Program Language/Python

(python) 두 개 엑셀 파일 비교하기

야곰야곰+책벌레 2023. 11. 28. 11:27
728x90
반응형
import pandas as pd
import xlwings as xw

excel1 = '분개장_1.xlsx'
excel2 = '분개장_2.xlsx'

df_excel1 = pd.read_excel(excel1, 'Sheet1', na_values=['NA'])
df_excel2 = pd.read_excel(excel2, 'Sheet1', na_values=['NA'])

df_excel1['version'] = 'cur'
df_excel2['version'] = 'new'

gb_excel1 = df_excel1.groupby(['전표일자', '전표번호'])
gb_excel2 = df_excel2.groupby(['전표일자', '전표번호'])

df_excel1['전표번호seq'] = gb_excel1.cumcount()
df_excel2['전표번호seq'] = gb_excel2.cumcount()

df_excel1['version'] = 'cur'
df_excel2['version'] = 'new'

df_excel1['전표일자번호'] = df_excel1['전표일자'] + df_excel1['전표번호'].astype(str) + df_excel1['전표번호seq'].astype(str)
df_excel2['전표일자번호'] = df_excel2['전표일자'] + df_excel2['전표번호'].astype(str) + df_excel2['전표번호seq'].astype(str)

excel1_date_all = set(df_excel1['전표일자번호'])
excel2_date_all = set(df_excel2['전표일자번호'])

remove_items = excel1_date_all - excel2_date_all
add_items = excel2_date_all - excel1_date_all
all_items = pd.concat([df_excel1, df_excel2], ignore_index=True)

changes = all_items.drop_duplicates(subset=['전표일자', '전표번호', '계정코드', '계정과목', '차변금액', '대변금액', '거래처', '승인일자', '프로젝트코드', '전표일자번호'], keep=False)

#xw.view(changes)

change_excel2 = changes[(changes['version'] == 'new')]
change_excel1 = changes[(changes['version'] == 'cur')]
change_excel2 = change_excel2.drop(['version'], axis=1)
change_excel1 = change_excel1.drop(['version'], axis=1)
change_excel2.set_index('전표일자번호', inplace=True)
change_excel1.set_index('전표일자번호', inplace=True)

df_all_changes = pd.concat([change_excel1, change_excel2], axis=1, keys=['curr', 'new'], join='outer')

#xw.view(df_all_changes)

def report_diff(x) :
    return x[0] if x[0] == x[1] else '{} ---> {}'.format(*x)

df_all_changes = df_all_changes.swaplevel(axis=1)[change_excel2.columns[0:]]

#xw.view(df_all_changes)

df_changed = df_all_changes.groupby(level=0, axis=1).apply(lambda x: x.apply(report_diff, axis=1))

#xw.view(df_all_changes)

cols = list(change_excel1.columns)
df_changed = df_changed[cols]
df_changed = df_changed.reset_index()
df_removed = changes[changes['전표일자번호'].isin(remove_items)]
df_added = changes[changes['전표일자번호'].isin(add_items)]

dfs = [df_changed, df_removed, df_added]

import os
base_dir = os.getcwd()
wb = xw.Book('분개장차이.xlsx')

for i in range(len(dfs)) :
    sht = wb.sheets[i]
    sht.range('A1').value = dfs[i]
728x90
반응형