ERP_Preprocessor / data_processor.py
hariqueen's picture
Setup ERP Preprocessor files
bd6d9a2 verified
raw
history blame
2.99 kB
"""
데이터 μ „μ²˜λ¦¬ 및 가곡 λͺ¨λ“ˆ
"""
import pandas as pd
from typing import Dict, List, Any, Tuple
import mapping_utils
def load_and_preprocess_data(input_file: str, config: Dict[str, Any], mapping_dict: Dict[str, Dict[str, str]]) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
데이터 λ‘œλ“œ 및 μ „μ²˜λ¦¬
Args:
input_file: μž…λ ₯ 파일 경둜
config: λ Œνƒˆμ‚¬ μ„€μ • 정보
mapping_dict: λ§€ν•‘ λ”•μ…”λ„ˆλ¦¬
Returns:
μ „μ²˜λ¦¬λœ λ°μ΄ν„°ν”„λ ˆμž„, ν•„ν„°λ§λœ λ°μ΄ν„°ν”„λ ˆμž„
"""
# CSV 파일 λ‘œλ“œ
print(f"'{input_file}' 파일 λ‘œλ”© 쀑...")
rental_df = pd.read_csv(input_file, encoding='utf-8')
print(f"λ‘œλ”© μ™„λ£Œ: {len(rental_df)}개 ν–‰ 발견")
# ν•„μš”ν•œ ν•„λ“œλ§Œ 선택
df = rental_df[["λͺ¨λΈλͺ…", config['amount_field'], "μ˜μ—…λΆ„λ₯˜", "κ΄€λ¦¬λΆ€μ„œ", "거래처λͺ…", "관리지점"] + config['team_fields']].copy()
# κΈˆμ•‘ ν•„λ“œ λ³€ν™˜ (μ •μˆ˜ νƒ€μž…μœΌλ‘œ)
df["κΈˆμ•‘"] = df[config['amount_field']].replace(",", "", regex=True).astype(float).astype(int)
# νŒ€λͺ… 처리 (μš°μ„ μˆœμœ„μ— 따라)
df["μ›λ³ΈνŒ€λͺ…"] = df[config['team_fields'][0]].copy()
for field in config['team_fields'][1:]:
df["μ›λ³ΈνŒ€λͺ…"] = df["μ›λ³ΈνŒ€λͺ…"].combine_first(df[field])
# λ§€ν•‘ 적용
df["맀핑정보"] = df["μ›λ³ΈνŒ€λͺ…"].apply(lambda x: mapping_utils.apply_mapping(x, mapping_dict))
# λ§€ν•‘ μ •λ³΄μ—μ„œ ν•„λ“œ μΆ”μΆœ
df["νŒ€λͺ…"] = df["맀핑정보"].apply(lambda x: x["present"])
df["CD_ACCT"] = df["맀핑정보"].apply(lambda x: x["CD_ACCT"])
df["CD_PJT"] = df["맀핑정보"].apply(lambda x: x["CD_PJT"])
# μ μš” 생성
df["μ μš”"] = f"{config['note_prefix']}(" + df["νŒ€λͺ…"] + ")"
# MNG μ½”λ“œ μ„€μ •
df["CD_MNG1"] = config['cost_center'] # μ½”μŠ€νŠΈμ„Όν„°
df["CD_MNG3"] = config['partner_code'] # 거래처 μ½”λ“œ
# λ§€ν•‘λœ ν•­λͺ©λ§Œ 선택 (CD_ACCT와 CD_PJTκ°€ μžˆλŠ” ν•­λͺ©λ§Œ)
df_filtered = df[(df["CD_ACCT"] != "") & (df["CD_PJT"] != "")].copy()
print(f"λ§€ν•‘λœ ν•­λͺ©: {len(df_filtered)}개 / 전체 {len(df)}개")
return df, df_filtered
def summarize_data(df_filtered: pd.DataFrame, mapping_dict: Dict[str, Dict[str, str]]) -> Dict[str, Any]:
"""
데이터 μš”μ•½ 정보 생성
Args:
df_filtered: ν•„ν„°λ§λœ λ°μ΄ν„°ν”„λ ˆμž„
mapping_dict: λ§€ν•‘ λ”•μ…”λ„ˆλ¦¬
Returns:
데이터 μš”μ•½ 정보
"""
total_amount = df_filtered["κΈˆμ•‘"].sum()
# λ§€ν•‘ κ²°κ³Ό μš”μ•½
mapping_summary = mapping_utils.get_mapping_summary(df_filtered, mapping_dict)
# 계정 μ‚¬μš© ν˜„ν™©
account_counts = df_filtered['CD_ACCT'].value_counts().to_dict()
return {
'total_count': len(df_filtered),
'total_amount': total_amount,
'account_counts': account_counts,
'mapping_summary': mapping_summary
}