Spaces:
Sleeping
Sleeping
File size: 2,989 Bytes
bd6d9a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
"""
λ°μ΄ν° μ μ²λ¦¬ λ° κ°κ³΅ λͺ¨λ
"""
import pandas as pd
from typing import Dict, List, Any, Tuple
import mapping_utils
def load_and_preprocess_data(input_file: str, config: Dict[str, Any], mapping_dict: Dict[str, Dict[str, str]]) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
λ°μ΄ν° λ‘λ λ° μ μ²λ¦¬
Args:
input_file: μ
λ ₯ νμΌ κ²½λ‘
config: λ νμ¬ μ€μ μ 보
mapping_dict: λ§€ν λμ
λ리
Returns:
μ μ²λ¦¬λ λ°μ΄ν°νλ μ, νν°λ§λ λ°μ΄ν°νλ μ
"""
# CSV νμΌ λ‘λ
print(f"'{input_file}' νμΌ λ‘λ© μ€...")
rental_df = pd.read_csv(input_file, encoding='utf-8')
print(f"λ‘λ© μλ£: {len(rental_df)}κ° ν λ°κ²¬")
# νμν νλλ§ μ ν
df = rental_df[["λͺ¨λΈλͺ
", config['amount_field'], "μμ
λΆλ₯", "κ΄λ¦¬λΆμ", "κ±°λμ²λͺ
", "κ΄λ¦¬μ§μ "] + config['team_fields']].copy()
# κΈμ‘ νλ λ³ν (μ μ νμ
μΌλ‘)
df["κΈμ‘"] = df[config['amount_field']].replace(",", "", regex=True).astype(float).astype(int)
# νλͺ
μ²λ¦¬ (μ°μ μμμ λ°λΌ)
df["μλ³Ένλͺ
"] = df[config['team_fields'][0]].copy()
for field in config['team_fields'][1:]:
df["μλ³Ένλͺ
"] = df["μλ³Ένλͺ
"].combine_first(df[field])
# λ§€ν μ μ©
df["λ§€νμ 보"] = df["μλ³Ένλͺ
"].apply(lambda x: mapping_utils.apply_mapping(x, mapping_dict))
# λ§€ν μ 보μμ νλ μΆμΆ
df["νλͺ
"] = df["λ§€νμ 보"].apply(lambda x: x["present"])
df["CD_ACCT"] = df["λ§€νμ 보"].apply(lambda x: x["CD_ACCT"])
df["CD_PJT"] = df["λ§€νμ 보"].apply(lambda x: x["CD_PJT"])
# μ μ μμ±
df["μ μ"] = f"{config['note_prefix']}(" + df["νλͺ
"] + ")"
# MNG μ½λ μ€μ
df["CD_MNG1"] = config['cost_center'] # μ½μ€νΈμΌν°
df["CD_MNG3"] = config['partner_code'] # κ±°λμ² μ½λ
# λ§€νλ νλͺ©λ§ μ ν (CD_ACCTμ CD_PJTκ° μλ νλͺ©λ§)
df_filtered = df[(df["CD_ACCT"] != "") & (df["CD_PJT"] != "")].copy()
print(f"λ§€νλ νλͺ©: {len(df_filtered)}κ° / μ 체 {len(df)}κ°")
return df, df_filtered
def summarize_data(df_filtered: pd.DataFrame, mapping_dict: Dict[str, Dict[str, str]]) -> Dict[str, Any]:
"""
λ°μ΄ν° μμ½ μ 보 μμ±
Args:
df_filtered: νν°λ§λ λ°μ΄ν°νλ μ
mapping_dict: λ§€ν λμ
λ리
Returns:
λ°μ΄ν° μμ½ μ 보
"""
total_amount = df_filtered["κΈμ‘"].sum()
# λ§€ν κ²°κ³Ό μμ½
mapping_summary = mapping_utils.get_mapping_summary(df_filtered, mapping_dict)
# κ³μ μ¬μ© νν©
account_counts = df_filtered['CD_ACCT'].value_counts().to_dict()
return {
'total_count': len(df_filtered),
'total_amount': total_amount,
'account_counts': account_counts,
'mapping_summary': mapping_summary
} |