Spaces:
Sleeping
Sleeping
| """ | |
| λ°μ΄ν° μ μ²λ¦¬ λ° κ°κ³΅ λͺ¨λ | |
| """ | |
| import pandas as pd | |
| from typing import Dict, List, Any, Tuple | |
| import mapping_utils | |
| def load_and_preprocess_data(input_file: str, config: Dict[str, Any], mapping_dict: Dict[str, Dict[str, str]]) -> Tuple[pd.DataFrame, pd.DataFrame]: | |
| """ | |
| λ°μ΄ν° λ‘λ λ° μ μ²λ¦¬ | |
| Args: | |
| input_file: μ λ ₯ νμΌ κ²½λ‘ | |
| config: λ νμ¬ μ€μ μ 보 | |
| mapping_dict: λ§€ν λμ λ리 | |
| Returns: | |
| μ μ²λ¦¬λ λ°μ΄ν°νλ μ, νν°λ§λ λ°μ΄ν°νλ μ | |
| """ | |
| # CSV νμΌ λ‘λ | |
| print(f"'{input_file}' νμΌ λ‘λ© μ€...") | |
| rental_df = pd.read_csv(input_file, encoding='utf-8') | |
| print(f"λ‘λ© μλ£: {len(rental_df)}κ° ν λ°κ²¬") | |
| # νμν νλλ§ μ ν | |
| df = rental_df[["λͺ¨λΈλͺ ", config['amount_field'], "μμ λΆλ₯", "κ΄λ¦¬λΆμ", "κ±°λμ²λͺ ", "κ΄λ¦¬μ§μ "] + config['team_fields']].copy() | |
| # κΈμ‘ νλ λ³ν (μ μ νμ μΌλ‘) | |
| df["κΈμ‘"] = df[config['amount_field']].replace(",", "", regex=True).astype(float).astype(int) | |
| # νλͺ μ²λ¦¬ (μ°μ μμμ λ°λΌ) | |
| df["μλ³Ένλͺ "] = df[config['team_fields'][0]].copy() | |
| for field in config['team_fields'][1:]: | |
| df["μλ³Ένλͺ "] = df["μλ³Ένλͺ "].combine_first(df[field]) | |
| # λ§€ν μ μ© | |
| df["λ§€νμ 보"] = df["μλ³Ένλͺ "].apply(lambda x: mapping_utils.apply_mapping(x, mapping_dict)) | |
| # λ§€ν μ 보μμ νλ μΆμΆ | |
| df["νλͺ "] = df["λ§€νμ 보"].apply(lambda x: x["present"]) | |
| df["CD_ACCT"] = df["λ§€νμ 보"].apply(lambda x: x["CD_ACCT"]) | |
| df["CD_PJT"] = df["λ§€νμ 보"].apply(lambda x: x["CD_PJT"]) | |
| # μ μ μμ± | |
| df["μ μ"] = f"{config['note_prefix']}(" + df["νλͺ "] + ")" | |
| # MNG μ½λ μ€μ | |
| df["CD_MNG1"] = config['cost_center'] # μ½μ€νΈμΌν° | |
| df["CD_MNG3"] = config['partner_code'] # κ±°λμ² μ½λ | |
| # λ§€νλ νλͺ©λ§ μ ν (CD_ACCTμ CD_PJTκ° μλ νλͺ©λ§) | |
| df_filtered = df[(df["CD_ACCT"] != "") & (df["CD_PJT"] != "")].copy() | |
| print(f"λ§€νλ νλͺ©: {len(df_filtered)}κ° / μ 체 {len(df)}κ°") | |
| return df, df_filtered | |
| def summarize_data(df_filtered: pd.DataFrame, mapping_dict: Dict[str, Dict[str, str]]) -> Dict[str, Any]: | |
| """ | |
| λ°μ΄ν° μμ½ μ 보 μμ± | |
| Args: | |
| df_filtered: νν°λ§λ λ°μ΄ν°νλ μ | |
| mapping_dict: λ§€ν λμ λ리 | |
| Returns: | |
| λ°μ΄ν° μμ½ μ 보 | |
| """ | |
| total_amount = df_filtered["κΈμ‘"].sum() | |
| # λ§€ν κ²°κ³Ό μμ½ | |
| mapping_summary = mapping_utils.get_mapping_summary(df_filtered, mapping_dict) | |
| # κ³μ μ¬μ© νν© | |
| account_counts = df_filtered['CD_ACCT'].value_counts().to_dict() | |
| return { | |
| 'total_count': len(df_filtered), | |
| 'total_amount': total_amount, | |
| 'account_counts': account_counts, | |
| 'mapping_summary': mapping_summary | |
| } |