Spaces:

rairo
/

OneExcelZimraAI

Build error

App Files Files Community

rairo commited on Feb 24, 2025

Commit

8e09dee

verified ·

1 Parent(s): a66eb56

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -7

app.py CHANGED Viewed

@@ -55,12 +55,11 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
     df.columns = [clean_column_name(col) for col in df.columns]
     # Build a rename map for TIN and salary synonyms.
-    # Note: This will capture PAYE's "tin_or_personal_id_of_employee" too.
     rename_map = {}
     for col in df.columns:
         if col in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col:
             rename_map[col] = 'tin'
-        elif 'tin' in col and 'tin' not in rename_map.get(col, ''):
             rename_map[col] = 'tin'
         if any(keyword in col for keyword in ['salary', 'wage', 'earning', 'commission', 'fee', 'payment', 'compensation']):
             rename_map[col] = 'salary'
@@ -184,7 +183,9 @@ def merge_with_master(processed_files):
     1. Use the earnings file as master and drop its inaccurate 'tin' column.
     2. Merge template info onto earnings using 'employee_name' (the key provided by "Employee Name").
        The trusted 'tin' comes from the template file.
-    3. Merge the combined earnings–template DataFrame with the PAYE file using 'tin'.
     """
     earnings_file = None
     paye_file = None
@@ -209,7 +210,7 @@ def merge_with_master(processed_files):
     # Drop the inaccurate 'tin' column from earnings if it exists.
     if 'tin' in earnings_df.columns:
         earnings_df = earnings_df.drop(columns=['tin'])
-    # Double-check removal of any middle_name column (should be done in standardization).
     if 'middle_name' in earnings_df.columns:
         earnings_df = earnings_df.drop(columns=['middle_name'])
@@ -219,6 +220,11 @@ def merge_with_master(processed_files):
     if template_file is not None:
         st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
         template_df = template_file["df"]
         # Drop any middle_name column from the template file.
         if 'middle_name' in template_df.columns:
             template_df = template_df.drop(columns=['middle_name'])
@@ -232,9 +238,10 @@ def merge_with_master(processed_files):
     else:
         st.warning("No template file detected. Cannot proceed without a trusted TIN from the template.")
-    # After merging, check that a trusted 'tin' is present from the template.
-    if 'tin' not in merged_df.columns:
-        st.error("No trusted 'tin' column found in the merged earnings-template data. Aborting further merge.")
         return merged_df
     # Merge PAYE figures onto the merged DataFrame using 'tin'

     df.columns = [clean_column_name(col) for col in df.columns]
     # Build a rename map for TIN and salary synonyms.
     rename_map = {}
     for col in df.columns:
         if col in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col:
             rename_map[col] = 'tin'
+        elif 'tin' in col:
             rename_map[col] = 'tin'
         if any(keyword in col for keyword in ['salary', 'wage', 'earning', 'commission', 'fee', 'payment', 'compensation']):
             rename_map[col] = 'salary'
     1. Use the earnings file as master and drop its inaccurate 'tin' column.
     2. Merge template info onto earnings using 'employee_name' (the key provided by "Employee Name").
        The trusted 'tin' comes from the template file.
+       In the template file, force the first column to be the trusted 'tin'.
+    3. Check that the merged earnings-template data has a 'tin' column.
+       If present, merge the resulting DataFrame with the PAYE file using 'tin'.
     """
     earnings_file = None
     paye_file = None
     # Drop the inaccurate 'tin' column from earnings if it exists.
     if 'tin' in earnings_df.columns:
         earnings_df = earnings_df.drop(columns=['tin'])
+    # Double-check removal of any middle_name column.
     if 'middle_name' in earnings_df.columns:
         earnings_df = earnings_df.drop(columns=['middle_name'])
     if template_file is not None:
         st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
         template_df = template_file["df"]
+        # Force the first column of the template file to be 'tin'
+        if not template_df.empty:
+            cols = list(template_df.columns)
+            cols[0] = "tin"
+            template_df.columns = cols
         # Drop any middle_name column from the template file.
         if 'middle_name' in template_df.columns:
             template_df = template_df.drop(columns=['middle_name'])
     else:
         st.warning("No template file detected. Cannot proceed without a trusted TIN from the template.")
+    # Check that a trusted 'tin' column exists from the template merge.
+    if 'tin' not in merged_df.columns or merged_df['tin'].isnull().all():
+        st.error("No trusted 'tin' column found in the merged earnings-template data. Aborting further merge. "
+                 "Please ensure the template file's first column holds the trusted TIN and is properly standardized.")
         return merged_df
     # Merge PAYE figures onto the merged DataFrame using 'tin'