Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -55,12 +55,11 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 55 |
df.columns = [clean_column_name(col) for col in df.columns]
|
| 56 |
|
| 57 |
# Build a rename map for TIN and salary synonyms.
|
| 58 |
-
# Note: This will capture PAYE's "tin_or_personal_id_of_employee" too.
|
| 59 |
rename_map = {}
|
| 60 |
for col in df.columns:
|
| 61 |
if col in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col:
|
| 62 |
rename_map[col] = 'tin'
|
| 63 |
-
elif 'tin' in col
|
| 64 |
rename_map[col] = 'tin'
|
| 65 |
if any(keyword in col for keyword in ['salary', 'wage', 'earning', 'commission', 'fee', 'payment', 'compensation']):
|
| 66 |
rename_map[col] = 'salary'
|
|
@@ -184,7 +183,9 @@ def merge_with_master(processed_files):
|
|
| 184 |
1. Use the earnings file as master and drop its inaccurate 'tin' column.
|
| 185 |
2. Merge template info onto earnings using 'employee_name' (the key provided by "Employee Name").
|
| 186 |
The trusted 'tin' comes from the template file.
|
| 187 |
-
|
|
|
|
|
|
|
| 188 |
"""
|
| 189 |
earnings_file = None
|
| 190 |
paye_file = None
|
|
@@ -209,7 +210,7 @@ def merge_with_master(processed_files):
|
|
| 209 |
# Drop the inaccurate 'tin' column from earnings if it exists.
|
| 210 |
if 'tin' in earnings_df.columns:
|
| 211 |
earnings_df = earnings_df.drop(columns=['tin'])
|
| 212 |
-
# Double-check removal of any middle_name column
|
| 213 |
if 'middle_name' in earnings_df.columns:
|
| 214 |
earnings_df = earnings_df.drop(columns=['middle_name'])
|
| 215 |
|
|
@@ -219,6 +220,11 @@ def merge_with_master(processed_files):
|
|
| 219 |
if template_file is not None:
|
| 220 |
st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
|
| 221 |
template_df = template_file["df"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
# Drop any middle_name column from the template file.
|
| 223 |
if 'middle_name' in template_df.columns:
|
| 224 |
template_df = template_df.drop(columns=['middle_name'])
|
|
@@ -232,9 +238,10 @@ def merge_with_master(processed_files):
|
|
| 232 |
else:
|
| 233 |
st.warning("No template file detected. Cannot proceed without a trusted TIN from the template.")
|
| 234 |
|
| 235 |
-
#
|
| 236 |
-
if 'tin' not in merged_df.columns:
|
| 237 |
-
st.error("No trusted 'tin' column found in the merged earnings-template data. Aborting further merge."
|
|
|
|
| 238 |
return merged_df
|
| 239 |
|
| 240 |
# Merge PAYE figures onto the merged DataFrame using 'tin'
|
|
|
|
| 55 |
df.columns = [clean_column_name(col) for col in df.columns]
|
| 56 |
|
| 57 |
# Build a rename map for TIN and salary synonyms.
|
|
|
|
| 58 |
rename_map = {}
|
| 59 |
for col in df.columns:
|
| 60 |
if col in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col:
|
| 61 |
rename_map[col] = 'tin'
|
| 62 |
+
elif 'tin' in col:
|
| 63 |
rename_map[col] = 'tin'
|
| 64 |
if any(keyword in col for keyword in ['salary', 'wage', 'earning', 'commission', 'fee', 'payment', 'compensation']):
|
| 65 |
rename_map[col] = 'salary'
|
|
|
|
| 183 |
1. Use the earnings file as master and drop its inaccurate 'tin' column.
|
| 184 |
2. Merge template info onto earnings using 'employee_name' (the key provided by "Employee Name").
|
| 185 |
The trusted 'tin' comes from the template file.
|
| 186 |
+
In the template file, force the first column to be the trusted 'tin'.
|
| 187 |
+
3. Check that the merged earnings-template data has a 'tin' column.
|
| 188 |
+
If present, merge the resulting DataFrame with the PAYE file using 'tin'.
|
| 189 |
"""
|
| 190 |
earnings_file = None
|
| 191 |
paye_file = None
|
|
|
|
| 210 |
# Drop the inaccurate 'tin' column from earnings if it exists.
|
| 211 |
if 'tin' in earnings_df.columns:
|
| 212 |
earnings_df = earnings_df.drop(columns=['tin'])
|
| 213 |
+
# Double-check removal of any middle_name column.
|
| 214 |
if 'middle_name' in earnings_df.columns:
|
| 215 |
earnings_df = earnings_df.drop(columns=['middle_name'])
|
| 216 |
|
|
|
|
| 220 |
if template_file is not None:
|
| 221 |
st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
|
| 222 |
template_df = template_file["df"]
|
| 223 |
+
# Force the first column of the template file to be 'tin'
|
| 224 |
+
if not template_df.empty:
|
| 225 |
+
cols = list(template_df.columns)
|
| 226 |
+
cols[0] = "tin"
|
| 227 |
+
template_df.columns = cols
|
| 228 |
# Drop any middle_name column from the template file.
|
| 229 |
if 'middle_name' in template_df.columns:
|
| 230 |
template_df = template_df.drop(columns=['middle_name'])
|
|
|
|
| 238 |
else:
|
| 239 |
st.warning("No template file detected. Cannot proceed without a trusted TIN from the template.")
|
| 240 |
|
| 241 |
+
# Check that a trusted 'tin' column exists from the template merge.
|
| 242 |
+
if 'tin' not in merged_df.columns or merged_df['tin'].isnull().all():
|
| 243 |
+
st.error("No trusted 'tin' column found in the merged earnings-template data. Aborting further merge. "
|
| 244 |
+
"Please ensure the template file's first column holds the trusted TIN and is properly standardized.")
|
| 245 |
return merged_df
|
| 246 |
|
| 247 |
# Merge PAYE figures onto the merged DataFrame using 'tin'
|