rairo commited on
Commit
8e09dee
·
verified ·
1 Parent(s): a66eb56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -55,12 +55,11 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
55
  df.columns = [clean_column_name(col) for col in df.columns]
56
 
57
  # Build a rename map for TIN and salary synonyms.
58
- # Note: This will capture PAYE's "tin_or_personal_id_of_employee" too.
59
  rename_map = {}
60
  for col in df.columns:
61
  if col in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col:
62
  rename_map[col] = 'tin'
63
- elif 'tin' in col and 'tin' not in rename_map.get(col, ''):
64
  rename_map[col] = 'tin'
65
  if any(keyword in col for keyword in ['salary', 'wage', 'earning', 'commission', 'fee', 'payment', 'compensation']):
66
  rename_map[col] = 'salary'
@@ -184,7 +183,9 @@ def merge_with_master(processed_files):
184
  1. Use the earnings file as master and drop its inaccurate 'tin' column.
185
  2. Merge template info onto earnings using 'employee_name' (the key provided by "Employee Name").
186
  The trusted 'tin' comes from the template file.
187
- 3. Merge the combined earnings–template DataFrame with the PAYE file using 'tin'.
 
 
188
  """
189
  earnings_file = None
190
  paye_file = None
@@ -209,7 +210,7 @@ def merge_with_master(processed_files):
209
  # Drop the inaccurate 'tin' column from earnings if it exists.
210
  if 'tin' in earnings_df.columns:
211
  earnings_df = earnings_df.drop(columns=['tin'])
212
- # Double-check removal of any middle_name column (should be done in standardization).
213
  if 'middle_name' in earnings_df.columns:
214
  earnings_df = earnings_df.drop(columns=['middle_name'])
215
 
@@ -219,6 +220,11 @@ def merge_with_master(processed_files):
219
  if template_file is not None:
220
  st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
221
  template_df = template_file["df"]
 
 
 
 
 
222
  # Drop any middle_name column from the template file.
223
  if 'middle_name' in template_df.columns:
224
  template_df = template_df.drop(columns=['middle_name'])
@@ -232,9 +238,10 @@ def merge_with_master(processed_files):
232
  else:
233
  st.warning("No template file detected. Cannot proceed without a trusted TIN from the template.")
234
 
235
- # After merging, check that a trusted 'tin' is present from the template.
236
- if 'tin' not in merged_df.columns:
237
- st.error("No trusted 'tin' column found in the merged earnings-template data. Aborting further merge.")
 
238
  return merged_df
239
 
240
  # Merge PAYE figures onto the merged DataFrame using 'tin'
 
55
  df.columns = [clean_column_name(col) for col in df.columns]
56
 
57
  # Build a rename map for TIN and salary synonyms.
 
58
  rename_map = {}
59
  for col in df.columns:
60
  if col in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col:
61
  rename_map[col] = 'tin'
62
+ elif 'tin' in col:
63
  rename_map[col] = 'tin'
64
  if any(keyword in col for keyword in ['salary', 'wage', 'earning', 'commission', 'fee', 'payment', 'compensation']):
65
  rename_map[col] = 'salary'
 
183
  1. Use the earnings file as master and drop its inaccurate 'tin' column.
184
  2. Merge template info onto earnings using 'employee_name' (the key provided by "Employee Name").
185
  The trusted 'tin' comes from the template file.
186
+ In the template file, force the first column to be the trusted 'tin'.
187
+ 3. Check that the merged earnings-template data has a 'tin' column.
188
+ If present, merge the resulting DataFrame with the PAYE file using 'tin'.
189
  """
190
  earnings_file = None
191
  paye_file = None
 
210
  # Drop the inaccurate 'tin' column from earnings if it exists.
211
  if 'tin' in earnings_df.columns:
212
  earnings_df = earnings_df.drop(columns=['tin'])
213
+ # Double-check removal of any middle_name column.
214
  if 'middle_name' in earnings_df.columns:
215
  earnings_df = earnings_df.drop(columns=['middle_name'])
216
 
 
220
  if template_file is not None:
221
  st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
222
  template_df = template_file["df"]
223
+ # Force the first column of the template file to be 'tin'
224
+ if not template_df.empty:
225
+ cols = list(template_df.columns)
226
+ cols[0] = "tin"
227
+ template_df.columns = cols
228
  # Drop any middle_name column from the template file.
229
  if 'middle_name' in template_df.columns:
230
  template_df = template_df.drop(columns=['middle_name'])
 
238
  else:
239
  st.warning("No template file detected. Cannot proceed without a trusted TIN from the template.")
240
 
241
+ # Check that a trusted 'tin' column exists from the template merge.
242
+ if 'tin' not in merged_df.columns or merged_df['tin'].isnull().all():
243
+ st.error("No trusted 'tin' column found in the merged earnings-template data. Aborting further merge. "
244
+ "Please ensure the template file's first column holds the trusted TIN and is properly standardized.")
245
  return merged_df
246
 
247
  # Merge PAYE figures onto the merged DataFrame using 'tin'