rairo commited on
Commit
d2c0f12
·
verified ·
1 Parent(s): 8e09dee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -183,8 +183,8 @@ def merge_with_master(processed_files):
183
  1. Use the earnings file as master and drop its inaccurate 'tin' column.
184
  2. Merge template info onto earnings using 'employee_name' (the key provided by "Employee Name").
185
  The trusted 'tin' comes from the template file.
186
- In the template file, force the first column to be the trusted 'tin'.
187
- 3. Check that the merged earnings-template data has a 'tin' column.
188
  If present, merge the resulting DataFrame with the PAYE file using 'tin'.
189
  """
190
  earnings_file = None
@@ -210,7 +210,7 @@ def merge_with_master(processed_files):
210
  # Drop the inaccurate 'tin' column from earnings if it exists.
211
  if 'tin' in earnings_df.columns:
212
  earnings_df = earnings_df.drop(columns=['tin'])
213
- # Double-check removal of any middle_name column.
214
  if 'middle_name' in earnings_df.columns:
215
  earnings_df = earnings_df.drop(columns=['middle_name'])
216
 
@@ -219,18 +219,21 @@ def merge_with_master(processed_files):
219
  # Merge template info onto earnings using 'employee_name'
220
  if template_file is not None:
221
  st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
222
- template_df = template_file["df"]
223
  # Force the first column of the template file to be 'tin'
224
  if not template_df.empty:
225
  cols = list(template_df.columns)
226
  cols[0] = "tin"
227
  template_df.columns = cols
228
- # Drop any middle_name column from the template file.
 
229
  if 'middle_name' in template_df.columns:
230
  template_df = template_df.drop(columns=['middle_name'])
231
- # Ensure template has an 'employee_name' column (construct if necessary).
232
  if 'employee_name' not in template_df.columns and 'first_name' in template_df.columns and 'last_name' in template_df.columns:
233
  template_df['employee_name'] = template_df['first_name'].astype(str).str.strip() + ' ' + template_df['last_name'].astype(str).str.strip()
 
 
234
  if 'employee_name' in merged_df.columns and 'employee_name' in template_df.columns:
235
  merged_df = merged_df.merge(template_df, on='employee_name', how='left', suffixes=('', '_template'))
236
  else:
 
183
  1. Use the earnings file as master and drop its inaccurate 'tin' column.
184
  2. Merge template info onto earnings using 'employee_name' (the key provided by "Employee Name").
185
  The trusted 'tin' comes from the template file.
186
+ For the template file, force its first column (which is "Personal ID of Employee") to be 'tin'.
187
+ 3. Check that the merged earnings-template data has a 'tin' column populated.
188
  If present, merge the resulting DataFrame with the PAYE file using 'tin'.
189
  """
190
  earnings_file = None
 
210
  # Drop the inaccurate 'tin' column from earnings if it exists.
211
  if 'tin' in earnings_df.columns:
212
  earnings_df = earnings_df.drop(columns=['tin'])
213
+ # Remove any middle_name column.
214
  if 'middle_name' in earnings_df.columns:
215
  earnings_df = earnings_df.drop(columns=['middle_name'])
216
 
 
219
  # Merge template info onto earnings using 'employee_name'
220
  if template_file is not None:
221
  st.write(f"Merging template info from '{template_file['filename']}' using key 'employee_name'.")
222
+ template_df = template_file["df"].copy()
223
  # Force the first column of the template file to be 'tin'
224
  if not template_df.empty:
225
  cols = list(template_df.columns)
226
  cols[0] = "tin"
227
  template_df.columns = cols
228
+
229
+ # Remove any middle_name column from the template file.
230
  if 'middle_name' in template_df.columns:
231
  template_df = template_df.drop(columns=['middle_name'])
232
+ # Ensure the template has an 'employee_name' column.
233
  if 'employee_name' not in template_df.columns and 'first_name' in template_df.columns and 'last_name' in template_df.columns:
234
  template_df['employee_name'] = template_df['first_name'].astype(str).str.strip() + ' ' + template_df['last_name'].astype(str).str.strip()
235
+ # If after standardization the template still doesn't have employee_name,
236
+ # you may need to construct it manually if possible.
237
  if 'employee_name' in merged_df.columns and 'employee_name' in template_df.columns:
238
  merged_df = merged_df.merge(template_df, on='employee_name', how='left', suffixes=('', '_template'))
239
  else: