rairo commited on
Commit
9c17313
·
verified ·
1 Parent(s): c71c6c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -23
app.py CHANGED
@@ -38,7 +38,13 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
38
  - Creates an 'employee_name' column if missing but first_name and last_name exist.
39
  - Combines duplicate key columns (e.g., multiple 'salary' or 'tin' columns) into one.
40
  - Forces the key columns 'tin' and 'employee_name' to be strings.
 
41
  """
 
 
 
 
 
42
  rename_map = {}
43
  for col in df.columns:
44
  col_lower = col.lower()
@@ -150,48 +156,65 @@ def read_excel_file(file) -> pd.DataFrame:
150
 
151
  def merge_with_master(processed_files):
152
  """
153
- Merge multiple DataFrames by using the earnings schedule file as the master.
154
- This modified logic looks for files whose names include 'earnings', 'paye', or 'template'.
155
- The PAYE figures and the template info are merged onto the earnings sheet using the 'tin' key.
 
156
  """
157
- master_file = None
158
  paye_file = None
159
  template_file = None
160
  for file_info in processed_files:
161
  lower_filename = file_info["filename"].lower()
162
  if "earnings" in lower_filename:
163
- master_file = file_info
164
  elif "paye" in lower_filename:
165
  paye_file = file_info
166
  elif "template" in lower_filename:
167
  template_file = file_info
168
- if not master_file:
169
  st.warning("No earnings file found as master. Using the first file as master.")
170
- master_file = processed_files[0]
171
 
172
- master_df = master_file["df"]
173
- merged_df = master_df.copy()
 
 
 
 
 
 
174
 
175
- # Merge PAYE figures onto the earnings sheet
176
- if paye_file is not None:
177
- st.write(f"Merging PAYE figures from '{paye_file['filename']}' onto the earnings sheet using key 'tin'.")
178
- if 'tin' in merged_df.columns and 'tin' in paye_file["df"].columns:
179
- merged_df = merged_df.merge(paye_file["df"], on='tin', how='left', suffixes=('', '_paye'))
180
- else:
181
- st.warning("Column 'tin' missing in either the earnings or PAYE file. Skipping PAYE merge.")
182
- else:
183
- st.warning("No PAYE file detected.")
184
 
185
- # Merge template info onto the earnings sheet
186
  if template_file is not None:
187
- st.write(f"Merging template info from '{template_file['filename']}' onto the earnings sheet using key 'tin'.")
188
- if 'tin' in merged_df.columns and 'tin' in template_file["df"].columns:
189
- merged_df = merged_df.merge(template_file["df"], on='tin', how='left', suffixes=('', '_template'))
 
 
 
 
 
 
 
190
  else:
191
- st.warning("Column 'tin' missing in either the earnings or template file. Skipping template merge.")
192
  else:
193
  st.warning("No template file detected.")
194
 
 
 
 
 
 
 
 
 
 
 
 
195
  return merged_df
196
 
197
  def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
 
38
  - Creates an 'employee_name' column if missing but first_name and last_name exist.
39
  - Combines duplicate key columns (e.g., multiple 'salary' or 'tin' columns) into one.
40
  - Forces the key columns 'tin' and 'employee_name' to be strings.
41
+ - Drops any middle name columns.
42
  """
43
+ # Drop any column that appears to be a middle name
44
+ middle_name_cols = [col for col in df.columns if 'middle_name' in col.lower()]
45
+ if middle_name_cols:
46
+ df = df.drop(columns=middle_name_cols)
47
+
48
  rename_map = {}
49
  for col in df.columns:
50
  col_lower = col.lower()
 
156
 
157
  def merge_with_master(processed_files):
158
  """
159
+ Merge multiple DataFrames using a two-step process:
160
+ 1. Use the earnings file as the master. Drop its inaccurate 'tin' column.
161
+ 2. Merge template info onto earnings via 'employee_name' (constructed from first and last names).
162
+ 3. Then merge the resulting DataFrame with the PAYE file using the (correct) 'tin' key.
163
  """
164
+ earnings_file = None
165
  paye_file = None
166
  template_file = None
167
  for file_info in processed_files:
168
  lower_filename = file_info["filename"].lower()
169
  if "earnings" in lower_filename:
170
+ earnings_file = file_info
171
  elif "paye" in lower_filename:
172
  paye_file = file_info
173
  elif "template" in lower_filename:
174
  template_file = file_info
175
+ if not earnings_file:
176
  st.warning("No earnings file found as master. Using the first file as master.")
177
+ earnings_file = processed_files[0]
178
 
179
+ # Start with the earnings DataFrame as master
180
+ earnings_df = earnings_file["df"]
181
+ # Drop the inaccurate 'tin' column from earnings, if present
182
+ if 'tin' in earnings_df.columns:
183
+ earnings_df = earnings_df.drop(columns=['tin'])
184
+ # Ensure any middle_name column is dropped (already handled in standardization, but double-check)
185
+ if 'middle_name' in earnings_df.columns:
186
+ earnings_df = earnings_df.drop(columns=['middle_name'])
187
 
188
+ merged_df = earnings_df.copy()
 
 
 
 
 
 
 
 
189
 
190
+ # Merge template info onto earnings using 'employee_name' as key
191
  if template_file is not None:
192
+ st.write(f"Merging template info from '{template_file['filename']}' onto the earnings sheet using key 'employee_name'.")
193
+ template_df = template_file["df"]
194
+ # Drop any middle_name column from the template file
195
+ if 'middle_name' in template_df.columns:
196
+ template_df = template_df.drop(columns=['middle_name'])
197
+ # Ensure template has an 'employee_name' column (constructed if necessary)
198
+ if 'employee_name' not in template_df.columns and 'first_name' in template_df.columns and 'last_name' in template_df.columns:
199
+ template_df['employee_name'] = template_df['first_name'].astype(str).str.strip() + ' ' + template_df['last_name'].astype(str).str.strip()
200
+ if 'employee_name' in merged_df.columns and 'employee_name' in template_df.columns:
201
+ merged_df = merged_df.merge(template_df, on='employee_name', how='left', suffixes=('', '_template'))
202
  else:
203
+ st.warning("Column 'employee_name' missing in either the earnings or template file. Skipping template merge.")
204
  else:
205
  st.warning("No template file detected.")
206
 
207
+ # Merge PAYE figures onto the merged DataFrame using 'tin'
208
+ if paye_file is not None:
209
+ st.write(f"Merging PAYE figures from '{paye_file['filename']}' onto the merged sheet using key 'tin'.")
210
+ paye_df = paye_file["df"]
211
+ if 'tin' in merged_df.columns and 'tin' in paye_df.columns:
212
+ merged_df = merged_df.merge(paye_df, on='tin', how='left', suffixes=('', '_paye'))
213
+ else:
214
+ st.warning("Column 'tin' missing in either the merged or PAYE file. Skipping PAYE merge.")
215
+ else:
216
+ st.warning("No PAYE file detected.")
217
+
218
  return merged_df
219
 
220
  def safe_display_df(df: pd.DataFrame) -> pd.DataFrame: