Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,7 +38,13 @@ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 38 |
- Creates an 'employee_name' column if missing but first_name and last_name exist.
|
| 39 |
- Combines duplicate key columns (e.g., multiple 'salary' or 'tin' columns) into one.
|
| 40 |
- Forces the key columns 'tin' and 'employee_name' to be strings.
|
|
|
|
| 41 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
rename_map = {}
|
| 43 |
for col in df.columns:
|
| 44 |
col_lower = col.lower()
|
|
@@ -150,48 +156,65 @@ def read_excel_file(file) -> pd.DataFrame:
|
|
| 150 |
|
| 151 |
def merge_with_master(processed_files):
|
| 152 |
"""
|
| 153 |
-
Merge multiple DataFrames
|
| 154 |
-
|
| 155 |
-
|
|
|
|
| 156 |
"""
|
| 157 |
-
|
| 158 |
paye_file = None
|
| 159 |
template_file = None
|
| 160 |
for file_info in processed_files:
|
| 161 |
lower_filename = file_info["filename"].lower()
|
| 162 |
if "earnings" in lower_filename:
|
| 163 |
-
|
| 164 |
elif "paye" in lower_filename:
|
| 165 |
paye_file = file_info
|
| 166 |
elif "template" in lower_filename:
|
| 167 |
template_file = file_info
|
| 168 |
-
if not
|
| 169 |
st.warning("No earnings file found as master. Using the first file as master.")
|
| 170 |
-
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
-
|
| 176 |
-
if paye_file is not None:
|
| 177 |
-
st.write(f"Merging PAYE figures from '{paye_file['filename']}' onto the earnings sheet using key 'tin'.")
|
| 178 |
-
if 'tin' in merged_df.columns and 'tin' in paye_file["df"].columns:
|
| 179 |
-
merged_df = merged_df.merge(paye_file["df"], on='tin', how='left', suffixes=('', '_paye'))
|
| 180 |
-
else:
|
| 181 |
-
st.warning("Column 'tin' missing in either the earnings or PAYE file. Skipping PAYE merge.")
|
| 182 |
-
else:
|
| 183 |
-
st.warning("No PAYE file detected.")
|
| 184 |
|
| 185 |
-
# Merge template info onto
|
| 186 |
if template_file is not None:
|
| 187 |
-
st.write(f"Merging template info from '{template_file['filename']}' onto the earnings sheet using key '
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
else:
|
| 191 |
-
st.warning("Column '
|
| 192 |
else:
|
| 193 |
st.warning("No template file detected.")
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
return merged_df
|
| 196 |
|
| 197 |
def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 38 |
- Creates an 'employee_name' column if missing but first_name and last_name exist.
|
| 39 |
- Combines duplicate key columns (e.g., multiple 'salary' or 'tin' columns) into one.
|
| 40 |
- Forces the key columns 'tin' and 'employee_name' to be strings.
|
| 41 |
+
- Drops any middle name columns.
|
| 42 |
"""
|
| 43 |
+
# Drop any column that appears to be a middle name
|
| 44 |
+
middle_name_cols = [col for col in df.columns if 'middle_name' in col.lower()]
|
| 45 |
+
if middle_name_cols:
|
| 46 |
+
df = df.drop(columns=middle_name_cols)
|
| 47 |
+
|
| 48 |
rename_map = {}
|
| 49 |
for col in df.columns:
|
| 50 |
col_lower = col.lower()
|
|
|
|
| 156 |
|
| 157 |
def merge_with_master(processed_files):
|
| 158 |
"""
|
| 159 |
+
Merge multiple DataFrames using a two-step process:
|
| 160 |
+
1. Use the earnings file as the master. Drop its inaccurate 'tin' column.
|
| 161 |
+
2. Merge template info onto earnings via 'employee_name' (constructed from first and last names).
|
| 162 |
+
3. Then merge the resulting DataFrame with the PAYE file using the (correct) 'tin' key.
|
| 163 |
"""
|
| 164 |
+
earnings_file = None
|
| 165 |
paye_file = None
|
| 166 |
template_file = None
|
| 167 |
for file_info in processed_files:
|
| 168 |
lower_filename = file_info["filename"].lower()
|
| 169 |
if "earnings" in lower_filename:
|
| 170 |
+
earnings_file = file_info
|
| 171 |
elif "paye" in lower_filename:
|
| 172 |
paye_file = file_info
|
| 173 |
elif "template" in lower_filename:
|
| 174 |
template_file = file_info
|
| 175 |
+
if not earnings_file:
|
| 176 |
st.warning("No earnings file found as master. Using the first file as master.")
|
| 177 |
+
earnings_file = processed_files[0]
|
| 178 |
|
| 179 |
+
# Start with the earnings DataFrame as master
|
| 180 |
+
earnings_df = earnings_file["df"]
|
| 181 |
+
# Drop the inaccurate 'tin' column from earnings, if present
|
| 182 |
+
if 'tin' in earnings_df.columns:
|
| 183 |
+
earnings_df = earnings_df.drop(columns=['tin'])
|
| 184 |
+
# Ensure any middle_name column is dropped (already handled in standardization, but double-check)
|
| 185 |
+
if 'middle_name' in earnings_df.columns:
|
| 186 |
+
earnings_df = earnings_df.drop(columns=['middle_name'])
|
| 187 |
|
| 188 |
+
merged_df = earnings_df.copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
+
# Merge template info onto earnings using 'employee_name' as key
|
| 191 |
if template_file is not None:
|
| 192 |
+
st.write(f"Merging template info from '{template_file['filename']}' onto the earnings sheet using key 'employee_name'.")
|
| 193 |
+
template_df = template_file["df"]
|
| 194 |
+
# Drop any middle_name column from the template file
|
| 195 |
+
if 'middle_name' in template_df.columns:
|
| 196 |
+
template_df = template_df.drop(columns=['middle_name'])
|
| 197 |
+
# Ensure template has an 'employee_name' column (constructed if necessary)
|
| 198 |
+
if 'employee_name' not in template_df.columns and 'first_name' in template_df.columns and 'last_name' in template_df.columns:
|
| 199 |
+
template_df['employee_name'] = template_df['first_name'].astype(str).str.strip() + ' ' + template_df['last_name'].astype(str).str.strip()
|
| 200 |
+
if 'employee_name' in merged_df.columns and 'employee_name' in template_df.columns:
|
| 201 |
+
merged_df = merged_df.merge(template_df, on='employee_name', how='left', suffixes=('', '_template'))
|
| 202 |
else:
|
| 203 |
+
st.warning("Column 'employee_name' missing in either the earnings or template file. Skipping template merge.")
|
| 204 |
else:
|
| 205 |
st.warning("No template file detected.")
|
| 206 |
|
| 207 |
+
# Merge PAYE figures onto the merged DataFrame using 'tin'
|
| 208 |
+
if paye_file is not None:
|
| 209 |
+
st.write(f"Merging PAYE figures from '{paye_file['filename']}' onto the merged sheet using key 'tin'.")
|
| 210 |
+
paye_df = paye_file["df"]
|
| 211 |
+
if 'tin' in merged_df.columns and 'tin' in paye_df.columns:
|
| 212 |
+
merged_df = merged_df.merge(paye_df, on='tin', how='left', suffixes=('', '_paye'))
|
| 213 |
+
else:
|
| 214 |
+
st.warning("Column 'tin' missing in either the merged or PAYE file. Skipping PAYE merge.")
|
| 215 |
+
else:
|
| 216 |
+
st.warning("No PAYE file detected.")
|
| 217 |
+
|
| 218 |
return merged_df
|
| 219 |
|
| 220 |
def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
|