rairo commited on
Commit
42db88a
·
verified ·
1 Parent(s): 96d2597

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -9
app.py CHANGED
@@ -11,6 +11,29 @@ def clean_column_name(col_name):
11
  cleaned = re.sub(r"[^\w\s]", " ", col_name)
12
  return re.sub(r"\s+", "_", cleaned.strip().lower())
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def read_file(file) -> pd.DataFrame:
15
  """Read a CSV or Excel file into a DataFrame."""
16
  try:
@@ -28,12 +51,16 @@ def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
28
 
29
  def main():
30
  st.title("Merge Employee Name from Earnings into PAYE Sheet")
31
- st.write("Upload an Earnings Sheet and a PAYE Sheet. The app will extract the first two columns (TIN and Employee Name) from the Earnings Sheet and merge the Employee Name into the PAYE sheet based on matching TIN.")
 
 
 
32
 
33
  earnings_file = st.file_uploader("Upload Earnings Sheet", type=["csv", "xlsx", "xls"], key="earnings")
34
  paye_file = st.file_uploader("Upload PAYE Sheet", type=["csv", "xlsx", "xls"], key="paye")
35
 
36
  if earnings_file and paye_file:
 
37
  earnings_df = read_file(earnings_file)
38
  paye_df = read_file(paye_file)
39
 
@@ -41,32 +68,42 @@ def main():
41
  st.error("One of the files could not be read. Please check the files and try again.")
42
  return
43
 
44
- # Clean column names for both dataframes
45
  earnings_df.columns = [clean_column_name(col) for col in earnings_df.columns]
 
46
  paye_df.columns = [clean_column_name(col) for col in paye_df.columns]
 
47
 
48
- # Check that earnings file has at least two columns
49
  if earnings_df.shape[1] < 2:
50
  st.error("Earnings sheet must have at least two columns (TIN and Employee Name).")
51
  return
52
 
53
- # Extract the first two columns from the earnings sheet.
54
- # We assume the first column is TIN and the second is Employee Name.
55
  earnings_subset = earnings_df.iloc[:, :2].copy()
56
  earnings_subset.columns = ["tin", "employee_name"]
 
 
 
 
57
  st.write("Preview of extracted TIN and Employee Name from Earnings Sheet:")
58
  st.dataframe(safe_display_df(earnings_subset.head()))
59
 
60
- # Merge the PAYE sheet with the extracted employee names based on the 'tin' column.
61
  if "tin" not in paye_df.columns:
62
- st.error("The PAYE sheet does not have a 'tin' column to merge on.")
63
  return
 
 
 
64
 
 
65
  merged_df = paye_df.merge(earnings_subset, on="tin", how="left")
66
  st.write("### Merged PAYE Sheet with Employee Name")
67
  st.dataframe(safe_display_df(merged_df.head()))
68
 
69
- # Provide option to download the merged data as CSV.
70
  csv_data = merged_df.to_csv(index=False).encode("utf-8")
71
  st.download_button(
72
  label="Download Merged CSV",
@@ -74,7 +111,6 @@ def main():
74
  file_name="merged_paye.csv",
75
  mime="text/csv"
76
  )
77
-
78
  st.write(f"Total rows in merged data: {len(merged_df)}")
79
  else:
80
  st.info("Please upload both an Earnings Sheet and a PAYE Sheet.")
 
11
  cleaned = re.sub(r"[^\w\s]", " ", col_name)
12
  return re.sub(r"\s+", "_", cleaned.strip().lower())
13
 
14
+ def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
15
+ """
16
+ Standardize DataFrame column names.
17
+ If a column name contains "tin" or variants of personal id,
18
+ rename it to 'tin'. Also, strip trailing spaces from all string values.
19
+ """
20
+ rename_map = {}
21
+ for col in df.columns:
22
+ col_lower = col.lower()
23
+ # Check for various forms of TIN column name.
24
+ if col_lower in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col_lower:
25
+ rename_map[col] = 'tin'
26
+ elif 'tin' in col_lower:
27
+ rename_map[col] = 'tin'
28
+ if rename_map:
29
+ df = df.rename(columns=rename_map)
30
+
31
+ # Strip trailing spaces from string values in every column.
32
+ for col in df.columns:
33
+ if df[col].dtype == object:
34
+ df[col] = df[col].astype(str).str.strip()
35
+ return df
36
+
37
  def read_file(file) -> pd.DataFrame:
38
  """Read a CSV or Excel file into a DataFrame."""
39
  try:
 
51
 
52
  def main():
53
  st.title("Merge Employee Name from Earnings into PAYE Sheet")
54
+ st.write(
55
+ "Upload an Earnings Sheet and a PAYE Sheet. The app will extract the first two columns "
56
+ "(TIN and Employee Name) from the Earnings Sheet and merge the Employee Name into the PAYE sheet based on matching TIN values."
57
+ )
58
 
59
  earnings_file = st.file_uploader("Upload Earnings Sheet", type=["csv", "xlsx", "xls"], key="earnings")
60
  paye_file = st.file_uploader("Upload PAYE Sheet", type=["csv", "xlsx", "xls"], key="paye")
61
 
62
  if earnings_file and paye_file:
63
+ # Read the files
64
  earnings_df = read_file(earnings_file)
65
  paye_df = read_file(paye_file)
66
 
 
68
  st.error("One of the files could not be read. Please check the files and try again.")
69
  return
70
 
71
+ # Clean and standardize column names and values
72
  earnings_df.columns = [clean_column_name(col) for col in earnings_df.columns]
73
+ earnings_df = standardize_dataframe(earnings_df)
74
  paye_df.columns = [clean_column_name(col) for col in paye_df.columns]
75
+ paye_df = standardize_dataframe(paye_df)
76
 
77
+ # Ensure earnings file has at least two columns
78
  if earnings_df.shape[1] < 2:
79
  st.error("Earnings sheet must have at least two columns (TIN and Employee Name).")
80
  return
81
 
82
+ # Extract first two columns from the earnings sheet.
83
+ # Assume the first column is TIN and the second is Employee Name.
84
  earnings_subset = earnings_df.iloc[:, :2].copy()
85
  earnings_subset.columns = ["tin", "employee_name"]
86
+ # Ensure trailing spaces are removed
87
+ earnings_subset["tin"] = earnings_subset["tin"].astype(str).str.strip()
88
+ earnings_subset["employee_name"] = earnings_subset["employee_name"].astype(str).str.strip()
89
+
90
  st.write("Preview of extracted TIN and Employee Name from Earnings Sheet:")
91
  st.dataframe(safe_display_df(earnings_subset.head()))
92
 
93
+ # Check for the 'tin' column in the PAYE sheet.
94
  if "tin" not in paye_df.columns:
95
+ st.error("The PAYE sheet does not have a recognized TIN column (e.g., 'tin' or 'personal_id_of_employee').")
96
  return
97
+ else:
98
+ # Ensure trailing spaces are removed from PAYE tin values.
99
+ paye_df["tin"] = paye_df["tin"].astype(str).str.strip()
100
 
101
+ # Merge the PAYE sheet with the earnings subset on the 'tin' column.
102
  merged_df = paye_df.merge(earnings_subset, on="tin", how="left")
103
  st.write("### Merged PAYE Sheet with Employee Name")
104
  st.dataframe(safe_display_df(merged_df.head()))
105
 
106
+ # Option to download the merged data as CSV.
107
  csv_data = merged_df.to_csv(index=False).encode("utf-8")
108
  st.download_button(
109
  label="Download Merged CSV",
 
111
  file_name="merged_paye.csv",
112
  mime="text/csv"
113
  )
 
114
  st.write(f"Total rows in merged data: {len(merged_df)}")
115
  else:
116
  st.info("Please upload both an Earnings Sheet and a PAYE Sheet.")