rairo commited on
Commit
0c23633
·
verified ·
1 Parent(s): 680d9f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -19
app.py CHANGED
@@ -12,7 +12,7 @@ def clean_column_name(col_name):
12
  def standardize_tin_column(df: pd.DataFrame) -> pd.DataFrame:
13
  """
14
  Clean column names and rename any column that contains 'tin'
15
- or both 'personal' and 'id' to 'tin'. Then strip extra spaces.
16
  """
17
  df.columns = [clean_column_name(col) for col in df.columns]
18
  rename_map = {}
@@ -22,19 +22,22 @@ def standardize_tin_column(df: pd.DataFrame) -> pd.DataFrame:
22
  rename_map[col] = "tin"
23
  if rename_map:
24
  df = df.rename(columns=rename_map)
25
- # Strip trailing spaces from string columns
26
  for col in df.columns:
27
  if df[col].dtype == object:
28
  df[col] = df[col].astype(str).str.strip()
29
  return df
30
 
31
- def read_file(file) -> pd.DataFrame:
32
- """Read a CSV or Excel file into a DataFrame."""
 
 
 
33
  try:
34
  if file.name.endswith((".xlsx", ".xls")):
35
- return pd.read_excel(file)
36
  else:
37
- return pd.read_csv(file)
38
  except Exception as e:
39
  st.error(f"Error reading {file.name}: {str(e)}")
40
  return None
@@ -46,8 +49,8 @@ def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
46
  def main():
47
  st.title("Merge Employee Name from Earnings into PAYE Sheet")
48
  st.write(
49
- "Upload an Earnings Sheet and a PAYE Sheet. The app will extract the first two columns "
50
- "(TIN and Employee Name) from the Earnings Sheet, skipping the first row which contains currency labels, "
51
  "and merge the Employee Name onto the PAYE sheet using the cleaned TIN."
52
  )
53
 
@@ -55,32 +58,37 @@ def main():
55
  paye_file = st.file_uploader("Upload PAYE Sheet", type=["csv", "xlsx", "xls"], key="paye")
56
 
57
  if earnings_file and paye_file:
58
- # Read the files
59
- earnings_df = read_file(earnings_file)
60
- paye_df = read_file(paye_file)
61
 
62
  if earnings_df is None or paye_df is None:
63
  st.error("One of the files could not be read. Please check the files and try again.")
64
  return
65
 
66
- # Standardize columns for both files
67
  earnings_df = standardize_tin_column(earnings_df)
68
  paye_df = standardize_tin_column(paye_df)
69
 
70
- # Check that the earnings file has at least two columns
 
 
 
 
 
 
 
71
  if earnings_df.shape[1] < 2:
72
  st.error("Earnings sheet must have at least two columns (TIN and Employee Name).")
73
  return
74
 
75
- # Extract first two columns from earnings file.
76
- # Skip the first row (assumed to contain currency labels) using .iloc[1:]
77
- earnings_subset = earnings_df.iloc[1:, :2].copy()
78
  earnings_subset.columns = ["tin", "employee_name"]
79
- # Ensure values are stripped of trailing spaces
80
  earnings_subset["tin"] = earnings_subset["tin"].astype(str).str.strip()
81
  earnings_subset["employee_name"] = earnings_subset["employee_name"].astype(str).str.strip()
82
 
83
- st.write("Preview of extracted TIN and Employee Name from Earnings Sheet (first 5 rows after skipping currency row):")
84
  st.dataframe(safe_display_df(earnings_subset.head()))
85
 
86
  # Verify the PAYE sheet has a 'tin' column.
@@ -90,7 +98,7 @@ def main():
90
  else:
91
  paye_df["tin"] = paye_df["tin"].astype(str).str.strip()
92
 
93
- # Merge the employee name from earnings_subset onto the PAYE sheet using 'tin'
94
  merged_df = paye_df.merge(earnings_subset, on="tin", how="left")
95
  st.write("### Merged PAYE Sheet with Employee Name")
96
  st.dataframe(safe_display_df(merged_df.head()))
 
12
  def standardize_tin_column(df: pd.DataFrame) -> pd.DataFrame:
13
  """
14
  Clean column names and rename any column that contains 'tin'
15
+ or both 'personal' and 'id' to 'tin'. Then strip extra spaces from all string values.
16
  """
17
  df.columns = [clean_column_name(col) for col in df.columns]
18
  rename_map = {}
 
22
  rename_map[col] = "tin"
23
  if rename_map:
24
  df = df.rename(columns=rename_map)
25
+ # Remove trailing and leading spaces in string cells
26
  for col in df.columns:
27
  if df[col].dtype == object:
28
  df[col] = df[col].astype(str).str.strip()
29
  return df
30
 
31
+ def read_file(file, skip_first_row=False) -> pd.DataFrame:
32
+ """
33
+ Read a CSV or Excel file into a DataFrame.
34
+ For the earnings file, skip_first_row=True will skip the first row (with currency labels).
35
+ """
36
  try:
37
  if file.name.endswith((".xlsx", ".xls")):
38
+ return pd.read_excel(file, skiprows=1 if skip_first_row else None)
39
  else:
40
+ return pd.read_csv(file, skiprows=1 if skip_first_row else None)
41
  except Exception as e:
42
  st.error(f"Error reading {file.name}: {str(e)}")
43
  return None
 
49
  def main():
50
  st.title("Merge Employee Name from Earnings into PAYE Sheet")
51
  st.write(
52
+ "Upload an Earnings Sheet and a PAYE Sheet. The Earnings Sheet is assumed to have a first row with currency labels "
53
+ "which will be skipped. The app will extract the first two columns (TIN and Employee Name) from the Earnings Sheet, "
54
  "and merge the Employee Name onto the PAYE sheet using the cleaned TIN."
55
  )
56
 
 
58
  paye_file = st.file_uploader("Upload PAYE Sheet", type=["csv", "xlsx", "xls"], key="paye")
59
 
60
  if earnings_file and paye_file:
61
+ # Read the earnings file with the first row skipped and the PAYE file normally.
62
+ earnings_df = read_file(earnings_file, skip_first_row=True)
63
+ paye_df = read_file(paye_file, skip_first_row=False)
64
 
65
  if earnings_df is None or paye_df is None:
66
  st.error("One of the files could not be read. Please check the files and try again.")
67
  return
68
 
69
+ # Standardize columns and TIN values for both files.
70
  earnings_df = standardize_tin_column(earnings_df)
71
  paye_df = standardize_tin_column(paye_df)
72
 
73
+ # Debug: display unique TIN values from both files
74
+ st.write("Unique TIN values in Earnings file:", earnings_df.iloc[:, 0].unique())
75
+ if "tin" in paye_df.columns:
76
+ st.write("Unique TIN values in PAYE file:", paye_df["tin"].unique())
77
+ else:
78
+ st.write("PAYE file columns:", list(paye_df.columns))
79
+
80
+ # Check that the earnings file has at least two columns.
81
  if earnings_df.shape[1] < 2:
82
  st.error("Earnings sheet must have at least two columns (TIN and Employee Name).")
83
  return
84
 
85
+ # Extract the first two columns from the earnings file.
86
+ earnings_subset = earnings_df.iloc[:, :2].copy()
 
87
  earnings_subset.columns = ["tin", "employee_name"]
 
88
  earnings_subset["tin"] = earnings_subset["tin"].astype(str).str.strip()
89
  earnings_subset["employee_name"] = earnings_subset["employee_name"].astype(str).str.strip()
90
 
91
+ st.write("Preview of extracted TIN and Employee Name from Earnings Sheet:")
92
  st.dataframe(safe_display_df(earnings_subset.head()))
93
 
94
  # Verify the PAYE sheet has a 'tin' column.
 
98
  else:
99
  paye_df["tin"] = paye_df["tin"].astype(str).str.strip()
100
 
101
+ # Merge the employee name from earnings_subset onto the PAYE sheet using 'tin'.
102
  merged_df = paye_df.merge(earnings_subset, on="tin", how="left")
103
  st.write("### Merged PAYE Sheet with Employee Name")
104
  st.dataframe(safe_display_df(merged_df.head()))