rairo commited on
Commit
a35b8e4
·
verified ·
1 Parent(s): 42db88a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -29
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import os
4
- from io import BytesIO
5
  import re
6
 
7
  def clean_column_name(col_name):
@@ -11,24 +9,20 @@ def clean_column_name(col_name):
11
  cleaned = re.sub(r"[^\w\s]", " ", col_name)
12
  return re.sub(r"\s+", "_", cleaned.strip().lower())
13
 
14
- def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
15
  """
16
- Standardize DataFrame column names.
17
- If a column name contains "tin" or variants of personal id,
18
- rename it to 'tin'. Also, strip trailing spaces from all string values.
19
  """
 
20
  rename_map = {}
21
  for col in df.columns:
22
  col_lower = col.lower()
23
- # Check for various forms of TIN column name.
24
- if col_lower in ['personal id', 'personal_id', 'tax id', 'taxid'] or "personal_id_of_employee" in col_lower:
25
- rename_map[col] = 'tin'
26
- elif 'tin' in col_lower:
27
- rename_map[col] = 'tin'
28
  if rename_map:
29
  df = df.rename(columns=rename_map)
30
-
31
- # Strip trailing spaces from string values in every column.
32
  for col in df.columns:
33
  if df[col].dtype == object:
34
  df[col] = df[col].astype(str).str.strip()
@@ -46,14 +40,15 @@ def read_file(file) -> pd.DataFrame:
46
  return None
47
 
48
  def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
49
- """Convert DataFrame to string values to ensure safe display."""
50
  return df.astype(str).replace({"nan": "", "None": ""})
51
 
52
  def main():
53
  st.title("Merge Employee Name from Earnings into PAYE Sheet")
54
  st.write(
55
- "Upload an Earnings Sheet and a PAYE Sheet. The app will extract the first two columns "
56
- "(TIN and Employee Name) from the Earnings Sheet and merge the Employee Name into the PAYE sheet based on matching TIN values."
 
57
  )
58
 
59
  earnings_file = st.file_uploader("Upload Earnings Sheet", type=["csv", "xlsx", "xls"], key="earnings")
@@ -68,37 +63,34 @@ def main():
68
  st.error("One of the files could not be read. Please check the files and try again.")
69
  return
70
 
71
- # Clean and standardize column names and values
72
- earnings_df.columns = [clean_column_name(col) for col in earnings_df.columns]
73
- earnings_df = standardize_dataframe(earnings_df)
74
- paye_df.columns = [clean_column_name(col) for col in paye_df.columns]
75
- paye_df = standardize_dataframe(paye_df)
76
 
77
- # Ensure earnings file has at least two columns
78
  if earnings_df.shape[1] < 2:
79
  st.error("Earnings sheet must have at least two columns (TIN and Employee Name).")
80
  return
81
 
82
- # Extract first two columns from the earnings sheet.
83
- # Assume the first column is TIN and the second is Employee Name.
84
  earnings_subset = earnings_df.iloc[:, :2].copy()
85
  earnings_subset.columns = ["tin", "employee_name"]
86
- # Ensure trailing spaces are removed
87
  earnings_subset["tin"] = earnings_subset["tin"].astype(str).str.strip()
88
  earnings_subset["employee_name"] = earnings_subset["employee_name"].astype(str).str.strip()
89
 
90
  st.write("Preview of extracted TIN and Employee Name from Earnings Sheet:")
91
  st.dataframe(safe_display_df(earnings_subset.head()))
92
 
93
- # Check for the 'tin' column in the PAYE sheet.
94
  if "tin" not in paye_df.columns:
95
- st.error("The PAYE sheet does not have a recognized TIN column (e.g., 'tin' or 'personal_id_of_employee').")
96
  return
97
  else:
98
- # Ensure trailing spaces are removed from PAYE tin values.
99
  paye_df["tin"] = paye_df["tin"].astype(str).str.strip()
100
 
101
- # Merge the PAYE sheet with the earnings subset on the 'tin' column.
102
  merged_df = paye_df.merge(earnings_subset, on="tin", how="left")
103
  st.write("### Merged PAYE Sheet with Employee Name")
104
  st.dataframe(safe_display_df(merged_df.head()))
 
1
  import streamlit as st
2
  import pandas as pd
 
 
3
  import re
4
 
5
  def clean_column_name(col_name):
 
9
  cleaned = re.sub(r"[^\w\s]", " ", col_name)
10
  return re.sub(r"\s+", "_", cleaned.strip().lower())
11
 
12
+ def standardize_tin_column(df: pd.DataFrame) -> pd.DataFrame:
13
  """
14
+ Clean column names and rename any column that contains 'tin'
15
+ or both 'personal' and 'id' to 'tin'. Then strip extra spaces.
 
16
  """
17
+ df.columns = [clean_column_name(col) for col in df.columns]
18
  rename_map = {}
19
  for col in df.columns:
20
  col_lower = col.lower()
21
+ if "tin" in col_lower or (("personal" in col_lower) and ("id" in col_lower)):
22
+ rename_map[col] = "tin"
 
 
 
23
  if rename_map:
24
  df = df.rename(columns=rename_map)
25
+ # Strip trailing spaces from string columns
 
26
  for col in df.columns:
27
  if df[col].dtype == object:
28
  df[col] = df[col].astype(str).str.strip()
 
40
  return None
41
 
42
  def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
43
+ """Convert DataFrame values to strings for safe display."""
44
  return df.astype(str).replace({"nan": "", "None": ""})
45
 
46
  def main():
47
  st.title("Merge Employee Name from Earnings into PAYE Sheet")
48
  st.write(
49
+ "Upload an Earnings Sheet and a PAYE Sheet. "
50
+ "The app will extract the first two columns (TIN and Employee Name) from the Earnings Sheet, "
51
+ "clean and standardize the TIN values, and then merge the Employee Name onto the PAYE sheet using the TIN."
52
  )
53
 
54
  earnings_file = st.file_uploader("Upload Earnings Sheet", type=["csv", "xlsx", "xls"], key="earnings")
 
63
  st.error("One of the files could not be read. Please check the files and try again.")
64
  return
65
 
66
+ # Standardize columns for both files
67
+ earnings_df = standardize_tin_column(earnings_df)
68
+ paye_df = standardize_tin_column(paye_df)
 
 
69
 
70
+ # Check that the earnings file has at least two columns
71
  if earnings_df.shape[1] < 2:
72
  st.error("Earnings sheet must have at least two columns (TIN and Employee Name).")
73
  return
74
 
75
+ # Extract first two columns from earnings file.
76
+ # Assume first column is TIN and second is Employee Name.
77
  earnings_subset = earnings_df.iloc[:, :2].copy()
78
  earnings_subset.columns = ["tin", "employee_name"]
79
+ # Ensure values are stripped of trailing spaces
80
  earnings_subset["tin"] = earnings_subset["tin"].astype(str).str.strip()
81
  earnings_subset["employee_name"] = earnings_subset["employee_name"].astype(str).str.strip()
82
 
83
  st.write("Preview of extracted TIN and Employee Name from Earnings Sheet:")
84
  st.dataframe(safe_display_df(earnings_subset.head()))
85
 
86
+ # Verify the PAYE sheet has a 'tin' column
87
  if "tin" not in paye_df.columns:
88
+ st.error("The PAYE sheet does not have a recognized TIN column (e.g., 'tin' or 'personal id').")
89
  return
90
  else:
 
91
  paye_df["tin"] = paye_df["tin"].astype(str).str.strip()
92
 
93
+ # Merge the employee name from earnings_subset onto the PAYE sheet using 'tin'
94
  merged_df = paye_df.merge(earnings_subset, on="tin", how="left")
95
  st.write("### Merged PAYE Sheet with Employee Name")
96
  st.dataframe(safe_display_df(merged_df.head()))