Spaces:

rairo
/

OneExcelZimra

Sleeping

App Files Files Community

rairo commited on Feb 21, 2025

Commit

0c23633

verified ·

1 Parent(s): 680d9f2

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -19

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ def clean_column_name(col_name):
 def standardize_tin_column(df: pd.DataFrame) -> pd.DataFrame:
     """
     Clean column names and rename any column that contains 'tin'
-    or both 'personal' and 'id' to 'tin'. Then strip extra spaces.
     """
     df.columns = [clean_column_name(col) for col in df.columns]
     rename_map = {}
@@ -22,19 +22,22 @@ def standardize_tin_column(df: pd.DataFrame) -> pd.DataFrame:
             rename_map[col] = "tin"
     if rename_map:
         df = df.rename(columns=rename_map)
-    # Strip trailing spaces from string columns
     for col in df.columns:
         if df[col].dtype == object:
             df[col] = df[col].astype(str).str.strip()
     return df
-def read_file(file) -> pd.DataFrame:
-    """Read a CSV or Excel file into a DataFrame."""
     try:
         if file.name.endswith((".xlsx", ".xls")):
-            return pd.read_excel(file)
         else:
-            return pd.read_csv(file)
     except Exception as e:
         st.error(f"Error reading {file.name}: {str(e)}")
         return None
@@ -46,8 +49,8 @@ def safe_display_df(df: pd.DataFrame) -> pd.DataFrame:
 def main():
     st.title("Merge Employee Name from Earnings into PAYE Sheet")
     st.write(
-        "Upload an Earnings Sheet and a PAYE Sheet. The app will extract the first two columns "
-        "(TIN and Employee Name) from the Earnings Sheet, skipping the first row which contains currency labels, "
         "and merge the Employee Name onto the PAYE sheet using the cleaned TIN."
     )
@@ -55,32 +58,37 @@ def main():
     paye_file = st.file_uploader("Upload PAYE Sheet", type=["csv", "xlsx", "xls"], key="paye")
     if earnings_file and paye_file:
-        # Read the files
-        earnings_df = read_file(earnings_file)
-        paye_df = read_file(paye_file)
         if earnings_df is None or paye_df is None:
             st.error("One of the files could not be read. Please check the files and try again.")
             return
-        # Standardize columns for both files
         earnings_df = standardize_tin_column(earnings_df)
         paye_df = standardize_tin_column(paye_df)
-        # Check that the earnings file has at least two columns
         if earnings_df.shape[1] < 2:
             st.error("Earnings sheet must have at least two columns (TIN and Employee Name).")
             return
-        # Extract first two columns from earnings file.
-        # Skip the first row (assumed to contain currency labels) using .iloc[1:]
-        earnings_subset = earnings_df.iloc[1:, :2].copy()
         earnings_subset.columns = ["tin", "employee_name"]
-        # Ensure values are stripped of trailing spaces
         earnings_subset["tin"] = earnings_subset["tin"].astype(str).str.strip()
         earnings_subset["employee_name"] = earnings_subset["employee_name"].astype(str).str.strip()
-        st.write("Preview of extracted TIN and Employee Name from Earnings Sheet (first 5 rows after skipping currency row):")
         st.dataframe(safe_display_df(earnings_subset.head()))
         # Verify the PAYE sheet has a 'tin' column.
@@ -90,7 +98,7 @@ def main():
         else:
             paye_df["tin"] = paye_df["tin"].astype(str).str.strip()
-        # Merge the employee name from earnings_subset onto the PAYE sheet using 'tin'
         merged_df = paye_df.merge(earnings_subset, on="tin", how="left")
         st.write("### Merged PAYE Sheet with Employee Name")
         st.dataframe(safe_display_df(merged_df.head()))

 def standardize_tin_column(df: pd.DataFrame) -> pd.DataFrame:
     """
     Clean column names and rename any column that contains 'tin'
+    or both 'personal' and 'id' to 'tin'. Then strip extra spaces from all string values.
     """
     df.columns = [clean_column_name(col) for col in df.columns]
     rename_map = {}
             rename_map[col] = "tin"
     if rename_map:
         df = df.rename(columns=rename_map)
+    # Remove trailing and leading spaces in string cells
     for col in df.columns:
         if df[col].dtype == object:
             df[col] = df[col].astype(str).str.strip()
     return df
+def read_file(file, skip_first_row=False) -> pd.DataFrame:
+    """
+    Read a CSV or Excel file into a DataFrame.
+    For the earnings file, skip_first_row=True will skip the first row (with currency labels).
+    """
     try:
         if file.name.endswith((".xlsx", ".xls")):
+            return pd.read_excel(file, skiprows=1 if skip_first_row else None)
         else:
+            return pd.read_csv(file, skiprows=1 if skip_first_row else None)
     except Exception as e:
         st.error(f"Error reading {file.name}: {str(e)}")
         return None
 def main():
     st.title("Merge Employee Name from Earnings into PAYE Sheet")
     st.write(
+        "Upload an Earnings Sheet and a PAYE Sheet. The Earnings Sheet is assumed to have a first row with currency labels "
+        "which will be skipped. The app will extract the first two columns (TIN and Employee Name) from the Earnings Sheet, "
         "and merge the Employee Name onto the PAYE sheet using the cleaned TIN."
     )
     paye_file = st.file_uploader("Upload PAYE Sheet", type=["csv", "xlsx", "xls"], key="paye")
     if earnings_file and paye_file:
+        # Read the earnings file with the first row skipped and the PAYE file normally.
+        earnings_df = read_file(earnings_file, skip_first_row=True)
+        paye_df = read_file(paye_file, skip_first_row=False)
         if earnings_df is None or paye_df is None:
             st.error("One of the files could not be read. Please check the files and try again.")
             return
+        # Standardize columns and TIN values for both files.
         earnings_df = standardize_tin_column(earnings_df)
         paye_df = standardize_tin_column(paye_df)
+        # Debug: display unique TIN values from both files
+        st.write("Unique TIN values in Earnings file:", earnings_df.iloc[:, 0].unique())
+        if "tin" in paye_df.columns:
+            st.write("Unique TIN values in PAYE file:", paye_df["tin"].unique())
+        else:
+            st.write("PAYE file columns:", list(paye_df.columns))
+        # Check that the earnings file has at least two columns.
         if earnings_df.shape[1] < 2:
             st.error("Earnings sheet must have at least two columns (TIN and Employee Name).")
             return
+        # Extract the first two columns from the earnings file.
+        earnings_subset = earnings_df.iloc[:, :2].copy()
         earnings_subset.columns = ["tin", "employee_name"]
         earnings_subset["tin"] = earnings_subset["tin"].astype(str).str.strip()
         earnings_subset["employee_name"] = earnings_subset["employee_name"].astype(str).str.strip()
+        st.write("Preview of extracted TIN and Employee Name from Earnings Sheet:")
         st.dataframe(safe_display_df(earnings_subset.head()))
         # Verify the PAYE sheet has a 'tin' column.
         else:
             paye_df["tin"] = paye_df["tin"].astype(str).str.strip()
+        # Merge the employee name from earnings_subset onto the PAYE sheet using 'tin'.
         merged_df = paye_df.merge(earnings_subset, on="tin", how="left")
         st.write("### Merged PAYE Sheet with Employee Name")
         st.dataframe(safe_display_df(merged_df.head()))