Spaces:

rairo
/

OneExcelZimra

Sleeping

App Files Files Community

rairo commited on Feb 11, 2025

Commit

955b70b

verified ·

1 Parent(s): e19ef0e

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -37

app.py CHANGED Viewed

@@ -9,9 +9,7 @@ def standardize_tin(tin):
     if pd.isna(tin):
         return ""
     tin = str(tin).strip()
-    # Remove all spaces and standardize
-    tin = re.sub(r'\s+', '', tin)
-    # Add spaces back in standard format if it matches the pattern
     if re.match(r'^\d{2}-?\d{6}[A-Z]\d{2}$', tin):
         return f"{tin[:2]}-{tin[2:8]} {tin[8]} {tin[9:11]}"
     return tin
@@ -24,14 +22,7 @@ def clean_name(name):
 def process_employee_data(df):
     """Process employee personal information."""
-    # Standardize column names
-    df.columns = [col.strip() for col in df.columns]
-    # Extract employee details
-    required_columns = [
-        'TIN', 'First Name', 'Middle Name', 'Last Name',
-        'Birth Date', 'Employed From date', 'Employed To date', 'Position'
-    ]
     # Create Employee Name
     if 'First Name' in df.columns and 'Last Name' in df.columns:
@@ -41,46 +32,65 @@ def process_employee_data(df):
         )
     # Clean TIN
-    if 'TIN' in df.columns or 'Personal ID of Employee' in df.columns:
-        tin_col = 'TIN' if 'TIN' in df.columns else 'Personal ID of Employee'
         df['TIN'] = df[tin_col].apply(standardize_tin)
     return df
 def process_salary_data(df):
     """Process salary and deductions data."""
-    # Standardize column names
-    df.columns = [col.strip() for col in df.columns]
-    # Clean TIN column if present
-    if 'TIN' in df.columns or 'TIN or Personal ID of Employee' in df.columns:
-        tin_col = 'TIN' if 'TIN' in df.columns else 'TIN or Personal ID of Employee'
         df['TIN'] = df[tin_col].apply(standardize_tin)
-    # Convert numeric columns
     numeric_columns = df.select_dtypes(include=[np.number]).columns
-    for col in numeric_columns:
-        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
     return df
-def merge_dataframes(employee_df, salary_df):
-    """Merge employee and salary information."""
-    # Ensure TIN columns are standardized
-    employee_df['TIN'] = employee_df['TIN'].apply(standardize_tin)
-    salary_df['TIN'] = salary_df['TIN'].apply(standardize_tin)
-    # Merge on TIN
     merged_df = pd.merge(
         employee_df,
         salary_df,
         on='TIN',
         how='outer',
-        suffixes=('', '_y')
     )
     # Drop duplicate columns
-    duplicate_cols = [col for col in merged_df.columns if col.endswith('_y')]
     merged_df.drop(columns=duplicate_cols, inplace=True)
     # Fill missing numeric values with 0
@@ -94,14 +104,16 @@ def main():
     st.write("""
     Upload:
-    1. Employee Information File (with personal details)
-    2. Salary Information File (with financial data)
     """)
     employee_file = st.file_uploader("Upload Employee Information", type=['xlsx', 'xls'])
     salary_file = st.file_uploader("Upload Salary Information", type=['xlsx', 'xls'])
-    if employee_file and salary_file:
         try:
             # Process employee data
             employee_df = pd.read_excel(employee_file)
@@ -113,8 +125,13 @@ def main():
             salary_df = process_salary_data(salary_df)
             st.write("Salary data processed successfully")
             # Merge the dataframes
-            final_df = merge_dataframes(employee_df, salary_df)
             # Organize columns in desired order
             column_order = [
@@ -122,12 +139,9 @@ def main():
                 'Birth Date', 'Employed From date', 'Employed To date', 'Position'
             ]
-            # Add remaining columns in their original order
             remaining_cols = [col for col in final_df.columns if col not in column_order]
             column_order.extend(remaining_cols)
-            # Reorder columns
-            final_df = final_df[column_order]
             st.subheader("Master Payroll Data Preview")
             st.dataframe(final_df)

     if pd.isna(tin):
         return ""
     tin = str(tin).strip()
+    tin = re.sub(r'\s+', '', tin)  # Remove all spaces
     if re.match(r'^\d{2}-?\d{6}[A-Z]\d{2}$', tin):
         return f"{tin[:2]}-{tin[2:8]} {tin[8]} {tin[9:11]}"
     return tin
 def process_employee_data(df):
     """Process employee personal information."""
+    df.columns = [col.strip() for col in df.columns]  # Standardize column names
     # Create Employee Name
     if 'First Name' in df.columns and 'Last Name' in df.columns:
         )
     # Clean TIN
+    tin_col = 'TIN' if 'TIN' in df.columns else 'Personal ID of Employee'
+    if tin_col in df.columns:
         df['TIN'] = df[tin_col].apply(standardize_tin)
     return df
 def process_salary_data(df):
     """Process salary and deductions data."""
+    df.columns = [col.strip() for col in df.columns]  # Standardize column names
+    tin_col = 'TIN' if 'TIN' in df.columns else 'TIN or Personal ID of Employee'
+    if tin_col in df.columns:
+        df['TIN'] = df[tin_col].apply(standardize_tin)
+    numeric_columns = df.select_dtypes(include=[np.number]).columns
+    df[numeric_columns] = df[numeric_columns].fillna(0)
+    return df
+def process_paye_data(df):
+    """Process PAYE data."""
+    df.columns = [col.strip() for col in df.columns]  # Standardize column names
+    tin_col = 'TIN' if 'TIN' in df.columns else 'TIN or Personal ID of Employee'
+    if tin_col in df.columns:
         df['TIN'] = df[tin_col].apply(standardize_tin)
     numeric_columns = df.select_dtypes(include=[np.number]).columns
+    df[numeric_columns] = df[numeric_columns].fillna(0)
     return df
+def merge_dataframes(employee_df, salary_df, paye_df):
+    """Merge employee, salary, and PAYE information."""
+    # Standardize TIN columns
+    for df in [employee_df, salary_df, paye_df]:
+        if 'TIN' in df.columns:
+            df['TIN'] = df['TIN'].apply(standardize_tin)
+    # Merge salary into employee data
     merged_df = pd.merge(
         employee_df,
         salary_df,
         on='TIN',
         how='outer',
+        suffixes=('', '_salary')
+    )
+    # Merge PAYE into the merged dataset
+    merged_df = pd.merge(
+        merged_df,
+        paye_df,
+        on='TIN',
+        how='outer',
+        suffixes=('', '_paye')
     )
     # Drop duplicate columns
+    duplicate_cols = [col for col in merged_df.columns if col.endswith(('_salary', '_paye'))]
     merged_df.drop(columns=duplicate_cols, inplace=True)
     # Fill missing numeric values with 0
     st.write("""
     Upload:
+    1. Employee Information File (Template)
+    2. Salary (Earnings) Information File
+    3. PAYE Information File
     """)
     employee_file = st.file_uploader("Upload Employee Information", type=['xlsx', 'xls'])
     salary_file = st.file_uploader("Upload Salary Information", type=['xlsx', 'xls'])
+    paye_file = st.file_uploader("Upload PAYE Information", type=['xlsx', 'xls'])
+    if employee_file and salary_file and paye_file:
         try:
             # Process employee data
             employee_df = pd.read_excel(employee_file)
             salary_df = process_salary_data(salary_df)
             st.write("Salary data processed successfully")
+            # Process PAYE data
+            paye_df = pd.read_excel(paye_file)
+            paye_df = process_paye_data(paye_df)
+            st.write("PAYE data processed successfully")
             # Merge the dataframes
+            final_df = merge_dataframes(employee_df, salary_df, paye_df)
             # Organize columns in desired order
             column_order = [
                 'Birth Date', 'Employed From date', 'Employed To date', 'Position'
             ]
             remaining_cols = [col for col in final_df.columns if col not in column_order]
             column_order.extend(remaining_cols)
+            final_df = final_df[column_order]  # Reorder columns
             st.subheader("Master Payroll Data Preview")
             st.dataframe(final_df)