rairo commited on
Commit
955b70b
·
verified ·
1 Parent(s): e19ef0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -37
app.py CHANGED
@@ -9,9 +9,7 @@ def standardize_tin(tin):
9
  if pd.isna(tin):
10
  return ""
11
  tin = str(tin).strip()
12
- # Remove all spaces and standardize
13
- tin = re.sub(r'\s+', '', tin)
14
- # Add spaces back in standard format if it matches the pattern
15
  if re.match(r'^\d{2}-?\d{6}[A-Z]\d{2}$', tin):
16
  return f"{tin[:2]}-{tin[2:8]} {tin[8]} {tin[9:11]}"
17
  return tin
@@ -24,14 +22,7 @@ def clean_name(name):
24
 
25
  def process_employee_data(df):
26
  """Process employee personal information."""
27
- # Standardize column names
28
- df.columns = [col.strip() for col in df.columns]
29
-
30
- # Extract employee details
31
- required_columns = [
32
- 'TIN', 'First Name', 'Middle Name', 'Last Name',
33
- 'Birth Date', 'Employed From date', 'Employed To date', 'Position'
34
- ]
35
 
36
  # Create Employee Name
37
  if 'First Name' in df.columns and 'Last Name' in df.columns:
@@ -41,46 +32,65 @@ def process_employee_data(df):
41
  )
42
 
43
  # Clean TIN
44
- if 'TIN' in df.columns or 'Personal ID of Employee' in df.columns:
45
- tin_col = 'TIN' if 'TIN' in df.columns else 'Personal ID of Employee'
46
  df['TIN'] = df[tin_col].apply(standardize_tin)
47
 
48
  return df
49
 
50
  def process_salary_data(df):
51
  """Process salary and deductions data."""
52
- # Standardize column names
53
- df.columns = [col.strip() for col in df.columns]
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- # Clean TIN column if present
56
- if 'TIN' in df.columns or 'TIN or Personal ID of Employee' in df.columns:
57
- tin_col = 'TIN' if 'TIN' in df.columns else 'TIN or Personal ID of Employee'
58
  df['TIN'] = df[tin_col].apply(standardize_tin)
59
 
60
- # Convert numeric columns
61
  numeric_columns = df.select_dtypes(include=[np.number]).columns
62
- for col in numeric_columns:
63
- df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
64
 
65
  return df
66
 
67
- def merge_dataframes(employee_df, salary_df):
68
- """Merge employee and salary information."""
69
- # Ensure TIN columns are standardized
70
- employee_df['TIN'] = employee_df['TIN'].apply(standardize_tin)
71
- salary_df['TIN'] = salary_df['TIN'].apply(standardize_tin)
 
72
 
73
- # Merge on TIN
74
  merged_df = pd.merge(
75
  employee_df,
76
  salary_df,
77
  on='TIN',
78
  how='outer',
79
- suffixes=('', '_y')
 
 
 
 
 
 
 
 
 
80
  )
81
 
82
  # Drop duplicate columns
83
- duplicate_cols = [col for col in merged_df.columns if col.endswith('_y')]
84
  merged_df.drop(columns=duplicate_cols, inplace=True)
85
 
86
  # Fill missing numeric values with 0
@@ -94,14 +104,16 @@ def main():
94
 
95
  st.write("""
96
  Upload:
97
- 1. Employee Information File (with personal details)
98
- 2. Salary Information File (with financial data)
 
99
  """)
100
 
101
  employee_file = st.file_uploader("Upload Employee Information", type=['xlsx', 'xls'])
102
  salary_file = st.file_uploader("Upload Salary Information", type=['xlsx', 'xls'])
 
103
 
104
- if employee_file and salary_file:
105
  try:
106
  # Process employee data
107
  employee_df = pd.read_excel(employee_file)
@@ -113,8 +125,13 @@ def main():
113
  salary_df = process_salary_data(salary_df)
114
  st.write("Salary data processed successfully")
115
 
 
 
 
 
 
116
  # Merge the dataframes
117
- final_df = merge_dataframes(employee_df, salary_df)
118
 
119
  # Organize columns in desired order
120
  column_order = [
@@ -122,12 +139,9 @@ def main():
122
  'Birth Date', 'Employed From date', 'Employed To date', 'Position'
123
  ]
124
 
125
- # Add remaining columns in their original order
126
  remaining_cols = [col for col in final_df.columns if col not in column_order]
127
  column_order.extend(remaining_cols)
128
-
129
- # Reorder columns
130
- final_df = final_df[column_order]
131
 
132
  st.subheader("Master Payroll Data Preview")
133
  st.dataframe(final_df)
 
9
  if pd.isna(tin):
10
  return ""
11
  tin = str(tin).strip()
12
+ tin = re.sub(r'\s+', '', tin) # Remove all spaces
 
 
13
  if re.match(r'^\d{2}-?\d{6}[A-Z]\d{2}$', tin):
14
  return f"{tin[:2]}-{tin[2:8]} {tin[8]} {tin[9:11]}"
15
  return tin
 
22
 
23
  def process_employee_data(df):
24
  """Process employee personal information."""
25
+ df.columns = [col.strip() for col in df.columns] # Standardize column names
 
 
 
 
 
 
 
26
 
27
  # Create Employee Name
28
  if 'First Name' in df.columns and 'Last Name' in df.columns:
 
32
  )
33
 
34
  # Clean TIN
35
+ tin_col = 'TIN' if 'TIN' in df.columns else 'Personal ID of Employee'
36
+ if tin_col in df.columns:
37
  df['TIN'] = df[tin_col].apply(standardize_tin)
38
 
39
  return df
40
 
41
  def process_salary_data(df):
42
  """Process salary and deductions data."""
43
+ df.columns = [col.strip() for col in df.columns] # Standardize column names
44
+
45
+ tin_col = 'TIN' if 'TIN' in df.columns else 'TIN or Personal ID of Employee'
46
+ if tin_col in df.columns:
47
+ df['TIN'] = df[tin_col].apply(standardize_tin)
48
+
49
+ numeric_columns = df.select_dtypes(include=[np.number]).columns
50
+ df[numeric_columns] = df[numeric_columns].fillna(0)
51
+
52
+ return df
53
+
54
+ def process_paye_data(df):
55
+ """Process PAYE data."""
56
+ df.columns = [col.strip() for col in df.columns] # Standardize column names
57
 
58
+ tin_col = 'TIN' if 'TIN' in df.columns else 'TIN or Personal ID of Employee'
59
+ if tin_col in df.columns:
 
60
  df['TIN'] = df[tin_col].apply(standardize_tin)
61
 
 
62
  numeric_columns = df.select_dtypes(include=[np.number]).columns
63
+ df[numeric_columns] = df[numeric_columns].fillna(0)
 
64
 
65
  return df
66
 
67
+ def merge_dataframes(employee_df, salary_df, paye_df):
68
+ """Merge employee, salary, and PAYE information."""
69
+ # Standardize TIN columns
70
+ for df in [employee_df, salary_df, paye_df]:
71
+ if 'TIN' in df.columns:
72
+ df['TIN'] = df['TIN'].apply(standardize_tin)
73
 
74
+ # Merge salary into employee data
75
  merged_df = pd.merge(
76
  employee_df,
77
  salary_df,
78
  on='TIN',
79
  how='outer',
80
+ suffixes=('', '_salary')
81
+ )
82
+
83
+ # Merge PAYE into the merged dataset
84
+ merged_df = pd.merge(
85
+ merged_df,
86
+ paye_df,
87
+ on='TIN',
88
+ how='outer',
89
+ suffixes=('', '_paye')
90
  )
91
 
92
  # Drop duplicate columns
93
+ duplicate_cols = [col for col in merged_df.columns if col.endswith(('_salary', '_paye'))]
94
  merged_df.drop(columns=duplicate_cols, inplace=True)
95
 
96
  # Fill missing numeric values with 0
 
104
 
105
  st.write("""
106
  Upload:
107
+ 1. Employee Information File (Template)
108
+ 2. Salary (Earnings) Information File
109
+ 3. PAYE Information File
110
  """)
111
 
112
  employee_file = st.file_uploader("Upload Employee Information", type=['xlsx', 'xls'])
113
  salary_file = st.file_uploader("Upload Salary Information", type=['xlsx', 'xls'])
114
+ paye_file = st.file_uploader("Upload PAYE Information", type=['xlsx', 'xls'])
115
 
116
+ if employee_file and salary_file and paye_file:
117
  try:
118
  # Process employee data
119
  employee_df = pd.read_excel(employee_file)
 
125
  salary_df = process_salary_data(salary_df)
126
  st.write("Salary data processed successfully")
127
 
128
+ # Process PAYE data
129
+ paye_df = pd.read_excel(paye_file)
130
+ paye_df = process_paye_data(paye_df)
131
+ st.write("PAYE data processed successfully")
132
+
133
  # Merge the dataframes
134
+ final_df = merge_dataframes(employee_df, salary_df, paye_df)
135
 
136
  # Organize columns in desired order
137
  column_order = [
 
139
  'Birth Date', 'Employed From date', 'Employed To date', 'Position'
140
  ]
141
 
 
142
  remaining_cols = [col for col in final_df.columns if col not in column_order]
143
  column_order.extend(remaining_cols)
144
+ final_df = final_df[column_order] # Reorder columns
 
 
145
 
146
  st.subheader("Master Payroll Data Preview")
147
  st.dataframe(final_df)