rairo commited on
Commit
9f460af
·
verified ·
1 Parent(s): 6876163

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -20,9 +20,14 @@ def clean_name(name):
20
  return ""
21
  return " ".join(str(name).upper().strip().split())
22
 
 
 
 
 
 
23
  def process_employee_data(df):
24
  """Process employee personal information."""
25
- df.columns = [col.strip() for col in df.columns]
26
 
27
  # Create Employee Name if possible
28
  if 'First Name' in df.columns and 'Last Name' in df.columns:
@@ -43,7 +48,7 @@ def process_employee_data(df):
43
 
44
  def process_salary_data(df):
45
  """Process salary and deductions data."""
46
- df.columns = [col.strip() for col in df.columns]
47
 
48
  if 'TIN' in df.columns:
49
  df['TIN'] = df['TIN'].apply(standardize_tin)
@@ -52,7 +57,6 @@ def process_salary_data(df):
52
  else:
53
  raise KeyError("Salary data must contain a 'TIN' or 'TIN or Personal ID of Employee' column.")
54
 
55
- # Convert numeric columns and fill NaNs with 0
56
  numeric_columns = df.select_dtypes(include=[np.number]).columns
57
  df[numeric_columns] = df[numeric_columns].fillna(0)
58
 
@@ -60,7 +64,7 @@ def process_salary_data(df):
60
 
61
  def process_paye_data(df):
62
  """Process PAYE data."""
63
- df.columns = [col.strip() for col in df.columns]
64
 
65
  if 'TIN' in df.columns:
66
  df['TIN'] = df['TIN'].apply(standardize_tin)
 
20
  return ""
21
  return " ".join(str(name).upper().strip().split())
22
 
23
+ def normalize_columns(df):
24
+ """Replace newline characters and extra spaces in column headers."""
25
+ df.columns = [col.replace("\n", " ").strip() for col in df.columns]
26
+ return df
27
+
28
  def process_employee_data(df):
29
  """Process employee personal information."""
30
+ df = normalize_columns(df)
31
 
32
  # Create Employee Name if possible
33
  if 'First Name' in df.columns and 'Last Name' in df.columns:
 
48
 
49
  def process_salary_data(df):
50
  """Process salary and deductions data."""
51
+ df = normalize_columns(df)
52
 
53
  if 'TIN' in df.columns:
54
  df['TIN'] = df['TIN'].apply(standardize_tin)
 
57
  else:
58
  raise KeyError("Salary data must contain a 'TIN' or 'TIN or Personal ID of Employee' column.")
59
 
 
60
  numeric_columns = df.select_dtypes(include=[np.number]).columns
61
  df[numeric_columns] = df[numeric_columns].fillna(0)
62
 
 
64
 
65
  def process_paye_data(df):
66
  """Process PAYE data."""
67
+ df = normalize_columns(df)
68
 
69
  if 'TIN' in df.columns:
70
  df['TIN'] = df['TIN'].apply(standardize_tin)