PeacebinfLow commited on
Commit
89b38f6
·
verified ·
1 Parent(s): dbf3464

Update normalize.py

Browse files
Files changed (1) hide show
  1. normalize.py +25 -9
normalize.py CHANGED
@@ -1,11 +1,27 @@
1
  import pandas as pd
2
- from typing import List
3
 
4
- def normalize_csv_order(csv_path: str, out_path: str, expected_cols: List[str]) -> str:
5
- df = pd.read_csv(csv_path)
6
- for c in expected_cols:
7
- if c not in df.columns:
8
- df[c] = None
9
- df = df[expected_cols]
10
- df.to_csv(out_path, index=False)
11
- return out_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
+ from typing import List, Optional
3
 
4
+ def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
5
+ """
6
+ Gentle normalization:
7
+ - strip whitespace from column names
8
+ - replace weird invisible chars
9
+ """
10
+ df = df.copy()
11
+ df.columns = [str(c).strip().replace("\ufeff", "") for c in df.columns]
12
+ return df
13
+
14
+ def ensure_expected_columns(df: pd.DataFrame, expected: Optional[List[str]] = None) -> pd.DataFrame:
15
+ """
16
+ If expected columns are provided:
17
+ - add missing cols as empty
18
+ - drop extra cols not in expected
19
+ """
20
+ if not expected:
21
+ return df
22
+ df = df.copy()
23
+ for col in expected:
24
+ if col not in df.columns:
25
+ df[col] = ""
26
+ df = df[expected]
27
+ return df