PeacebinfLow's picture
Update normalize.py
89b38f6 verified
raw
history blame contribute delete
643 Bytes
import pandas as pd
from typing import List, Optional
def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Gentle normalization:
- strip whitespace from column names
- replace weird invisible chars
"""
df = df.copy()
df.columns = [str(c).strip().replace("\ufeff", "") for c in df.columns]
return df
def ensure_expected_columns(df: pd.DataFrame, expected: Optional[List[str]] = None) -> pd.DataFrame:
"""
If expected columns are provided:
- add missing cols as empty
- drop extra cols not in expected
"""
if not expected:
return df
df = df.copy()
for col in expected:
if col not in df.columns:
df[col] = ""
df = df[expected]
return df