| import pandas as pd | |
| from typing import List, Optional | |
| def normalize_columns(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Gentle normalization: | |
| - strip whitespace from column names | |
| - replace weird invisible chars | |
| """ | |
| df = df.copy() | |
| df.columns = [str(c).strip().replace("\ufeff", "") for c in df.columns] | |
| return df | |
| def ensure_expected_columns(df: pd.DataFrame, expected: Optional[List[str]] = None) -> pd.DataFrame: | |
| """ | |
| If expected columns are provided: | |
| - add missing cols as empty | |
| - drop extra cols not in expected | |
| """ | |
| if not expected: | |
| return df | |
| df = df.copy() | |
| for col in expected: | |
| if col not in df.columns: | |
| df[col] = "" | |
| df = df[expected] | |
| return df |