Spaces:
Running
Running
| """Create and persist the preprocessing transformer used by the API. | |
| Run this script after you change `data/processed/features_train.csv` to refresh the | |
| serialized preprocessor at `models/preprocessor.joblib`. | |
| """ | |
| from pathlib import Path | |
| import joblib | |
| from src.preprocessing import RawToModelTransformer | |
| MODEL_DIR = Path("models") | |
| MODEL_DIR.mkdir(parents=True, exist_ok=True) | |
| PREPROC_PATH = MODEL_DIR / "preprocessor.joblib" | |
| pre = RawToModelTransformer() | |
| print(f"Inferred {len(pre.get_feature_names_out())} expected features") | |
| joblib.dump(pre, PREPROC_PATH) | |
| print(f"Preprocessor saved to {PREPROC_PATH.resolve()}") | |