OC_P8_prod / models /export_preprocessor.py
GitHub Actions
Sync to HF Spaces [no-ci]
178345a
raw
history blame contribute delete
621 Bytes
"""Create and persist the preprocessing transformer used by the API.
Run this script after you change `data/processed/features_train.csv` to refresh the
serialized preprocessor at `models/preprocessor.joblib`.
"""
from pathlib import Path
import joblib
from src.preprocessing import RawToModelTransformer
MODEL_DIR = Path("models")
MODEL_DIR.mkdir(parents=True, exist_ok=True)
PREPROC_PATH = MODEL_DIR / "preprocessor.joblib"
pre = RawToModelTransformer()
print(f"Inferred {len(pre.get_feature_names_out())} expected features")
joblib.dump(pre, PREPROC_PATH)
print(f"Preprocessor saved to {PREPROC_PATH.resolve()}")