File size: 621 Bytes
178345a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
"""Create and persist the preprocessing transformer used by the API.

Run this script after you change `data/processed/features_train.csv` to refresh the
serialized preprocessor at `models/preprocessor.joblib`.
"""
from pathlib import Path
import joblib

from src.preprocessing import RawToModelTransformer

MODEL_DIR = Path("models")
MODEL_DIR.mkdir(parents=True, exist_ok=True)
PREPROC_PATH = MODEL_DIR / "preprocessor.joblib"

pre = RawToModelTransformer()
print(f"Inferred {len(pre.get_feature_names_out())} expected features")

joblib.dump(pre, PREPROC_PATH)
print(f"Preprocessor saved to {PREPROC_PATH.resolve()}")