| import json |
| import joblib |
| import pandas as pd |
| import yaml |
|
|
| from huggingface_hub import hf_hub_download |
| from src.preprocess import preprocess_input |
|
|
|
|
| def load_config(config_path: str = "config/config.yaml") -> dict: |
| with open(config_path, "r", encoding="utf-8") as f: |
| return yaml.safe_load(f) |
|
|
|
|
| def load_model_and_info(): |
| config = load_config() |
|
|
| repo_id = config["model"]["repo_id"] |
| model_filename = config["model"]["filename"] |
| info_filename = config["model"]["info_filename"] |
|
|
| model_path = hf_hub_download( |
| repo_id=repo_id, |
| filename=model_filename, |
| repo_type="model", |
| ) |
|
|
| info_path = hf_hub_download( |
| repo_id=repo_id, |
| filename=info_filename, |
| repo_type="model", |
| ) |
|
|
| model = joblib.load(model_path) |
|
|
| with open(info_path, "r", encoding="utf-8") as f: |
| model_info = json.load(f) |
|
|
| return model, model_info |
|
|
|
|
| def align_features_for_inference(input_df: pd.DataFrame, feature_columns: list[str]) -> pd.DataFrame: |
| df = input_df.copy() |
|
|
| df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns] |
|
|
| df = pd.get_dummies(df, drop_first=False) |
|
|
| df = df.reindex(columns=feature_columns, fill_value=0) |
|
|
| return df |
|
|
|
|
| def predict_input(input_df: pd.DataFrame) -> dict: |
| model, model_info = load_model_and_info() |
|
|
| processed_df = preprocess_input(input_df) |
|
|
| feature_columns = model_info["feature_columns"] |
| aligned_df = align_features_for_inference(processed_df, feature_columns) |
|
|
| prediction = model.predict(aligned_df) |
|
|
| result = { |
| "prediction": prediction[0], |
| "processed_input": aligned_df.to_dict(orient="records")[0], |
| } |
|
|
| if hasattr(model, "predict_proba"): |
| probabilities = model.predict_proba(aligned_df) |
| result["probabilities"] = probabilities[0].tolist() |
|
|
| return result |
|
|