import pickle import pandas as pd import os from app.core.config import settings class MLService: def __init__(self): self.model = None self.expected_features = None self.load_model() def load_model(self): model_path = settings.MODEL_PATH if not os.path.exists(model_path): print(f"Warning: Model file not found at {model_path}") return print(f"Loading model from {model_path}...") with open(model_path, 'rb') as f: self.model = pickle.load(f) if hasattr(self.model, "feature_names_in_"): self.expected_features = self.model.feature_names_in_ print(f"Model expects {len(self.expected_features)} features.") else: print("Warning: Model does not have feature_names_in_. Preprocessing might fail.") print("Model loaded successfully.") def predict(self, input_data: dict): if not self.model: raise RuntimeError("Model is not loaded") # Convert input dict to DataFrame df = pd.DataFrame([input_data]) # Preprocessing: One-Hot Encoding # We use pd.get_dummies to encode categorical variables # Then we align with expected features df_encoded = pd.get_dummies(df) if self.expected_features is not None: # Add missing columns with 0 # Remove extra columns (if any, though unlikely with single row input unless new category appears) # Reorder columns to match model expectation # This reindex handles both adding missing cols (filling with 0) and reordering df_final = df_encoded.reindex(columns=self.expected_features, fill_value=0) else: df_final = df_encoded # Predict prediction = self.model.predict(df_final)[0] # Try to get probability if available probability = None if hasattr(self.model, "predict_proba"): try: probs = self.model.predict_proba(df_final) probability = float(probs[0][1]) # Assuming binary classification except Exception as e: print(f"Could not get probability: {e}") return int(prediction), probability ml_service = MLService()