prediction-api / app /services /ml_service.py
3v324v23's picture
feat(api): Implement FastAPI endpoints and ML service
48c8b68
import pickle
import pandas as pd
import os
from app.core.config import settings
class MLService:
def __init__(self):
self.model = None
self.expected_features = None
self.load_model()
def load_model(self):
model_path = settings.MODEL_PATH
if not os.path.exists(model_path):
print(f"Warning: Model file not found at {model_path}")
return
print(f"Loading model from {model_path}...")
with open(model_path, 'rb') as f:
self.model = pickle.load(f)
if hasattr(self.model, "feature_names_in_"):
self.expected_features = self.model.feature_names_in_
print(f"Model expects {len(self.expected_features)} features.")
else:
print("Warning: Model does not have feature_names_in_. Preprocessing might fail.")
print("Model loaded successfully.")
def predict(self, input_data: dict):
if not self.model:
raise RuntimeError("Model is not loaded")
# Convert input dict to DataFrame
df = pd.DataFrame([input_data])
# Preprocessing: One-Hot Encoding
# We use pd.get_dummies to encode categorical variables
# Then we align with expected features
df_encoded = pd.get_dummies(df)
if self.expected_features is not None:
# Add missing columns with 0
# Remove extra columns (if any, though unlikely with single row input unless new category appears)
# Reorder columns to match model expectation
# This reindex handles both adding missing cols (filling with 0) and reordering
df_final = df_encoded.reindex(columns=self.expected_features, fill_value=0)
else:
df_final = df_encoded
# Predict
prediction = self.model.predict(df_final)[0]
# Try to get probability if available
probability = None
if hasattr(self.model, "predict_proba"):
try:
probs = self.model.predict_proba(df_final)
probability = float(probs[0][1]) # Assuming binary classification
except Exception as e:
print(f"Could not get probability: {e}")
return int(prediction), probability
ml_service = MLService()