File size: 2,396 Bytes
48c8b68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pickle
import pandas as pd
import os
from app.core.config import settings

class MLService:
    def __init__(self):
        self.model = None
        self.expected_features = None
        self.load_model()

    def load_model(self):
        model_path = settings.MODEL_PATH
        if not os.path.exists(model_path):
            print(f"Warning: Model file not found at {model_path}")
            return
            
        print(f"Loading model from {model_path}...")
        with open(model_path, 'rb') as f:
            self.model = pickle.load(f)
        
        if hasattr(self.model, "feature_names_in_"):
            self.expected_features = self.model.feature_names_in_
            print(f"Model expects {len(self.expected_features)} features.")
        else:
            print("Warning: Model does not have feature_names_in_. Preprocessing might fail.")
        
        print("Model loaded successfully.")

    def predict(self, input_data: dict):
        if not self.model:
            raise RuntimeError("Model is not loaded")
        
        # Convert input dict to DataFrame
        df = pd.DataFrame([input_data])
        
        # Preprocessing: One-Hot Encoding
        # We use pd.get_dummies to encode categorical variables
        # Then we align with expected features
        
        df_encoded = pd.get_dummies(df)
        
        if self.expected_features is not None:
            # Add missing columns with 0
            # Remove extra columns (if any, though unlikely with single row input unless new category appears)
            # Reorder columns to match model expectation
            
            # This reindex handles both adding missing cols (filling with 0) and reordering
            df_final = df_encoded.reindex(columns=self.expected_features, fill_value=0)
        else:
            df_final = df_encoded
        
        # Predict
        prediction = self.model.predict(df_final)[0]
        
        # Try to get probability if available
        probability = None
        if hasattr(self.model, "predict_proba"):
            try:
                probs = self.model.predict_proba(df_final)
                probability = float(probs[0][1]) # Assuming binary classification
            except Exception as e:
                print(f"Could not get probability: {e}")
        
        return int(prediction), probability

ml_service = MLService()