Spaces:
Sleeping
Sleeping
Update inference/predictor.py
Browse files- inference/predictor.py +95 -55
inference/predictor.py
CHANGED
|
@@ -1,55 +1,95 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
from src.
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
MODEL_DIR = "models"
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
class CreditRiskPredictor:
|
| 12 |
-
def __init__(self):
|
| 13 |
-
self.model_path = get_latest_file(MODEL_DIR, "credit_model")
|
| 14 |
-
self.scaler_path = get_latest_file(MODEL_DIR, "scaler")
|
| 15 |
-
self.columns_path = get_latest_file(MODEL_DIR, "columns")
|
| 16 |
-
|
| 17 |
-
self.model = joblib.load(self.model_path)
|
| 18 |
-
self.scaler = joblib.load(self.scaler_path)
|
| 19 |
-
self.columns = joblib.load(self.columns_path)
|
| 20 |
-
|
| 21 |
-
print(f"Loaded model: {self.model_path}")
|
| 22 |
-
print(f"Loaded scaler: {self.scaler_path}")
|
| 23 |
-
print(f"Loaded columns: {self.columns_path}")
|
| 24 |
-
|
| 25 |
-
def predict(self, input_dict: dict):
|
| 26 |
-
#
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
#
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
#
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import joblib
|
| 4 |
+
|
| 5 |
+
from src.utils import get_latest_file
|
| 6 |
+
from src.preprocessing import clean_and_engineer
|
| 7 |
+
|
| 8 |
+
MODEL_DIR = "models"
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class CreditRiskPredictor:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.model_path = get_latest_file(MODEL_DIR, "credit_model")
|
| 14 |
+
self.scaler_path = get_latest_file(MODEL_DIR, "scaler")
|
| 15 |
+
self.columns_path = get_latest_file(MODEL_DIR, "columns")
|
| 16 |
+
|
| 17 |
+
self.model = joblib.load(self.model_path)
|
| 18 |
+
self.scaler = joblib.load(self.scaler_path)
|
| 19 |
+
self.columns = joblib.load(self.columns_path)
|
| 20 |
+
|
| 21 |
+
print(f"Loaded model: {self.model_path}")
|
| 22 |
+
print(f"Loaded scaler: {self.scaler_path}")
|
| 23 |
+
print(f"Loaded columns: {self.columns_path}")
|
| 24 |
+
|
| 25 |
+
def predict(self, input_dict: dict):
|
| 26 |
+
# -------------------------------
|
| 27 |
+
# 1. Convert input to DataFrame
|
| 28 |
+
# -------------------------------
|
| 29 |
+
df = pd.DataFrame([input_dict])
|
| 30 |
+
|
| 31 |
+
# -------------------------------
|
| 32 |
+
# 2. Apply preprocessing
|
| 33 |
+
# -------------------------------
|
| 34 |
+
df = clean_and_engineer(df)
|
| 35 |
+
|
| 36 |
+
# -------------------------------
|
| 37 |
+
# 3. One-hot encode
|
| 38 |
+
# -------------------------------
|
| 39 |
+
df = pd.get_dummies(df, drop_first=True)
|
| 40 |
+
|
| 41 |
+
# -------------------------------
|
| 42 |
+
# 4. Align columns with training
|
| 43 |
+
# -------------------------------
|
| 44 |
+
df = df.reindex(columns=self.columns, fill_value=0)
|
| 45 |
+
|
| 46 |
+
# -------------------------------
|
| 47 |
+
# 5. Scale
|
| 48 |
+
# -------------------------------
|
| 49 |
+
X_scaled = self.scaler.transform(df)
|
| 50 |
+
|
| 51 |
+
# -------------------------------
|
| 52 |
+
# 6. ORIGINAL SCORECARD LOGIC
|
| 53 |
+
# -------------------------------
|
| 54 |
+
probability, credit_score, rating = self._calculate_scorecard_output(X_scaled)
|
| 55 |
+
|
| 56 |
+
return probability, credit_score, rating
|
| 57 |
+
|
| 58 |
+
def _calculate_scorecard_output(self, X_scaled, base_score=300, scale_length=600):
|
| 59 |
+
"""
|
| 60 |
+
This matches your original logic exactly:
|
| 61 |
+
x = wX + b
|
| 62 |
+
PD = sigmoid(x)
|
| 63 |
+
score = base + (1 - PD) * scale
|
| 64 |
+
rating = bucket(score)
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
# Linear combination
|
| 68 |
+
x = np.dot(X_scaled, self.model.coef_.T) + self.model.intercept_
|
| 69 |
+
|
| 70 |
+
# Sigmoid → default probability
|
| 71 |
+
default_probability = 1 / (1 + np.exp(-x))
|
| 72 |
+
|
| 73 |
+
# Non-default probability
|
| 74 |
+
non_default_probability = 1 - default_probability
|
| 75 |
+
|
| 76 |
+
# Credit score calculation
|
| 77 |
+
credit_score = base_score + non_default_probability.flatten() * scale_length
|
| 78 |
+
credit_score = int(credit_score[0])
|
| 79 |
+
|
| 80 |
+
# Rating bucket (same as your code)
|
| 81 |
+
rating = self._get_rating(credit_score)
|
| 82 |
+
|
| 83 |
+
return float(default_probability.flatten()[0]), credit_score, rating
|
| 84 |
+
|
| 85 |
+
def _get_rating(self, score):
|
| 86 |
+
if 300 <= score < 500:
|
| 87 |
+
return 'Poor'
|
| 88 |
+
elif 500 <= score < 650:
|
| 89 |
+
return 'Average'
|
| 90 |
+
elif 650 <= score < 750:
|
| 91 |
+
return 'Good'
|
| 92 |
+
elif 750 <= score <= 900:
|
| 93 |
+
return 'Excellent'
|
| 94 |
+
else:
|
| 95 |
+
return 'Undefined'
|