junaid17 commited on
Commit
795b3fe
·
verified ·
1 Parent(s): 0c661f9

Update inference/predictor.py

Browse files
Files changed (1) hide show
  1. inference/predictor.py +95 -55
inference/predictor.py CHANGED
@@ -1,55 +1,95 @@
1
- import pandas as pd
2
- import joblib
3
-
4
- from src.utils import get_latest_file
5
- from src.preprocessing import clean_and_engineer
6
-
7
-
8
- MODEL_DIR = "models"
9
-
10
-
11
- class CreditRiskPredictor:
12
- def __init__(self):
13
- self.model_path = get_latest_file(MODEL_DIR, "credit_model")
14
- self.scaler_path = get_latest_file(MODEL_DIR, "scaler")
15
- self.columns_path = get_latest_file(MODEL_DIR, "columns")
16
-
17
- self.model = joblib.load(self.model_path)
18
- self.scaler = joblib.load(self.scaler_path)
19
- self.columns = joblib.load(self.columns_path)
20
-
21
- print(f"Loaded model: {self.model_path}")
22
- print(f"Loaded scaler: {self.scaler_path}")
23
- print(f"Loaded columns: {self.columns_path}")
24
-
25
- def predict(self, input_dict: dict):
26
- # Convert input to DataFrame
27
- df = pd.DataFrame([input_dict])
28
-
29
- # Apply same preprocessing as training
30
- df = clean_and_engineer(df)
31
-
32
- # One-hot encode
33
- df = pd.get_dummies(df, drop_first=True)
34
-
35
- # Align columns with training
36
- df = df.reindex(columns=self.columns, fill_value=0)
37
-
38
- # Scale
39
- X_scaled = self.scaler.transform(df)
40
-
41
- # Predict
42
- probability = self.model.predict_proba(X_scaled)[0][1]
43
-
44
- # Simple credit score + rating logic
45
- credit_score = int(900 - (probability * 600))
46
-
47
- if probability < 0.3:
48
- rating = "Good"
49
- elif probability < 0.6:
50
- rating = "Average"
51
- else:
52
- rating = "High Risk"
53
-
54
- return probability, credit_score, rating
55
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+
5
+ from src.utils import get_latest_file
6
+ from src.preprocessing import clean_and_engineer
7
+
8
+ MODEL_DIR = "models"
9
+
10
+
11
+ class CreditRiskPredictor:
12
+ def __init__(self):
13
+ self.model_path = get_latest_file(MODEL_DIR, "credit_model")
14
+ self.scaler_path = get_latest_file(MODEL_DIR, "scaler")
15
+ self.columns_path = get_latest_file(MODEL_DIR, "columns")
16
+
17
+ self.model = joblib.load(self.model_path)
18
+ self.scaler = joblib.load(self.scaler_path)
19
+ self.columns = joblib.load(self.columns_path)
20
+
21
+ print(f"Loaded model: {self.model_path}")
22
+ print(f"Loaded scaler: {self.scaler_path}")
23
+ print(f"Loaded columns: {self.columns_path}")
24
+
25
+ def predict(self, input_dict: dict):
26
+ # -------------------------------
27
+ # 1. Convert input to DataFrame
28
+ # -------------------------------
29
+ df = pd.DataFrame([input_dict])
30
+
31
+ # -------------------------------
32
+ # 2. Apply preprocessing
33
+ # -------------------------------
34
+ df = clean_and_engineer(df)
35
+
36
+ # -------------------------------
37
+ # 3. One-hot encode
38
+ # -------------------------------
39
+ df = pd.get_dummies(df, drop_first=True)
40
+
41
+ # -------------------------------
42
+ # 4. Align columns with training
43
+ # -------------------------------
44
+ df = df.reindex(columns=self.columns, fill_value=0)
45
+
46
+ # -------------------------------
47
+ # 5. Scale
48
+ # -------------------------------
49
+ X_scaled = self.scaler.transform(df)
50
+
51
+ # -------------------------------
52
+ # 6. ORIGINAL SCORECARD LOGIC
53
+ # -------------------------------
54
+ probability, credit_score, rating = self._calculate_scorecard_output(X_scaled)
55
+
56
+ return probability, credit_score, rating
57
+
58
+ def _calculate_scorecard_output(self, X_scaled, base_score=300, scale_length=600):
59
+ """
60
+ This matches your original logic exactly:
61
+ x = wX + b
62
+ PD = sigmoid(x)
63
+ score = base + (1 - PD) * scale
64
+ rating = bucket(score)
65
+ """
66
+
67
+ # Linear combination
68
+ x = np.dot(X_scaled, self.model.coef_.T) + self.model.intercept_
69
+
70
+ # Sigmoid → default probability
71
+ default_probability = 1 / (1 + np.exp(-x))
72
+
73
+ # Non-default probability
74
+ non_default_probability = 1 - default_probability
75
+
76
+ # Credit score calculation
77
+ credit_score = base_score + non_default_probability.flatten() * scale_length
78
+ credit_score = int(credit_score[0])
79
+
80
+ # Rating bucket (same as your code)
81
+ rating = self._get_rating(credit_score)
82
+
83
+ return float(default_probability.flatten()[0]), credit_score, rating
84
+
85
+ def _get_rating(self, score):
86
+ if 300 <= score < 500:
87
+ return 'Poor'
88
+ elif 500 <= score < 650:
89
+ return 'Average'
90
+ elif 650 <= score < 750:
91
+ return 'Good'
92
+ elif 750 <= score <= 900:
93
+ return 'Excellent'
94
+ else:
95
+ return 'Undefined'