File size: 1,874 Bytes
795b3fe
 
 
 
 
 
 
 
 
e5057ff
795b3fe
 
 
 
 
e5057ff
795b3fe
 
 
 
 
 
 
 
 
 
 
 
 
3e5dd6d
 
795b3fe
 
 
3e5dd6d
 
 
 
 
 
b6d1423
3e5dd6d
795b3fe
 
 
 
 
 
 
 
 
 
 
3e5dd6d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import numpy as np
import joblib

from src.utils import get_latest_file
from src.preprocessing import clean_and_engineer

MODEL_DIR = "models"


class CreditRiskPredictor:
    def __init__(self):
        self.model_path = get_latest_file(MODEL_DIR, "credit_model")
        self.scaler_path = get_latest_file(MODEL_DIR, "scaler")
        self.columns_path = get_latest_file(MODEL_DIR, "columns")

        self.model = joblib.load(self.model_path)
        self.scaler = joblib.load(self.scaler_path)
        self.columns = joblib.load(self.columns_path)

    def predict(self, input_dict: dict):
        df = pd.DataFrame([input_dict])
        df = clean_and_engineer(df)
        df = pd.get_dummies(df, drop_first=True)
        df = df.reindex(columns=self.columns, fill_value=0)
        X_scaled = self.scaler.transform(df)

        probability, credit_score, rating = self._calculate_scorecard_output(X_scaled)

        probability = round(probability, 4)

        return probability, credit_score, rating

    def _calculate_scorecard_output(self, X_scaled, base_score=300, scale_length=600):
        x = np.dot(X_scaled, self.model.coef_.T) + self.model.intercept_
        default_probability = 1 / (1 + np.exp(-x))
        non_default_probability = 1 - default_probability
        credit_score = base_score + non_default_probability.flatten() * scale_length
        credit_score = int(credit_score[0])
        rating = self._get_rating(credit_score)

        return float(default_probability.flatten()[0]), credit_score, rating

    def _get_rating(self, score):
        if 300 <= score < 500:
            return 'Poor'
        elif 500 <= score < 650:
            return 'Average'
        elif 650 <= score < 750:
            return 'Good'
        elif 750 <= score <= 900:
            return 'Excellent'
        else:
            return 'Undefined'