File size: 5,372 Bytes
cb74148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import pandas as pd
import joblib

def get_encoding(category, value):
    encodings = {
        'person_gender': {
            'female': 0,
            'male': 1
        },
        'person_education': {
            'associate': 0,
            'bachelor': 1,
            'doctorate': 2,
            'high school': 3,
            'master': 4
        },
        'person_home_ownership': {
            'mortgage': 0,
            'other': 1,
            'own': 2,
            'rent': 3
        },
        'previous_loan_defaults_on_file': {
            'no': 0,
            'yes': 1,
            0: 0,
            1: 1
        },
        'loan_intent': {
            'debt_consolidation': 0,
            'education': 1,
            'home_improvement': 2,
            'medical': 3,
            'personal': 4,
            'venture': 5
        }
    }
    
    # Look up the encoding
    return encodings.get(category, {}).get(value, None)

def classify_loan(

        person_age: float,

        person_gender: str,

        person_education: str,

        person_income: float,

        person_emp_exp: float,

        person_home_ownership: str,

        loan_amnt: float,

        loan_intent: str,

        loan_int_rate: float,

        loan_percent_income: float,

        cb_person_cred_hist_length: float,

        credit_score: float,

        previous_loan_defaults_on_file: str

) -> dict[float, str, str, float, float, str, float, str, float, float, float, float, str | str]:
    """Set the borrower personal details for loan approval classification. (mock API).



    Args:

        person_age (float): The age of the borrower.

        person_gender: Gender of the borrower. Either `male` or `female`.

        person_education: The education level of the borrower.Either 'associate`, `bachelor`, `doctorate`, `master` or `high school`.

        person_income: The income of the borrower.

        person_emp_exp: The years of employment experience of the borrower.

        person_home_ownership: The home ownership status of the borrower. Either `mortage`, `other`, `own` or `rent`.

        loan_amnt: The amount of loan requested.

        loan_intent: The intent of the loan. Either `debt_consolidation`, `education`, `home_improvement`, `medical` or `personal`.

        loan_int_rate: The interest rate of the loan.

        loan_percent_income: The loan amount as a percentage of annual income.

        cb_person_cred_hist_length: The length of the credit history of the borrower.

        credit_score: The credit score of the borrower.

        previous_loan_defaults_on_file: The indicator of previous loan defaults. Either `yes` or `no`.



    Returns:

        A dictionary containing the loan approval status.

    """
    # make the input data into a dataframe
    input_data = {
        "person_age": person_age,
        "person_gender": person_gender,
        "person_education": person_education,
        "person_income": person_income,
        "person_emp_exp": person_emp_exp,
        "person_home_ownership": person_home_ownership,
        "loan_amnt": loan_amnt,
        "loan_intent": loan_intent, 
        "loan_int_rate": loan_int_rate,
        "loan_percent_income": loan_percent_income,
        "cb_person_cred_hist_length": cb_person_cred_hist_length,
        "credit_score": credit_score,
        "previous_loan_defaults_on_file": previous_loan_defaults_on_file
    }
    input_df = pd.DataFrame([input_data])
    print("### This is the input data:")
    print(input_df.head())
    # scale the input data
    means_stds = pd.read_csv("data/means_stds.csv")
    means_stds.set_index('column', inplace=True)
    columns = ["person_age", "person_income", "person_emp_exp", "loan_amnt",
                "loan_int_rate", "loan_percent_income", "cb_person_cred_hist_length",
                "credit_score"]
    for column in columns:
        mean = means_stds.loc[column, 'mean']
        std = means_stds.loc[column, 'std']
        input_df[column] = (input_df[column] - mean) / std

    # convert the categorical variables to class
    categorical_columns = [
        "person_gender", "person_education", "person_home_ownership",
        "loan_intent", "previous_loan_defaults_on_file"
    ]
    for column in categorical_columns:
        input_df[column] = input_df[column].apply(lambda x: get_encoding(column, x))
    
    print("### This is the processed input data: ")
    print(input_df.head())

    # load classifier at model/logistic_regression.pkl
    classifier = joblib.load("model/random_forest_model.pkl")

    # reorder the columns to match the training data
    ordered_columns = [
        "person_gender",
        "person_education",
        "person_home_ownership",
        "loan_intent",
        "previous_loan_defaults_on_file",
        "person_age",
        "person_income",
        "person_emp_exp",
        "loan_amnt",
        "loan_int_rate",
        "loan_percent_income",
        "cb_person_cred_hist_length",
        "credit_score"
    ]
    input_df = input_df[ordered_columns]

    # make prediction
    prediction = classifier.predict(input_df)

    if prediction[0] == 1:
        return "Your loan application has been approved."
    else:
        return "Your loan application has been rejected."