File size: 5,372 Bytes
cb74148 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import pandas as pd
import joblib
def get_encoding(category, value):
encodings = {
'person_gender': {
'female': 0,
'male': 1
},
'person_education': {
'associate': 0,
'bachelor': 1,
'doctorate': 2,
'high school': 3,
'master': 4
},
'person_home_ownership': {
'mortgage': 0,
'other': 1,
'own': 2,
'rent': 3
},
'previous_loan_defaults_on_file': {
'no': 0,
'yes': 1,
0: 0,
1: 1
},
'loan_intent': {
'debt_consolidation': 0,
'education': 1,
'home_improvement': 2,
'medical': 3,
'personal': 4,
'venture': 5
}
}
# Look up the encoding
return encodings.get(category, {}).get(value, None)
def classify_loan(
person_age: float,
person_gender: str,
person_education: str,
person_income: float,
person_emp_exp: float,
person_home_ownership: str,
loan_amnt: float,
loan_intent: str,
loan_int_rate: float,
loan_percent_income: float,
cb_person_cred_hist_length: float,
credit_score: float,
previous_loan_defaults_on_file: str
) -> dict[float, str, str, float, float, str, float, str, float, float, float, float, str | str]:
"""Set the borrower personal details for loan approval classification. (mock API).
Args:
person_age (float): The age of the borrower.
person_gender: Gender of the borrower. Either `male` or `female`.
person_education: The education level of the borrower.Either 'associate`, `bachelor`, `doctorate`, `master` or `high school`.
person_income: The income of the borrower.
person_emp_exp: The years of employment experience of the borrower.
person_home_ownership: The home ownership status of the borrower. Either `mortage`, `other`, `own` or `rent`.
loan_amnt: The amount of loan requested.
loan_intent: The intent of the loan. Either `debt_consolidation`, `education`, `home_improvement`, `medical` or `personal`.
loan_int_rate: The interest rate of the loan.
loan_percent_income: The loan amount as a percentage of annual income.
cb_person_cred_hist_length: The length of the credit history of the borrower.
credit_score: The credit score of the borrower.
previous_loan_defaults_on_file: The indicator of previous loan defaults. Either `yes` or `no`.
Returns:
A dictionary containing the loan approval status.
"""
# make the input data into a dataframe
input_data = {
"person_age": person_age,
"person_gender": person_gender,
"person_education": person_education,
"person_income": person_income,
"person_emp_exp": person_emp_exp,
"person_home_ownership": person_home_ownership,
"loan_amnt": loan_amnt,
"loan_intent": loan_intent,
"loan_int_rate": loan_int_rate,
"loan_percent_income": loan_percent_income,
"cb_person_cred_hist_length": cb_person_cred_hist_length,
"credit_score": credit_score,
"previous_loan_defaults_on_file": previous_loan_defaults_on_file
}
input_df = pd.DataFrame([input_data])
print("### This is the input data:")
print(input_df.head())
# scale the input data
means_stds = pd.read_csv("data/means_stds.csv")
means_stds.set_index('column', inplace=True)
columns = ["person_age", "person_income", "person_emp_exp", "loan_amnt",
"loan_int_rate", "loan_percent_income", "cb_person_cred_hist_length",
"credit_score"]
for column in columns:
mean = means_stds.loc[column, 'mean']
std = means_stds.loc[column, 'std']
input_df[column] = (input_df[column] - mean) / std
# convert the categorical variables to class
categorical_columns = [
"person_gender", "person_education", "person_home_ownership",
"loan_intent", "previous_loan_defaults_on_file"
]
for column in categorical_columns:
input_df[column] = input_df[column].apply(lambda x: get_encoding(column, x))
print("### This is the processed input data: ")
print(input_df.head())
# load classifier at model/logistic_regression.pkl
classifier = joblib.load("model/random_forest_model.pkl")
# reorder the columns to match the training data
ordered_columns = [
"person_gender",
"person_education",
"person_home_ownership",
"loan_intent",
"previous_loan_defaults_on_file",
"person_age",
"person_income",
"person_emp_exp",
"loan_amnt",
"loan_int_rate",
"loan_percent_income",
"cb_person_cred_hist_length",
"credit_score"
]
input_df = input_df[ordered_columns]
# make prediction
prediction = classifier.predict(input_df)
if prediction[0] == 1:
return "Your loan application has been approved."
else:
return "Your loan application has been rejected." |