Spaces:
Sleeping
Sleeping
File size: 7,412 Bytes
d1a10f2 033f8d8 d1a10f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
# ==========================================
# Loan Approval Prediction App (Gradio)
# ==========================================
# Step 1: Install dependencies
# Dependencies are installed in a separate notebook cell
# Step 2: Import libraries
import gradio as gr
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import os
# Step 3: Load trained model and dataset
print("Loading trained model and dataset...")
model = joblib.load("loan_model.joblib")
df = pd.read_csv("loan_acceptance_dataset.csv")
# Prepare feature list for alignment consistency
X = pd.get_dummies(df.drop(['applicant_id', 'loan_approved'], axis=1), drop_first=True)
feature_names = X.columns.tolist()
# Step 4: Define helper for confidence messaging
def confidence_message(pred, prob):
if pred == 1 and prob > 0.95:
return "The model is VERY confident that this loan will be APPROVED."
elif pred == 1:
return "The model predicts APPROVAL, but with moderate confidence."
elif pred == 0 and prob < 0.05:
return "The model is VERY confident that this loan will be REJECTED."
else:
return "The model predicts REJECTION, but with moderate confidence."
# Step 5: Define single prediction function
def predict_loan(age, income, employment_years, credit_score, loan_amount,
loan_term_months, existing_loans_count, marital_status,
education_level, loan_purpose):
# Prepare single record
input_dict = {
'age': [age],
'income': [income],
'employment_years': [employment_years],
'credit_score': [credit_score],
'loan_amount': [loan_amount],
'loan_term_months': [loan_term_months],
'existing_loans_count': [existing_loans_count],
'marital_status': [marital_status],
'education_level': [education_level],
'loan_purpose': [loan_purpose]
}
input_df = pd.DataFrame(input_dict)
# Encode and align
input_encoded = pd.get_dummies(input_df, drop_first=True)
missing_cols = set(feature_names) - set(input_encoded.columns)
for col in missing_cols:
input_encoded[col] = 0
input_encoded = input_encoded[feature_names]
# Predict
pred = model.predict(input_encoded)[0]
prob = model.predict_proba(input_encoded)[0][1]
result = "Loan Approved" if pred == 1 else "Loan Rejected"
msg = confidence_message(pred, prob)
return f"{result}\n\nApproval Probability: {prob:.2%}\n{msg}"
# Step 6: Define batch prediction function (learned from standalone script)
def predict_from_file(file_obj):
if file_obj is None:
return "Please upload a CSV or Excel file."
# Load the uploaded file
try:
file_ext = os.path.splitext(file_obj.name)[1].lower()
if file_ext == ".csv":
new_data = pd.read_csv(file_obj)
elif file_ext in [".xls", ".xlsx"]:
new_data = pd.read_excel(file_obj)
else:
return "Unsupported file format. Please upload a CSV or Excel file."
except Exception as e:
return f"Error reading file: {str(e)}"
# Encode and align with training data
new_data_encoded = pd.get_dummies(new_data, drop_first=True)
missing_cols = set(feature_names) - set(new_data_encoded.columns)
for col in missing_cols:
new_data_encoded[col] = 0
new_data_encoded = new_data_encoded[feature_names]
# Predictions
pred_labels = model.predict(new_data_encoded)
pred_probs = model.predict_proba(new_data_encoded)[:, 1]
# Combine with input
results_df = new_data.copy()
results_df["Prediction"] = np.where(pred_labels == 1, "Loan Approved", "Loan Rejected")
results_df["Approval_Probability"] = pred_probs
# Add confidence message column
results_df["Confidence_Message"] = [
confidence_message(pred, prob) for pred, prob in zip(pred_labels, pred_probs)
]
# Save for download
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
results_df.to_csv(temp_file.name, index=False)
temp_file.close()
# Return preview and downloadable file
preview_html = results_df.head().to_html(index=False)
return preview_html, temp_file.name
# Step 7: Define feature importance visualization
def feature_importance_plot():
coef_df = pd.DataFrame({
'Feature': feature_names,
'Coefficient': model.coef_[0]
}).sort_values(by='Coefficient', ascending=False)
plt.figure(figsize=(8, 6))
plt.barh(coef_df['Feature'], coef_df['Coefficient'], color='skyblue')
plt.xlabel("Coefficient Value (Impact Strength)")
plt.title("Feature Importance (Logistic Regression)")
plt.tight_layout()
plt.gca().invert_yaxis()
plt.savefig("feature_importance.png", bbox_inches='tight')
return "feature_importance.png"
# Step 8: Gradio UI Components
age = gr.Slider(18, 70, step=1, label="Applicant Age")
income = gr.Number(label="Annual Income (KWD)")
employment_years = gr.Slider(0, 40, step=1, label="Years of Employment")
credit_score = gr.Slider(300, 850, step=1, label="Credit Score")
loan_amount = gr.Number(
label="Loan Amount Requested (KWD)",
value=5000, # default starting value
minimum=5000, # enforce minimum loan amount
step=100, # step size
precision=0 # integers only
)
loan_term_months = gr.Slider(6, 60, step=6, label="Loan Term (Months)")
existing_loans_count = gr.Slider(0, 5, step=1, label="Existing Loans")
marital_status = gr.Dropdown(["Single", "Married", "Divorced"], label="Marital Status")
education_level = gr.Dropdown(["High School", "Bachelor", "Master", "PhD"], label="Education Level")
loan_purpose = gr.Dropdown(["Home", "Car", "Personal"], label="Loan Purpose")
# Step 9: Gradio Interfaces
predict_interface = gr.Interface(
fn=predict_loan,
inputs=[
age, income, employment_years, credit_score, loan_amount,
loan_term_months, existing_loans_count, marital_status,
education_level, loan_purpose
],
outputs=gr.Textbox(
label="Prediction Result",
lines=8, # makes the output box taller
max_lines=12, # allows scrolling if needed
placeholder="Prediction will appear here..."
),
title="Loan Approval Prediction",
description="Predict whether a loan will be approved using a trained Logistic Regression model.",
theme="gradio/soft"
)
batch_interface = gr.Interface(
fn=predict_from_file,
inputs=gr.File(label="Upload CSV or Excel File"),
outputs=[
gr.HTML(label="Preview of Results (first 5 rows)"),
gr.File(label="Download Full Predictions (CSV)")
],
title="Batch Loan Predictions",
description="Upload a CSV or Excel file containing applicant data to get loan approval predictions."
)
feature_interface = gr.Interface(
fn=feature_importance_plot,
inputs=None,
outputs=gr.Image(type="filepath", label="Feature Importance Chart"),
title="Model Feature Importance",
description="Visualizes the coefficient impact strength of each feature in the logistic regression model."
)
# Step 10: Combine Tabs and Launch (Colab-safe)
demo = gr.TabbedInterface(
[predict_interface, batch_interface, feature_interface],
["Single Loan Prediction", "Batch Predictions", "Feature Importance"]
)
demo.launch(share=True, inline=True)
|