acceptance / app.py
altararwa's picture
Upload app.py
033f8d8 verified
# ==========================================
# Loan Approval Prediction App (Gradio)
# ==========================================
# Step 1: Install dependencies
# Dependencies are installed in a separate notebook cell
# Step 2: Import libraries
import gradio as gr
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import os
# Step 3: Load trained model and dataset
print("Loading trained model and dataset...")
model = joblib.load("loan_model.joblib")
df = pd.read_csv("loan_acceptance_dataset.csv")
# Prepare feature list for alignment consistency
X = pd.get_dummies(df.drop(['applicant_id', 'loan_approved'], axis=1), drop_first=True)
feature_names = X.columns.tolist()
# Step 4: Define helper for confidence messaging
def confidence_message(pred, prob):
if pred == 1 and prob > 0.95:
return "The model is VERY confident that this loan will be APPROVED."
elif pred == 1:
return "The model predicts APPROVAL, but with moderate confidence."
elif pred == 0 and prob < 0.05:
return "The model is VERY confident that this loan will be REJECTED."
else:
return "The model predicts REJECTION, but with moderate confidence."
# Step 5: Define single prediction function
def predict_loan(age, income, employment_years, credit_score, loan_amount,
loan_term_months, existing_loans_count, marital_status,
education_level, loan_purpose):
# Prepare single record
input_dict = {
'age': [age],
'income': [income],
'employment_years': [employment_years],
'credit_score': [credit_score],
'loan_amount': [loan_amount],
'loan_term_months': [loan_term_months],
'existing_loans_count': [existing_loans_count],
'marital_status': [marital_status],
'education_level': [education_level],
'loan_purpose': [loan_purpose]
}
input_df = pd.DataFrame(input_dict)
# Encode and align
input_encoded = pd.get_dummies(input_df, drop_first=True)
missing_cols = set(feature_names) - set(input_encoded.columns)
for col in missing_cols:
input_encoded[col] = 0
input_encoded = input_encoded[feature_names]
# Predict
pred = model.predict(input_encoded)[0]
prob = model.predict_proba(input_encoded)[0][1]
result = "Loan Approved" if pred == 1 else "Loan Rejected"
msg = confidence_message(pred, prob)
return f"{result}\n\nApproval Probability: {prob:.2%}\n{msg}"
# Step 6: Define batch prediction function (learned from standalone script)
def predict_from_file(file_obj):
if file_obj is None:
return "Please upload a CSV or Excel file."
# Load the uploaded file
try:
file_ext = os.path.splitext(file_obj.name)[1].lower()
if file_ext == ".csv":
new_data = pd.read_csv(file_obj)
elif file_ext in [".xls", ".xlsx"]:
new_data = pd.read_excel(file_obj)
else:
return "Unsupported file format. Please upload a CSV or Excel file."
except Exception as e:
return f"Error reading file: {str(e)}"
# Encode and align with training data
new_data_encoded = pd.get_dummies(new_data, drop_first=True)
missing_cols = set(feature_names) - set(new_data_encoded.columns)
for col in missing_cols:
new_data_encoded[col] = 0
new_data_encoded = new_data_encoded[feature_names]
# Predictions
pred_labels = model.predict(new_data_encoded)
pred_probs = model.predict_proba(new_data_encoded)[:, 1]
# Combine with input
results_df = new_data.copy()
results_df["Prediction"] = np.where(pred_labels == 1, "Loan Approved", "Loan Rejected")
results_df["Approval_Probability"] = pred_probs
# Add confidence message column
results_df["Confidence_Message"] = [
confidence_message(pred, prob) for pred, prob in zip(pred_labels, pred_probs)
]
# Save for download
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
results_df.to_csv(temp_file.name, index=False)
temp_file.close()
# Return preview and downloadable file
preview_html = results_df.head().to_html(index=False)
return preview_html, temp_file.name
# Step 7: Define feature importance visualization
def feature_importance_plot():
coef_df = pd.DataFrame({
'Feature': feature_names,
'Coefficient': model.coef_[0]
}).sort_values(by='Coefficient', ascending=False)
plt.figure(figsize=(8, 6))
plt.barh(coef_df['Feature'], coef_df['Coefficient'], color='skyblue')
plt.xlabel("Coefficient Value (Impact Strength)")
plt.title("Feature Importance (Logistic Regression)")
plt.tight_layout()
plt.gca().invert_yaxis()
plt.savefig("feature_importance.png", bbox_inches='tight')
return "feature_importance.png"
# Step 8: Gradio UI Components
age = gr.Slider(18, 70, step=1, label="Applicant Age")
income = gr.Number(label="Annual Income (KWD)")
employment_years = gr.Slider(0, 40, step=1, label="Years of Employment")
credit_score = gr.Slider(300, 850, step=1, label="Credit Score")
loan_amount = gr.Number(
label="Loan Amount Requested (KWD)",
value=5000, # default starting value
minimum=5000, # enforce minimum loan amount
step=100, # step size
precision=0 # integers only
)
loan_term_months = gr.Slider(6, 60, step=6, label="Loan Term (Months)")
existing_loans_count = gr.Slider(0, 5, step=1, label="Existing Loans")
marital_status = gr.Dropdown(["Single", "Married", "Divorced"], label="Marital Status")
education_level = gr.Dropdown(["High School", "Bachelor", "Master", "PhD"], label="Education Level")
loan_purpose = gr.Dropdown(["Home", "Car", "Personal"], label="Loan Purpose")
# Step 9: Gradio Interfaces
predict_interface = gr.Interface(
fn=predict_loan,
inputs=[
age, income, employment_years, credit_score, loan_amount,
loan_term_months, existing_loans_count, marital_status,
education_level, loan_purpose
],
outputs=gr.Textbox(
label="Prediction Result",
lines=8, # makes the output box taller
max_lines=12, # allows scrolling if needed
placeholder="Prediction will appear here..."
),
title="Loan Approval Prediction",
description="Predict whether a loan will be approved using a trained Logistic Regression model.",
theme="gradio/soft"
)
batch_interface = gr.Interface(
fn=predict_from_file,
inputs=gr.File(label="Upload CSV or Excel File"),
outputs=[
gr.HTML(label="Preview of Results (first 5 rows)"),
gr.File(label="Download Full Predictions (CSV)")
],
title="Batch Loan Predictions",
description="Upload a CSV or Excel file containing applicant data to get loan approval predictions."
)
feature_interface = gr.Interface(
fn=feature_importance_plot,
inputs=None,
outputs=gr.Image(type="filepath", label="Feature Importance Chart"),
title="Model Feature Importance",
description="Visualizes the coefficient impact strength of each feature in the logistic regression model."
)
# Step 10: Combine Tabs and Launch (Colab-safe)
demo = gr.TabbedInterface(
[predict_interface, batch_interface, feature_interface],
["Single Loan Prediction", "Batch Predictions", "Feature Importance"]
)
demo.launch(share=True, inline=True)