Spaces:

altararwa
/

acceptance

Sleeping

File size: 7,412 Bytes

d1a10f2
 
 
 
 
033f8d8
d1a10f2

# ==========================================
# Loan Approval Prediction App (Gradio)
# ==========================================

# Step 1: Install dependencies
# Dependencies are installed in a separate notebook cell

# Step 2: Import libraries
import gradio as gr
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import os

# Step 3: Load trained model and dataset
print("Loading trained model and dataset...")
model = joblib.load("loan_model.joblib")
df = pd.read_csv("loan_acceptance_dataset.csv")

# Prepare feature list for alignment consistency
X = pd.get_dummies(df.drop(['applicant_id', 'loan_approved'], axis=1), drop_first=True)
feature_names = X.columns.tolist()

# Step 4: Define helper for confidence messaging
def confidence_message(pred, prob):
    if pred == 1 and prob > 0.95:
        return "The model is VERY confident that this loan will be APPROVED."
    elif pred == 1:
        return "The model predicts APPROVAL, but with moderate confidence."
    elif pred == 0 and prob < 0.05:
        return "The model is VERY confident that this loan will be REJECTED."
    else:
        return "The model predicts REJECTION, but with moderate confidence."

# Step 5: Define single prediction function
def predict_loan(age, income, employment_years, credit_score, loan_amount,
                 loan_term_months, existing_loans_count, marital_status,
                 education_level, loan_purpose):

    # Prepare single record
    input_dict = {
        'age': [age],
        'income': [income],
        'employment_years': [employment_years],
        'credit_score': [credit_score],
        'loan_amount': [loan_amount],
        'loan_term_months': [loan_term_months],
        'existing_loans_count': [existing_loans_count],
        'marital_status': [marital_status],
        'education_level': [education_level],
        'loan_purpose': [loan_purpose]
    }

    input_df = pd.DataFrame(input_dict)

    # Encode and align
    input_encoded = pd.get_dummies(input_df, drop_first=True)
    missing_cols = set(feature_names) - set(input_encoded.columns)
    for col in missing_cols:
        input_encoded[col] = 0
    input_encoded = input_encoded[feature_names]

    # Predict
    pred = model.predict(input_encoded)[0]
    prob = model.predict_proba(input_encoded)[0][1]

    result = "Loan Approved" if pred == 1 else "Loan Rejected"
    msg = confidence_message(pred, prob)
    return f"{result}\n\nApproval Probability: {prob:.2%}\n{msg}"

# Step 6: Define batch prediction function (learned from standalone script)
def predict_from_file(file_obj):
    if file_obj is None:
        return "Please upload a CSV or Excel file."

    # Load the uploaded file
    try:
        file_ext = os.path.splitext(file_obj.name)[1].lower()
        if file_ext == ".csv":
            new_data = pd.read_csv(file_obj)
        elif file_ext in [".xls", ".xlsx"]:
            new_data = pd.read_excel(file_obj)
        else:
            return "Unsupported file format. Please upload a CSV or Excel file."
    except Exception as e:
        return f"Error reading file: {str(e)}"

    # Encode and align with training data
    new_data_encoded = pd.get_dummies(new_data, drop_first=True)
    missing_cols = set(feature_names) - set(new_data_encoded.columns)
    for col in missing_cols:
        new_data_encoded[col] = 0
    new_data_encoded = new_data_encoded[feature_names]

    # Predictions
    pred_labels = model.predict(new_data_encoded)
    pred_probs = model.predict_proba(new_data_encoded)[:, 1]

    # Combine with input
    results_df = new_data.copy()
    results_df["Prediction"] = np.where(pred_labels == 1, "Loan Approved", "Loan Rejected")
    results_df["Approval_Probability"] = pred_probs

    # Add confidence message column
    results_df["Confidence_Message"] = [
        confidence_message(pred, prob) for pred, prob in zip(pred_labels, pred_probs)
    ]

    # Save for download
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
    results_df.to_csv(temp_file.name, index=False)
    temp_file.close()

    # Return preview and downloadable file
    preview_html = results_df.head().to_html(index=False)
    return preview_html, temp_file.name

# Step 7: Define feature importance visualization
def feature_importance_plot():
    coef_df = pd.DataFrame({
        'Feature': feature_names,
        'Coefficient': model.coef_[0]
    }).sort_values(by='Coefficient', ascending=False)

    plt.figure(figsize=(8, 6))
    plt.barh(coef_df['Feature'], coef_df['Coefficient'], color='skyblue')
    plt.xlabel("Coefficient Value (Impact Strength)")
    plt.title("Feature Importance (Logistic Regression)")
    plt.tight_layout()
    plt.gca().invert_yaxis()

    plt.savefig("feature_importance.png", bbox_inches='tight')
    return "feature_importance.png"

# Step 8: Gradio UI Components
age = gr.Slider(18, 70, step=1, label="Applicant Age")
income = gr.Number(label="Annual Income (KWD)")
employment_years = gr.Slider(0, 40, step=1, label="Years of Employment")
credit_score = gr.Slider(300, 850, step=1, label="Credit Score")
loan_amount = gr.Number(
    label="Loan Amount Requested (KWD)",
    value=5000,        # default starting value
    minimum=5000,      # enforce minimum loan amount
    step=100,          # step size
    precision=0        # integers only
)
loan_term_months = gr.Slider(6, 60, step=6, label="Loan Term (Months)")
existing_loans_count = gr.Slider(0, 5, step=1, label="Existing Loans")

marital_status = gr.Dropdown(["Single", "Married", "Divorced"], label="Marital Status")
education_level = gr.Dropdown(["High School", "Bachelor", "Master", "PhD"], label="Education Level")
loan_purpose = gr.Dropdown(["Home", "Car", "Personal"], label="Loan Purpose")

# Step 9: Gradio Interfaces
predict_interface = gr.Interface(
    fn=predict_loan,
    inputs=[
        age, income, employment_years, credit_score, loan_amount,
        loan_term_months, existing_loans_count, marital_status,
        education_level, loan_purpose
    ],
    outputs=gr.Textbox(
        label="Prediction Result",
        lines=8,              # makes the output box taller
        max_lines=12,         # allows scrolling if needed
        placeholder="Prediction will appear here..."
    ),
    title="Loan Approval Prediction",
    description="Predict whether a loan will be approved using a trained Logistic Regression model.",
    theme="gradio/soft"
)

batch_interface = gr.Interface(
    fn=predict_from_file,
    inputs=gr.File(label="Upload CSV or Excel File"),
    outputs=[
        gr.HTML(label="Preview of Results (first 5 rows)"),
        gr.File(label="Download Full Predictions (CSV)")
    ],
    title="Batch Loan Predictions",
    description="Upload a CSV or Excel file containing applicant data to get loan approval predictions."
)

feature_interface = gr.Interface(
    fn=feature_importance_plot,
    inputs=None,
    outputs=gr.Image(type="filepath", label="Feature Importance Chart"),
    title="Model Feature Importance",
    description="Visualizes the coefficient impact strength of each feature in the logistic regression model."
)

# Step 10: Combine Tabs and Launch (Colab-safe)
demo = gr.TabbedInterface(
    [predict_interface, batch_interface, feature_interface],
    ["Single Loan Prediction", "Batch Predictions", "Feature Importance"]
)

demo.launch(share=True, inline=True)