File size: 7,412 Bytes
d1a10f2
 
 
 
 
033f8d8
d1a10f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# ==========================================
# Loan Approval Prediction App (Gradio)
# ==========================================

# Step 1: Install dependencies
# Dependencies are installed in a separate notebook cell

# Step 2: Import libraries
import gradio as gr
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import os

# Step 3: Load trained model and dataset
print("Loading trained model and dataset...")
model = joblib.load("loan_model.joblib")
df = pd.read_csv("loan_acceptance_dataset.csv")

# Prepare feature list for alignment consistency
X = pd.get_dummies(df.drop(['applicant_id', 'loan_approved'], axis=1), drop_first=True)
feature_names = X.columns.tolist()

# Step 4: Define helper for confidence messaging
def confidence_message(pred, prob):
    if pred == 1 and prob > 0.95:
        return "The model is VERY confident that this loan will be APPROVED."
    elif pred == 1:
        return "The model predicts APPROVAL, but with moderate confidence."
    elif pred == 0 and prob < 0.05:
        return "The model is VERY confident that this loan will be REJECTED."
    else:
        return "The model predicts REJECTION, but with moderate confidence."

# Step 5: Define single prediction function
def predict_loan(age, income, employment_years, credit_score, loan_amount,
                 loan_term_months, existing_loans_count, marital_status,
                 education_level, loan_purpose):

    # Prepare single record
    input_dict = {
        'age': [age],
        'income': [income],
        'employment_years': [employment_years],
        'credit_score': [credit_score],
        'loan_amount': [loan_amount],
        'loan_term_months': [loan_term_months],
        'existing_loans_count': [existing_loans_count],
        'marital_status': [marital_status],
        'education_level': [education_level],
        'loan_purpose': [loan_purpose]
    }

    input_df = pd.DataFrame(input_dict)

    # Encode and align
    input_encoded = pd.get_dummies(input_df, drop_first=True)
    missing_cols = set(feature_names) - set(input_encoded.columns)
    for col in missing_cols:
        input_encoded[col] = 0
    input_encoded = input_encoded[feature_names]

    # Predict
    pred = model.predict(input_encoded)[0]
    prob = model.predict_proba(input_encoded)[0][1]

    result = "Loan Approved" if pred == 1 else "Loan Rejected"
    msg = confidence_message(pred, prob)
    return f"{result}\n\nApproval Probability: {prob:.2%}\n{msg}"

# Step 6: Define batch prediction function (learned from standalone script)
def predict_from_file(file_obj):
    if file_obj is None:
        return "Please upload a CSV or Excel file."

    # Load the uploaded file
    try:
        file_ext = os.path.splitext(file_obj.name)[1].lower()
        if file_ext == ".csv":
            new_data = pd.read_csv(file_obj)
        elif file_ext in [".xls", ".xlsx"]:
            new_data = pd.read_excel(file_obj)
        else:
            return "Unsupported file format. Please upload a CSV or Excel file."
    except Exception as e:
        return f"Error reading file: {str(e)}"

    # Encode and align with training data
    new_data_encoded = pd.get_dummies(new_data, drop_first=True)
    missing_cols = set(feature_names) - set(new_data_encoded.columns)
    for col in missing_cols:
        new_data_encoded[col] = 0
    new_data_encoded = new_data_encoded[feature_names]

    # Predictions
    pred_labels = model.predict(new_data_encoded)
    pred_probs = model.predict_proba(new_data_encoded)[:, 1]

    # Combine with input
    results_df = new_data.copy()
    results_df["Prediction"] = np.where(pred_labels == 1, "Loan Approved", "Loan Rejected")
    results_df["Approval_Probability"] = pred_probs

    # Add confidence message column
    results_df["Confidence_Message"] = [
        confidence_message(pred, prob) for pred, prob in zip(pred_labels, pred_probs)
    ]

    # Save for download
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
    results_df.to_csv(temp_file.name, index=False)
    temp_file.close()

    # Return preview and downloadable file
    preview_html = results_df.head().to_html(index=False)
    return preview_html, temp_file.name

# Step 7: Define feature importance visualization
def feature_importance_plot():
    coef_df = pd.DataFrame({
        'Feature': feature_names,
        'Coefficient': model.coef_[0]
    }).sort_values(by='Coefficient', ascending=False)

    plt.figure(figsize=(8, 6))
    plt.barh(coef_df['Feature'], coef_df['Coefficient'], color='skyblue')
    plt.xlabel("Coefficient Value (Impact Strength)")
    plt.title("Feature Importance (Logistic Regression)")
    plt.tight_layout()
    plt.gca().invert_yaxis()

    plt.savefig("feature_importance.png", bbox_inches='tight')
    return "feature_importance.png"

# Step 8: Gradio UI Components
age = gr.Slider(18, 70, step=1, label="Applicant Age")
income = gr.Number(label="Annual Income (KWD)")
employment_years = gr.Slider(0, 40, step=1, label="Years of Employment")
credit_score = gr.Slider(300, 850, step=1, label="Credit Score")
loan_amount = gr.Number(
    label="Loan Amount Requested (KWD)",
    value=5000,        # default starting value
    minimum=5000,      # enforce minimum loan amount
    step=100,          # step size
    precision=0        # integers only
)
loan_term_months = gr.Slider(6, 60, step=6, label="Loan Term (Months)")
existing_loans_count = gr.Slider(0, 5, step=1, label="Existing Loans")

marital_status = gr.Dropdown(["Single", "Married", "Divorced"], label="Marital Status")
education_level = gr.Dropdown(["High School", "Bachelor", "Master", "PhD"], label="Education Level")
loan_purpose = gr.Dropdown(["Home", "Car", "Personal"], label="Loan Purpose")

# Step 9: Gradio Interfaces
predict_interface = gr.Interface(
    fn=predict_loan,
    inputs=[
        age, income, employment_years, credit_score, loan_amount,
        loan_term_months, existing_loans_count, marital_status,
        education_level, loan_purpose
    ],
    outputs=gr.Textbox(
        label="Prediction Result",
        lines=8,              # makes the output box taller
        max_lines=12,         # allows scrolling if needed
        placeholder="Prediction will appear here..."
    ),
    title="Loan Approval Prediction",
    description="Predict whether a loan will be approved using a trained Logistic Regression model.",
    theme="gradio/soft"
)

batch_interface = gr.Interface(
    fn=predict_from_file,
    inputs=gr.File(label="Upload CSV or Excel File"),
    outputs=[
        gr.HTML(label="Preview of Results (first 5 rows)"),
        gr.File(label="Download Full Predictions (CSV)")
    ],
    title="Batch Loan Predictions",
    description="Upload a CSV or Excel file containing applicant data to get loan approval predictions."
)

feature_interface = gr.Interface(
    fn=feature_importance_plot,
    inputs=None,
    outputs=gr.Image(type="filepath", label="Feature Importance Chart"),
    title="Model Feature Importance",
    description="Visualizes the coefficient impact strength of each feature in the logistic regression model."
)

# Step 10: Combine Tabs and Launch (Colab-safe)
demo = gr.TabbedInterface(
    [predict_interface, batch_interface, feature_interface],
    ["Single Loan Prediction", "Batch Predictions", "Feature Importance"]
)

demo.launch(share=True, inline=True)