Spaces:
Runtime error
Runtime error
Upload 9 main files
Browse files- README.md +28 -8
- app.py +460 -0
- imputer.pkl +3 -0
- predictor.py +434 -0
- requirements.txt +7 -0
- scaler.pkl +3 -0
- training_features.csv +99 -0
- training_features.json +155 -0
- xgb_best_model.pkl +3 -0
README.md
CHANGED
|
@@ -1,14 +1,34 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
license:
|
| 11 |
-
short_description: Predict loan defaults with 92.3% accuracy using machine lear
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Credit Risk Predictor
|
| 3 |
+
emoji: 🏦
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 3.50.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# 🏦 Credit Risk Prediction System
|
| 14 |
+
|
| 15 |
+
Predict loan defaults with 92.3% accuracy using machine learning.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
- 92.3% AUC-ROC accuracy (beats academic paper)
|
| 19 |
+
- Business-optimized for maximum profit
|
| 20 |
+
- Real-time predictions with visualizations
|
| 21 |
+
- Based on 358,244 real loans
|
| 22 |
+
|
| 23 |
+
## How to Use
|
| 24 |
+
1. Fill in the loan application details
|
| 25 |
+
2. Click "Assess Credit Risk"
|
| 26 |
+
3. Get instant approval/rejection with risk analysis
|
| 27 |
+
|
| 28 |
+
## Model Details
|
| 29 |
+
- **Algorithm**: XGBoost with enhanced features
|
| 30 |
+
- **Training Data**: Lending Club (2013-2014)
|
| 31 |
+
- **Key Improvement**: +0.010 AUC over baseline
|
| 32 |
+
- **Business Impact**: Optimized threshold at 28%
|
| 33 |
+
|
| 34 |
+
*For research purposes only. Not financial advice.*
|
app.py
ADDED
|
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# deployment/gradio_app.py
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import json
|
| 5 |
+
from predictor import CreditRiskPredictor
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
# Initialize predictor
|
| 10 |
+
predictor = CreditRiskPredictor("model_artifacts")
|
| 11 |
+
|
| 12 |
+
# Get the actual base features needed from the predictor
|
| 13 |
+
if hasattr(predictor, 'base_features_needed') and predictor.base_features_needed:
|
| 14 |
+
print(f"📋 Model needs these base features: {predictor.base_features_needed}")
|
| 15 |
+
else:
|
| 16 |
+
print("⚠️ Could not determine base features needed")
|
| 17 |
+
|
| 18 |
+
# Feature descriptions for tooltips
|
| 19 |
+
FEATURE_INFO = {
|
| 20 |
+
'loan_amnt': "Total amount of the loan applied for",
|
| 21 |
+
'int_rate': "Interest rate on the loan",
|
| 22 |
+
'grade': "LC assigned loan grade (A=best, G=worst)",
|
| 23 |
+
'emp_length': "Employment length in years",
|
| 24 |
+
'annual_inc': "Self-reported annual income",
|
| 25 |
+
'dti': "Debt-to-income ratio",
|
| 26 |
+
'revol_util': "Revolving line utilization rate",
|
| 27 |
+
'delinq_2yrs': "Number of delinquencies in past 2 years",
|
| 28 |
+
'inq_last_6mths': "Number of credit inquiries in past 6 months",
|
| 29 |
+
'open_acc': "Number of open credit lines",
|
| 30 |
+
'total_acc': "Total number of credit lines",
|
| 31 |
+
# Additional features from your predictor
|
| 32 |
+
'revol_bal': "Total credit revolving balance",
|
| 33 |
+
'total_bc_limit': "Total bankcard limit",
|
| 34 |
+
'total_bal_ex_mort': "Total balance excluding mortgage",
|
| 35 |
+
'avg_cur_bal': "Average current balance",
|
| 36 |
+
'mo_sin_old_il_acct': "Months since oldest installment account opened",
|
| 37 |
+
'mo_sin_old_rev_tl_op': "Months since oldest revolving account opened",
|
| 38 |
+
'mo_sin_rcnt_rev_tl_op': "Months since most recent revolving account opened",
|
| 39 |
+
'mths_since_recent_bc': "Months since most recent bankcard account opened",
|
| 40 |
+
'mths_since_recent_inq': "Months since most recent inquiry",
|
| 41 |
+
'pct_tl_nvr_dlq': "Percent of trades never delinquent",
|
| 42 |
+
'last_fico_range_low': "Lower bound of the last FICO range",
|
| 43 |
+
'last_fico_range_high': "Upper bound of the last FICO range",
|
| 44 |
+
'years_since_earliest_cr': "Years since earliest credit line opened",
|
| 45 |
+
'addr_state': "State of the borrower (2-letter code)",
|
| 46 |
+
'home_ownership': "Home ownership status",
|
| 47 |
+
'purpose': "Purpose of the loan",
|
| 48 |
+
'verification_status': "Income verification status",
|
| 49 |
+
'title': "Loan title/description"
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
def create_visualization(default_prob, threshold=0.28):
|
| 53 |
+
"""Create risk visualization"""
|
| 54 |
+
fig, ax = plt.subplots(figsize=(8, 2))
|
| 55 |
+
|
| 56 |
+
# Create gradient risk bar
|
| 57 |
+
x = np.linspace(0, 1, 100)
|
| 58 |
+
colors = plt.cm.RdYlGn_r(x) # Red to Green (reversed)
|
| 59 |
+
|
| 60 |
+
for i in range(len(x)-1):
|
| 61 |
+
ax.fill_between([x[i], x[i+1]], 0, 1, color=colors[i], alpha=0.7)
|
| 62 |
+
|
| 63 |
+
# Mark threshold
|
| 64 |
+
ax.axvline(x=threshold, color='black', linestyle='--', linewidth=2, label=f'Threshold ({threshold:.0%})')
|
| 65 |
+
|
| 66 |
+
# Mark prediction
|
| 67 |
+
ax.plot(default_prob, 0.5, 'ro', markersize=15, label=f'Prediction ({default_prob:.1%})')
|
| 68 |
+
|
| 69 |
+
ax.set_xlim(0, 1)
|
| 70 |
+
ax.set_ylim(0, 1)
|
| 71 |
+
ax.set_xlabel('Default Probability')
|
| 72 |
+
ax.set_title('Risk Assessment')
|
| 73 |
+
ax.legend(loc='upper right')
|
| 74 |
+
ax.set_yticks([])
|
| 75 |
+
|
| 76 |
+
plt.tight_layout()
|
| 77 |
+
return fig
|
| 78 |
+
|
| 79 |
+
def predict_loan(loan_amnt, int_rate, grade, emp_length, annual_inc,
|
| 80 |
+
dti, revol_util, delinq_2yrs, inq_last_6mths,
|
| 81 |
+
open_acc, total_acc, revol_bal=5000, total_bc_limit=20000,
|
| 82 |
+
total_bal_ex_mort=30000, avg_cur_bal=2500,
|
| 83 |
+
mo_sin_old_il_acct=60, mo_sin_old_rev_tl_op=48,
|
| 84 |
+
mo_sin_rcnt_rev_tl_op=12, mths_since_recent_bc=6,
|
| 85 |
+
mths_since_recent_inq=3, pct_tl_nvr_dlq=95,
|
| 86 |
+
last_fico_range_low=680, last_fico_range_high=684,
|
| 87 |
+
years_since_earliest_cr=10, addr_state="CA",
|
| 88 |
+
home_ownership="RENT", purpose="debt_consolidation",
|
| 89 |
+
verification_status="Verified",
|
| 90 |
+
title="Debt consolidation loan"):
|
| 91 |
+
"""Main prediction function with all needed features"""
|
| 92 |
+
|
| 93 |
+
# Prepare input with ALL features the model expects
|
| 94 |
+
loan_data = {
|
| 95 |
+
# Basic loan info
|
| 96 |
+
'loan_amnt': float(loan_amnt),
|
| 97 |
+
'int_rate': float(int_rate),
|
| 98 |
+
'grade': grade,
|
| 99 |
+
'emp_length': emp_length,
|
| 100 |
+
'annual_inc': float(annual_inc),
|
| 101 |
+
'dti': float(dti),
|
| 102 |
+
'revol_util': f"{revol_util}%",
|
| 103 |
+
'delinq_2yrs': int(delinq_2yrs),
|
| 104 |
+
'inq_last_6mths': int(inq_last_6mths),
|
| 105 |
+
'open_acc': int(open_acc),
|
| 106 |
+
'total_acc': int(total_acc),
|
| 107 |
+
|
| 108 |
+
# Additional credit features
|
| 109 |
+
'revol_bal': float(revol_bal),
|
| 110 |
+
'total_bc_limit': float(total_bc_limit),
|
| 111 |
+
'total_bal_ex_mort': float(total_bal_ex_mort),
|
| 112 |
+
'avg_cur_bal': float(avg_cur_bal),
|
| 113 |
+
'mo_sin_old_il_acct': float(mo_sin_old_il_acct),
|
| 114 |
+
'mo_sin_old_rev_tl_op': float(mo_sin_old_rev_tl_op),
|
| 115 |
+
'mo_sin_rcnt_rev_tl_op': float(mo_sin_rcnt_rev_tl_op),
|
| 116 |
+
'mths_since_recent_bc': float(mths_since_recent_bc),
|
| 117 |
+
'mths_since_recent_inq': float(mths_since_recent_inq),
|
| 118 |
+
'pct_tl_nvr_dlq': float(pct_tl_nvr_dlq) / 100.0, # Convert to decimal
|
| 119 |
+
'last_fico_range_low': float(last_fico_range_low),
|
| 120 |
+
'last_fico_range_high': float(last_fico_range_high),
|
| 121 |
+
'years_since_earliest_cr': float(years_since_earliest_cr),
|
| 122 |
+
|
| 123 |
+
# Categorical features for one-hot encoding
|
| 124 |
+
'addr_state': str(addr_state),
|
| 125 |
+
'home_ownership': str(home_ownership),
|
| 126 |
+
'purpose': str(purpose),
|
| 127 |
+
'verification_status': str(verification_status),
|
| 128 |
+
'title': str(title)
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
# Get prediction
|
| 132 |
+
result = predictor.predict(loan_data)
|
| 133 |
+
|
| 134 |
+
if not result['success']:
|
| 135 |
+
return f"❌ Error: {result['error']}", None, "red"
|
| 136 |
+
|
| 137 |
+
# Format results
|
| 138 |
+
if result['decision'] == 'APPROVE':
|
| 139 |
+
decision_html = """
|
| 140 |
+
<div style='background-color: #d4edda; padding: 20px; border-radius: 10px; border: 2px solid #c3e6cb;'>
|
| 141 |
+
<h2 style='color: #155724; margin: 0;'>✅ LOAN APPROVED</h2>
|
| 142 |
+
</div>
|
| 143 |
+
"""
|
| 144 |
+
color = "green"
|
| 145 |
+
else:
|
| 146 |
+
decision_html = """
|
| 147 |
+
<div style='background-color: #f8d7da; padding: 20px; border-radius: 10px; border: 2px solid #f5c6cb;'>
|
| 148 |
+
<h2 style='color: #721c24; margin: 0;'>❌ LOAN REJECTED</h2>
|
| 149 |
+
</div>
|
| 150 |
+
"""
|
| 151 |
+
color = "red"
|
| 152 |
+
|
| 153 |
+
# Create results table
|
| 154 |
+
results_md = f"""
|
| 155 |
+
## 📊 Prediction Results
|
| 156 |
+
|
| 157 |
+
| Metric | Value |
|
| 158 |
+
|--------|-------|
|
| 159 |
+
| **Default Probability** | {result['default_probability']:.2%} |
|
| 160 |
+
| **Risk Level** | {result['risk_level']} |
|
| 161 |
+
| **Confidence** | {result['confidence']:.0%} |
|
| 162 |
+
| **Optimal Threshold** | {result['optimal_threshold']:.0%} |
|
| 163 |
+
|
| 164 |
+
### 💡 Explanation
|
| 165 |
+
{result['explanation']}
|
| 166 |
+
|
| 167 |
+
### 🔧 Model Info
|
| 168 |
+
- **Features used**: {len(predictor.feature_list) if predictor.feature_list else 'Unknown'}
|
| 169 |
+
- **Features provided**: {len(loan_data)}
|
| 170 |
+
- **Threshold optimized for profit**: {result['optimal_threshold']:.0%}
|
| 171 |
+
|
| 172 |
+
---
|
| 173 |
+
*Model accuracy: 92.3% AUC-ROC | Trained on 358,244 loans*
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
# Create visualization
|
| 177 |
+
fig = create_visualization(result['default_probability'], result['optimal_threshold'])
|
| 178 |
+
|
| 179 |
+
return decision_html, results_md, color, fig
|
| 180 |
+
|
| 181 |
+
# Create Gradio interface
|
| 182 |
+
with gr.Blocks(title="Credit Risk Predictor", theme=gr.themes.Soft()) as demo:
|
| 183 |
+
gr.Markdown("""
|
| 184 |
+
# 🏦 Credit Risk Prediction System
|
| 185 |
+
*Predict loan defaults with 92.3% accuracy using machine learning*
|
| 186 |
+
|
| 187 |
+
Based on research: *"Credit scoring for peer-to-peer lending using machine learning techniques"*
|
| 188 |
+
(Quantitative Finance and Economics, Volume 6, Issue 2) with enhancements.
|
| 189 |
+
""")
|
| 190 |
+
|
| 191 |
+
# Advanced features accordion
|
| 192 |
+
with gr.Accordion("🔧 Advanced Features (Optional)", open=False):
|
| 193 |
+
gr.Markdown("""
|
| 194 |
+
**Default values are set to typical/average levels.**
|
| 195 |
+
These additional features improve prediction accuracy but are optional.
|
| 196 |
+
""")
|
| 197 |
+
|
| 198 |
+
with gr.Row():
|
| 199 |
+
with gr.Column():
|
| 200 |
+
revol_bal = gr.Slider(0, 100000, 5000, step=1000,
|
| 201 |
+
label="Revolving Balance ($)",
|
| 202 |
+
info=FEATURE_INFO['revol_bal'])
|
| 203 |
+
|
| 204 |
+
total_bc_limit = gr.Slider(0, 100000, 20000, step=1000,
|
| 205 |
+
label="Total Bankcard Limit ($)",
|
| 206 |
+
info=FEATURE_INFO['total_bc_limit'])
|
| 207 |
+
|
| 208 |
+
total_bal_ex_mort = gr.Slider(0, 200000, 30000, step=1000,
|
| 209 |
+
label="Total Balance Excl. Mortgage ($)",
|
| 210 |
+
info=FEATURE_INFO['total_bal_ex_mort'])
|
| 211 |
+
|
| 212 |
+
avg_cur_bal = gr.Slider(0, 50000, 2500, step=100,
|
| 213 |
+
label="Average Current Balance ($)",
|
| 214 |
+
info=FEATURE_INFO['avg_cur_bal'])
|
| 215 |
+
|
| 216 |
+
with gr.Column():
|
| 217 |
+
mo_sin_old_il_acct = gr.Slider(0, 300, 60, step=1,
|
| 218 |
+
label="Months since oldest installment account",
|
| 219 |
+
info=FEATURE_INFO['mo_sin_old_il_acct'])
|
| 220 |
+
|
| 221 |
+
mo_sin_old_rev_tl_op = gr.Slider(0, 300, 48, step=1,
|
| 222 |
+
label="Months since oldest revolving account",
|
| 223 |
+
info=FEATURE_INFO['mo_sin_old_rev_tl_op'])
|
| 224 |
+
|
| 225 |
+
mo_sin_rcnt_rev_tl_op = gr.Slider(0, 300, 12, step=1,
|
| 226 |
+
label="Months since newest revolving account",
|
| 227 |
+
info=FEATURE_INFO['mo_sin_rcnt_rev_tl_op'])
|
| 228 |
+
|
| 229 |
+
mths_since_recent_bc = gr.Slider(0, 120, 6, step=1,
|
| 230 |
+
label="Months since newest bankcard",
|
| 231 |
+
info=FEATURE_INFO['mths_since_recent_bc'])
|
| 232 |
+
|
| 233 |
+
with gr.Row():
|
| 234 |
+
with gr.Column():
|
| 235 |
+
mths_since_recent_inq = gr.Slider(0, 120, 3, step=1,
|
| 236 |
+
label="Months since newest inquiry",
|
| 237 |
+
info=FEATURE_INFO['mths_since_recent_inq'])
|
| 238 |
+
|
| 239 |
+
pct_tl_nvr_dlq = gr.Slider(0, 100, 95, step=1,
|
| 240 |
+
label="% of trades never delinquent",
|
| 241 |
+
info=FEATURE_INFO['pct_tl_nvr_dlq'])
|
| 242 |
+
|
| 243 |
+
last_fico_range_low = gr.Slider(300, 850, 680, step=10,
|
| 244 |
+
label="Lowest recent FICO score",
|
| 245 |
+
info=FEATURE_INFO['last_fico_range_low'])
|
| 246 |
+
|
| 247 |
+
last_fico_range_high = gr.Slider(300, 850, 684, step=10,
|
| 248 |
+
label="Highest recent FICO score",
|
| 249 |
+
info=FEATURE_INFO['last_fico_range_high'])
|
| 250 |
+
|
| 251 |
+
with gr.Column():
|
| 252 |
+
years_since_earliest_cr = gr.Slider(0, 50, 10, step=1,
|
| 253 |
+
label="Years since first credit line",
|
| 254 |
+
info=FEATURE_INFO['years_since_earliest_cr'])
|
| 255 |
+
|
| 256 |
+
addr_state = gr.Textbox(value="CA", label="State (2 letters)",
|
| 257 |
+
info=FEATURE_INFO['addr_state'])
|
| 258 |
+
|
| 259 |
+
home_ownership = gr.Dropdown(["RENT", "MORTGAGE", "OWN", "OTHER"],
|
| 260 |
+
value="RENT", label="Home Ownership",
|
| 261 |
+
info=FEATURE_INFO['home_ownership'])
|
| 262 |
+
|
| 263 |
+
with gr.Row():
|
| 264 |
+
purpose = gr.Dropdown(["debt_consolidation", "credit_card", "home_improvement",
|
| 265 |
+
"major_purchase", "medical", "car", "wedding"],
|
| 266 |
+
value="debt_consolidation", label="Loan Purpose",
|
| 267 |
+
info=FEATURE_INFO['purpose'])
|
| 268 |
+
|
| 269 |
+
verification_status = gr.Dropdown(["Verified", "Source Verified", "Not Verified"],
|
| 270 |
+
value="Verified", label="Income Verification",
|
| 271 |
+
info=FEATURE_INFO['verification_status'])
|
| 272 |
+
|
| 273 |
+
title = gr.Textbox(value="Debt consolidation loan", label="Loan Title",
|
| 274 |
+
info=FEATURE_INFO['title'])
|
| 275 |
+
|
| 276 |
+
# Main form
|
| 277 |
+
gr.Markdown("## 📝 Required Loan Information")
|
| 278 |
+
with gr.Row():
|
| 279 |
+
with gr.Column(scale=1):
|
| 280 |
+
gr.Markdown("### Loan Application")
|
| 281 |
+
|
| 282 |
+
with gr.Group():
|
| 283 |
+
loan_amnt = gr.Slider(1000, 40000, 15000, step=500,
|
| 284 |
+
label="Loan Amount ($)",
|
| 285 |
+
info=FEATURE_INFO['loan_amnt'])
|
| 286 |
+
|
| 287 |
+
int_rate = gr.Slider(5.0, 30.0, 12.5, step=0.1,
|
| 288 |
+
label="Interest Rate (%)",
|
| 289 |
+
info=FEATURE_INFO['int_rate'])
|
| 290 |
+
|
| 291 |
+
grade = gr.Radio(["A", "B", "C", "D", "E", "F", "G"], value="C",
|
| 292 |
+
label="Loan Grade",
|
| 293 |
+
info=FEATURE_INFO['grade'])
|
| 294 |
+
|
| 295 |
+
with gr.Group():
|
| 296 |
+
emp_length = gr.Dropdown(["< 1 year", "1 year", "2 years", "3 years",
|
| 297 |
+
"4 years", "5 years", "6 years", "7 years",
|
| 298 |
+
"8 years", "9 years", "10+ years"],
|
| 299 |
+
value="5 years",
|
| 300 |
+
label="Employment Length",
|
| 301 |
+
info=FEATURE_INFO['emp_length'])
|
| 302 |
+
|
| 303 |
+
annual_inc = gr.Slider(20000, 1000000, 75000, step=1000,
|
| 304 |
+
label="Annual Income ($)",
|
| 305 |
+
info=FEATURE_INFO['annual_inc'])
|
| 306 |
+
|
| 307 |
+
dti = gr.Slider(0, 40, 18.5, step=0.1,
|
| 308 |
+
label="Debt-to-Income Ratio",
|
| 309 |
+
info=FEATURE_INFO['dti'])
|
| 310 |
+
|
| 311 |
+
with gr.Column(scale=1):
|
| 312 |
+
gr.Markdown("### Credit History")
|
| 313 |
+
|
| 314 |
+
with gr.Group():
|
| 315 |
+
revol_util = gr.Slider(0, 100, 45, step=1,
|
| 316 |
+
label="Credit Utilization (%)",
|
| 317 |
+
info=FEATURE_INFO['revol_util'])
|
| 318 |
+
|
| 319 |
+
delinq_2yrs = gr.Slider(0, 10, 0, step=1,
|
| 320 |
+
label="Delinquencies (last 2 years)",
|
| 321 |
+
info=FEATURE_INFO['delinq_2yrs'])
|
| 322 |
+
|
| 323 |
+
inq_last_6mths = gr.Slider(0, 10, 2, step=1,
|
| 324 |
+
label="Credit Inquiries (last 6 months)",
|
| 325 |
+
info=FEATURE_INFO['inq_last_6mths'])
|
| 326 |
+
|
| 327 |
+
with gr.Group():
|
| 328 |
+
open_acc = gr.Slider(0, 50, 8, step=1,
|
| 329 |
+
label="Open Credit Lines",
|
| 330 |
+
info=FEATURE_INFO['open_acc'])
|
| 331 |
+
|
| 332 |
+
total_acc = gr.Slider(0, 100, 25, step=1,
|
| 333 |
+
label="Total Credit Lines",
|
| 334 |
+
info=FEATURE_INFO['total_acc'])
|
| 335 |
+
|
| 336 |
+
with gr.Row():
|
| 337 |
+
submit_btn = gr.Button("🔍 Assess Credit Risk", variant="primary", size="lg")
|
| 338 |
+
clear_btn = gr.Button("🔄 Clear Form", variant="secondary")
|
| 339 |
+
simple_mode_btn = gr.Button("📱 Simple Mode", variant="secondary")
|
| 340 |
+
|
| 341 |
+
# Example buttons
|
| 342 |
+
gr.Markdown("### 🚀 Quick Examples")
|
| 343 |
+
with gr.Row():
|
| 344 |
+
low_risk_btn = gr.Button("👍 Low Risk Example", variant="secondary", size="sm")
|
| 345 |
+
high_risk_btn = gr.Button("👎 High Risk Example", variant="secondary", size="sm")
|
| 346 |
+
borderline_btn = gr.Button("⚖️ Borderline Example", variant="secondary", size="sm")
|
| 347 |
+
|
| 348 |
+
# Results section
|
| 349 |
+
gr.Markdown("## 📈 Assessment Results")
|
| 350 |
+
|
| 351 |
+
with gr.Row():
|
| 352 |
+
decision_output = gr.HTML(label="Decision")
|
| 353 |
+
color_indicator = gr.HTML(visible=False)
|
| 354 |
+
|
| 355 |
+
with gr.Row():
|
| 356 |
+
with gr.Column(scale=2):
|
| 357 |
+
results_output = gr.Markdown(label="Detailed Results")
|
| 358 |
+
with gr.Column(scale=1):
|
| 359 |
+
plot_output = gr.Plot(label="Risk Visualization")
|
| 360 |
+
|
| 361 |
+
# Footer
|
| 362 |
+
gr.Markdown("""
|
| 363 |
+
---
|
| 364 |
+
### ℹ️ About This Model
|
| 365 |
+
- **Accuracy**: 92.3% AUC-ROC (beats paper's 86-87%)
|
| 366 |
+
- **Training Data**: 358,244 loans from Lending Club (2013-2014)
|
| 367 |
+
- **Key Features**: 98 engineered features including credit history and financial ratios
|
| 368 |
+
- **Business Impact**: Optimized for maximum profit (threshold: 28%)
|
| 369 |
+
- **Improvements**: No undersampling, time-based validation, enhanced features
|
| 370 |
+
|
| 371 |
+
*For research purposes only. Not financial advice.*
|
| 372 |
+
""")
|
| 373 |
+
|
| 374 |
+
# Define examples with all needed features
|
| 375 |
+
examples = {
|
| 376 |
+
'low': {
|
| 377 |
+
'basic': [10000, 8.5, 'A', '10+ years', 120000, 12.0, 30, 0, 1, 5, 20],
|
| 378 |
+
'advanced': [3000, 15000, 25000, 3000, 120, 96, 24, 12, 6, 98, 720, 724, 15,
|
| 379 |
+
"CA", "OWN", "debt_consolidation", "Verified", "Debt consolidation"]
|
| 380 |
+
},
|
| 381 |
+
'high': {
|
| 382 |
+
'basic': [35000, 25.0, 'F', '< 1 year', 30000, 35.0, 95, 3, 8, 15, 40],
|
| 383 |
+
'advanced': [20000, 5000, 10000, 1000, 6, 12, 1, 1, 1, 60, 580, 590, 2,
|
| 384 |
+
"NV", "RENT", "credit_card", "Not Verified", "Credit card payoff"]
|
| 385 |
+
},
|
| 386 |
+
'borderline': {
|
| 387 |
+
'basic': [20000, 15.0, 'D', '3 years', 55000, 22.0, 75, 1, 4, 10, 30],
|
| 388 |
+
'advanced': [10000, 10000, 20000, 2000, 36, 48, 6, 6, 3, 85, 650, 660, 5,
|
| 389 |
+
"TX", "MORTGAGE", "home_improvement", "Source Verified", "Home renovation loan"]
|
| 390 |
+
}
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
# Function to get all inputs for an example
|
| 394 |
+
def get_example(example_type):
|
| 395 |
+
basic = examples[example_type]['basic']
|
| 396 |
+
advanced = examples[example_type]['advanced']
|
| 397 |
+
return basic + advanced
|
| 398 |
+
|
| 399 |
+
# Connect buttons
|
| 400 |
+
all_inputs = [loan_amnt, int_rate, grade, emp_length, annual_inc,
|
| 401 |
+
dti, revol_util, delinq_2yrs, inq_last_6mths,
|
| 402 |
+
open_acc, total_acc, revol_bal, total_bc_limit,
|
| 403 |
+
total_bal_ex_mort, avg_cur_bal, mo_sin_old_il_acct,
|
| 404 |
+
mo_sin_old_rev_tl_op, mo_sin_rcnt_rev_tl_op,
|
| 405 |
+
mths_since_recent_bc, mths_since_recent_inq,
|
| 406 |
+
pct_tl_nvr_dlq, last_fico_range_low, last_fico_range_high,
|
| 407 |
+
years_since_earliest_cr, addr_state, home_ownership,
|
| 408 |
+
purpose, verification_status, title]
|
| 409 |
+
|
| 410 |
+
submit_btn.click(
|
| 411 |
+
fn=predict_loan,
|
| 412 |
+
inputs=all_inputs,
|
| 413 |
+
outputs=[decision_output, results_output, color_indicator, plot_output]
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
+
# Clear function with defaults
|
| 417 |
+
def clear_form():
|
| 418 |
+
basic_defaults = [15000, 12.5, 'C', '5 years', 75000, 18.5, 45, 0, 2, 8, 25]
|
| 419 |
+
advanced_defaults = [5000, 20000, 30000, 2500, 60, 48, 12, 6, 3, 95, 680, 684,
|
| 420 |
+
10, "CA", "RENT", "debt_consolidation", "Verified",
|
| 421 |
+
"Debt consolidation loan"]
|
| 422 |
+
return basic_defaults + advanced_defaults + [None, None, None, None]
|
| 423 |
+
|
| 424 |
+
clear_btn.click(
|
| 425 |
+
fn=clear_form,
|
| 426 |
+
outputs=all_inputs + [decision_output, results_output, plot_output]
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
# Example buttons
|
| 430 |
+
low_risk_btn.click(
|
| 431 |
+
fn=lambda: get_example('low'),
|
| 432 |
+
outputs=all_inputs
|
| 433 |
+
)
|
| 434 |
+
|
| 435 |
+
high_risk_btn.click(
|
| 436 |
+
fn=lambda: get_example('high'),
|
| 437 |
+
outputs=all_inputs
|
| 438 |
+
)
|
| 439 |
+
|
| 440 |
+
borderline_btn.click(
|
| 441 |
+
fn=lambda: get_example('borderline'),
|
| 442 |
+
outputs=all_inputs
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
# Simple mode button (hides advanced features)
|
| 446 |
+
simple_mode_btn.click(
|
| 447 |
+
fn=lambda: gr.Accordion(open=False),
|
| 448 |
+
outputs=None
|
| 449 |
+
)
|
| 450 |
+
|
| 451 |
+
# Run the app
|
| 452 |
+
if __name__ == "__main__":
|
| 453 |
+
print("🚀 Starting Credit Risk Predictor...")
|
| 454 |
+
print(f"📊 Model features: {len(predictor.feature_list) if predictor.feature_list else 'Unknown'}")
|
| 455 |
+
demo.launch(
|
| 456 |
+
server_name="0.0.0.0",
|
| 457 |
+
server_port=7860,
|
| 458 |
+
share=False,
|
| 459 |
+
debug=True
|
| 460 |
+
)
|
imputer.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43ccf3bf22bce59767331fdda0a5fa8c5ef839774b42b05f1746f162a13b119f
|
| 3 |
+
size 3319
|
predictor.py
ADDED
|
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# deployment/predictor.py
|
| 2 |
+
import joblib
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import re
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
class CreditRiskPredictor:
|
| 10 |
+
"""Predictor using your actual trained model features"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, model_dir="model_artifacts"):
|
| 13 |
+
self.model_dir = Path(model_dir)
|
| 14 |
+
self.model = None
|
| 15 |
+
self.scaler = None
|
| 16 |
+
self.imputer = None
|
| 17 |
+
self.optimal_threshold = 0.28
|
| 18 |
+
|
| 19 |
+
# Load the ACTUAL feature list from your JSON
|
| 20 |
+
self.feature_list = self._load_actual_features()
|
| 21 |
+
print(f"📋 Using {len(self.feature_list)} ACTUAL features")
|
| 22 |
+
|
| 23 |
+
# Extract base features needed from user input
|
| 24 |
+
self.base_features_needed = self._extract_base_features()
|
| 25 |
+
print(f"📋 Expecting {len(self.base_features_needed)} base input features")
|
| 26 |
+
|
| 27 |
+
self.load_artifacts()
|
| 28 |
+
|
| 29 |
+
def _load_actual_features(self):
|
| 30 |
+
"""Load the actual features used in training"""
|
| 31 |
+
feature_file = self.model_dir / "training_features.json"
|
| 32 |
+
if not feature_file.exists():
|
| 33 |
+
print(f"⚠️ {feature_file} not found")
|
| 34 |
+
return []
|
| 35 |
+
|
| 36 |
+
with open(feature_file, 'r') as f:
|
| 37 |
+
data = json.load(f)
|
| 38 |
+
|
| 39 |
+
# Your JSON has 'feature_names' key
|
| 40 |
+
if 'feature_names' in data:
|
| 41 |
+
features = data['feature_names']
|
| 42 |
+
if isinstance(features, list):
|
| 43 |
+
return features
|
| 44 |
+
elif 'enhanced_features' in data:
|
| 45 |
+
features = data['enhanced_features']
|
| 46 |
+
if isinstance(features, list):
|
| 47 |
+
return features
|
| 48 |
+
|
| 49 |
+
print(f"❌ Could not find feature list in JSON. Keys: {list(data.keys())}")
|
| 50 |
+
return []
|
| 51 |
+
|
| 52 |
+
def _extract_base_features(self):
|
| 53 |
+
"""Extract base features from one-hot encoded feature list"""
|
| 54 |
+
if not self.feature_list:
|
| 55 |
+
return []
|
| 56 |
+
|
| 57 |
+
base_features = set()
|
| 58 |
+
for feature in self.feature_list:
|
| 59 |
+
# Handle one-hot encoded features
|
| 60 |
+
if feature.startswith('addr_state_'):
|
| 61 |
+
base_features.add('addr_state')
|
| 62 |
+
elif feature.startswith('home_ownership_'):
|
| 63 |
+
base_features.add('home_ownership')
|
| 64 |
+
elif feature.startswith('purpose_'):
|
| 65 |
+
base_features.add('purpose')
|
| 66 |
+
elif feature.startswith('verification_status_'):
|
| 67 |
+
base_features.add('verification_status')
|
| 68 |
+
elif feature.startswith('title_has_'):
|
| 69 |
+
# These are title-based engineered features
|
| 70 |
+
base_features.add('title')
|
| 71 |
+
elif '_' in feature and not feature.replace('_', '').isnumeric():
|
| 72 |
+
# Other potential categoricals
|
| 73 |
+
parts = feature.split('_')
|
| 74 |
+
if len(parts) > 1:
|
| 75 |
+
base_features.add(parts[0])
|
| 76 |
+
else:
|
| 77 |
+
# Regular feature
|
| 78 |
+
base_features.add(feature)
|
| 79 |
+
|
| 80 |
+
# Filter out features that don't make sense as user inputs
|
| 81 |
+
user_input_features = []
|
| 82 |
+
for feature in base_features:
|
| 83 |
+
if feature not in ['purpose_debt_consolidation', 'verification_status_Verified',
|
| 84 |
+
'verification_status_Source', 'title_has_car', 'title_has_medical',
|
| 85 |
+
'title_has_credit', 'title_has_home', 'title_has_consolidation',
|
| 86 |
+
'title_has_debt', 'title_has_card'] and not any(feature + '_' in f for f in self.feature_list):
|
| 87 |
+
user_input_features.append(feature)
|
| 88 |
+
|
| 89 |
+
return user_input_features
|
| 90 |
+
|
| 91 |
+
def load_artifacts(self):
|
| 92 |
+
"""Load model, scaler, and imputer"""
|
| 93 |
+
try:
|
| 94 |
+
# Find the latest model
|
| 95 |
+
model_files = list(self.model_dir.glob("*xgb*.pkl"))
|
| 96 |
+
scaler_files = list(self.model_dir.glob("*scaler*.pkl"))
|
| 97 |
+
imputer_files = list(self.model_dir.glob("*imputer*.pkl"))
|
| 98 |
+
|
| 99 |
+
if not model_files:
|
| 100 |
+
raise FileNotFoundError("No model files found")
|
| 101 |
+
|
| 102 |
+
# Load the first available
|
| 103 |
+
self.model = joblib.load(model_files[0])
|
| 104 |
+
print(f"✅ Loaded model: {model_files[0].name}")
|
| 105 |
+
|
| 106 |
+
if scaler_files:
|
| 107 |
+
self.scaler = joblib.load(scaler_files[0])
|
| 108 |
+
print(f"✅ Loaded scaler: {scaler_files[0].name}")
|
| 109 |
+
|
| 110 |
+
if imputer_files:
|
| 111 |
+
self.imputer = joblib.load(imputer_files[0])
|
| 112 |
+
print(f"✅ Loaded imputer: {imputer_files[0].name}")
|
| 113 |
+
|
| 114 |
+
# Verify feature count
|
| 115 |
+
if hasattr(self.model, 'n_features_in_'):
|
| 116 |
+
print(f"📊 Model expects {self.model.n_features_in_} features")
|
| 117 |
+
print(f"📊 We have {len(self.feature_list)} features in our list")
|
| 118 |
+
|
| 119 |
+
if self.model.n_features_in_ != len(self.feature_list):
|
| 120 |
+
print("⚠️ WARNING: Feature count mismatch!")
|
| 121 |
+
|
| 122 |
+
except Exception as e:
|
| 123 |
+
print(f"❌ Error loading artifacts: {e}")
|
| 124 |
+
raise
|
| 125 |
+
|
| 126 |
+
def _engineer_features(self, df):
|
| 127 |
+
"""Create all features including one-hot encoded"""
|
| 128 |
+
if not self.feature_list:
|
| 129 |
+
raise ValueError("No feature list available!")
|
| 130 |
+
|
| 131 |
+
# First, ensure we have all base features (fill missing with defaults)
|
| 132 |
+
for feature in self.base_features_needed:
|
| 133 |
+
if feature not in df.columns:
|
| 134 |
+
# Set appropriate defaults based on feature type
|
| 135 |
+
if feature in ['loan_amnt', 'annual_inc', 'int_rate', 'dti', 'total_acc',
|
| 136 |
+
'revol_bal', 'total_bc_limit', 'total_bal_ex_mort', 'avg_cur_bal',
|
| 137 |
+
'mo_sin_old_il_acct', 'mo_sin_old_rev_tl_op', 'mo_sin_rcnt_rev_tl_op',
|
| 138 |
+
'mths_since_recent_bc', 'mths_since_recent_inq', 'last_fico_range_low',
|
| 139 |
+
'last_fico_range_high', 'years_since_earliest_cr']:
|
| 140 |
+
df[feature] = 0 # Numerical defaults
|
| 141 |
+
elif feature in ['addr_state', 'home_ownership', 'purpose', 'verification_status', 'title']:
|
| 142 |
+
df[feature] = 'unknown' # Categorical defaults
|
| 143 |
+
elif feature in ['grade_numeric', 'emp_length_numeric', 'revol_util_decimal',
|
| 144 |
+
'loan_to_income', 'int_rate_times_loan', 'subprime_high_dti',
|
| 145 |
+
'pct_tl_nvr_dlq', 'title_length', 'title_word_count']:
|
| 146 |
+
df[feature] = 0 # Engineered feature defaults
|
| 147 |
+
elif feature in ['delinq_2yrs', 'inq_last_6mths', 'open_acc', 'has_delinq_history']:
|
| 148 |
+
df[feature] = 0 # Credit history defaults
|
| 149 |
+
else:
|
| 150 |
+
df[feature] = 0
|
| 151 |
+
|
| 152 |
+
# Convert categorical to one-hot
|
| 153 |
+
df = self._create_one_hot_features(df)
|
| 154 |
+
|
| 155 |
+
# Engineered features
|
| 156 |
+
df = self._create_engineered_features(df)
|
| 157 |
+
|
| 158 |
+
return df
|
| 159 |
+
|
| 160 |
+
def _create_one_hot_features(self, df):
|
| 161 |
+
"""Create one-hot encoded features from categorical variables"""
|
| 162 |
+
if not self.feature_list:
|
| 163 |
+
return df
|
| 164 |
+
|
| 165 |
+
for feature in self.feature_list:
|
| 166 |
+
# Handle different categorical encodings
|
| 167 |
+
if feature.startswith('addr_state_'):
|
| 168 |
+
state_code = feature.replace('addr_state_', '')
|
| 169 |
+
if 'addr_state' in df.columns:
|
| 170 |
+
df[feature] = (df['addr_state'].astype(str).str.upper() == state_code).astype(int)
|
| 171 |
+
else:
|
| 172 |
+
df[feature] = 0
|
| 173 |
+
|
| 174 |
+
elif feature.startswith('home_ownership_'):
|
| 175 |
+
ownership_type = feature.replace('home_ownership_', '')
|
| 176 |
+
if 'home_ownership' in df.columns:
|
| 177 |
+
df[feature] = (df['home_ownership'].astype(str).str.upper() == ownership_type).astype(int)
|
| 178 |
+
else:
|
| 179 |
+
df[feature] = 0
|
| 180 |
+
|
| 181 |
+
elif feature.startswith('purpose_'):
|
| 182 |
+
purpose_type = feature.replace('purpose_', '')
|
| 183 |
+
if 'purpose' in df.columns:
|
| 184 |
+
df[feature] = (df['purpose'].astype(str).str.lower().replace(' ', '_') == purpose_type).astype(int)
|
| 185 |
+
else:
|
| 186 |
+
df[feature] = 0
|
| 187 |
+
|
| 188 |
+
elif feature.startswith('verification_status_'):
|
| 189 |
+
status_type = feature.replace('verification_status_', '')
|
| 190 |
+
if 'verification_status' in df.columns:
|
| 191 |
+
df[feature] = (df['verification_status'].astype(str).str.replace(' ', '_') == status_type).astype(int)
|
| 192 |
+
else:
|
| 193 |
+
df[feature] = 0
|
| 194 |
+
|
| 195 |
+
elif feature.startswith('title_has_'):
|
| 196 |
+
# These are title-based engineered features
|
| 197 |
+
keyword = feature.replace('title_has_', '')
|
| 198 |
+
if 'title' in df.columns:
|
| 199 |
+
title_str = str(df['title'].iloc[0]).lower() if len(df) > 0 else ''
|
| 200 |
+
df[feature] = 1 if keyword in title_str else 0
|
| 201 |
+
else:
|
| 202 |
+
df[feature] = 0
|
| 203 |
+
|
| 204 |
+
return df
|
| 205 |
+
|
| 206 |
+
def _create_engineered_features(self, df):
|
| 207 |
+
"""Create engineered features"""
|
| 208 |
+
# Grade to numeric (if grade is provided)
|
| 209 |
+
if 'grade' in df.columns:
|
| 210 |
+
grade_map = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7}
|
| 211 |
+
df['grade_numeric'] = df['grade'].map(grade_map).fillna(4)
|
| 212 |
+
|
| 213 |
+
# Employment length to numeric
|
| 214 |
+
if 'emp_length' in df.columns:
|
| 215 |
+
df['emp_length_numeric'] = df['emp_length'].apply(self._convert_emp_length)
|
| 216 |
+
|
| 217 |
+
# Credit utilization to decimal
|
| 218 |
+
if 'revol_util' in df.columns:
|
| 219 |
+
df['revol_util_decimal'] = df['revol_util'].astype(str).str.replace('%', '', regex=False).astype(float) / 100
|
| 220 |
+
|
| 221 |
+
# Financial ratios
|
| 222 |
+
if 'loan_amnt' in df.columns and 'annual_inc' in df.columns:
|
| 223 |
+
df['loan_to_income'] = df['loan_amnt'] / (df['annual_inc'].replace(0, 1) + 1)
|
| 224 |
+
|
| 225 |
+
if 'int_rate' in df.columns and 'loan_amnt' in df.columns:
|
| 226 |
+
df['int_rate_times_loan'] = df['int_rate'] * df['loan_amnt'] / 1000
|
| 227 |
+
|
| 228 |
+
# Credit flags
|
| 229 |
+
if 'delinq_2yrs' in df.columns:
|
| 230 |
+
df['has_delinq_history'] = (df['delinq_2yrs'] > 0).astype(int)
|
| 231 |
+
|
| 232 |
+
# Subprime indicator
|
| 233 |
+
if 'grade_numeric' in df.columns and 'dti' in df.columns:
|
| 234 |
+
df['subprime_high_dti'] = ((df['grade_numeric'] >= 4) & (df['dti'] > 20)).astype(int)
|
| 235 |
+
|
| 236 |
+
# Title-based features
|
| 237 |
+
if 'title' in df.columns:
|
| 238 |
+
title_str = str(df['title'].iloc[0]).lower() if len(df) > 0 else ''
|
| 239 |
+
df['title_length'] = len(title_str)
|
| 240 |
+
df['title_word_count'] = len(title_str.split())
|
| 241 |
+
|
| 242 |
+
# Years since earliest credit line (simplified)
|
| 243 |
+
if 'years_since_earliest_cr' not in df.columns:
|
| 244 |
+
df['years_since_earliest_cr'] = 10 # Default value
|
| 245 |
+
|
| 246 |
+
# Set defaults for any missing engineered features
|
| 247 |
+
for feature in self.feature_list:
|
| 248 |
+
if feature not in df.columns and not feature.startswith(('addr_state_', 'home_ownership_',
|
| 249 |
+
'purpose_', 'verification_status_', 'title_has_')):
|
| 250 |
+
# Default values based on feature type
|
| 251 |
+
if 'fico' in feature.lower():
|
| 252 |
+
df[feature] = 700 # Average FICO score
|
| 253 |
+
elif any(x in feature for x in ['rate', 'util', 'pct', 'ratio']):
|
| 254 |
+
df[feature] = 0.5 # Percentage default
|
| 255 |
+
elif any(x in feature for x in ['loan', 'amt', 'bal', 'limit', 'inc']):
|
| 256 |
+
df[feature] = 0 # Monetary default
|
| 257 |
+
elif any(x in feature for x in ['month', 'mo', 'mth', 'year']):
|
| 258 |
+
df[feature] = 0 # Time default
|
| 259 |
+
else:
|
| 260 |
+
df[feature] = 0
|
| 261 |
+
|
| 262 |
+
return df
|
| 263 |
+
|
| 264 |
+
def _convert_emp_length(self, val):
|
| 265 |
+
"""Convert employment length string to numeric"""
|
| 266 |
+
if pd.isna(val):
|
| 267 |
+
return 3.0 # Default
|
| 268 |
+
val = str(val).lower()
|
| 269 |
+
if '10+' in val:
|
| 270 |
+
return 10.0
|
| 271 |
+
elif '< 1' in val:
|
| 272 |
+
return 0.5
|
| 273 |
+
else:
|
| 274 |
+
numbers = re.findall(r'\d+', val)
|
| 275 |
+
return float(numbers[0]) if numbers else 3.0
|
| 276 |
+
|
| 277 |
+
def preprocess_input(self, input_dict):
|
| 278 |
+
"""Convert raw input to model-ready features"""
|
| 279 |
+
if not self.feature_list:
|
| 280 |
+
raise ValueError("No feature list available!")
|
| 281 |
+
|
| 282 |
+
df = pd.DataFrame([input_dict])
|
| 283 |
+
|
| 284 |
+
# Engineer all features including one-hot
|
| 285 |
+
df = self._engineer_features(df)
|
| 286 |
+
|
| 287 |
+
# Ensure we have all features in correct order
|
| 288 |
+
processed_df = pd.DataFrame(columns=self.feature_list)
|
| 289 |
+
|
| 290 |
+
# Fill with available values, zeros for missing
|
| 291 |
+
for feature in self.feature_list:
|
| 292 |
+
if feature in df.columns:
|
| 293 |
+
processed_df[feature] = df[feature].values
|
| 294 |
+
else:
|
| 295 |
+
processed_df[feature] = 0
|
| 296 |
+
|
| 297 |
+
# Debug: Show we have the right number of features
|
| 298 |
+
print(f"🔧 Created dataframe with {len(processed_df.columns)} features")
|
| 299 |
+
|
| 300 |
+
# Handle missing values (imputer)
|
| 301 |
+
if self.imputer is not None and not processed_df.empty:
|
| 302 |
+
try:
|
| 303 |
+
processed_df = pd.DataFrame(
|
| 304 |
+
self.imputer.transform(processed_df),
|
| 305 |
+
columns=self.feature_list
|
| 306 |
+
)
|
| 307 |
+
except Exception as e:
|
| 308 |
+
print(f"⚠️ Imputer error: {e}")
|
| 309 |
+
|
| 310 |
+
# Scale features
|
| 311 |
+
if self.scaler is not None and not processed_df.empty:
|
| 312 |
+
try:
|
| 313 |
+
processed_df = pd.DataFrame(
|
| 314 |
+
self.scaler.transform(processed_df),
|
| 315 |
+
columns=self.feature_list
|
| 316 |
+
)
|
| 317 |
+
except Exception as e:
|
| 318 |
+
print(f"⚠️ Scaler error: {e}")
|
| 319 |
+
|
| 320 |
+
return processed_df.values
|
| 321 |
+
|
| 322 |
+
def predict(self, input_dict):
|
| 323 |
+
"""Make prediction"""
|
| 324 |
+
try:
|
| 325 |
+
# Preprocess
|
| 326 |
+
features = self.preprocess_input(input_dict)
|
| 327 |
+
|
| 328 |
+
if features.size == 0:
|
| 329 |
+
raise ValueError("No features generated!")
|
| 330 |
+
|
| 331 |
+
# Debug info
|
| 332 |
+
print(f"🔧 Processed features shape: {features.shape}")
|
| 333 |
+
|
| 334 |
+
# Predict
|
| 335 |
+
default_prob = self.model.predict_proba(features)[0, 1]
|
| 336 |
+
|
| 337 |
+
# Decision
|
| 338 |
+
decision = "APPROVE" if default_prob < self.optimal_threshold else "REJECT"
|
| 339 |
+
|
| 340 |
+
return {
|
| 341 |
+
'success': True,
|
| 342 |
+
'default_probability': float(default_prob),
|
| 343 |
+
'decision': decision,
|
| 344 |
+
'risk_level': self._get_risk_level(default_prob),
|
| 345 |
+
'confidence': self._get_confidence(default_prob),
|
| 346 |
+
'optimal_threshold': self.optimal_threshold,
|
| 347 |
+
'explanation': f"Default probability: {default_prob:.1%} (threshold: {self.optimal_threshold:.1%})"
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
except Exception as e:
|
| 351 |
+
import traceback
|
| 352 |
+
print(f"❌ Prediction error: {e}")
|
| 353 |
+
traceback.print_exc()
|
| 354 |
+
return {
|
| 355 |
+
'success': False,
|
| 356 |
+
'error': str(e),
|
| 357 |
+
'decision': 'ERROR'
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
def _get_risk_level(self, prob):
|
| 361 |
+
if prob < 0.2: return "LOW"
|
| 362 |
+
elif prob < 0.4: return "MEDIUM"
|
| 363 |
+
elif prob < 0.6: return "HIGH"
|
| 364 |
+
else: return "VERY HIGH"
|
| 365 |
+
|
| 366 |
+
def _get_confidence(self, prob):
|
| 367 |
+
distance = abs(prob - self.optimal_threshold)
|
| 368 |
+
return max(0.5, 1.0 - distance * 2)
|
| 369 |
+
|
| 370 |
+
# Test with the exact features your model expects
|
| 371 |
+
if __name__ == "__main__":
|
| 372 |
+
print("🧪 Testing CreditRiskPredictor...")
|
| 373 |
+
print("=" * 60)
|
| 374 |
+
|
| 375 |
+
# Create predictor
|
| 376 |
+
predictor = CreditRiskPredictor("model_artifacts")
|
| 377 |
+
|
| 378 |
+
if not predictor.feature_list:
|
| 379 |
+
print("\n❌ Cannot proceed without features!")
|
| 380 |
+
else:
|
| 381 |
+
# Create a test input with ALL the features your model actually needs
|
| 382 |
+
# Based on your JSON, here's what to provide:
|
| 383 |
+
test_loan = {
|
| 384 |
+
# Basic loan info
|
| 385 |
+
'loan_amnt': 15000,
|
| 386 |
+
'int_rate': 12.5,
|
| 387 |
+
|
| 388 |
+
# Categorical features (will be one-hot encoded)
|
| 389 |
+
'addr_state': 'CA', # Will create addr_state_CA = 1
|
| 390 |
+
'home_ownership': 'RENT', # Will create home_ownership_RENT = 1
|
| 391 |
+
'purpose': 'debt_consolidation', # Will create purpose_debt_consolidation = 1
|
| 392 |
+
'verification_status': 'Verified', # Will create verification_status_Verified = 1
|
| 393 |
+
|
| 394 |
+
# Title for title-based features
|
| 395 |
+
'title': 'Debt consolidation loan for credit card payoff',
|
| 396 |
+
|
| 397 |
+
# Credit features from your feature list
|
| 398 |
+
'dti': 18.5,
|
| 399 |
+
'annual_inc': 75000,
|
| 400 |
+
'revol_util': '45%',
|
| 401 |
+
'delinq_2yrs': 0,
|
| 402 |
+
'inq_last_6mths': 2,
|
| 403 |
+
'open_acc': 8,
|
| 404 |
+
'total_acc': 25,
|
| 405 |
+
'revol_bal': 5000,
|
| 406 |
+
'total_bc_limit': 20000,
|
| 407 |
+
'total_bal_ex_mort': 30000,
|
| 408 |
+
'avg_cur_bal': 2500,
|
| 409 |
+
'mo_sin_old_il_acct': 60,
|
| 410 |
+
'mo_sin_old_rev_tl_op': 48,
|
| 411 |
+
'mo_sin_rcnt_rev_tl_op': 12,
|
| 412 |
+
'mths_since_recent_bc': 6,
|
| 413 |
+
'mths_since_recent_inq': 3,
|
| 414 |
+
'pct_tl_nvr_dlq': 0.95,
|
| 415 |
+
'last_fico_range_low': 680,
|
| 416 |
+
'last_fico_range_high': 684,
|
| 417 |
+
|
| 418 |
+
# Additional features that might be needed
|
| 419 |
+
'grade': 'C',
|
| 420 |
+
'emp_length': '5 years',
|
| 421 |
+
'years_since_earliest_cr': 10
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
print(f"\n📊 Making test prediction...")
|
| 425 |
+
print(f"Using input with {len(test_loan)} fields")
|
| 426 |
+
|
| 427 |
+
result = predictor.predict(test_loan)
|
| 428 |
+
|
| 429 |
+
print("\n" + "=" * 60)
|
| 430 |
+
print("📈 PREDICTION RESULTS:")
|
| 431 |
+
print("=" * 60)
|
| 432 |
+
for key, value in result.items():
|
| 433 |
+
if key != 'explanation' or result['success']:
|
| 434 |
+
print(f"{key:25}: {value}")
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==3.50.0
|
| 2 |
+
pandas==1.5.0
|
| 3 |
+
numpy==1.24.0
|
| 4 |
+
scikit-learn==1.2.0
|
| 5 |
+
xgboost==1.7.6
|
| 6 |
+
joblib==1.2.0
|
| 7 |
+
matplotlib==3.7.0
|
scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c4b10627fd238874896d566a58e6d09c1a24275882a4c8aec293ff0cad2e9b0
|
| 3 |
+
size 2935
|
training_features.csv
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
feature_name
|
| 2 |
+
mths_since_recent_inq
|
| 3 |
+
purpose_debt_consolidation
|
| 4 |
+
addr_state_NE
|
| 5 |
+
purpose_other
|
| 6 |
+
purpose_wedding
|
| 7 |
+
title_length
|
| 8 |
+
purpose_vacation
|
| 9 |
+
addr_state_SD
|
| 10 |
+
addr_state_OH
|
| 11 |
+
addr_state_VT
|
| 12 |
+
addr_state_WY
|
| 13 |
+
purpose_home_improvement
|
| 14 |
+
addr_state_AR
|
| 15 |
+
dti
|
| 16 |
+
purpose_small_business
|
| 17 |
+
last_fico_range_low
|
| 18 |
+
addr_state_KY
|
| 19 |
+
emp_length_numeric
|
| 20 |
+
addr_state_TN
|
| 21 |
+
addr_state_UT
|
| 22 |
+
title_has_car
|
| 23 |
+
addr_state_WA
|
| 24 |
+
addr_state_KS
|
| 25 |
+
addr_state_VA
|
| 26 |
+
pct_tl_nvr_dlq
|
| 27 |
+
int_rate_times_loan
|
| 28 |
+
addr_state_ID
|
| 29 |
+
loan_to_income
|
| 30 |
+
addr_state_MA
|
| 31 |
+
addr_state_ME
|
| 32 |
+
mths_since_recent_bc
|
| 33 |
+
addr_state_DE
|
| 34 |
+
addr_state_MT
|
| 35 |
+
addr_state_DC
|
| 36 |
+
home_ownership_MORTGAGE
|
| 37 |
+
addr_state_IA
|
| 38 |
+
addr_state_LA
|
| 39 |
+
mo_sin_old_rev_tl_op
|
| 40 |
+
title_has_medical
|
| 41 |
+
addr_state_NY
|
| 42 |
+
addr_state_IL
|
| 43 |
+
title_has_credit
|
| 44 |
+
purpose_major_purchase
|
| 45 |
+
addr_state_AL
|
| 46 |
+
addr_state_CA
|
| 47 |
+
verification_status_Verified
|
| 48 |
+
verification_status_Source Verified
|
| 49 |
+
total_bal_ex_mort
|
| 50 |
+
addr_state_MD
|
| 51 |
+
purpose_medical
|
| 52 |
+
addr_state_MS
|
| 53 |
+
revol_util_decimal
|
| 54 |
+
addr_state_CT
|
| 55 |
+
years_since_earliest_cr
|
| 56 |
+
title_has_home
|
| 57 |
+
mo_sin_old_il_acct
|
| 58 |
+
addr_state_NC
|
| 59 |
+
addr_state_RI
|
| 60 |
+
addr_state_CO
|
| 61 |
+
addr_state_OR
|
| 62 |
+
addr_state_AZ
|
| 63 |
+
addr_state_NV
|
| 64 |
+
addr_state_MI
|
| 65 |
+
addr_state_IN
|
| 66 |
+
total_bc_limit
|
| 67 |
+
home_ownership_OWN
|
| 68 |
+
addr_state_SC
|
| 69 |
+
subprime_high_dti
|
| 70 |
+
home_ownership_RENT
|
| 71 |
+
mo_sin_rcnt_rev_tl_op
|
| 72 |
+
title_has_consolidation
|
| 73 |
+
has_delinq_history
|
| 74 |
+
addr_state_FL
|
| 75 |
+
title_has_debt
|
| 76 |
+
addr_state_NJ
|
| 77 |
+
addr_state_WV
|
| 78 |
+
addr_state_NH
|
| 79 |
+
addr_state_HI
|
| 80 |
+
title_has_card
|
| 81 |
+
addr_state_TX
|
| 82 |
+
annual_inc
|
| 83 |
+
addr_state_GA
|
| 84 |
+
addr_state_WI
|
| 85 |
+
addr_state_MO
|
| 86 |
+
addr_state_MN
|
| 87 |
+
total_acc
|
| 88 |
+
grade_numeric
|
| 89 |
+
addr_state_PA
|
| 90 |
+
addr_state_NM
|
| 91 |
+
purpose_renewable_energy
|
| 92 |
+
addr_state_OK
|
| 93 |
+
last_fico_range_high
|
| 94 |
+
revol_bal
|
| 95 |
+
purpose_house
|
| 96 |
+
purpose_moving
|
| 97 |
+
title_word_count
|
| 98 |
+
avg_cur_bal
|
| 99 |
+
purpose_credit_card
|
training_features.json
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"feature_names": [
|
| 3 |
+
"mths_since_recent_inq",
|
| 4 |
+
"purpose_debt_consolidation",
|
| 5 |
+
"addr_state_NE",
|
| 6 |
+
"purpose_other",
|
| 7 |
+
"purpose_wedding",
|
| 8 |
+
"title_length",
|
| 9 |
+
"purpose_vacation",
|
| 10 |
+
"addr_state_SD",
|
| 11 |
+
"addr_state_OH",
|
| 12 |
+
"addr_state_VT",
|
| 13 |
+
"addr_state_WY",
|
| 14 |
+
"purpose_home_improvement",
|
| 15 |
+
"addr_state_AR",
|
| 16 |
+
"dti",
|
| 17 |
+
"purpose_small_business",
|
| 18 |
+
"last_fico_range_low",
|
| 19 |
+
"addr_state_KY",
|
| 20 |
+
"emp_length_numeric",
|
| 21 |
+
"addr_state_TN",
|
| 22 |
+
"addr_state_UT",
|
| 23 |
+
"title_has_car",
|
| 24 |
+
"addr_state_WA",
|
| 25 |
+
"addr_state_KS",
|
| 26 |
+
"addr_state_VA",
|
| 27 |
+
"pct_tl_nvr_dlq",
|
| 28 |
+
"int_rate_times_loan",
|
| 29 |
+
"addr_state_ID",
|
| 30 |
+
"loan_to_income",
|
| 31 |
+
"addr_state_MA",
|
| 32 |
+
"addr_state_ME",
|
| 33 |
+
"mths_since_recent_bc",
|
| 34 |
+
"addr_state_DE",
|
| 35 |
+
"addr_state_MT",
|
| 36 |
+
"addr_state_DC",
|
| 37 |
+
"home_ownership_MORTGAGE",
|
| 38 |
+
"addr_state_IA",
|
| 39 |
+
"addr_state_LA",
|
| 40 |
+
"mo_sin_old_rev_tl_op",
|
| 41 |
+
"title_has_medical",
|
| 42 |
+
"addr_state_NY",
|
| 43 |
+
"addr_state_IL",
|
| 44 |
+
"title_has_credit",
|
| 45 |
+
"purpose_major_purchase",
|
| 46 |
+
"addr_state_AL",
|
| 47 |
+
"addr_state_CA",
|
| 48 |
+
"verification_status_Verified",
|
| 49 |
+
"verification_status_Source Verified",
|
| 50 |
+
"total_bal_ex_mort",
|
| 51 |
+
"addr_state_MD",
|
| 52 |
+
"purpose_medical",
|
| 53 |
+
"addr_state_MS",
|
| 54 |
+
"revol_util_decimal",
|
| 55 |
+
"addr_state_CT",
|
| 56 |
+
"years_since_earliest_cr",
|
| 57 |
+
"title_has_home",
|
| 58 |
+
"mo_sin_old_il_acct",
|
| 59 |
+
"addr_state_NC",
|
| 60 |
+
"addr_state_RI",
|
| 61 |
+
"addr_state_CO",
|
| 62 |
+
"addr_state_OR",
|
| 63 |
+
"addr_state_AZ",
|
| 64 |
+
"addr_state_NV",
|
| 65 |
+
"addr_state_MI",
|
| 66 |
+
"addr_state_IN",
|
| 67 |
+
"total_bc_limit",
|
| 68 |
+
"home_ownership_OWN",
|
| 69 |
+
"addr_state_SC",
|
| 70 |
+
"subprime_high_dti",
|
| 71 |
+
"home_ownership_RENT",
|
| 72 |
+
"mo_sin_rcnt_rev_tl_op",
|
| 73 |
+
"title_has_consolidation",
|
| 74 |
+
"has_delinq_history",
|
| 75 |
+
"addr_state_FL",
|
| 76 |
+
"title_has_debt",
|
| 77 |
+
"addr_state_NJ",
|
| 78 |
+
"addr_state_WV",
|
| 79 |
+
"addr_state_NH",
|
| 80 |
+
"addr_state_HI",
|
| 81 |
+
"title_has_card",
|
| 82 |
+
"addr_state_TX",
|
| 83 |
+
"annual_inc",
|
| 84 |
+
"addr_state_GA",
|
| 85 |
+
"addr_state_WI",
|
| 86 |
+
"addr_state_MO",
|
| 87 |
+
"addr_state_MN",
|
| 88 |
+
"total_acc",
|
| 89 |
+
"grade_numeric",
|
| 90 |
+
"addr_state_PA",
|
| 91 |
+
"addr_state_NM",
|
| 92 |
+
"purpose_renewable_energy",
|
| 93 |
+
"addr_state_OK",
|
| 94 |
+
"last_fico_range_high",
|
| 95 |
+
"revol_bal",
|
| 96 |
+
"purpose_house",
|
| 97 |
+
"purpose_moving",
|
| 98 |
+
"title_word_count",
|
| 99 |
+
"avg_cur_bal",
|
| 100 |
+
"purpose_credit_card"
|
| 101 |
+
],
|
| 102 |
+
"feature_count": 98,
|
| 103 |
+
"categorical_features": [
|
| 104 |
+
"purpose_debt_consolidation",
|
| 105 |
+
"addr_state_NE",
|
| 106 |
+
"purpose_other",
|
| 107 |
+
"purpose_wedding",
|
| 108 |
+
"purpose_vacation",
|
| 109 |
+
"addr_state_SD",
|
| 110 |
+
"addr_state_OH",
|
| 111 |
+
"addr_state_VT",
|
| 112 |
+
"addr_state_WY",
|
| 113 |
+
"purpose_home_improvement",
|
| 114 |
+
"addr_state_AR",
|
| 115 |
+
"purpose_small_business",
|
| 116 |
+
"addr_state_KY",
|
| 117 |
+
"addr_state_TN",
|
| 118 |
+
"addr_state_UT",
|
| 119 |
+
"addr_state_WA",
|
| 120 |
+
"addr_state_KS",
|
| 121 |
+
"addr_state_VA",
|
| 122 |
+
"addr_state_ID",
|
| 123 |
+
"addr_state_MA",
|
| 124 |
+
"addr_state_ME",
|
| 125 |
+
"addr_state_DE",
|
| 126 |
+
"addr_state_MT",
|
| 127 |
+
"addr_state_DC",
|
| 128 |
+
"home_ownership_MORTGAGE",
|
| 129 |
+
"addr_state_IA",
|
| 130 |
+
"addr_state_LA",
|
| 131 |
+
"addr_state_NY",
|
| 132 |
+
"addr_state_IL",
|
| 133 |
+
"purpose_major_purchase",
|
| 134 |
+
"addr_state_AL",
|
| 135 |
+
"addr_state_CA",
|
| 136 |
+
"verification_status_Verified",
|
| 137 |
+
"verification_status_Source Verified",
|
| 138 |
+
"addr_state_MD",
|
| 139 |
+
"purpose_medical",
|
| 140 |
+
"addr_state_MS",
|
| 141 |
+
"addr_state_CT",
|
| 142 |
+
"addr_state_NC",
|
| 143 |
+
"addr_state_RI",
|
| 144 |
+
"addr_state_CO",
|
| 145 |
+
"addr_state_OR",
|
| 146 |
+
"addr_state_AZ",
|
| 147 |
+
"addr_state_NV",
|
| 148 |
+
"addr_state_MI",
|
| 149 |
+
"addr_state_IN",
|
| 150 |
+
"home_ownership_OWN",
|
| 151 |
+
"addr_state_SC",
|
| 152 |
+
"home_ownership_RENT",
|
| 153 |
+
"addr_state_FL"
|
| 154 |
+
]
|
| 155 |
+
}
|
xgb_best_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bb8b39bb961670917a07abb4a256d08bc70c7245a59df0bee99820e709f5b61
|
| 3 |
+
size 2583734
|