Upload 17 files
Browse files- .gitattributes +3 -0
- Dockerfile +25 -0
- README.md +8 -0
- app.py +9 -0
- data/loan_applications.csv +3 -0
- emi_feature_engineered.csv +3 -0
- feature_builder.py +46 -0
- gitattributes +38 -0
- inference.py +88 -0
- mlflow.db +3 -0
- models/emi_classifier_final.pkl +3 -0
- models/emi_model_optimized.pkl +3 -0
- models/label_encoder.pkl +3 -0
- pages/1_Predict_EMI.py +160 -0
- pages/2_Data_Exploration.py +164 -0
- pages/3_Model_Monitoring.py +29 -0
- pages/4_Admin_Panel.py +19 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
data/loan_applications.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
emi_feature_engineered.csv filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
mlflow.db filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official lightweight Python image
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies (Required for XGBoost)
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
build-essential \
|
| 10 |
+
libgomp1 \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Copy requirements and install
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 16 |
+
|
| 17 |
+
# Copy everything (this includes your 'pages/' folder and 'models/' folder)
|
| 18 |
+
COPY . .
|
| 19 |
+
|
| 20 |
+
# Expose Hugging Face's default port
|
| 21 |
+
EXPOSE 7860
|
| 22 |
+
|
| 23 |
+
# Run the main entry point (usually named app.py or main.py)
|
| 24 |
+
# Streamlit automatically detects the /pages folder for the sidebar
|
| 25 |
+
ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: EMI Eligibility Predictor
|
| 3 |
+
emoji: 💰
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
---
|
app.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
st.set_page_config(
|
| 4 |
+
page_title="EMI Prediction System",
|
| 5 |
+
layout="wide"
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
st.title("EMI Prediction & Eligibility Platform")
|
| 9 |
+
st.write("Use the sidebar to navigate between modules.")
|
data/loan_applications.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcbe93afbb6f8a2db378d0e5f576c9f18699e29ca488522155ebd7720dccadd1
|
| 3 |
+
size 102077185
|
emi_feature_engineered.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcbe93afbb6f8a2db378d0e5f576c9f18699e29ca488522155ebd7720dccadd1
|
| 3 |
+
size 102077185
|
feature_builder.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
def prepare_input_features(raw_input: dict) -> pd.DataFrame:
|
| 4 |
+
df = pd.DataFrame([raw_input])
|
| 5 |
+
|
| 6 |
+
# -------------------------------
|
| 7 |
+
# EXPENSE AGGREGATION
|
| 8 |
+
# -------------------------------
|
| 9 |
+
df["total_monthly_expenses"] = (
|
| 10 |
+
df["school_fees"]
|
| 11 |
+
+ df["college_fees"]
|
| 12 |
+
+ df["travel_expenses"]
|
| 13 |
+
+ df["groceries_utilities"]
|
| 14 |
+
+ df["other_monthly_expenses"]
|
| 15 |
+
+ df["monthly_rent"]
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# -------------------------------
|
| 19 |
+
# FINANCIAL RATIOS
|
| 20 |
+
# -------------------------------
|
| 21 |
+
df["debt_to_income"] = df["current_emi_amount"] / df["monthly_salary"]
|
| 22 |
+
df["expense_to_income"] = df["total_monthly_expenses"] / df["monthly_salary"]
|
| 23 |
+
df["savings_ratio"] = df["bank_balance"] / df["monthly_salary"]
|
| 24 |
+
|
| 25 |
+
# -------------------------------
|
| 26 |
+
# CREDIT RISK BUCKET (CRITICAL FIX)
|
| 27 |
+
# -------------------------------
|
| 28 |
+
df["credit_risk_bucket"] = pd.cut(
|
| 29 |
+
df["credit_score"],
|
| 30 |
+
bins=[0, 600, 700, 900],
|
| 31 |
+
labels=["Low", "Medium", "High"]
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# -------------------------------
|
| 35 |
+
# EMPLOYMENT STABILITY SCORE
|
| 36 |
+
# -------------------------------
|
| 37 |
+
df["employment_stability_score"] = (
|
| 38 |
+
df["years_of_employment"] *
|
| 39 |
+
df["employment_type"].map({
|
| 40 |
+
"Government": 1.5,
|
| 41 |
+
"Private": 1.0,
|
| 42 |
+
"Self-employed": 0.8
|
| 43 |
+
})
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
return df
|
gitattributes
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
emi_feature_engineered.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
mlflow.db filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
data/loan_applications.csv filter=lfs diff=lfs merge=lfs -text
|
inference.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import joblib
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from feature_builder import prepare_input_features
|
| 6 |
+
|
| 7 |
+
# --------------------------------
|
| 8 |
+
# PATHS
|
| 9 |
+
# --------------------------------
|
| 10 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 11 |
+
|
| 12 |
+
# Ensure these match the new XGBoost models you trained
|
| 13 |
+
CLASSIFIER_PATH = os.path.join(BASE_DIR, "models", "emi_classifier_final.pkl")
|
| 14 |
+
REGRESSOR_PATH = os.path.join(BASE_DIR, "models", "emi_model_optimized.pkl")
|
| 15 |
+
LABEL_ENCODER_PATH = os.path.join(BASE_DIR, "models", "label_encoder.pkl")
|
| 16 |
+
|
| 17 |
+
# --------------------------------
|
| 18 |
+
# LOAD MODELS (ONCE)
|
| 19 |
+
# --------------------------------
|
| 20 |
+
classifier = joblib.load(CLASSIFIER_PATH)
|
| 21 |
+
regressor = joblib.load(REGRESSOR_PATH)
|
| 22 |
+
# We need the label encoder to know which index corresponds to "Eligible"
|
| 23 |
+
label_encoder = joblib.load(LABEL_ENCODER_PATH)
|
| 24 |
+
|
| 25 |
+
# --------------------------------
|
| 26 |
+
# PREDICTION FUNCTION
|
| 27 |
+
# --------------------------------
|
| 28 |
+
def predict_emi(raw_input: dict):
|
| 29 |
+
"""
|
| 30 |
+
Returns:
|
| 31 |
+
eligibility_label (str): Eligible | High Risk | Not Eligible
|
| 32 |
+
max_emi (float): Predicted maximum EMI
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
# 1. FEATURE ENGINEERING
|
| 36 |
+
input_df = prepare_input_features(raw_input)
|
| 37 |
+
|
| 38 |
+
# 2. PROBABILITY-BASED CLASSIFICATION (The Fix)
|
| 39 |
+
# Get probabilities for all classes
|
| 40 |
+
probs = classifier.predict_proba(input_df)[0]
|
| 41 |
+
|
| 42 |
+
# Create a dictionary mapping Class Name -> Probability
|
| 43 |
+
# Example: {'Eligible': 0.38, 'Not_Eligible': 0.62}
|
| 44 |
+
prob_map = {
|
| 45 |
+
label_encoder.inverse_transform([i])[0]: prob
|
| 46 |
+
for i, prob in enumerate(probs)
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
# 3. THRESHOLD LOGIC (Anti-Bias)
|
| 50 |
+
# Because of the 4.5:1 imbalance, the model is "shy" to predict Eligible.
|
| 51 |
+
# We lower the requirement to 35% to give good candidates a fair chance.
|
| 52 |
+
eligible_prob = prob_map.get("Eligible", 0)
|
| 53 |
+
high_risk_prob = prob_map.get("High_Risk", 0)
|
| 54 |
+
|
| 55 |
+
if eligible_prob > 0.35:
|
| 56 |
+
ml_label = "Eligible"
|
| 57 |
+
elif high_risk_prob > 0.40:
|
| 58 |
+
ml_label = "High Risk"
|
| 59 |
+
else:
|
| 60 |
+
ml_label = "Not Eligible"
|
| 61 |
+
|
| 62 |
+
# 4. REGRESSION PREDICTION (Amount)
|
| 63 |
+
max_emi = float(regressor.predict(input_df)[0])
|
| 64 |
+
max_emi = max(max_emi, 0.0) # Safety clamp
|
| 65 |
+
|
| 66 |
+
# 5. HARD BUSINESS RULES (The "Banker's Veto")
|
| 67 |
+
# These rules override the ML if the risk is objectively too high.
|
| 68 |
+
credit_score = input_df["credit_score"].iloc[0]
|
| 69 |
+
dti = input_df.get("debt_to_income", pd.Series([0])).iloc[0]
|
| 70 |
+
|
| 71 |
+
# Relaxed but safe rejection criteria
|
| 72 |
+
is_hard_reject = (
|
| 73 |
+
credit_score < 400 or
|
| 74 |
+
dti > 0.85
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# 6. FINAL ELIGIBILITY LOGIC
|
| 78 |
+
if is_hard_reject:
|
| 79 |
+
eligibility_label = "Not Eligible"
|
| 80 |
+
max_emi = 0.0
|
| 81 |
+
else:
|
| 82 |
+
eligibility_label = ml_label
|
| 83 |
+
|
| 84 |
+
# If the label is "Not Eligible", we force Max EMI to 0 for consistency
|
| 85 |
+
if eligibility_label == "Not Eligible":
|
| 86 |
+
max_emi = 0.0
|
| 87 |
+
|
| 88 |
+
return eligibility_label, round(max_emi, 2)
|
mlflow.db
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c511fe076991c7283c6c6dea06de1483695e47bced88645e9684a802433c3a17
|
| 3 |
+
size 667648
|
models/emi_classifier_final.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e38f0d3074a6d56bf6069d81591cfaad4c99fa024041bba4e6f645235c1e9601
|
| 3 |
+
size 1954853
|
models/emi_model_optimized.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20b975fc14733370812ae43702901275de249f7fb677b1d9bcbaa54de3d31920
|
| 3 |
+
size 9835116
|
models/label_encoder.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b292508e70165f8e5a96cde70b326e21d5a0021868801711c839ef178b2fbb97
|
| 3 |
+
size 511
|
pages/1_Predict_EMI.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from inference import predict_emi # Ensure inference.py is in the same folder
|
| 5 |
+
|
| 6 |
+
# -------------------------------
|
| 7 |
+
# PAGE CONFIG
|
| 8 |
+
# -------------------------------
|
| 9 |
+
st.set_page_config(
|
| 10 |
+
page_title="EMI Eligibility Pro",
|
| 11 |
+
page_icon="💰",
|
| 12 |
+
layout="wide"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# Custom CSS for better styling
|
| 16 |
+
st.markdown("""
|
| 17 |
+
<style>
|
| 18 |
+
.main {
|
| 19 |
+
background-color: #f5f7f9;
|
| 20 |
+
}
|
| 21 |
+
.stMetric {
|
| 22 |
+
background-color: #ffffff;
|
| 23 |
+
padding: 15px;
|
| 24 |
+
border-radius: 10px;
|
| 25 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 26 |
+
}
|
| 27 |
+
</style>
|
| 28 |
+
""", unsafe_allow_html=True)
|
| 29 |
+
|
| 30 |
+
st.title("🚀 EMI Eligibility & Risk Prediction")
|
| 31 |
+
st.write("Fill in the details below to check your loan eligibility and maximum safe EMI.")
|
| 32 |
+
|
| 33 |
+
# -------------------------------
|
| 34 |
+
# INPUT FORM
|
| 35 |
+
# -------------------------------
|
| 36 |
+
with st.container():
|
| 37 |
+
# SECTION 1: Personal & Demographic
|
| 38 |
+
st.subheader("👤 Personal Information")
|
| 39 |
+
col1, col2, col3 = st.columns(3)
|
| 40 |
+
with col1:
|
| 41 |
+
age = st.number_input("Age", 18, 70, 30)
|
| 42 |
+
gender = st.selectbox("Gender", ["Male", "Female"])
|
| 43 |
+
marital_status = st.selectbox("Marital Status", ["Single", "Married"])
|
| 44 |
+
with col2:
|
| 45 |
+
education = st.selectbox("Education", ["High School", "Graduate", "Post Graduate", "Professional"])
|
| 46 |
+
family_size = st.number_input("Family Size", 1, 10, 3)
|
| 47 |
+
dependents = st.number_input("Dependents", 0, 10, 1)
|
| 48 |
+
with col3:
|
| 49 |
+
house_type = st.selectbox("House Type", ["Rented", "Own", "Family"])
|
| 50 |
+
company_type = st.selectbox("Company Type", ["Startup", "SME", "MNC", "Government"])
|
| 51 |
+
|
| 52 |
+
st.divider()
|
| 53 |
+
|
| 54 |
+
# SECTION 2: Employment & Income
|
| 55 |
+
st.subheader("💼 Employment & Financials")
|
| 56 |
+
col4, col5, col6 = st.columns(3)
|
| 57 |
+
with col4:
|
| 58 |
+
employment_type = st.selectbox("Employment Type", ["Private", "Government", "Self-employed"])
|
| 59 |
+
years_of_employment = st.number_input("Years of Employment", 0, 40, 5)
|
| 60 |
+
with col5:
|
| 61 |
+
monthly_salary = st.number_input("Monthly Salary (INR)", 10000, 500000, 50000, step=5000)
|
| 62 |
+
credit_score = st.number_input("Credit Score", 300, 900, 700)
|
| 63 |
+
with col6:
|
| 64 |
+
bank_balance = st.number_input("Bank Balance (INR)", 0, 10000000, 200000)
|
| 65 |
+
existing_loans = st.selectbox("Existing Loans", ["No", "Yes"])
|
| 66 |
+
|
| 67 |
+
st.divider()
|
| 68 |
+
|
| 69 |
+
# SECTION 3: Expenses & Current Debt
|
| 70 |
+
st.subheader("📉 Monthly Outgoings")
|
| 71 |
+
col7, col8, col9 = st.columns(3)
|
| 72 |
+
with col7:
|
| 73 |
+
monthly_rent = st.number_input("Monthly Rent (INR)", 0, 100000, 10000)
|
| 74 |
+
current_emi_amount = st.number_input("Current EMI Totals", 0, 100000, 0)
|
| 75 |
+
with col8:
|
| 76 |
+
groceries_utilities = st.number_input("Groceries & Utilities", 0, 50000, 8000)
|
| 77 |
+
travel_expenses = st.number_input("Travel Expenses", 0, 50000, 3000)
|
| 78 |
+
with col9:
|
| 79 |
+
school_college_fees = st.number_input("Education Fees (Total)", 0, 150000, 0)
|
| 80 |
+
other_monthly_expenses = st.number_input("Other Expenses", 0, 50000, 5000)
|
| 81 |
+
|
| 82 |
+
st.divider()
|
| 83 |
+
|
| 84 |
+
# SECTION 4: Loan Details
|
| 85 |
+
st.subheader("📝 Loan Application Details")
|
| 86 |
+
col10, col11, col12 = st.columns(3)
|
| 87 |
+
with col10:
|
| 88 |
+
emi_scenario = st.selectbox("EMI Type", ["Personal Loan EMI", "Vehicle EMI", "Home Appliances EMI", "Education EMI", "E-commerce Shopping EMI"])
|
| 89 |
+
with col11:
|
| 90 |
+
requested_amount = st.number_input("Requested Loan Amount (INR)", 10000, 20000000, 300000)
|
| 91 |
+
with col12:
|
| 92 |
+
requested_tenure = st.number_input("Requested Tenure (Months)", 3, 84, 24)
|
| 93 |
+
|
| 94 |
+
# -------------------------------
|
| 95 |
+
# PREDICTION ENGINE
|
| 96 |
+
# -------------------------------
|
| 97 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 98 |
+
|
| 99 |
+
if st.button("Analyze Eligibility", use_container_width=True, type="primary"):
|
| 100 |
+
|
| 101 |
+
# Bundle input for Inference
|
| 102 |
+
raw_input = {
|
| 103 |
+
"age": age,
|
| 104 |
+
"gender": gender,
|
| 105 |
+
"marital_status": marital_status,
|
| 106 |
+
"education": education,
|
| 107 |
+
"monthly_salary": monthly_salary,
|
| 108 |
+
"employment_type": employment_type,
|
| 109 |
+
"years_of_employment": years_of_employment,
|
| 110 |
+
"company_type": company_type,
|
| 111 |
+
"house_type": house_type,
|
| 112 |
+
"monthly_rent": monthly_rent,
|
| 113 |
+
"family_size": family_size,
|
| 114 |
+
"dependents": dependents,
|
| 115 |
+
"school_fees": school_college_fees * 0.4, # Heuristic split if your model expects separate
|
| 116 |
+
"college_fees": school_college_fees * 0.6,
|
| 117 |
+
"travel_expenses": travel_expenses,
|
| 118 |
+
"groceries_utilities": groceries_utilities,
|
| 119 |
+
"other_monthly_expenses": other_monthly_expenses,
|
| 120 |
+
"existing_loans": existing_loans,
|
| 121 |
+
"current_emi_amount": current_emi_amount,
|
| 122 |
+
"credit_score": credit_score,
|
| 123 |
+
"bank_balance": bank_balance,
|
| 124 |
+
"emergency_fund": bank_balance * 0.2, # Assumption if not provided
|
| 125 |
+
"emi_scenario": emi_scenario,
|
| 126 |
+
"requested_amount": requested_amount,
|
| 127 |
+
"requested_tenure": requested_tenure
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
with st.spinner("Consulting the AI Risk Model..."):
|
| 131 |
+
eligibility, max_emi = predict_emi(raw_input)
|
| 132 |
+
|
| 133 |
+
st.markdown("---")
|
| 134 |
+
|
| 135 |
+
# DISPLAY RESULTS
|
| 136 |
+
res_col1, res_col2 = st.columns([1, 2])
|
| 137 |
+
|
| 138 |
+
with res_col1:
|
| 139 |
+
if eligibility == "Eligible":
|
| 140 |
+
st.success(f"### Result: {eligibility} ✅")
|
| 141 |
+
st.metric("Safe EMI Limit", f"₹ {max_emi:,.2f}")
|
| 142 |
+
elif eligibility == "High Risk":
|
| 143 |
+
st.warning(f"### Result: {eligibility} ⚠️")
|
| 144 |
+
st.metric("Risk-Adjusted EMI", f"₹ {max_emi:,.2f}")
|
| 145 |
+
else:
|
| 146 |
+
st.error(f"### Result: {eligibility} ❌")
|
| 147 |
+
st.metric("Approved EMI", "₹ 0.00")
|
| 148 |
+
|
| 149 |
+
with res_col2:
|
| 150 |
+
st.write("#### AI Analysis Summary")
|
| 151 |
+
if eligibility == "Eligible":
|
| 152 |
+
st.write(f"Based on your credit score of **{credit_score}** and disposable income, you are highly likely to be approved. Your monthly surplus supports an EMI of up to **₹{max_emi:,.2f}**.")
|
| 153 |
+
elif eligibility == "High Risk":
|
| 154 |
+
st.write("You are borderline eligible. We recommend either increasing your loan tenure (to lower the monthly burden) or closing small existing debts to improve your score.")
|
| 155 |
+
else:
|
| 156 |
+
st.write("Unfortunately, based on current debt-to-income ratios and credit history, we cannot approve this loan. Try again after improving your credit score or increasing your monthly bank balance.")
|
| 157 |
+
|
| 158 |
+
# Show "What-If" Analysis
|
| 159 |
+
if eligibility != "Eligible":
|
| 160 |
+
st.info("💡 **Pro-Tip:** Lowering your 'Requested Amount' or increasing your 'Tenure' usually improves eligibility results.")
|
pages/2_Data_Exploration.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
|
| 5 |
+
st.set_page_config(layout="wide")
|
| 6 |
+
st.title("Financial Data Exploration & Insights")
|
| 7 |
+
|
| 8 |
+
# -----------------------------------
|
| 9 |
+
# DATA LOADING
|
| 10 |
+
# -----------------------------------
|
| 11 |
+
@st.cache_data
|
| 12 |
+
def load_data():
|
| 13 |
+
return pd.read_csv("data/loan_applications.csv")
|
| 14 |
+
|
| 15 |
+
df = load_data()
|
| 16 |
+
|
| 17 |
+
st.subheader("Dataset Overview")
|
| 18 |
+
st.write(f"Total Records: {len(df)}")
|
| 19 |
+
st.dataframe(df.head(50), use_container_width=True)
|
| 20 |
+
|
| 21 |
+
# -----------------------------------
|
| 22 |
+
# SCHEMA VALIDATION
|
| 23 |
+
# -----------------------------------
|
| 24 |
+
required_columns = {
|
| 25 |
+
"monthly_salary",
|
| 26 |
+
"max_monthly_emi",
|
| 27 |
+
"credit_score",
|
| 28 |
+
"emi_eligibility",
|
| 29 |
+
"debt_to_income",
|
| 30 |
+
"expense_to_income",
|
| 31 |
+
"savings_ratio",
|
| 32 |
+
"credit_risk_bucket"
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
missing_cols = required_columns - set(df.columns)
|
| 36 |
+
if missing_cols:
|
| 37 |
+
st.error(f"Missing required columns: {missing_cols}")
|
| 38 |
+
st.stop()
|
| 39 |
+
|
| 40 |
+
# -----------------------------------
|
| 41 |
+
# INTERACTIVE FILTERS
|
| 42 |
+
# -----------------------------------
|
| 43 |
+
st.markdown("---")
|
| 44 |
+
st.subheader("Data Filters")
|
| 45 |
+
|
| 46 |
+
col1, col2, col3 = st.columns(3)
|
| 47 |
+
|
| 48 |
+
with col1:
|
| 49 |
+
credit_range = st.slider(
|
| 50 |
+
"Credit Score Range",
|
| 51 |
+
int(df.credit_score.min()),
|
| 52 |
+
int(df.credit_score.max()),
|
| 53 |
+
(600, 800)
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
with col2:
|
| 57 |
+
eligibility_filter = st.multiselect(
|
| 58 |
+
"EMI Eligibility",
|
| 59 |
+
options=df["emi_eligibility"].unique().tolist(),
|
| 60 |
+
default=df["emi_eligibility"].unique().tolist()
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
with col3:
|
| 64 |
+
risk_filter = st.multiselect(
|
| 65 |
+
"Credit Risk Bucket",
|
| 66 |
+
options=df["credit_risk_bucket"].unique().tolist(),
|
| 67 |
+
default=df["credit_risk_bucket"].unique().tolist()
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
filtered_df = df[
|
| 71 |
+
(df["credit_score"].between(*credit_range)) &
|
| 72 |
+
(df["emi_eligibility"].isin(eligibility_filter)) &
|
| 73 |
+
(df["credit_risk_bucket"].isin(risk_filter))
|
| 74 |
+
]
|
| 75 |
+
|
| 76 |
+
st.write(f"Filtered Records: {len(filtered_df)}")
|
| 77 |
+
|
| 78 |
+
# -----------------------------------
|
| 79 |
+
# VISUALIZATIONS
|
| 80 |
+
# -----------------------------------
|
| 81 |
+
st.markdown("---")
|
| 82 |
+
st.subheader("Key Financial Visualizations")
|
| 83 |
+
|
| 84 |
+
# ---- Salary vs Max EMI
|
| 85 |
+
st.markdown("### Monthly Salary vs Maximum Safe EMI")
|
| 86 |
+
|
| 87 |
+
fig, ax = plt.subplots()
|
| 88 |
+
ax.scatter(
|
| 89 |
+
filtered_df["monthly_salary"],
|
| 90 |
+
filtered_df["max_monthly_emi"]
|
| 91 |
+
)
|
| 92 |
+
ax.set_xlabel("Monthly Salary (INR)")
|
| 93 |
+
ax.set_ylabel("Maximum Safe EMI (INR)")
|
| 94 |
+
st.pyplot(fig)
|
| 95 |
+
|
| 96 |
+
# ---- Credit Score vs EMI
|
| 97 |
+
st.markdown("### Credit Score vs EMI Capacity")
|
| 98 |
+
|
| 99 |
+
fig, ax = plt.subplots()
|
| 100 |
+
ax.scatter(
|
| 101 |
+
filtered_df["credit_score"],
|
| 102 |
+
filtered_df["max_monthly_emi"]
|
| 103 |
+
)
|
| 104 |
+
ax.set_xlabel("Credit Score")
|
| 105 |
+
ax.set_ylabel("Maximum Safe EMI (INR)")
|
| 106 |
+
st.pyplot(fig)
|
| 107 |
+
|
| 108 |
+
# ---- Debt to Income Distribution
|
| 109 |
+
st.markdown("### Debt-to-Income Ratio Distribution")
|
| 110 |
+
|
| 111 |
+
fig, ax = plt.subplots()
|
| 112 |
+
ax.hist(filtered_df["debt_to_income"], bins=30)
|
| 113 |
+
ax.set_xlabel("Debt-to-Income Ratio")
|
| 114 |
+
st.pyplot(fig)
|
| 115 |
+
|
| 116 |
+
# ---- EMI Eligibility Breakdown
|
| 117 |
+
st.markdown("### EMI Eligibility Distribution")
|
| 118 |
+
eligibility_counts = filtered_df["emi_eligibility"].value_counts()
|
| 119 |
+
st.bar_chart(eligibility_counts)
|
| 120 |
+
|
| 121 |
+
# ---- Credit Risk Bucket Distribution
|
| 122 |
+
st.markdown("### Credit Risk Bucket Distribution")
|
| 123 |
+
risk_counts = filtered_df["credit_risk_bucket"].value_counts()
|
| 124 |
+
st.bar_chart(risk_counts)
|
| 125 |
+
|
| 126 |
+
# -----------------------------------
|
| 127 |
+
# SUMMARY STATISTICS
|
| 128 |
+
# -----------------------------------
|
| 129 |
+
st.markdown("---")
|
| 130 |
+
st.subheader("Summary Statistics")
|
| 131 |
+
|
| 132 |
+
col4, col5, col6 = st.columns(3)
|
| 133 |
+
|
| 134 |
+
with col4:
|
| 135 |
+
st.metric(
|
| 136 |
+
"Average Salary",
|
| 137 |
+
f"₹ {filtered_df['monthly_salary'].mean():,.0f}"
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
with col5:
|
| 141 |
+
st.metric(
|
| 142 |
+
"Average Max EMI",
|
| 143 |
+
f"₹ {filtered_df['max_monthly_emi'].mean():,.0f}"
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
with col6:
|
| 147 |
+
st.metric(
|
| 148 |
+
"Average Credit Score",
|
| 149 |
+
f"{filtered_df['credit_score'].mean():.0f}"
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
# -----------------------------------
|
| 153 |
+
# EXPORT OPTION
|
| 154 |
+
# -----------------------------------
|
| 155 |
+
st.markdown("---")
|
| 156 |
+
st.subheader("Export Filtered Data")
|
| 157 |
+
|
| 158 |
+
csv = filtered_df.to_csv(index=False).encode("utf-8")
|
| 159 |
+
st.download_button(
|
| 160 |
+
"Download Filtered Dataset",
|
| 161 |
+
data=csv,
|
| 162 |
+
file_name="filtered_loan_data.csv",
|
| 163 |
+
mime="text/csv"
|
| 164 |
+
)
|
pages/3_Model_Monitoring.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import mlflow
|
| 3 |
+
from mlflow.tracking import MlflowClient
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
st.title("Model Performance & Monitoring")
|
| 7 |
+
|
| 8 |
+
client = MlflowClient()
|
| 9 |
+
experiments = client.search_experiments()
|
| 10 |
+
|
| 11 |
+
exp_names = [exp.name for exp in experiments]
|
| 12 |
+
selected_exp = st.selectbox("Select Experiment", exp_names)
|
| 13 |
+
|
| 14 |
+
exp = client.get_experiment_by_name(selected_exp)
|
| 15 |
+
runs = client.search_runs(exp.experiment_id)
|
| 16 |
+
|
| 17 |
+
runs_df = pd.DataFrame([
|
| 18 |
+
{
|
| 19 |
+
"run_id": r.info.run_id,
|
| 20 |
+
"status": r.info.status,
|
| 21 |
+
"accuracy": r.data.metrics.get("accuracy"),
|
| 22 |
+
"rmse": r.data.metrics.get("rmse"),
|
| 23 |
+
}
|
| 24 |
+
for r in runs
|
| 25 |
+
])
|
| 26 |
+
|
| 27 |
+
st.dataframe(runs_df)
|
| 28 |
+
|
| 29 |
+
st.markdown("For full details, access the MLflow UI at http://127.0.0.1:5000")
|
pages/4_Admin_Panel.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
st.title("Administrative Panel")
|
| 5 |
+
st.warning("Restricted Access Authorized Users Only")
|
| 6 |
+
|
| 7 |
+
uploaded_file = st.file_uploader(
|
| 8 |
+
"Upload New Loan Dataset (CSV)",
|
| 9 |
+
type=["csv"]
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
if uploaded_file:
|
| 13 |
+
df = pd.read_csv(uploaded_file)
|
| 14 |
+
st.success("Dataset uploaded successfully")
|
| 15 |
+
st.dataframe(df.head())
|
| 16 |
+
|
| 17 |
+
if st.button("Persist Dataset"):
|
| 18 |
+
df.to_csv("data/loan_applications.csv", index=False)
|
| 19 |
+
st.success("Dataset saved and ready for retraining")
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
scikit-learn
|
| 5 |
+
xgboost
|
| 6 |
+
joblib
|
| 7 |
+
matplotlib
|
| 8 |
+
mlflow
|