asmithaaa commited on
Commit
5a9137a
·
verified ·
1 Parent(s): 17117fb

Upload 17 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/loan_applications.csv filter=lfs diff=lfs merge=lfs -text
37
+ emi_feature_engineered.csv filter=lfs diff=lfs merge=lfs -text
38
+ mlflow.db filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official lightweight Python image
2
+ FROM python:3.10-slim
3
+
4
+ # Set the working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies (Required for XGBoost)
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ libgomp1 \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy requirements and install
14
+ COPY requirements.txt .
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ # Copy everything (this includes your 'pages/' folder and 'models/' folder)
18
+ COPY . .
19
+
20
+ # Expose Hugging Face's default port
21
+ EXPOSE 7860
22
+
23
+ # Run the main entry point (usually named app.py or main.py)
24
+ # Streamlit automatically detects the /pages folder for the sidebar
25
+ ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: EMI Eligibility Predictor
3
+ emoji: 💰
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
app.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(
4
+ page_title="EMI Prediction System",
5
+ layout="wide"
6
+ )
7
+
8
+ st.title("EMI Prediction & Eligibility Platform")
9
+ st.write("Use the sidebar to navigate between modules.")
data/loan_applications.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcbe93afbb6f8a2db378d0e5f576c9f18699e29ca488522155ebd7720dccadd1
3
+ size 102077185
emi_feature_engineered.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcbe93afbb6f8a2db378d0e5f576c9f18699e29ca488522155ebd7720dccadd1
3
+ size 102077185
feature_builder.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def prepare_input_features(raw_input: dict) -> pd.DataFrame:
4
+ df = pd.DataFrame([raw_input])
5
+
6
+ # -------------------------------
7
+ # EXPENSE AGGREGATION
8
+ # -------------------------------
9
+ df["total_monthly_expenses"] = (
10
+ df["school_fees"]
11
+ + df["college_fees"]
12
+ + df["travel_expenses"]
13
+ + df["groceries_utilities"]
14
+ + df["other_monthly_expenses"]
15
+ + df["monthly_rent"]
16
+ )
17
+
18
+ # -------------------------------
19
+ # FINANCIAL RATIOS
20
+ # -------------------------------
21
+ df["debt_to_income"] = df["current_emi_amount"] / df["monthly_salary"]
22
+ df["expense_to_income"] = df["total_monthly_expenses"] / df["monthly_salary"]
23
+ df["savings_ratio"] = df["bank_balance"] / df["monthly_salary"]
24
+
25
+ # -------------------------------
26
+ # CREDIT RISK BUCKET (CRITICAL FIX)
27
+ # -------------------------------
28
+ df["credit_risk_bucket"] = pd.cut(
29
+ df["credit_score"],
30
+ bins=[0, 600, 700, 900],
31
+ labels=["Low", "Medium", "High"]
32
+ )
33
+
34
+ # -------------------------------
35
+ # EMPLOYMENT STABILITY SCORE
36
+ # -------------------------------
37
+ df["employment_stability_score"] = (
38
+ df["years_of_employment"] *
39
+ df["employment_type"].map({
40
+ "Government": 1.5,
41
+ "Private": 1.0,
42
+ "Self-employed": 0.8
43
+ })
44
+ )
45
+
46
+ return df
gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ emi_feature_engineered.csv filter=lfs diff=lfs merge=lfs -text
37
+ mlflow.db filter=lfs diff=lfs merge=lfs -text
38
+ data/loan_applications.csv filter=lfs diff=lfs merge=lfs -text
inference.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ from feature_builder import prepare_input_features
6
+
7
+ # --------------------------------
8
+ # PATHS
9
+ # --------------------------------
10
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11
+
12
+ # Ensure these match the new XGBoost models you trained
13
+ CLASSIFIER_PATH = os.path.join(BASE_DIR, "models", "emi_classifier_final.pkl")
14
+ REGRESSOR_PATH = os.path.join(BASE_DIR, "models", "emi_model_optimized.pkl")
15
+ LABEL_ENCODER_PATH = os.path.join(BASE_DIR, "models", "label_encoder.pkl")
16
+
17
+ # --------------------------------
18
+ # LOAD MODELS (ONCE)
19
+ # --------------------------------
20
+ classifier = joblib.load(CLASSIFIER_PATH)
21
+ regressor = joblib.load(REGRESSOR_PATH)
22
+ # We need the label encoder to know which index corresponds to "Eligible"
23
+ label_encoder = joblib.load(LABEL_ENCODER_PATH)
24
+
25
+ # --------------------------------
26
+ # PREDICTION FUNCTION
27
+ # --------------------------------
28
+ def predict_emi(raw_input: dict):
29
+ """
30
+ Returns:
31
+ eligibility_label (str): Eligible | High Risk | Not Eligible
32
+ max_emi (float): Predicted maximum EMI
33
+ """
34
+
35
+ # 1. FEATURE ENGINEERING
36
+ input_df = prepare_input_features(raw_input)
37
+
38
+ # 2. PROBABILITY-BASED CLASSIFICATION (The Fix)
39
+ # Get probabilities for all classes
40
+ probs = classifier.predict_proba(input_df)[0]
41
+
42
+ # Create a dictionary mapping Class Name -> Probability
43
+ # Example: {'Eligible': 0.38, 'Not_Eligible': 0.62}
44
+ prob_map = {
45
+ label_encoder.inverse_transform([i])[0]: prob
46
+ for i, prob in enumerate(probs)
47
+ }
48
+
49
+ # 3. THRESHOLD LOGIC (Anti-Bias)
50
+ # Because of the 4.5:1 imbalance, the model is "shy" to predict Eligible.
51
+ # We lower the requirement to 35% to give good candidates a fair chance.
52
+ eligible_prob = prob_map.get("Eligible", 0)
53
+ high_risk_prob = prob_map.get("High_Risk", 0)
54
+
55
+ if eligible_prob > 0.35:
56
+ ml_label = "Eligible"
57
+ elif high_risk_prob > 0.40:
58
+ ml_label = "High Risk"
59
+ else:
60
+ ml_label = "Not Eligible"
61
+
62
+ # 4. REGRESSION PREDICTION (Amount)
63
+ max_emi = float(regressor.predict(input_df)[0])
64
+ max_emi = max(max_emi, 0.0) # Safety clamp
65
+
66
+ # 5. HARD BUSINESS RULES (The "Banker's Veto")
67
+ # These rules override the ML if the risk is objectively too high.
68
+ credit_score = input_df["credit_score"].iloc[0]
69
+ dti = input_df.get("debt_to_income", pd.Series([0])).iloc[0]
70
+
71
+ # Relaxed but safe rejection criteria
72
+ is_hard_reject = (
73
+ credit_score < 400 or
74
+ dti > 0.85
75
+ )
76
+
77
+ # 6. FINAL ELIGIBILITY LOGIC
78
+ if is_hard_reject:
79
+ eligibility_label = "Not Eligible"
80
+ max_emi = 0.0
81
+ else:
82
+ eligibility_label = ml_label
83
+
84
+ # If the label is "Not Eligible", we force Max EMI to 0 for consistency
85
+ if eligibility_label == "Not Eligible":
86
+ max_emi = 0.0
87
+
88
+ return eligibility_label, round(max_emi, 2)
mlflow.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c511fe076991c7283c6c6dea06de1483695e47bced88645e9684a802433c3a17
3
+ size 667648
models/emi_classifier_final.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e38f0d3074a6d56bf6069d81591cfaad4c99fa024041bba4e6f645235c1e9601
3
+ size 1954853
models/emi_model_optimized.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20b975fc14733370812ae43702901275de249f7fb677b1d9bcbaa54de3d31920
3
+ size 9835116
models/label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b292508e70165f8e5a96cde70b326e21d5a0021868801711c839ef178b2fbb97
3
+ size 511
pages/1_Predict_EMI.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from inference import predict_emi # Ensure inference.py is in the same folder
5
+
6
+ # -------------------------------
7
+ # PAGE CONFIG
8
+ # -------------------------------
9
+ st.set_page_config(
10
+ page_title="EMI Eligibility Pro",
11
+ page_icon="💰",
12
+ layout="wide"
13
+ )
14
+
15
+ # Custom CSS for better styling
16
+ st.markdown("""
17
+ <style>
18
+ .main {
19
+ background-color: #f5f7f9;
20
+ }
21
+ .stMetric {
22
+ background-color: #ffffff;
23
+ padding: 15px;
24
+ border-radius: 10px;
25
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
26
+ }
27
+ </style>
28
+ """, unsafe_allow_html=True)
29
+
30
+ st.title("🚀 EMI Eligibility & Risk Prediction")
31
+ st.write("Fill in the details below to check your loan eligibility and maximum safe EMI.")
32
+
33
+ # -------------------------------
34
+ # INPUT FORM
35
+ # -------------------------------
36
+ with st.container():
37
+ # SECTION 1: Personal & Demographic
38
+ st.subheader("👤 Personal Information")
39
+ col1, col2, col3 = st.columns(3)
40
+ with col1:
41
+ age = st.number_input("Age", 18, 70, 30)
42
+ gender = st.selectbox("Gender", ["Male", "Female"])
43
+ marital_status = st.selectbox("Marital Status", ["Single", "Married"])
44
+ with col2:
45
+ education = st.selectbox("Education", ["High School", "Graduate", "Post Graduate", "Professional"])
46
+ family_size = st.number_input("Family Size", 1, 10, 3)
47
+ dependents = st.number_input("Dependents", 0, 10, 1)
48
+ with col3:
49
+ house_type = st.selectbox("House Type", ["Rented", "Own", "Family"])
50
+ company_type = st.selectbox("Company Type", ["Startup", "SME", "MNC", "Government"])
51
+
52
+ st.divider()
53
+
54
+ # SECTION 2: Employment & Income
55
+ st.subheader("💼 Employment & Financials")
56
+ col4, col5, col6 = st.columns(3)
57
+ with col4:
58
+ employment_type = st.selectbox("Employment Type", ["Private", "Government", "Self-employed"])
59
+ years_of_employment = st.number_input("Years of Employment", 0, 40, 5)
60
+ with col5:
61
+ monthly_salary = st.number_input("Monthly Salary (INR)", 10000, 500000, 50000, step=5000)
62
+ credit_score = st.number_input("Credit Score", 300, 900, 700)
63
+ with col6:
64
+ bank_balance = st.number_input("Bank Balance (INR)", 0, 10000000, 200000)
65
+ existing_loans = st.selectbox("Existing Loans", ["No", "Yes"])
66
+
67
+ st.divider()
68
+
69
+ # SECTION 3: Expenses & Current Debt
70
+ st.subheader("📉 Monthly Outgoings")
71
+ col7, col8, col9 = st.columns(3)
72
+ with col7:
73
+ monthly_rent = st.number_input("Monthly Rent (INR)", 0, 100000, 10000)
74
+ current_emi_amount = st.number_input("Current EMI Totals", 0, 100000, 0)
75
+ with col8:
76
+ groceries_utilities = st.number_input("Groceries & Utilities", 0, 50000, 8000)
77
+ travel_expenses = st.number_input("Travel Expenses", 0, 50000, 3000)
78
+ with col9:
79
+ school_college_fees = st.number_input("Education Fees (Total)", 0, 150000, 0)
80
+ other_monthly_expenses = st.number_input("Other Expenses", 0, 50000, 5000)
81
+
82
+ st.divider()
83
+
84
+ # SECTION 4: Loan Details
85
+ st.subheader("📝 Loan Application Details")
86
+ col10, col11, col12 = st.columns(3)
87
+ with col10:
88
+ emi_scenario = st.selectbox("EMI Type", ["Personal Loan EMI", "Vehicle EMI", "Home Appliances EMI", "Education EMI", "E-commerce Shopping EMI"])
89
+ with col11:
90
+ requested_amount = st.number_input("Requested Loan Amount (INR)", 10000, 20000000, 300000)
91
+ with col12:
92
+ requested_tenure = st.number_input("Requested Tenure (Months)", 3, 84, 24)
93
+
94
+ # -------------------------------
95
+ # PREDICTION ENGINE
96
+ # -------------------------------
97
+ st.markdown("<br>", unsafe_allow_html=True)
98
+
99
+ if st.button("Analyze Eligibility", use_container_width=True, type="primary"):
100
+
101
+ # Bundle input for Inference
102
+ raw_input = {
103
+ "age": age,
104
+ "gender": gender,
105
+ "marital_status": marital_status,
106
+ "education": education,
107
+ "monthly_salary": monthly_salary,
108
+ "employment_type": employment_type,
109
+ "years_of_employment": years_of_employment,
110
+ "company_type": company_type,
111
+ "house_type": house_type,
112
+ "monthly_rent": monthly_rent,
113
+ "family_size": family_size,
114
+ "dependents": dependents,
115
+ "school_fees": school_college_fees * 0.4, # Heuristic split if your model expects separate
116
+ "college_fees": school_college_fees * 0.6,
117
+ "travel_expenses": travel_expenses,
118
+ "groceries_utilities": groceries_utilities,
119
+ "other_monthly_expenses": other_monthly_expenses,
120
+ "existing_loans": existing_loans,
121
+ "current_emi_amount": current_emi_amount,
122
+ "credit_score": credit_score,
123
+ "bank_balance": bank_balance,
124
+ "emergency_fund": bank_balance * 0.2, # Assumption if not provided
125
+ "emi_scenario": emi_scenario,
126
+ "requested_amount": requested_amount,
127
+ "requested_tenure": requested_tenure
128
+ }
129
+
130
+ with st.spinner("Consulting the AI Risk Model..."):
131
+ eligibility, max_emi = predict_emi(raw_input)
132
+
133
+ st.markdown("---")
134
+
135
+ # DISPLAY RESULTS
136
+ res_col1, res_col2 = st.columns([1, 2])
137
+
138
+ with res_col1:
139
+ if eligibility == "Eligible":
140
+ st.success(f"### Result: {eligibility} ✅")
141
+ st.metric("Safe EMI Limit", f"₹ {max_emi:,.2f}")
142
+ elif eligibility == "High Risk":
143
+ st.warning(f"### Result: {eligibility} ⚠️")
144
+ st.metric("Risk-Adjusted EMI", f"₹ {max_emi:,.2f}")
145
+ else:
146
+ st.error(f"### Result: {eligibility} ❌")
147
+ st.metric("Approved EMI", "₹ 0.00")
148
+
149
+ with res_col2:
150
+ st.write("#### AI Analysis Summary")
151
+ if eligibility == "Eligible":
152
+ st.write(f"Based on your credit score of **{credit_score}** and disposable income, you are highly likely to be approved. Your monthly surplus supports an EMI of up to **₹{max_emi:,.2f}**.")
153
+ elif eligibility == "High Risk":
154
+ st.write("You are borderline eligible. We recommend either increasing your loan tenure (to lower the monthly burden) or closing small existing debts to improve your score.")
155
+ else:
156
+ st.write("Unfortunately, based on current debt-to-income ratios and credit history, we cannot approve this loan. Try again after improving your credit score or increasing your monthly bank balance.")
157
+
158
+ # Show "What-If" Analysis
159
+ if eligibility != "Eligible":
160
+ st.info("💡 **Pro-Tip:** Lowering your 'Requested Amount' or increasing your 'Tenure' usually improves eligibility results.")
pages/2_Data_Exploration.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+
5
+ st.set_page_config(layout="wide")
6
+ st.title("Financial Data Exploration & Insights")
7
+
8
+ # -----------------------------------
9
+ # DATA LOADING
10
+ # -----------------------------------
11
+ @st.cache_data
12
+ def load_data():
13
+ return pd.read_csv("data/loan_applications.csv")
14
+
15
+ df = load_data()
16
+
17
+ st.subheader("Dataset Overview")
18
+ st.write(f"Total Records: {len(df)}")
19
+ st.dataframe(df.head(50), use_container_width=True)
20
+
21
+ # -----------------------------------
22
+ # SCHEMA VALIDATION
23
+ # -----------------------------------
24
+ required_columns = {
25
+ "monthly_salary",
26
+ "max_monthly_emi",
27
+ "credit_score",
28
+ "emi_eligibility",
29
+ "debt_to_income",
30
+ "expense_to_income",
31
+ "savings_ratio",
32
+ "credit_risk_bucket"
33
+ }
34
+
35
+ missing_cols = required_columns - set(df.columns)
36
+ if missing_cols:
37
+ st.error(f"Missing required columns: {missing_cols}")
38
+ st.stop()
39
+
40
+ # -----------------------------------
41
+ # INTERACTIVE FILTERS
42
+ # -----------------------------------
43
+ st.markdown("---")
44
+ st.subheader("Data Filters")
45
+
46
+ col1, col2, col3 = st.columns(3)
47
+
48
+ with col1:
49
+ credit_range = st.slider(
50
+ "Credit Score Range",
51
+ int(df.credit_score.min()),
52
+ int(df.credit_score.max()),
53
+ (600, 800)
54
+ )
55
+
56
+ with col2:
57
+ eligibility_filter = st.multiselect(
58
+ "EMI Eligibility",
59
+ options=df["emi_eligibility"].unique().tolist(),
60
+ default=df["emi_eligibility"].unique().tolist()
61
+ )
62
+
63
+ with col3:
64
+ risk_filter = st.multiselect(
65
+ "Credit Risk Bucket",
66
+ options=df["credit_risk_bucket"].unique().tolist(),
67
+ default=df["credit_risk_bucket"].unique().tolist()
68
+ )
69
+
70
+ filtered_df = df[
71
+ (df["credit_score"].between(*credit_range)) &
72
+ (df["emi_eligibility"].isin(eligibility_filter)) &
73
+ (df["credit_risk_bucket"].isin(risk_filter))
74
+ ]
75
+
76
+ st.write(f"Filtered Records: {len(filtered_df)}")
77
+
78
+ # -----------------------------------
79
+ # VISUALIZATIONS
80
+ # -----------------------------------
81
+ st.markdown("---")
82
+ st.subheader("Key Financial Visualizations")
83
+
84
+ # ---- Salary vs Max EMI
85
+ st.markdown("### Monthly Salary vs Maximum Safe EMI")
86
+
87
+ fig, ax = plt.subplots()
88
+ ax.scatter(
89
+ filtered_df["monthly_salary"],
90
+ filtered_df["max_monthly_emi"]
91
+ )
92
+ ax.set_xlabel("Monthly Salary (INR)")
93
+ ax.set_ylabel("Maximum Safe EMI (INR)")
94
+ st.pyplot(fig)
95
+
96
+ # ---- Credit Score vs EMI
97
+ st.markdown("### Credit Score vs EMI Capacity")
98
+
99
+ fig, ax = plt.subplots()
100
+ ax.scatter(
101
+ filtered_df["credit_score"],
102
+ filtered_df["max_monthly_emi"]
103
+ )
104
+ ax.set_xlabel("Credit Score")
105
+ ax.set_ylabel("Maximum Safe EMI (INR)")
106
+ st.pyplot(fig)
107
+
108
+ # ---- Debt to Income Distribution
109
+ st.markdown("### Debt-to-Income Ratio Distribution")
110
+
111
+ fig, ax = plt.subplots()
112
+ ax.hist(filtered_df["debt_to_income"], bins=30)
113
+ ax.set_xlabel("Debt-to-Income Ratio")
114
+ st.pyplot(fig)
115
+
116
+ # ---- EMI Eligibility Breakdown
117
+ st.markdown("### EMI Eligibility Distribution")
118
+ eligibility_counts = filtered_df["emi_eligibility"].value_counts()
119
+ st.bar_chart(eligibility_counts)
120
+
121
+ # ---- Credit Risk Bucket Distribution
122
+ st.markdown("### Credit Risk Bucket Distribution")
123
+ risk_counts = filtered_df["credit_risk_bucket"].value_counts()
124
+ st.bar_chart(risk_counts)
125
+
126
+ # -----------------------------------
127
+ # SUMMARY STATISTICS
128
+ # -----------------------------------
129
+ st.markdown("---")
130
+ st.subheader("Summary Statistics")
131
+
132
+ col4, col5, col6 = st.columns(3)
133
+
134
+ with col4:
135
+ st.metric(
136
+ "Average Salary",
137
+ f"₹ {filtered_df['monthly_salary'].mean():,.0f}"
138
+ )
139
+
140
+ with col5:
141
+ st.metric(
142
+ "Average Max EMI",
143
+ f"₹ {filtered_df['max_monthly_emi'].mean():,.0f}"
144
+ )
145
+
146
+ with col6:
147
+ st.metric(
148
+ "Average Credit Score",
149
+ f"{filtered_df['credit_score'].mean():.0f}"
150
+ )
151
+
152
+ # -----------------------------------
153
+ # EXPORT OPTION
154
+ # -----------------------------------
155
+ st.markdown("---")
156
+ st.subheader("Export Filtered Data")
157
+
158
+ csv = filtered_df.to_csv(index=False).encode("utf-8")
159
+ st.download_button(
160
+ "Download Filtered Dataset",
161
+ data=csv,
162
+ file_name="filtered_loan_data.csv",
163
+ mime="text/csv"
164
+ )
pages/3_Model_Monitoring.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import mlflow
3
+ from mlflow.tracking import MlflowClient
4
+ import pandas as pd
5
+
6
+ st.title("Model Performance & Monitoring")
7
+
8
+ client = MlflowClient()
9
+ experiments = client.search_experiments()
10
+
11
+ exp_names = [exp.name for exp in experiments]
12
+ selected_exp = st.selectbox("Select Experiment", exp_names)
13
+
14
+ exp = client.get_experiment_by_name(selected_exp)
15
+ runs = client.search_runs(exp.experiment_id)
16
+
17
+ runs_df = pd.DataFrame([
18
+ {
19
+ "run_id": r.info.run_id,
20
+ "status": r.info.status,
21
+ "accuracy": r.data.metrics.get("accuracy"),
22
+ "rmse": r.data.metrics.get("rmse"),
23
+ }
24
+ for r in runs
25
+ ])
26
+
27
+ st.dataframe(runs_df)
28
+
29
+ st.markdown("For full details, access the MLflow UI at http://127.0.0.1:5000")
pages/4_Admin_Panel.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ st.title("Administrative Panel")
5
+ st.warning("Restricted Access Authorized Users Only")
6
+
7
+ uploaded_file = st.file_uploader(
8
+ "Upload New Loan Dataset (CSV)",
9
+ type=["csv"]
10
+ )
11
+
12
+ if uploaded_file:
13
+ df = pd.read_csv(uploaded_file)
14
+ st.success("Dataset uploaded successfully")
15
+ st.dataframe(df.head())
16
+
17
+ if st.button("Persist Dataset"):
18
+ df.to_csv("data/loan_applications.csv", index=False)
19
+ st.success("Dataset saved and ready for retraining")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ xgboost
6
+ joblib
7
+ matplotlib
8
+ mlflow