File size: 4,222 Bytes
5a9137a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

st.set_page_config(layout="wide")
st.title("Financial Data Exploration & Insights")

# -----------------------------------
# DATA LOADING
# -----------------------------------
@st.cache_data
def load_data():
    return pd.read_csv("data/loan_applications.csv")

df = load_data()

st.subheader("Dataset Overview")
st.write(f"Total Records: {len(df)}")
st.dataframe(df.head(50), use_container_width=True)

# -----------------------------------
# SCHEMA VALIDATION
# -----------------------------------
required_columns = {
    "monthly_salary",
    "max_monthly_emi",
    "credit_score",
    "emi_eligibility",
    "debt_to_income",
    "expense_to_income",
    "savings_ratio",
    "credit_risk_bucket"
}

missing_cols = required_columns - set(df.columns)
if missing_cols:
    st.error(f"Missing required columns: {missing_cols}")
    st.stop()

# -----------------------------------
# INTERACTIVE FILTERS
# -----------------------------------
st.markdown("---")
st.subheader("Data Filters")

col1, col2, col3 = st.columns(3)

with col1:
    credit_range = st.slider(
        "Credit Score Range",
        int(df.credit_score.min()),
        int(df.credit_score.max()),
        (600, 800)
    )

with col2:
    eligibility_filter = st.multiselect(
        "EMI Eligibility",
        options=df["emi_eligibility"].unique().tolist(),
        default=df["emi_eligibility"].unique().tolist()
    )

with col3:
    risk_filter = st.multiselect(
        "Credit Risk Bucket",
        options=df["credit_risk_bucket"].unique().tolist(),
        default=df["credit_risk_bucket"].unique().tolist()
    )

filtered_df = df[
    (df["credit_score"].between(*credit_range)) &
    (df["emi_eligibility"].isin(eligibility_filter)) &
    (df["credit_risk_bucket"].isin(risk_filter))
]

st.write(f"Filtered Records: {len(filtered_df)}")

# -----------------------------------
# VISUALIZATIONS
# -----------------------------------
st.markdown("---")
st.subheader("Key Financial Visualizations")

# ---- Salary vs Max EMI
st.markdown("### Monthly Salary vs Maximum Safe EMI")

fig, ax = plt.subplots()
ax.scatter(
    filtered_df["monthly_salary"],
    filtered_df["max_monthly_emi"]
)
ax.set_xlabel("Monthly Salary (INR)")
ax.set_ylabel("Maximum Safe EMI (INR)")
st.pyplot(fig)

# ---- Credit Score vs EMI
st.markdown("### Credit Score vs EMI Capacity")

fig, ax = plt.subplots()
ax.scatter(
    filtered_df["credit_score"],
    filtered_df["max_monthly_emi"]
)
ax.set_xlabel("Credit Score")
ax.set_ylabel("Maximum Safe EMI (INR)")
st.pyplot(fig)

# ---- Debt to Income Distribution
st.markdown("### Debt-to-Income Ratio Distribution")

fig, ax = plt.subplots()
ax.hist(filtered_df["debt_to_income"], bins=30)
ax.set_xlabel("Debt-to-Income Ratio")
st.pyplot(fig)

# ---- EMI Eligibility Breakdown
st.markdown("### EMI Eligibility Distribution")
eligibility_counts = filtered_df["emi_eligibility"].value_counts()
st.bar_chart(eligibility_counts)

# ---- Credit Risk Bucket Distribution
st.markdown("### Credit Risk Bucket Distribution")
risk_counts = filtered_df["credit_risk_bucket"].value_counts()
st.bar_chart(risk_counts)

# -----------------------------------
# SUMMARY STATISTICS
# -----------------------------------
st.markdown("---")
st.subheader("Summary Statistics")

col4, col5, col6 = st.columns(3)

with col4:
    st.metric(
        "Average Salary",
        f"₹ {filtered_df['monthly_salary'].mean():,.0f}"
    )

with col5:
    st.metric(
        "Average Max EMI",
        f"₹ {filtered_df['max_monthly_emi'].mean():,.0f}"
    )

with col6:
    st.metric(
        "Average Credit Score",
        f"{filtered_df['credit_score'].mean():.0f}"
    )

# -----------------------------------
# EXPORT OPTION
# -----------------------------------
st.markdown("---")
st.subheader("Export Filtered Data")

csv = filtered_df.to_csv(index=False).encode("utf-8")
st.download_button(
    "Download Filtered Dataset",
    data=csv,
    file_name="filtered_loan_data.csv",
    mime="text/csv"
)