File size: 4,222 Bytes
5a9137a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
st.set_page_config(layout="wide")
st.title("Financial Data Exploration & Insights")
# -----------------------------------
# DATA LOADING
# -----------------------------------
@st.cache_data
def load_data():
return pd.read_csv("data/loan_applications.csv")
df = load_data()
st.subheader("Dataset Overview")
st.write(f"Total Records: {len(df)}")
st.dataframe(df.head(50), use_container_width=True)
# -----------------------------------
# SCHEMA VALIDATION
# -----------------------------------
required_columns = {
"monthly_salary",
"max_monthly_emi",
"credit_score",
"emi_eligibility",
"debt_to_income",
"expense_to_income",
"savings_ratio",
"credit_risk_bucket"
}
missing_cols = required_columns - set(df.columns)
if missing_cols:
st.error(f"Missing required columns: {missing_cols}")
st.stop()
# -----------------------------------
# INTERACTIVE FILTERS
# -----------------------------------
st.markdown("---")
st.subheader("Data Filters")
col1, col2, col3 = st.columns(3)
with col1:
credit_range = st.slider(
"Credit Score Range",
int(df.credit_score.min()),
int(df.credit_score.max()),
(600, 800)
)
with col2:
eligibility_filter = st.multiselect(
"EMI Eligibility",
options=df["emi_eligibility"].unique().tolist(),
default=df["emi_eligibility"].unique().tolist()
)
with col3:
risk_filter = st.multiselect(
"Credit Risk Bucket",
options=df["credit_risk_bucket"].unique().tolist(),
default=df["credit_risk_bucket"].unique().tolist()
)
filtered_df = df[
(df["credit_score"].between(*credit_range)) &
(df["emi_eligibility"].isin(eligibility_filter)) &
(df["credit_risk_bucket"].isin(risk_filter))
]
st.write(f"Filtered Records: {len(filtered_df)}")
# -----------------------------------
# VISUALIZATIONS
# -----------------------------------
st.markdown("---")
st.subheader("Key Financial Visualizations")
# ---- Salary vs Max EMI
st.markdown("### Monthly Salary vs Maximum Safe EMI")
fig, ax = plt.subplots()
ax.scatter(
filtered_df["monthly_salary"],
filtered_df["max_monthly_emi"]
)
ax.set_xlabel("Monthly Salary (INR)")
ax.set_ylabel("Maximum Safe EMI (INR)")
st.pyplot(fig)
# ---- Credit Score vs EMI
st.markdown("### Credit Score vs EMI Capacity")
fig, ax = plt.subplots()
ax.scatter(
filtered_df["credit_score"],
filtered_df["max_monthly_emi"]
)
ax.set_xlabel("Credit Score")
ax.set_ylabel("Maximum Safe EMI (INR)")
st.pyplot(fig)
# ---- Debt to Income Distribution
st.markdown("### Debt-to-Income Ratio Distribution")
fig, ax = plt.subplots()
ax.hist(filtered_df["debt_to_income"], bins=30)
ax.set_xlabel("Debt-to-Income Ratio")
st.pyplot(fig)
# ---- EMI Eligibility Breakdown
st.markdown("### EMI Eligibility Distribution")
eligibility_counts = filtered_df["emi_eligibility"].value_counts()
st.bar_chart(eligibility_counts)
# ---- Credit Risk Bucket Distribution
st.markdown("### Credit Risk Bucket Distribution")
risk_counts = filtered_df["credit_risk_bucket"].value_counts()
st.bar_chart(risk_counts)
# -----------------------------------
# SUMMARY STATISTICS
# -----------------------------------
st.markdown("---")
st.subheader("Summary Statistics")
col4, col5, col6 = st.columns(3)
with col4:
st.metric(
"Average Salary",
f"₹ {filtered_df['monthly_salary'].mean():,.0f}"
)
with col5:
st.metric(
"Average Max EMI",
f"₹ {filtered_df['max_monthly_emi'].mean():,.0f}"
)
with col6:
st.metric(
"Average Credit Score",
f"{filtered_df['credit_score'].mean():.0f}"
)
# -----------------------------------
# EXPORT OPTION
# -----------------------------------
st.markdown("---")
st.subheader("Export Filtered Data")
csv = filtered_df.to_csv(index=False).encode("utf-8")
st.download_button(
"Download Filtered Dataset",
data=csv,
file_name="filtered_loan_data.csv",
mime="text/csv"
)
|