emi_predict / pages /2_Data_Exploration.py
asmithaaa's picture
Upload 13 files
80bb933 verified
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
st.set_page_config(layout="wide")
st.title("Financial Data Exploration & Insights")
# -----------------------------------
# DATA LOADING
# -----------------------------------
@st.cache_data
def load_data():
return pd.read_csv("data/loan_applications.csv")
df = load_data()
st.subheader("Dataset Overview")
st.write(f"Total Records: {len(df)}")
st.dataframe(df.head(50), use_container_width=True)
# -----------------------------------
# SCHEMA VALIDATION
# -----------------------------------
required_columns = {
"monthly_salary",
"max_monthly_emi",
"credit_score",
"emi_eligibility",
"debt_to_income",
"expense_to_income",
"savings_ratio",
"credit_risk_bucket"
}
missing_cols = required_columns - set(df.columns)
if missing_cols:
st.error(f"Missing required columns: {missing_cols}")
st.stop()
# -----------------------------------
# INTERACTIVE FILTERS
# -----------------------------------
st.markdown("---")
st.subheader("Data Filters")
col1, col2, col3 = st.columns(3)
with col1:
credit_range = st.slider(
"Credit Score Range",
int(df.credit_score.min()),
int(df.credit_score.max()),
(600, 800)
)
with col2:
eligibility_filter = st.multiselect(
"EMI Eligibility",
options=df["emi_eligibility"].unique().tolist(),
default=df["emi_eligibility"].unique().tolist()
)
with col3:
risk_filter = st.multiselect(
"Credit Risk Bucket",
options=df["credit_risk_bucket"].unique().tolist(),
default=df["credit_risk_bucket"].unique().tolist()
)
filtered_df = df[
(df["credit_score"].between(*credit_range)) &
(df["emi_eligibility"].isin(eligibility_filter)) &
(df["credit_risk_bucket"].isin(risk_filter))
]
st.write(f"Filtered Records: {len(filtered_df)}")
# -----------------------------------
# VISUALIZATIONS
# -----------------------------------
st.markdown("---")
st.subheader("Key Financial Visualizations")
# ---- Salary vs Max EMI
st.markdown("### Monthly Salary vs Maximum Safe EMI")
fig, ax = plt.subplots()
ax.scatter(
filtered_df["monthly_salary"],
filtered_df["max_monthly_emi"]
)
ax.set_xlabel("Monthly Salary (INR)")
ax.set_ylabel("Maximum Safe EMI (INR)")
st.pyplot(fig)
# ---- Credit Score vs EMI
st.markdown("### Credit Score vs EMI Capacity")
fig, ax = plt.subplots()
ax.scatter(
filtered_df["credit_score"],
filtered_df["max_monthly_emi"]
)
ax.set_xlabel("Credit Score")
ax.set_ylabel("Maximum Safe EMI (INR)")
st.pyplot(fig)
# ---- Debt to Income Distribution
st.markdown("### Debt-to-Income Ratio Distribution")
fig, ax = plt.subplots()
ax.hist(filtered_df["debt_to_income"], bins=30)
ax.set_xlabel("Debt-to-Income Ratio")
st.pyplot(fig)
# ---- EMI Eligibility Breakdown
st.markdown("### EMI Eligibility Distribution")
eligibility_counts = filtered_df["emi_eligibility"].value_counts()
st.bar_chart(eligibility_counts)
# ---- Credit Risk Bucket Distribution
st.markdown("### Credit Risk Bucket Distribution")
risk_counts = filtered_df["credit_risk_bucket"].value_counts()
st.bar_chart(risk_counts)
# -----------------------------------
# SUMMARY STATISTICS
# -----------------------------------
st.markdown("---")
st.subheader("Summary Statistics")
col4, col5, col6 = st.columns(3)
with col4:
st.metric(
"Average Salary",
f"₹ {filtered_df['monthly_salary'].mean():,.0f}"
)
with col5:
st.metric(
"Average Max EMI",
f"₹ {filtered_df['max_monthly_emi'].mean():,.0f}"
)
with col6:
st.metric(
"Average Credit Score",
f"{filtered_df['credit_score'].mean():.0f}"
)
# -----------------------------------
# EXPORT OPTION
# -----------------------------------
st.markdown("---")
st.subheader("Export Filtered Data")
csv = filtered_df.to_csv(index=False).encode("utf-8")
st.download_button(
"Download Filtered Dataset",
data=csv,
file_name="filtered_loan_data.csv",
mime="text/csv"
)