# eda.py import streamlit as st import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.impute import SimpleImputer def run_eda(): st.header("๐Ÿ“Š Exploratory Data Analysis") df_raw = st.session_state.original_df df = st.session_state.processed_df c1, c2 = st.columns(2) with c1: st.subheader("Raw Data") st.dataframe(df_raw.head(10)) st.write(df_raw.describe(include="all")) with c2: st.subheader("Processed Data") st.dataframe(df.head(10)) st.subheader("๐ŸŽฏ Feature & Target Selection") cols = df.columns.tolist() target = st.selectbox("Target", cols) features = st.multiselect("Features", [c for c in cols if c != target]) st.session_state.target_col = target st.session_state.feature_cols = features st.subheader("๐Ÿงน Cleaning") if st.checkbox("Apply smart imputation"): num = df.select_dtypes(include=np.number).columns cat = df.select_dtypes(exclude=np.number).columns if len(num): df[num] = SimpleImputer(strategy="mean").fit_transform(df[num]) if len(cat): df[cat] = SimpleImputer(strategy="most_frequent").fit_transform(df[cat]) st.session_state.processed_df = df st.success("Imputation complete") st.rerun() st.subheader("๐Ÿ“ˆ Visuals") plot = st.selectbox( "Plot type", ["Correlation Heatmap", "Target Distribution"] ) fig, ax = plt.subplots(figsize=(8,6)) if plot == "Correlation Heatmap": sns.heatmap(df.select_dtypes(np.number).corr(), annot=True, ax=ax) elif plot == "Target Distribution": sns.histplot(df[target], kde=True, ax=ax) st.pyplot(fig) plt.close(fig)