Spaces:
Sleeping
Sleeping
| # eda.py | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from sklearn.impute import SimpleImputer | |
| def run_eda(): | |
| st.header("π Exploratory Data Analysis") | |
| df_raw = st.session_state.original_df | |
| df = st.session_state.processed_df | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.subheader("Raw Data") | |
| st.dataframe(df_raw.head(10)) | |
| st.write(df_raw.describe(include="all")) | |
| with c2: | |
| st.subheader("Processed Data") | |
| st.dataframe(df.head(10)) | |
| st.subheader("π― Feature & Target Selection") | |
| cols = df.columns.tolist() | |
| target = st.selectbox("Target", cols) | |
| features = st.multiselect("Features", [c for c in cols if c != target]) | |
| st.session_state.target_col = target | |
| st.session_state.feature_cols = features | |
| st.subheader("π§Ή Cleaning") | |
| if st.checkbox("Apply smart imputation"): | |
| num = df.select_dtypes(include=np.number).columns | |
| cat = df.select_dtypes(exclude=np.number).columns | |
| if len(num): | |
| df[num] = SimpleImputer(strategy="mean").fit_transform(df[num]) | |
| if len(cat): | |
| df[cat] = SimpleImputer(strategy="most_frequent").fit_transform(df[cat]) | |
| st.session_state.processed_df = df | |
| st.success("Imputation complete") | |
| st.rerun() | |
| st.subheader("π Visuals") | |
| plot = st.selectbox( | |
| "Plot type", | |
| ["Correlation Heatmap", "Target Distribution"] | |
| ) | |
| fig, ax = plt.subplots(figsize=(8,6)) | |
| if plot == "Correlation Heatmap": | |
| sns.heatmap(df.select_dtypes(np.number).corr(), annot=True, ax=ax) | |
| elif plot == "Target Distribution": | |
| sns.histplot(df[target], kde=True, ax=ax) | |
| st.pyplot(fig) | |
| plt.close(fig) | |