import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go from io import BytesIO st.set_page_config(page_title="Excel Explorer Dashboard", layout="wide") st.title("📊 Excel Explorer — Interactive Dashboard") st.caption("Upload any .xlsx file, pick a sheet, filter columns, and auto-generate charts.") # --------- Sidebar: File & Sheet --------- with st.sidebar: st.header("1) Upload Excel") uploaded = st.file_uploader("Choose an Excel (.xlsx)", type=["xlsx"]) date_parse = st.checkbox("Parse dates automatically", value=True) st.markdown("---") st.header("2) Sheet & Options") sheet_name = None sample_n = st.number_input("Preview Rows", min_value=5, max_value=200, value=20, step=5) st.markdown("---") st.header("3) Chart Defaults") default_agg = st.selectbox("Default aggregation for numeric", ["sum", "mean", "count"], index=0) top_n = st.slider("Top N for category charts", min_value=5, max_value=50, value=20, step=5) def load_excel(file, parse_dates=True): xls = pd.ExcelFile(file) sheets = xls.sheet_names frames = {} for s in sheets: df = pd.read_excel(file, sheet_name=s) # Basic cleanup hints # Try to convert object columns that look like dates if parse_dates: for c in df.columns: if df[c].dtype == "object": try: df[c] = pd.to_datetime(df[c]) except Exception: pass frames[s] = df return frames def detect_types(df: pd.DataFrame): cols = df.columns.tolist() cat_cols, num_cols, dt_cols, bool_cols = [], [], [], [] for c in cols: s = df[c] if pd.api.types.is_datetime64_any_dtype(s): dt_cols.append(c) elif pd.api.types.is_bool_dtype(s): bool_cols.append(c) elif pd.api.types.is_numeric_dtype(s): num_cols.append(c) else: # Treat low-cardinality object columns as categories if s.dtype == "object" or pd.api.types.is_categorical_dtype(s): if s.nunique(dropna=True) <= max(50, int(len(df) * 0.3)): cat_cols.append(c) else: # high-cardinality text: keep as category candidate but mark separately cat_cols.append(c) else: cat_cols.append(c) return cat_cols, num_cols, dt_cols, bool_cols def apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols): st.subheader("🔎 Filters") with st.expander("Show/Hide Filters", expanded=False): filtered = df.copy() cols = st.columns(3) # Categorical filters with cols[0]: for c in cat_cols[:10]: # avoid too many widgets at once vals = sorted([v for v in filtered[c].dropna().unique()])[:500] if len(vals) > 0: sel = st.multiselect(f"Filter: {c}", vals, default=[]) if len(sel) > 0: filtered = filtered[filtered[c].isin(sel)] # Numeric filters with cols[1]: for c in num_cols[:10]: min_v, max_v = pd.to_numeric(filtered[c], errors="coerce").min(), pd.to_numeric(filtered[c], errors="coerce").max() if pd.isna(min_v) or pd.isna(max_v): continue rng = st.slider(f"Range: {c}", float(min_v), float(max_v), (float(min_v), float(max_v))) filtered = filtered[(pd.to_numeric(filtered[c], errors="coerce") >= rng[0]) & (pd.to_numeric(filtered[c], errors="coerce") <= rng[1])] # Datetime filters with cols[2]: for c in dt_cols[:5]: min_d, max_d = pd.to_datetime(filtered[c]).min(), pd.to_datetime(filtered[c]).max() if pd.isna(min_d) or pd.isna(max_d): continue dt = st.date_input(f"Date range: {c}", (min_d.date(), max_d.date())) if isinstance(dt, tuple) and len(dt) == 2: start, end = pd.to_datetime(dt[0]), pd.to_datetime(dt[1]) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1) filtered = filtered[(pd.to_datetime(filtered[c]) >= start) & (pd.to_datetime(filtered[c]) <= end)] return filtered def recommend_charts(df, cat_cols, num_cols, dt_cols, default_agg="sum", top_n=20): tabs = st.tabs(["📋 Table", "📈 Category", "📊 Numeric by Category", "⏱️ Time Series", "🔁 Pivot Builder"]) with tabs[0]: st.caption("Preview (filtered)") st.dataframe(df.head(1000)) st.caption(f"Rows: {len(df):,} | Columns: {len(df.columns)}") with tabs[1]: st.caption("Count by a categorical column") if len(cat_cols) == 0: st.info("No categorical-like columns detected.") else: cat = st.selectbox("Category column", cat_cols, index=0) top = st.slider("Top N", 5, 100, min(top_n, 20), 5) vc = df[cat].astype("object").value_counts(dropna=False).reset_index() vc.columns = [cat, "count"] vc = vc.head(top) c1, c2 = st.columns(2) with c1: fig = px.bar(vc, x="count", y=cat, orientation="h", title=f"Count by {cat}") st.plotly_chart(fig, use_container_width=True) with c2: fig = px.treemap(vc, path=[cat], values="count", title=f"Treemap — {cat}") st.plotly_chart(fig, use_container_width=True) with tabs[2]: st.caption("Aggregate a numeric column by a category") if len(cat_cols) == 0 or len(num_cols) == 0: st.info("Need at least one category and one numeric column.") else: cat = st.selectbox("Group by (category)", cat_cols, key="num_cat") num = st.selectbox("Value (numeric)", num_cols, key="num_val") agg = st.selectbox("Aggregation", ["sum", "mean", "count"], index=["sum","mean","count"].index(default_agg)) g = None if agg == "sum": g = df.groupby(cat, dropna=False)[num].sum().reset_index(name="value") elif agg == "mean": g = df.groupby(cat, dropna=False)[num].mean().reset_index(name="value") else: g = df.groupby(cat, dropna=False)[num].count().reset_index(name="value") g = g.sort_values("value", ascending=False).head(top_n) fig = px.bar(g, x="value", y=cat, orientation="h", title=f"{agg} of {num} by {cat}") st.plotly_chart(fig, use_container_width=True) with tabs[3]: st.caption("Time series from a date/datetime column") if len(dt_cols) == 0: st.info("No datetime columns detected.") else: dtc = st.selectbox("Datetime column", dt_cols) mode = st.selectbox("Aggregation", ["count rows", "count by category", "sum numeric", "mean numeric"]) freq = st.selectbox("Resample frequency", ["D","W","M","Q","Y"], index=2, help="D=Day, W=Week, M=Month, Q=Quarter, Y=Year") df2 = df.dropna(subset=[dtc]).copy() df2[dtc] = pd.to_datetime(df2[dtc]) df2 = df2.set_index(dtc).sort_index() if mode == "count rows": ts = df2.resample(freq).size().reset_index(name="count") fig = px.line(ts, x=dtc, y="count", markers=True, title=f"Row count over time ({freq})") st.plotly_chart(fig, use_container_width=True) elif mode == "count by category": cat_choices = [c for c in df.columns if c not in dt_cols] if len(cat_choices) == 0: st.info("No category columns available.") else: cat = st.selectbox("Category", cat_choices) ts = df2.groupby([pd.Grouper(freq=freq), cat]).size().reset_index(name="count") fig = px.line(ts, x=dtc, y="count", color=cat, markers=True, title=f"Count over time by {cat}") st.plotly_chart(fig, use_container_width=True) elif mode in ["sum numeric", "mean numeric"]: nums = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])] if len(nums) == 0: st.info("No numeric columns available.") else: num = st.selectbox("Numeric column", nums) if mode == "sum numeric": ts = df2[num].resample(freq).sum().reset_index(name="value") ttl = f"Sum of {num} over time ({freq})" else: ts = df2[num].resample(freq).mean().reset_index(name="value") ttl = f"Mean of {num} over time ({freq})" fig = px.line(ts, x=dtc, y="value", markers=True, title=ttl) st.plotly_chart(fig, use_container_width=True) with tabs[4]: st.caption("Build a quick pivot") cat_options = [c for c in df.columns if not pd.api.types.is_numeric_dtype(df[c])] num_options = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])] col1, col2, col3 = st.columns(3) rows = col1.multiselect("Rows (categories)", cat_options[:50]) cols = col2.multiselect("Columns (categories)", cat_options[:50]) vals = col3.multiselect("Values (numeric)", num_options[:10]) aggfunc = st.selectbox("Aggregation", ["sum","mean","count"], index=["sum","mean","count"].index(default_agg)) if len(vals) == 0: st.info("Pick at least one numeric value field.") else: func = {"sum": "sum", "mean": "mean", "count": "count"}[aggfunc] try: pv = pd.pivot_table(df, index=rows if rows else None, columns=cols if cols else None, values=vals, aggfunc=func, margins=True, dropna=False, observed=False) st.dataframe(pv) except Exception as e: st.error(f"Pivot error: {e}") # --------- Main flow --------- if uploaded is None: st.info("Upload an Excel file to begin.") st.stop() try: frames = load_excel(uploaded, parse_dates=date_parse) except Exception as e: st.error(f"Failed to read Excel: {e}") st.stop() with st.sidebar: sheet_name = st.selectbox("Select sheet", list(frames.keys())) df = frames[sheet_name] st.subheader(f"📄 Sheet: {sheet_name}") st.write(df.head(sample_n)) # Detect types & filter cat_cols, num_cols, dt_cols, bool_cols = detect_types(df) st.markdown(f"**Detected types:** 🏷️ Category: {len(cat_cols)} | 🔢 Numeric: {len(num_cols)} | 🗓️ Datetime: {len(dt_cols)} | ✅ Bool: {len(bool_cols)}") filtered_df = apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols) st.markdown("---") st.subheader("📈 Recommended Charts") recommend_charts(filtered_df, cat_cols, num_cols, dt_cols, default_agg=default_agg, top_n=top_n) # Download filtered data st.markdown("---") st.download_button("⬇️ Download filtered CSV", data=filtered_df.to_csv(index=False).encode("utf-8-sig"), file_name=f"{sheet_name}_filtered.csv", mime="text/csv") st.caption("Tips: Use the filters to focus your view, then switch tabs for different visualizations.")