Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from io import BytesIO | |
| st.set_page_config(page_title="Excel Explorer Dashboard", layout="wide") | |
| st.title("π Excel Explorer β Interactive Dashboard") | |
| st.caption("Upload any .xlsx file, pick a sheet, filter columns, and auto-generate charts.") | |
| # --------- Sidebar: File & Sheet --------- | |
| with st.sidebar: | |
| st.header("1) Upload Excel") | |
| uploaded = st.file_uploader("Choose an Excel (.xlsx)", type=["xlsx"]) | |
| date_parse = st.checkbox("Parse dates automatically", value=True) | |
| st.markdown("---") | |
| st.header("2) Sheet & Options") | |
| sheet_name = None | |
| sample_n = st.number_input("Preview Rows", min_value=5, max_value=200, value=20, step=5) | |
| st.markdown("---") | |
| st.header("3) Chart Defaults") | |
| default_agg = st.selectbox("Default aggregation for numeric", ["sum", "mean", "count"], index=0) | |
| top_n = st.slider("Top N for category charts", min_value=5, max_value=50, value=20, step=5) | |
| def load_excel(file, parse_dates=True): | |
| xls = pd.ExcelFile(file) | |
| sheets = xls.sheet_names | |
| frames = {} | |
| for s in sheets: | |
| df = pd.read_excel(file, sheet_name=s) | |
| # Basic cleanup hints | |
| # Try to convert object columns that look like dates | |
| if parse_dates: | |
| for c in df.columns: | |
| if df[c].dtype == "object": | |
| try: | |
| df[c] = pd.to_datetime(df[c]) | |
| except Exception: | |
| pass | |
| frames[s] = df | |
| return frames | |
| def detect_types(df: pd.DataFrame): | |
| cols = df.columns.tolist() | |
| cat_cols, num_cols, dt_cols, bool_cols = [], [], [], [] | |
| for c in cols: | |
| s = df[c] | |
| if pd.api.types.is_datetime64_any_dtype(s): | |
| dt_cols.append(c) | |
| elif pd.api.types.is_bool_dtype(s): | |
| bool_cols.append(c) | |
| elif pd.api.types.is_numeric_dtype(s): | |
| num_cols.append(c) | |
| else: | |
| # Treat low-cardinality object columns as categories | |
| if s.dtype == "object" or pd.api.types.is_categorical_dtype(s): | |
| if s.nunique(dropna=True) <= max(50, int(len(df) * 0.3)): | |
| cat_cols.append(c) | |
| else: | |
| # high-cardinality text: keep as category candidate but mark separately | |
| cat_cols.append(c) | |
| else: | |
| cat_cols.append(c) | |
| return cat_cols, num_cols, dt_cols, bool_cols | |
| def apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols): | |
| st.subheader("π Filters") | |
| with st.expander("Show/Hide Filters", expanded=False): | |
| filtered = df.copy() | |
| cols = st.columns(3) | |
| # Categorical filters | |
| with cols[0]: | |
| for c in cat_cols[:10]: # avoid too many widgets at once | |
| vals = sorted([v for v in filtered[c].dropna().unique()])[:500] | |
| if len(vals) > 0: | |
| sel = st.multiselect(f"Filter: {c}", vals, default=[]) | |
| if len(sel) > 0: | |
| filtered = filtered[filtered[c].isin(sel)] | |
| # Numeric filters | |
| with cols[1]: | |
| for c in num_cols[:10]: | |
| min_v, max_v = pd.to_numeric(filtered[c], errors="coerce").min(), pd.to_numeric(filtered[c], errors="coerce").max() | |
| if pd.isna(min_v) or pd.isna(max_v): | |
| continue | |
| rng = st.slider(f"Range: {c}", float(min_v), float(max_v), (float(min_v), float(max_v))) | |
| filtered = filtered[(pd.to_numeric(filtered[c], errors="coerce") >= rng[0]) & | |
| (pd.to_numeric(filtered[c], errors="coerce") <= rng[1])] | |
| # Datetime filters | |
| with cols[2]: | |
| for c in dt_cols[:5]: | |
| min_d, max_d = pd.to_datetime(filtered[c]).min(), pd.to_datetime(filtered[c]).max() | |
| if pd.isna(min_d) or pd.isna(max_d): | |
| continue | |
| dt = st.date_input(f"Date range: {c}", (min_d.date(), max_d.date())) | |
| if isinstance(dt, tuple) and len(dt) == 2: | |
| start, end = pd.to_datetime(dt[0]), pd.to_datetime(dt[1]) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1) | |
| filtered = filtered[(pd.to_datetime(filtered[c]) >= start) & (pd.to_datetime(filtered[c]) <= end)] | |
| return filtered | |
| def recommend_charts(df, cat_cols, num_cols, dt_cols, default_agg="sum", top_n=20): | |
| tabs = st.tabs(["π Table", "π Category", "π Numeric by Category", "β±οΈ Time Series", "π Pivot Builder"]) | |
| with tabs[0]: | |
| st.caption("Preview (filtered)") | |
| st.dataframe(df.head(1000)) | |
| st.caption(f"Rows: {len(df):,} | Columns: {len(df.columns)}") | |
| with tabs[1]: | |
| st.caption("Count by a categorical column") | |
| if len(cat_cols) == 0: | |
| st.info("No categorical-like columns detected.") | |
| else: | |
| cat = st.selectbox("Category column", cat_cols, index=0) | |
| top = st.slider("Top N", 5, 100, min(top_n, 20), 5) | |
| vc = df[cat].astype("object").value_counts(dropna=False).reset_index() | |
| vc.columns = [cat, "count"] | |
| vc = vc.head(top) | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| fig = px.bar(vc, x="count", y=cat, orientation="h", title=f"Count by {cat}") | |
| st.plotly_chart(fig, use_container_width=True) | |
| with c2: | |
| fig = px.treemap(vc, path=[cat], values="count", title=f"Treemap β {cat}") | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tabs[2]: | |
| st.caption("Aggregate a numeric column by a category") | |
| if len(cat_cols) == 0 or len(num_cols) == 0: | |
| st.info("Need at least one category and one numeric column.") | |
| else: | |
| cat = st.selectbox("Group by (category)", cat_cols, key="num_cat") | |
| num = st.selectbox("Value (numeric)", num_cols, key="num_val") | |
| agg = st.selectbox("Aggregation", ["sum", "mean", "count"], index=["sum","mean","count"].index(default_agg)) | |
| g = None | |
| if agg == "sum": | |
| g = df.groupby(cat, dropna=False)[num].sum().reset_index(name="value") | |
| elif agg == "mean": | |
| g = df.groupby(cat, dropna=False)[num].mean().reset_index(name="value") | |
| else: | |
| g = df.groupby(cat, dropna=False)[num].count().reset_index(name="value") | |
| g = g.sort_values("value", ascending=False).head(top_n) | |
| fig = px.bar(g, x="value", y=cat, orientation="h", title=f"{agg} of {num} by {cat}") | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tabs[3]: | |
| st.caption("Time series from a date/datetime column") | |
| if len(dt_cols) == 0: | |
| st.info("No datetime columns detected.") | |
| else: | |
| dtc = st.selectbox("Datetime column", dt_cols) | |
| mode = st.selectbox("Aggregation", ["count rows", "count by category", "sum numeric", "mean numeric"]) | |
| freq = st.selectbox("Resample frequency", ["D","W","M","Q","Y"], index=2, help="D=Day, W=Week, M=Month, Q=Quarter, Y=Year") | |
| df2 = df.dropna(subset=[dtc]).copy() | |
| df2[dtc] = pd.to_datetime(df2[dtc]) | |
| df2 = df2.set_index(dtc).sort_index() | |
| if mode == "count rows": | |
| ts = df2.resample(freq).size().reset_index(name="count") | |
| fig = px.line(ts, x=dtc, y="count", markers=True, title=f"Row count over time ({freq})") | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif mode == "count by category": | |
| cat_choices = [c for c in df.columns if c not in dt_cols] | |
| if len(cat_choices) == 0: | |
| st.info("No category columns available.") | |
| else: | |
| cat = st.selectbox("Category", cat_choices) | |
| ts = df2.groupby([pd.Grouper(freq=freq), cat]).size().reset_index(name="count") | |
| fig = px.line(ts, x=dtc, y="count", color=cat, markers=True, title=f"Count over time by {cat}") | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif mode in ["sum numeric", "mean numeric"]: | |
| nums = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])] | |
| if len(nums) == 0: | |
| st.info("No numeric columns available.") | |
| else: | |
| num = st.selectbox("Numeric column", nums) | |
| if mode == "sum numeric": | |
| ts = df2[num].resample(freq).sum().reset_index(name="value") | |
| ttl = f"Sum of {num} over time ({freq})" | |
| else: | |
| ts = df2[num].resample(freq).mean().reset_index(name="value") | |
| ttl = f"Mean of {num} over time ({freq})" | |
| fig = px.line(ts, x=dtc, y="value", markers=True, title=ttl) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tabs[4]: | |
| st.caption("Build a quick pivot") | |
| cat_options = [c for c in df.columns if not pd.api.types.is_numeric_dtype(df[c])] | |
| num_options = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])] | |
| col1, col2, col3 = st.columns(3) | |
| rows = col1.multiselect("Rows (categories)", cat_options[:50]) | |
| cols = col2.multiselect("Columns (categories)", cat_options[:50]) | |
| vals = col3.multiselect("Values (numeric)", num_options[:10]) | |
| aggfunc = st.selectbox("Aggregation", ["sum","mean","count"], index=["sum","mean","count"].index(default_agg)) | |
| if len(vals) == 0: | |
| st.info("Pick at least one numeric value field.") | |
| else: | |
| func = {"sum": "sum", "mean": "mean", "count": "count"}[aggfunc] | |
| try: | |
| pv = pd.pivot_table(df, index=rows if rows else None, columns=cols if cols else None, | |
| values=vals, aggfunc=func, margins=True, dropna=False, observed=False) | |
| st.dataframe(pv) | |
| except Exception as e: | |
| st.error(f"Pivot error: {e}") | |
| # --------- Main flow --------- | |
| if uploaded is None: | |
| st.info("Upload an Excel file to begin.") | |
| st.stop() | |
| try: | |
| frames = load_excel(uploaded, parse_dates=date_parse) | |
| except Exception as e: | |
| st.error(f"Failed to read Excel: {e}") | |
| st.stop() | |
| with st.sidebar: | |
| sheet_name = st.selectbox("Select sheet", list(frames.keys())) | |
| df = frames[sheet_name] | |
| st.subheader(f"π Sheet: {sheet_name}") | |
| st.write(df.head(sample_n)) | |
| # Detect types & filter | |
| cat_cols, num_cols, dt_cols, bool_cols = detect_types(df) | |
| st.markdown(f"**Detected types:** π·οΈ Category: {len(cat_cols)} | π’ Numeric: {len(num_cols)} | ποΈ Datetime: {len(dt_cols)} | β Bool: {len(bool_cols)}") | |
| filtered_df = apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols) | |
| st.markdown("---") | |
| st.subheader("π Recommended Charts") | |
| recommend_charts(filtered_df, cat_cols, num_cols, dt_cols, default_agg=default_agg, top_n=top_n) | |
| # Download filtered data | |
| st.markdown("---") | |
| st.download_button("β¬οΈ Download filtered CSV", data=filtered_df.to_csv(index=False).encode("utf-8-sig"), | |
| file_name=f"{sheet_name}_filtered.csv", mime="text/csv") | |
| st.caption("Tips: Use the filters to focus your view, then switch tabs for different visualizations.") |