Spaces:

Rathapoom
/

DashBoardGen

Sleeping

File size: 11,458 Bytes

8ca5246

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from io import BytesIO

st.set_page_config(page_title="Excel Explorer Dashboard", layout="wide")

st.title("📊 Excel Explorer — Interactive Dashboard")
st.caption("Upload any .xlsx file, pick a sheet, filter columns, and auto-generate charts.")

# --------- Sidebar: File & Sheet ---------
with st.sidebar:
    st.header("1) Upload Excel")
    uploaded = st.file_uploader("Choose an Excel (.xlsx)", type=["xlsx"])
    date_parse = st.checkbox("Parse dates automatically", value=True)
    st.markdown("---")
    st.header("2) Sheet & Options")
    sheet_name = None
    sample_n = st.number_input("Preview Rows", min_value=5, max_value=200, value=20, step=5)
    st.markdown("---")
    st.header("3) Chart Defaults")
    default_agg = st.selectbox("Default aggregation for numeric", ["sum", "mean", "count"], index=0)
    top_n = st.slider("Top N for category charts", min_value=5, max_value=50, value=20, step=5)

def load_excel(file, parse_dates=True):
    xls = pd.ExcelFile(file)
    sheets = xls.sheet_names
    frames = {}
    for s in sheets:
        df = pd.read_excel(file, sheet_name=s)
        # Basic cleanup hints
        # Try to convert object columns that look like dates
        if parse_dates:
            for c in df.columns:
                if df[c].dtype == "object":
                    try:
                        df[c] = pd.to_datetime(df[c])
                    except Exception:
                        pass
        frames[s] = df
    return frames

def detect_types(df: pd.DataFrame):
    cols = df.columns.tolist()
    cat_cols, num_cols, dt_cols, bool_cols = [], [], [], []
    for c in cols:
        s = df[c]
        if pd.api.types.is_datetime64_any_dtype(s):
            dt_cols.append(c)
        elif pd.api.types.is_bool_dtype(s):
            bool_cols.append(c)
        elif pd.api.types.is_numeric_dtype(s):
            num_cols.append(c)
        else:
            # Treat low-cardinality object columns as categories
            if s.dtype == "object" or pd.api.types.is_categorical_dtype(s):
                if s.nunique(dropna=True) <= max(50, int(len(df) * 0.3)):
                    cat_cols.append(c)
                else:
                    # high-cardinality text: keep as category candidate but mark separately
                    cat_cols.append(c)
            else:
                cat_cols.append(c)
    return cat_cols, num_cols, dt_cols, bool_cols

def apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols):
    st.subheader("🔎 Filters")
    with st.expander("Show/Hide Filters", expanded=False):
        filtered = df.copy()
        cols = st.columns(3)
        # Categorical filters
        with cols[0]:
            for c in cat_cols[:10]:  # avoid too many widgets at once
                vals = sorted([v for v in filtered[c].dropna().unique()])[:500]
                if len(vals) > 0:
                    sel = st.multiselect(f"Filter: {c}", vals, default=[])
                    if len(sel) > 0:
                        filtered = filtered[filtered[c].isin(sel)]
        # Numeric filters
        with cols[1]:
            for c in num_cols[:10]:
                min_v, max_v = pd.to_numeric(filtered[c], errors="coerce").min(), pd.to_numeric(filtered[c], errors="coerce").max()
                if pd.isna(min_v) or pd.isna(max_v):
                    continue
                rng = st.slider(f"Range: {c}", float(min_v), float(max_v), (float(min_v), float(max_v)))
                filtered = filtered[(pd.to_numeric(filtered[c], errors="coerce") >= rng[0]) &
                                    (pd.to_numeric(filtered[c], errors="coerce") <= rng[1])]
        # Datetime filters
        with cols[2]:
            for c in dt_cols[:5]:
                min_d, max_d = pd.to_datetime(filtered[c]).min(), pd.to_datetime(filtered[c]).max()
                if pd.isna(min_d) or pd.isna(max_d):
                    continue
                dt = st.date_input(f"Date range: {c}", (min_d.date(), max_d.date()))
                if isinstance(dt, tuple) and len(dt) == 2:
                    start, end = pd.to_datetime(dt[0]), pd.to_datetime(dt[1]) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
                    filtered = filtered[(pd.to_datetime(filtered[c]) >= start) & (pd.to_datetime(filtered[c]) <= end)]
        return filtered

def recommend_charts(df, cat_cols, num_cols, dt_cols, default_agg="sum", top_n=20):
    tabs = st.tabs(["📋 Table", "📈 Category", "📊 Numeric by Category", "⏱️ Time Series", "🔁 Pivot Builder"])
    with tabs[0]:
        st.caption("Preview (filtered)")
        st.dataframe(df.head(1000))
        st.caption(f"Rows: {len(df):,} | Columns: {len(df.columns)}")

    with tabs[1]:
        st.caption("Count by a categorical column")
        if len(cat_cols) == 0:
            st.info("No categorical-like columns detected.")
        else:
            cat = st.selectbox("Category column", cat_cols, index=0)
            top = st.slider("Top N", 5, 100, min(top_n, 20), 5)
            vc = df[cat].astype("object").value_counts(dropna=False).reset_index()
            vc.columns = [cat, "count"]
            vc = vc.head(top)
            c1, c2 = st.columns(2)
            with c1:
                fig = px.bar(vc, x="count", y=cat, orientation="h", title=f"Count by {cat}")
                st.plotly_chart(fig, use_container_width=True)
            with c2:
                fig = px.treemap(vc, path=[cat], values="count", title=f"Treemap — {cat}")
                st.plotly_chart(fig, use_container_width=True)

    with tabs[2]:
        st.caption("Aggregate a numeric column by a category")
        if len(cat_cols) == 0 or len(num_cols) == 0:
            st.info("Need at least one category and one numeric column.")
        else:
            cat = st.selectbox("Group by (category)", cat_cols, key="num_cat")
            num = st.selectbox("Value (numeric)", num_cols, key="num_val")
            agg = st.selectbox("Aggregation", ["sum", "mean", "count"], index=["sum","mean","count"].index(default_agg))
            g = None
            if agg == "sum":
                g = df.groupby(cat, dropna=False)[num].sum().reset_index(name="value")
            elif agg == "mean":
                g = df.groupby(cat, dropna=False)[num].mean().reset_index(name="value")
            else:
                g = df.groupby(cat, dropna=False)[num].count().reset_index(name="value")
            g = g.sort_values("value", ascending=False).head(top_n)
            fig = px.bar(g, x="value", y=cat, orientation="h", title=f"{agg} of {num} by {cat}")
            st.plotly_chart(fig, use_container_width=True)

    with tabs[3]:
        st.caption("Time series from a date/datetime column")
        if len(dt_cols) == 0:
            st.info("No datetime columns detected.")
        else:
            dtc = st.selectbox("Datetime column", dt_cols)
            mode = st.selectbox("Aggregation", ["count rows", "count by category", "sum numeric", "mean numeric"])
            freq = st.selectbox("Resample frequency", ["D","W","M","Q","Y"], index=2, help="D=Day, W=Week, M=Month, Q=Quarter, Y=Year")
            df2 = df.dropna(subset=[dtc]).copy()
            df2[dtc] = pd.to_datetime(df2[dtc])
            df2 = df2.set_index(dtc).sort_index()

            if mode == "count rows":
                ts = df2.resample(freq).size().reset_index(name="count")
                fig = px.line(ts, x=dtc, y="count", markers=True, title=f"Row count over time ({freq})")
                st.plotly_chart(fig, use_container_width=True)

            elif mode == "count by category":
                cat_choices = [c for c in df.columns if c not in dt_cols]
                if len(cat_choices) == 0:
                    st.info("No category columns available.")
                else:
                    cat = st.selectbox("Category", cat_choices)
                    ts = df2.groupby([pd.Grouper(freq=freq), cat]).size().reset_index(name="count")
                    fig = px.line(ts, x=dtc, y="count", color=cat, markers=True, title=f"Count over time by {cat}")
                    st.plotly_chart(fig, use_container_width=True)

            elif mode in ["sum numeric", "mean numeric"]:
                nums = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
                if len(nums) == 0:
                    st.info("No numeric columns available.")
                else:
                    num = st.selectbox("Numeric column", nums)
                    if mode == "sum numeric":
                        ts = df2[num].resample(freq).sum().reset_index(name="value")
                        ttl = f"Sum of {num} over time ({freq})"
                    else:
                        ts = df2[num].resample(freq).mean().reset_index(name="value")
                        ttl = f"Mean of {num} over time ({freq})"
                    fig = px.line(ts, x=dtc, y="value", markers=True, title=ttl)
                    st.plotly_chart(fig, use_container_width=True)

    with tabs[4]:
        st.caption("Build a quick pivot")
        cat_options = [c for c in df.columns if not pd.api.types.is_numeric_dtype(df[c])]
        num_options = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
        col1, col2, col3 = st.columns(3)
        rows = col1.multiselect("Rows (categories)", cat_options[:50])
        cols = col2.multiselect("Columns (categories)", cat_options[:50])
        vals = col3.multiselect("Values (numeric)", num_options[:10])
        aggfunc = st.selectbox("Aggregation", ["sum","mean","count"], index=["sum","mean","count"].index(default_agg))
        if len(vals) == 0:
            st.info("Pick at least one numeric value field.")
        else:
            func = {"sum": "sum", "mean": "mean", "count": "count"}[aggfunc]
            try:
                pv = pd.pivot_table(df, index=rows if rows else None, columns=cols if cols else None,
                                    values=vals, aggfunc=func, margins=True, dropna=False, observed=False)
                st.dataframe(pv)
            except Exception as e:
                st.error(f"Pivot error: {e}")

# --------- Main flow ---------
if uploaded is None:
    st.info("Upload an Excel file to begin.")
    st.stop()

try:
    frames = load_excel(uploaded, parse_dates=date_parse)
except Exception as e:
    st.error(f"Failed to read Excel: {e}")
    st.stop()

with st.sidebar:
    sheet_name = st.selectbox("Select sheet", list(frames.keys()))

df = frames[sheet_name]
st.subheader(f"📄 Sheet: {sheet_name}")
st.write(df.head(sample_n))

# Detect types & filter
cat_cols, num_cols, dt_cols, bool_cols = detect_types(df)
st.markdown(f"**Detected types:** 🏷️ Category: {len(cat_cols)} | 🔢 Numeric: {len(num_cols)} | 🗓️ Datetime: {len(dt_cols)} | ✅ Bool: {len(bool_cols)}")

filtered_df = apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols)

st.markdown("---")
st.subheader("📈 Recommended Charts")
recommend_charts(filtered_df, cat_cols, num_cols, dt_cols, default_agg=default_agg, top_n=top_n)

# Download filtered data
st.markdown("---")
st.download_button("⬇️ Download filtered CSV", data=filtered_df.to_csv(index=False).encode("utf-8-sig"),
                   file_name=f"{sheet_name}_filtered.csv", mime="text/csv")

st.caption("Tips: Use the filters to focus your view, then switch tabs for different visualizations.")