DashBoardGen / app.py
Rathapoom's picture
Rename app (5).py to app.py
178a336 verified
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from io import BytesIO
st.set_page_config(page_title="Excel Explorer Dashboard", layout="wide")
st.title("πŸ“Š Excel Explorer β€” Interactive Dashboard")
st.caption("Upload any .xlsx file, pick a sheet, filter columns, and auto-generate charts.")
# --------- Sidebar: File & Sheet ---------
with st.sidebar:
st.header("1) Upload Excel")
uploaded = st.file_uploader("Choose an Excel (.xlsx)", type=["xlsx"])
date_parse = st.checkbox("Parse dates automatically", value=True)
st.markdown("---")
st.header("2) Sheet & Options")
sheet_name = None
sample_n = st.number_input("Preview Rows", min_value=5, max_value=200, value=20, step=5)
st.markdown("---")
st.header("3) Chart Defaults")
default_agg = st.selectbox("Default aggregation for numeric", ["sum", "mean", "count"], index=0)
top_n = st.slider("Top N for category charts", min_value=5, max_value=50, value=20, step=5)
def load_excel(file, parse_dates=True):
xls = pd.ExcelFile(file)
sheets = xls.sheet_names
frames = {}
for s in sheets:
df = pd.read_excel(file, sheet_name=s)
# Basic cleanup hints
# Try to convert object columns that look like dates
if parse_dates:
for c in df.columns:
if df[c].dtype == "object":
try:
df[c] = pd.to_datetime(df[c])
except Exception:
pass
frames[s] = df
return frames
def detect_types(df: pd.DataFrame):
cols = df.columns.tolist()
cat_cols, num_cols, dt_cols, bool_cols = [], [], [], []
for c in cols:
s = df[c]
if pd.api.types.is_datetime64_any_dtype(s):
dt_cols.append(c)
elif pd.api.types.is_bool_dtype(s):
bool_cols.append(c)
elif pd.api.types.is_numeric_dtype(s):
num_cols.append(c)
else:
# Treat low-cardinality object columns as categories
if s.dtype == "object" or pd.api.types.is_categorical_dtype(s):
if s.nunique(dropna=True) <= max(50, int(len(df) * 0.3)):
cat_cols.append(c)
else:
# high-cardinality text: keep as category candidate but mark separately
cat_cols.append(c)
else:
cat_cols.append(c)
return cat_cols, num_cols, dt_cols, bool_cols
def apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols):
st.subheader("πŸ”Ž Filters")
with st.expander("Show/Hide Filters", expanded=False):
filtered = df.copy()
cols = st.columns(3)
# Categorical filters
with cols[0]:
for c in cat_cols[:10]: # avoid too many widgets at once
vals = sorted([v for v in filtered[c].dropna().unique()])[:500]
if len(vals) > 0:
sel = st.multiselect(f"Filter: {c}", vals, default=[])
if len(sel) > 0:
filtered = filtered[filtered[c].isin(sel)]
# Numeric filters
with cols[1]:
for c in num_cols[:10]:
min_v, max_v = pd.to_numeric(filtered[c], errors="coerce").min(), pd.to_numeric(filtered[c], errors="coerce").max()
if pd.isna(min_v) or pd.isna(max_v):
continue
rng = st.slider(f"Range: {c}", float(min_v), float(max_v), (float(min_v), float(max_v)))
filtered = filtered[(pd.to_numeric(filtered[c], errors="coerce") >= rng[0]) &
(pd.to_numeric(filtered[c], errors="coerce") <= rng[1])]
# Datetime filters
with cols[2]:
for c in dt_cols[:5]:
min_d, max_d = pd.to_datetime(filtered[c]).min(), pd.to_datetime(filtered[c]).max()
if pd.isna(min_d) or pd.isna(max_d):
continue
dt = st.date_input(f"Date range: {c}", (min_d.date(), max_d.date()))
if isinstance(dt, tuple) and len(dt) == 2:
start, end = pd.to_datetime(dt[0]), pd.to_datetime(dt[1]) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
filtered = filtered[(pd.to_datetime(filtered[c]) >= start) & (pd.to_datetime(filtered[c]) <= end)]
return filtered
def recommend_charts(df, cat_cols, num_cols, dt_cols, default_agg="sum", top_n=20):
tabs = st.tabs(["πŸ“‹ Table", "πŸ“ˆ Category", "πŸ“Š Numeric by Category", "⏱️ Time Series", "πŸ” Pivot Builder"])
with tabs[0]:
st.caption("Preview (filtered)")
st.dataframe(df.head(1000))
st.caption(f"Rows: {len(df):,} | Columns: {len(df.columns)}")
with tabs[1]:
st.caption("Count by a categorical column")
if len(cat_cols) == 0:
st.info("No categorical-like columns detected.")
else:
cat = st.selectbox("Category column", cat_cols, index=0)
top = st.slider("Top N", 5, 100, min(top_n, 20), 5)
vc = df[cat].astype("object").value_counts(dropna=False).reset_index()
vc.columns = [cat, "count"]
vc = vc.head(top)
c1, c2 = st.columns(2)
with c1:
fig = px.bar(vc, x="count", y=cat, orientation="h", title=f"Count by {cat}")
st.plotly_chart(fig, use_container_width=True)
with c2:
fig = px.treemap(vc, path=[cat], values="count", title=f"Treemap β€” {cat}")
st.plotly_chart(fig, use_container_width=True)
with tabs[2]:
st.caption("Aggregate a numeric column by a category")
if len(cat_cols) == 0 or len(num_cols) == 0:
st.info("Need at least one category and one numeric column.")
else:
cat = st.selectbox("Group by (category)", cat_cols, key="num_cat")
num = st.selectbox("Value (numeric)", num_cols, key="num_val")
agg = st.selectbox("Aggregation", ["sum", "mean", "count"], index=["sum","mean","count"].index(default_agg))
g = None
if agg == "sum":
g = df.groupby(cat, dropna=False)[num].sum().reset_index(name="value")
elif agg == "mean":
g = df.groupby(cat, dropna=False)[num].mean().reset_index(name="value")
else:
g = df.groupby(cat, dropna=False)[num].count().reset_index(name="value")
g = g.sort_values("value", ascending=False).head(top_n)
fig = px.bar(g, x="value", y=cat, orientation="h", title=f"{agg} of {num} by {cat}")
st.plotly_chart(fig, use_container_width=True)
with tabs[3]:
st.caption("Time series from a date/datetime column")
if len(dt_cols) == 0:
st.info("No datetime columns detected.")
else:
dtc = st.selectbox("Datetime column", dt_cols)
mode = st.selectbox("Aggregation", ["count rows", "count by category", "sum numeric", "mean numeric"])
freq = st.selectbox("Resample frequency", ["D","W","M","Q","Y"], index=2, help="D=Day, W=Week, M=Month, Q=Quarter, Y=Year")
df2 = df.dropna(subset=[dtc]).copy()
df2[dtc] = pd.to_datetime(df2[dtc])
df2 = df2.set_index(dtc).sort_index()
if mode == "count rows":
ts = df2.resample(freq).size().reset_index(name="count")
fig = px.line(ts, x=dtc, y="count", markers=True, title=f"Row count over time ({freq})")
st.plotly_chart(fig, use_container_width=True)
elif mode == "count by category":
cat_choices = [c for c in df.columns if c not in dt_cols]
if len(cat_choices) == 0:
st.info("No category columns available.")
else:
cat = st.selectbox("Category", cat_choices)
ts = df2.groupby([pd.Grouper(freq=freq), cat]).size().reset_index(name="count")
fig = px.line(ts, x=dtc, y="count", color=cat, markers=True, title=f"Count over time by {cat}")
st.plotly_chart(fig, use_container_width=True)
elif mode in ["sum numeric", "mean numeric"]:
nums = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
if len(nums) == 0:
st.info("No numeric columns available.")
else:
num = st.selectbox("Numeric column", nums)
if mode == "sum numeric":
ts = df2[num].resample(freq).sum().reset_index(name="value")
ttl = f"Sum of {num} over time ({freq})"
else:
ts = df2[num].resample(freq).mean().reset_index(name="value")
ttl = f"Mean of {num} over time ({freq})"
fig = px.line(ts, x=dtc, y="value", markers=True, title=ttl)
st.plotly_chart(fig, use_container_width=True)
with tabs[4]:
st.caption("Build a quick pivot")
cat_options = [c for c in df.columns if not pd.api.types.is_numeric_dtype(df[c])]
num_options = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
col1, col2, col3 = st.columns(3)
rows = col1.multiselect("Rows (categories)", cat_options[:50])
cols = col2.multiselect("Columns (categories)", cat_options[:50])
vals = col3.multiselect("Values (numeric)", num_options[:10])
aggfunc = st.selectbox("Aggregation", ["sum","mean","count"], index=["sum","mean","count"].index(default_agg))
if len(vals) == 0:
st.info("Pick at least one numeric value field.")
else:
func = {"sum": "sum", "mean": "mean", "count": "count"}[aggfunc]
try:
pv = pd.pivot_table(df, index=rows if rows else None, columns=cols if cols else None,
values=vals, aggfunc=func, margins=True, dropna=False, observed=False)
st.dataframe(pv)
except Exception as e:
st.error(f"Pivot error: {e}")
# --------- Main flow ---------
if uploaded is None:
st.info("Upload an Excel file to begin.")
st.stop()
try:
frames = load_excel(uploaded, parse_dates=date_parse)
except Exception as e:
st.error(f"Failed to read Excel: {e}")
st.stop()
with st.sidebar:
sheet_name = st.selectbox("Select sheet", list(frames.keys()))
df = frames[sheet_name]
st.subheader(f"πŸ“„ Sheet: {sheet_name}")
st.write(df.head(sample_n))
# Detect types & filter
cat_cols, num_cols, dt_cols, bool_cols = detect_types(df)
st.markdown(f"**Detected types:** 🏷️ Category: {len(cat_cols)} | πŸ”’ Numeric: {len(num_cols)} | πŸ—“οΈ Datetime: {len(dt_cols)} | βœ… Bool: {len(bool_cols)}")
filtered_df = apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols)
st.markdown("---")
st.subheader("πŸ“ˆ Recommended Charts")
recommend_charts(filtered_df, cat_cols, num_cols, dt_cols, default_agg=default_agg, top_n=top_n)
# Download filtered data
st.markdown("---")
st.download_button("⬇️ Download filtered CSV", data=filtered_df.to_csv(index=False).encode("utf-8-sig"),
file_name=f"{sheet_name}_filtered.csv", mime="text/csv")
st.caption("Tips: Use the filters to focus your view, then switch tabs for different visualizations.")