Spaces:

Rathapoom
/

DashBoardGen

Sleeping

App Files Files Community

DashBoardGen / app.py

Rathapoom

Rename app (5).py to app.py

178a336 verified 5 months ago

raw

history blame contribute delete

11.5 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from io import BytesIO

	st.set_page_config(page_title="Excel Explorer Dashboard", layout="wide")

	st.title("📊 Excel Explorer — Interactive Dashboard")
	st.caption("Upload any .xlsx file, pick a sheet, filter columns, and auto-generate charts.")

	# --------- Sidebar: File & Sheet ---------
	with st.sidebar:
	st.header("1) Upload Excel")
	uploaded = st.file_uploader("Choose an Excel (.xlsx)", type=["xlsx"])
	date_parse = st.checkbox("Parse dates automatically", value=True)
	st.markdown("---")
	st.header("2) Sheet & Options")
	sheet_name = None
	sample_n = st.number_input("Preview Rows", min_value=5, max_value=200, value=20, step=5)
	st.markdown("---")
	st.header("3) Chart Defaults")
	default_agg = st.selectbox("Default aggregation for numeric", ["sum", "mean", "count"], index=0)
	top_n = st.slider("Top N for category charts", min_value=5, max_value=50, value=20, step=5)

	def load_excel(file, parse_dates=True):
	xls = pd.ExcelFile(file)
	sheets = xls.sheet_names
	frames = {}
	for s in sheets:
	df = pd.read_excel(file, sheet_name=s)
	# Basic cleanup hints
	# Try to convert object columns that look like dates
	if parse_dates:
	for c in df.columns:
	if df[c].dtype == "object":
	try:
	df[c] = pd.to_datetime(df[c])
	except Exception:
	pass
	frames[s] = df
	return frames

	def detect_types(df: pd.DataFrame):
	cols = df.columns.tolist()
	cat_cols, num_cols, dt_cols, bool_cols = [], [], [], []
	for c in cols:
	s = df[c]
	if pd.api.types.is_datetime64_any_dtype(s):
	dt_cols.append(c)
	elif pd.api.types.is_bool_dtype(s):
	bool_cols.append(c)
	elif pd.api.types.is_numeric_dtype(s):
	num_cols.append(c)
	else:
	# Treat low-cardinality object columns as categories
	if s.dtype == "object" or pd.api.types.is_categorical_dtype(s):
	if s.nunique(dropna=True) <= max(50, int(len(df) * 0.3)):
	cat_cols.append(c)
	else:
	# high-cardinality text: keep as category candidate but mark separately
	cat_cols.append(c)
	else:
	cat_cols.append(c)
	return cat_cols, num_cols, dt_cols, bool_cols

	def apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols):
	st.subheader("🔎 Filters")
	with st.expander("Show/Hide Filters", expanded=False):
	filtered = df.copy()
	cols = st.columns(3)
	# Categorical filters
	with cols[0]:
	for c in cat_cols[:10]: # avoid too many widgets at once
	vals = sorted([v for v in filtered[c].dropna().unique()])[:500]
	if len(vals) > 0:
	sel = st.multiselect(f"Filter: {c}", vals, default=[])
	if len(sel) > 0:
	filtered = filtered[filtered[c].isin(sel)]
	# Numeric filters
	with cols[1]:
	for c in num_cols[:10]:
	min_v, max_v = pd.to_numeric(filtered[c], errors="coerce").min(), pd.to_numeric(filtered[c], errors="coerce").max()
	if pd.isna(min_v) or pd.isna(max_v):
	continue
	rng = st.slider(f"Range: {c}", float(min_v), float(max_v), (float(min_v), float(max_v)))
	filtered = filtered[(pd.to_numeric(filtered[c], errors="coerce") >= rng[0]) &
	(pd.to_numeric(filtered[c], errors="coerce") <= rng[1])]
	# Datetime filters
	with cols[2]:
	for c in dt_cols[:5]:
	min_d, max_d = pd.to_datetime(filtered[c]).min(), pd.to_datetime(filtered[c]).max()
	if pd.isna(min_d) or pd.isna(max_d):
	continue
	dt = st.date_input(f"Date range: {c}", (min_d.date(), max_d.date()))
	if isinstance(dt, tuple) and len(dt) == 2:
	start, end = pd.to_datetime(dt[0]), pd.to_datetime(dt[1]) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
	filtered = filtered[(pd.to_datetime(filtered[c]) >= start) & (pd.to_datetime(filtered[c]) <= end)]
	return filtered

	def recommend_charts(df, cat_cols, num_cols, dt_cols, default_agg="sum", top_n=20):
	tabs = st.tabs(["📋 Table", "📈 Category", "📊 Numeric by Category", "⏱️ Time Series", "🔁 Pivot Builder"])
	with tabs[0]:
	st.caption("Preview (filtered)")
	st.dataframe(df.head(1000))
	st.caption(f"Rows: {len(df):,} \| Columns: {len(df.columns)}")

	with tabs[1]:
	st.caption("Count by a categorical column")
	if len(cat_cols) == 0:
	st.info("No categorical-like columns detected.")
	else:
	cat = st.selectbox("Category column", cat_cols, index=0)
	top = st.slider("Top N", 5, 100, min(top_n, 20), 5)
	vc = df[cat].astype("object").value_counts(dropna=False).reset_index()
	vc.columns = [cat, "count"]
	vc = vc.head(top)
	c1, c2 = st.columns(2)
	with c1:
	fig = px.bar(vc, x="count", y=cat, orientation="h", title=f"Count by {cat}")
	st.plotly_chart(fig, use_container_width=True)
	with c2:
	fig = px.treemap(vc, path=[cat], values="count", title=f"Treemap — {cat}")
	st.plotly_chart(fig, use_container_width=True)

	with tabs[2]:
	st.caption("Aggregate a numeric column by a category")
	if len(cat_cols) == 0 or len(num_cols) == 0:
	st.info("Need at least one category and one numeric column.")
	else:
	cat = st.selectbox("Group by (category)", cat_cols, key="num_cat")
	num = st.selectbox("Value (numeric)", num_cols, key="num_val")
	agg = st.selectbox("Aggregation", ["sum", "mean", "count"], index=["sum","mean","count"].index(default_agg))
	g = None
	if agg == "sum":
	g = df.groupby(cat, dropna=False)[num].sum().reset_index(name="value")
	elif agg == "mean":
	g = df.groupby(cat, dropna=False)[num].mean().reset_index(name="value")
	else:
	g = df.groupby(cat, dropna=False)[num].count().reset_index(name="value")
	g = g.sort_values("value", ascending=False).head(top_n)
	fig = px.bar(g, x="value", y=cat, orientation="h", title=f"{agg} of {num} by {cat}")
	st.plotly_chart(fig, use_container_width=True)

	with tabs[3]:
	st.caption("Time series from a date/datetime column")
	if len(dt_cols) == 0:
	st.info("No datetime columns detected.")
	else:
	dtc = st.selectbox("Datetime column", dt_cols)
	mode = st.selectbox("Aggregation", ["count rows", "count by category", "sum numeric", "mean numeric"])
	freq = st.selectbox("Resample frequency", ["D","W","M","Q","Y"], index=2, help="D=Day, W=Week, M=Month, Q=Quarter, Y=Year")
	df2 = df.dropna(subset=[dtc]).copy()
	df2[dtc] = pd.to_datetime(df2[dtc])
	df2 = df2.set_index(dtc).sort_index()

	if mode == "count rows":
	ts = df2.resample(freq).size().reset_index(name="count")
	fig = px.line(ts, x=dtc, y="count", markers=True, title=f"Row count over time ({freq})")
	st.plotly_chart(fig, use_container_width=True)

	elif mode == "count by category":
	cat_choices = [c for c in df.columns if c not in dt_cols]
	if len(cat_choices) == 0:
	st.info("No category columns available.")
	else:
	cat = st.selectbox("Category", cat_choices)
	ts = df2.groupby([pd.Grouper(freq=freq), cat]).size().reset_index(name="count")
	fig = px.line(ts, x=dtc, y="count", color=cat, markers=True, title=f"Count over time by {cat}")
	st.plotly_chart(fig, use_container_width=True)

	elif mode in ["sum numeric", "mean numeric"]:
	nums = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
	if len(nums) == 0:
	st.info("No numeric columns available.")
	else:
	num = st.selectbox("Numeric column", nums)
	if mode == "sum numeric":
	ts = df2[num].resample(freq).sum().reset_index(name="value")
	ttl = f"Sum of {num} over time ({freq})"
	else:
	ts = df2[num].resample(freq).mean().reset_index(name="value")
	ttl = f"Mean of {num} over time ({freq})"
	fig = px.line(ts, x=dtc, y="value", markers=True, title=ttl)
	st.plotly_chart(fig, use_container_width=True)

	with tabs[4]:
	st.caption("Build a quick pivot")
	cat_options = [c for c in df.columns if not pd.api.types.is_numeric_dtype(df[c])]
	num_options = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
	col1, col2, col3 = st.columns(3)
	rows = col1.multiselect("Rows (categories)", cat_options[:50])
	cols = col2.multiselect("Columns (categories)", cat_options[:50])
	vals = col3.multiselect("Values (numeric)", num_options[:10])
	aggfunc = st.selectbox("Aggregation", ["sum","mean","count"], index=["sum","mean","count"].index(default_agg))
	if len(vals) == 0:
	st.info("Pick at least one numeric value field.")
	else:
	func = {"sum": "sum", "mean": "mean", "count": "count"}[aggfunc]
	try:
	pv = pd.pivot_table(df, index=rows if rows else None, columns=cols if cols else None,
	values=vals, aggfunc=func, margins=True, dropna=False, observed=False)
	st.dataframe(pv)
	except Exception as e:
	st.error(f"Pivot error: {e}")

	# --------- Main flow ---------
	if uploaded is None:
	st.info("Upload an Excel file to begin.")
	st.stop()

	try:
	frames = load_excel(uploaded, parse_dates=date_parse)
	except Exception as e:
	st.error(f"Failed to read Excel: {e}")
	st.stop()

	with st.sidebar:
	sheet_name = st.selectbox("Select sheet", list(frames.keys()))

	df = frames[sheet_name]
	st.subheader(f"📄 Sheet: {sheet_name}")
	st.write(df.head(sample_n))

	# Detect types & filter
	cat_cols, num_cols, dt_cols, bool_cols = detect_types(df)
	st.markdown(f"Detected types: 🏷️ Category: {len(cat_cols)} \| 🔢 Numeric: {len(num_cols)} \| 🗓️ Datetime: {len(dt_cols)} \| ✅ Bool: {len(bool_cols)}")

	filtered_df = apply_filters(df, cat_cols, num_cols, dt_cols, bool_cols)

	st.markdown("---")
	st.subheader("📈 Recommended Charts")
	recommend_charts(filtered_df, cat_cols, num_cols, dt_cols, default_agg=default_agg, top_n=top_n)

	# Download filtered data
	st.markdown("---")
	st.download_button("⬇️ Download filtered CSV", data=filtered_df.to_csv(index=False).encode("utf-8-sig"),
	file_name=f"{sheet_name}_filtered.csv", mime="text/csv")

	st.caption("Tips: Use the filters to focus your view, then switch tabs for different visualizations.")