import pandas as pd import streamlit as st from src import onboarding from src.constants import APP_NAME, DEFAULT_LOOKBACK_YEARS, MONTH_KEY_FORMAT, ORDERS_ZIP from src.data import load_data from src.plots import monthly_spend, top_products def _resolve_zip_bytes() -> bytes | None: # Disk wins so local users keep the "drop once into data/" UX. Session-state # is the upload path used on Hugging Face Spaces (and any other hosted # deployment) where there's no persistent filesystem. if ORDERS_ZIP.exists(): return ORDERS_ZIP.read_bytes() return st.session_state.get("uploaded_zip") # (divisor, suffix) tiers, smallest first. The loop tries each tier in order # and bails out at the first one whose formatted representation fits the # 3-digit budget — so 999,999 falls through K (would round to "1000") and # lands cleanly in M as "1.0M". _TIERS = ((1_000, "K"), (1_000_000, "M"), (1_000_000_000, "B")) def _compact(value: float) -> str: """Format a number to ≤3 digits with K/M/B suffixes (≤4 below 10,000).""" if abs(value) < 10_000: return f"{value:,.0f}" for divisor, suffix in _TIERS: scaled = value / divisor if abs(scaled) >= 100: text = f"{round(scaled):d}" else: text = f"{round(scaled, 1):.1f}".rstrip("0").rstrip(".") if sum(c.isdigit() for c in text) <= 3: return f"{text}{suffix}" divisor, suffix = _TIERS[-1] return f"{value / divisor:.0f}{suffix}" def run() -> None: st.set_page_config(page_title=APP_NAME, layout="wide") st.title(APP_NAME) zip_bytes = _resolve_zip_bytes() if zip_bytes is None: onboarding.render() return orders, refunds = load_data(zip_bytes) full_net = monthly_spend.compute_full_net(orders, refunds) sma = monthly_spend.compute_sma(full_net) min_date = orders["Order Date"].min().date() max_date = orders["Order Date"].max().date() default_start = max( min_date, (pd.Timestamp(max_date) - pd.DateOffset(years=DEFAULT_LOOKBACK_YEARS)).date() ) c1, c2, c3, c4, c5 = st.columns(5) net_slot = c1.empty() avg_slot = c2.empty() refunded_slot = c3.empty() orders_slot = c4.empty() items_slot = c5.empty() chart_slot = st.empty() # Slider options append one sentinel month past the last data month so # the right handle is exclusive — a single-month selection still spans a # tick instead of collapsing both handles onto the same point. month_options = [d.strftime(MONTH_KEY_FORMAT) for d in full_net.index] month_options.append( (full_net.index.max() + pd.offsets.MonthBegin(1)).strftime(MONTH_KEY_FORMAT) ) default_start_month = pd.Timestamp(default_start).to_period("M").strftime(MONTH_KEY_FORMAT) default_end_month = month_options[-1] # Sync slider to chart's box-selection. Chart x uses MONTH_LABEL_FORMAT # ("Apr 2024"); convert to match month_options. _last_chart_selection # prevents the still-present selection from clobbering manual slider moves. points = (st.session_state.get("monthly_chart") or {}).get("selection", {}).get("points", []) selected_keys = sorted({pd.Timestamp(p["x"]).strftime(MONTH_KEY_FORMAT) for p in points}) if selected_keys != st.session_state.get("_last_chart_selection"): if selected_keys: end_idx = month_options.index(selected_keys[-1]) + 1 st.session_state["date_range"] = (selected_keys[0], month_options[end_idx]) st.session_state["_last_chart_selection"] = selected_keys # Enforce a 1-month minimum span — st.select_slider lets the user drag # both handles onto the same tick, which would collapse the range and # produce empty queries. Nudge the right handle out by one (or the left # in, if we're already at the sentinel). rng = st.session_state.get("date_range") if rng and rng[0] == rng[1]: idx = month_options.index(rng[0]) if idx + 1 < len(month_options): st.session_state["date_range"] = (rng[0], month_options[idx + 1]) else: st.session_state["date_range"] = (month_options[idx - 1], rng[1]) start_label, end_label = st.select_slider( "Date range", options=month_options, value=(default_start_month, default_end_month), key="date_range", ) start = pd.Timestamp(start_label).date() end_exclusive = pd.Timestamp(end_label).date() orders_v = orders.loc[ (orders["Order Date"].dt.date >= start) & (orders["Order Date"].dt.date < end_exclusive) ] refunds_v = refunds.loc[ (refunds["Order Date"].dt.date >= start) & (refunds["Order Date"].dt.date < end_exclusive) ] gross = orders_v["Total Amount"].sum() refunded = refunds_v["Refund Amount"].sum() net = gross - refunded n_months = month_options.index(end_label) - month_options.index(start_label) net_slot.metric("Net spent", f"${_compact(net)}") refunded_slot.metric("Refunded", f"${_compact(refunded)}") orders_slot.metric("Orders", _compact(orders_v["Order ID"].nunique())) items_slot.metric("Items", _compact(len(orders_v))) avg_slot.metric("Avg/month", f"${_compact(net / n_months)}") start_month = pd.Period(start_label, freq="M").to_timestamp() end_month = (pd.Period(end_label, freq="M") - 1).to_timestamp() monthly_spend.render(chart_slot, full_net, sma, start_month, end_month) top_products.render(orders_v)