Spaces:
Sleeping
Sleeping
File size: 5,504 Bytes
fecd12b c366581 fecd12b db10938 7997da9 fecd12b db10938 c366581 db10938 fecd12b 488b6b0 fecd12b 488b6b0 fecd12b b4aedd0 fecd12b b4aedd0 fecd12b b4aedd0 fecd12b b4aedd0 fecd12b b4aedd0 fecd12b b4aedd0 fecd12b b4aedd0 fecd12b b4aedd0 fecd12b 7997da9 fecd12b b4aedd0 fecd12b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | import pandas as pd
import streamlit as st
from src import onboarding
from src.constants import APP_NAME, DEFAULT_LOOKBACK_YEARS, MONTH_KEY_FORMAT, ORDERS_ZIP
from src.data import load_data
from src.plots import monthly_spend, top_products
def _resolve_zip_bytes() -> bytes | None:
# Disk wins so local users keep the "drop once into data/" UX. Session-state
# is the upload path used on Hugging Face Spaces (and any other hosted
# deployment) where there's no persistent filesystem.
if ORDERS_ZIP.exists():
return ORDERS_ZIP.read_bytes()
return st.session_state.get("uploaded_zip")
# (divisor, suffix) tiers, smallest first. The loop tries each tier in order
# and bails out at the first one whose formatted representation fits the
# 3-digit budget — so 999,999 falls through K (would round to "1000") and
# lands cleanly in M as "1.0M".
_TIERS = ((1_000, "K"), (1_000_000, "M"), (1_000_000_000, "B"))
def _compact(value: float) -> str:
"""Format a number to ≤3 digits with K/M/B suffixes (≤4 below 10,000)."""
if abs(value) < 10_000:
return f"{value:,.0f}"
for divisor, suffix in _TIERS:
scaled = value / divisor
if abs(scaled) >= 100:
text = f"{round(scaled):d}"
else:
text = f"{round(scaled, 1):.1f}".rstrip("0").rstrip(".")
if sum(c.isdigit() for c in text) <= 3:
return f"{text}{suffix}"
divisor, suffix = _TIERS[-1]
return f"{value / divisor:.0f}{suffix}"
def run() -> None:
st.set_page_config(page_title=APP_NAME, layout="wide")
st.title(APP_NAME)
zip_bytes = _resolve_zip_bytes()
if zip_bytes is None:
onboarding.render()
return
orders, refunds = load_data(zip_bytes)
full_net = monthly_spend.compute_full_net(orders, refunds)
sma = monthly_spend.compute_sma(full_net)
min_date = orders["Order Date"].min().date()
max_date = orders["Order Date"].max().date()
default_start = max(
min_date, (pd.Timestamp(max_date) - pd.DateOffset(years=DEFAULT_LOOKBACK_YEARS)).date()
)
c1, c2, c3, c4, c5 = st.columns(5)
net_slot = c1.empty()
avg_slot = c2.empty()
refunded_slot = c3.empty()
orders_slot = c4.empty()
items_slot = c5.empty()
chart_slot = st.empty()
# Slider options append one sentinel month past the last data month so
# the right handle is exclusive — a single-month selection still spans a
# tick instead of collapsing both handles onto the same point.
month_options = [d.strftime(MONTH_KEY_FORMAT) for d in full_net.index]
month_options.append(
(full_net.index.max() + pd.offsets.MonthBegin(1)).strftime(MONTH_KEY_FORMAT)
)
default_start_month = pd.Timestamp(default_start).to_period("M").strftime(MONTH_KEY_FORMAT)
default_end_month = month_options[-1]
# Sync slider to chart's box-selection. Chart x uses MONTH_LABEL_FORMAT
# ("Apr 2024"); convert to match month_options. _last_chart_selection
# prevents the still-present selection from clobbering manual slider moves.
points = (st.session_state.get("monthly_chart") or {}).get("selection", {}).get("points", [])
selected_keys = sorted({pd.Timestamp(p["x"]).strftime(MONTH_KEY_FORMAT) for p in points})
if selected_keys != st.session_state.get("_last_chart_selection"):
if selected_keys:
end_idx = month_options.index(selected_keys[-1]) + 1
st.session_state["date_range"] = (selected_keys[0], month_options[end_idx])
st.session_state["_last_chart_selection"] = selected_keys
# Enforce a 1-month minimum span — st.select_slider lets the user drag
# both handles onto the same tick, which would collapse the range and
# produce empty queries. Nudge the right handle out by one (or the left
# in, if we're already at the sentinel).
rng = st.session_state.get("date_range")
if rng and rng[0] == rng[1]:
idx = month_options.index(rng[0])
if idx + 1 < len(month_options):
st.session_state["date_range"] = (rng[0], month_options[idx + 1])
else:
st.session_state["date_range"] = (month_options[idx - 1], rng[1])
start_label, end_label = st.select_slider(
"Date range",
options=month_options,
value=(default_start_month, default_end_month),
key="date_range",
)
start = pd.Timestamp(start_label).date()
end_exclusive = pd.Timestamp(end_label).date()
orders_v = orders.loc[
(orders["Order Date"].dt.date >= start) & (orders["Order Date"].dt.date < end_exclusive)
]
refunds_v = refunds.loc[
(refunds["Order Date"].dt.date >= start) & (refunds["Order Date"].dt.date < end_exclusive)
]
gross = orders_v["Total Amount"].sum()
refunded = refunds_v["Refund Amount"].sum()
net = gross - refunded
n_months = month_options.index(end_label) - month_options.index(start_label)
net_slot.metric("Net spent", f"${_compact(net)}")
refunded_slot.metric("Refunded", f"${_compact(refunded)}")
orders_slot.metric("Orders", _compact(orders_v["Order ID"].nunique()))
items_slot.metric("Items", _compact(len(orders_v)))
avg_slot.metric("Avg/month", f"${_compact(net / n_months)}")
start_month = pd.Period(start_label, freq="M").to_timestamp()
end_month = (pd.Period(end_label, freq="M") - 1).to_timestamp()
monthly_spend.render(chart_slot, full_net, sma, start_month, end_month)
top_products.render(orders_v)
|