File size: 5,504 Bytes
fecd12b
 
 
c366581
 
fecd12b
 
 
 
db10938
 
 
 
 
 
 
 
 
7997da9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fecd12b
 
 
 
db10938
 
c366581
 
 
db10938
fecd12b
 
 
 
 
 
 
 
 
 
488b6b0
fecd12b
488b6b0
 
 
 
fecd12b
 
 
b4aedd0
 
 
fecd12b
b4aedd0
 
 
fecd12b
b4aedd0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fecd12b
 
 
 
 
b4aedd0
fecd12b
 
b4aedd0
fecd12b
 
b4aedd0
fecd12b
 
b4aedd0
fecd12b
 
 
 
 
b4aedd0
fecd12b
7997da9
 
 
 
 
fecd12b
b4aedd0
 
fecd12b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import pandas as pd
import streamlit as st

from src import onboarding
from src.constants import APP_NAME, DEFAULT_LOOKBACK_YEARS, MONTH_KEY_FORMAT, ORDERS_ZIP
from src.data import load_data
from src.plots import monthly_spend, top_products


def _resolve_zip_bytes() -> bytes | None:
    # Disk wins so local users keep the "drop once into data/" UX. Session-state
    # is the upload path used on Hugging Face Spaces (and any other hosted
    # deployment) where there's no persistent filesystem.
    if ORDERS_ZIP.exists():
        return ORDERS_ZIP.read_bytes()
    return st.session_state.get("uploaded_zip")


# (divisor, suffix) tiers, smallest first. The loop tries each tier in order
# and bails out at the first one whose formatted representation fits the
# 3-digit budget — so 999,999 falls through K (would round to "1000") and
# lands cleanly in M as "1.0M".
_TIERS = ((1_000, "K"), (1_000_000, "M"), (1_000_000_000, "B"))


def _compact(value: float) -> str:
    """Format a number to ≤3 digits with K/M/B suffixes (≤4 below 10,000)."""
    if abs(value) < 10_000:
        return f"{value:,.0f}"
    for divisor, suffix in _TIERS:
        scaled = value / divisor
        if abs(scaled) >= 100:
            text = f"{round(scaled):d}"
        else:
            text = f"{round(scaled, 1):.1f}".rstrip("0").rstrip(".")
        if sum(c.isdigit() for c in text) <= 3:
            return f"{text}{suffix}"
    divisor, suffix = _TIERS[-1]
    return f"{value / divisor:.0f}{suffix}"


def run() -> None:
    st.set_page_config(page_title=APP_NAME, layout="wide")
    st.title(APP_NAME)

    zip_bytes = _resolve_zip_bytes()
    if zip_bytes is None:
        onboarding.render()
        return

    orders, refunds = load_data(zip_bytes)

    full_net = monthly_spend.compute_full_net(orders, refunds)
    sma = monthly_spend.compute_sma(full_net)

    min_date = orders["Order Date"].min().date()
    max_date = orders["Order Date"].max().date()
    default_start = max(
        min_date, (pd.Timestamp(max_date) - pd.DateOffset(years=DEFAULT_LOOKBACK_YEARS)).date()
    )

    c1, c2, c3, c4, c5 = st.columns(5)
    net_slot = c1.empty()
    avg_slot = c2.empty()
    refunded_slot = c3.empty()
    orders_slot = c4.empty()
    items_slot = c5.empty()

    chart_slot = st.empty()

    # Slider options append one sentinel month past the last data month so
    # the right handle is exclusive — a single-month selection still spans a
    # tick instead of collapsing both handles onto the same point.
    month_options = [d.strftime(MONTH_KEY_FORMAT) for d in full_net.index]
    month_options.append(
        (full_net.index.max() + pd.offsets.MonthBegin(1)).strftime(MONTH_KEY_FORMAT)
    )
    default_start_month = pd.Timestamp(default_start).to_period("M").strftime(MONTH_KEY_FORMAT)
    default_end_month = month_options[-1]

    # Sync slider to chart's box-selection. Chart x uses MONTH_LABEL_FORMAT
    # ("Apr 2024"); convert to match month_options. _last_chart_selection
    # prevents the still-present selection from clobbering manual slider moves.
    points = (st.session_state.get("monthly_chart") or {}).get("selection", {}).get("points", [])
    selected_keys = sorted({pd.Timestamp(p["x"]).strftime(MONTH_KEY_FORMAT) for p in points})
    if selected_keys != st.session_state.get("_last_chart_selection"):
        if selected_keys:
            end_idx = month_options.index(selected_keys[-1]) + 1
            st.session_state["date_range"] = (selected_keys[0], month_options[end_idx])
        st.session_state["_last_chart_selection"] = selected_keys

    # Enforce a 1-month minimum span — st.select_slider lets the user drag
    # both handles onto the same tick, which would collapse the range and
    # produce empty queries. Nudge the right handle out by one (or the left
    # in, if we're already at the sentinel).
    rng = st.session_state.get("date_range")
    if rng and rng[0] == rng[1]:
        idx = month_options.index(rng[0])
        if idx + 1 < len(month_options):
            st.session_state["date_range"] = (rng[0], month_options[idx + 1])
        else:
            st.session_state["date_range"] = (month_options[idx - 1], rng[1])

    start_label, end_label = st.select_slider(
        "Date range",
        options=month_options,
        value=(default_start_month, default_end_month),
        key="date_range",
    )
    start = pd.Timestamp(start_label).date()
    end_exclusive = pd.Timestamp(end_label).date()

    orders_v = orders.loc[
        (orders["Order Date"].dt.date >= start) & (orders["Order Date"].dt.date < end_exclusive)
    ]
    refunds_v = refunds.loc[
        (refunds["Order Date"].dt.date >= start) & (refunds["Order Date"].dt.date < end_exclusive)
    ]

    gross = orders_v["Total Amount"].sum()
    refunded = refunds_v["Refund Amount"].sum()
    net = gross - refunded
    n_months = month_options.index(end_label) - month_options.index(start_label)

    net_slot.metric("Net spent", f"${_compact(net)}")
    refunded_slot.metric("Refunded", f"${_compact(refunded)}")
    orders_slot.metric("Orders", _compact(orders_v["Order ID"].nunique()))
    items_slot.metric("Items", _compact(len(orders_v)))
    avg_slot.metric("Avg/month", f"${_compact(net / n_months)}")

    start_month = pd.Period(start_label, freq="M").to_timestamp()
    end_month = (pd.Period(end_label, freq="M") - 1).to_timestamp()

    monthly_spend.render(chart_slot, full_net, sma, start_month, end_month)
    top_products.render(orders_v)