sukiboo commited on
Commit
db10938
·
1 Parent(s): c366581

support hf hosting

Browse files
Files changed (5) hide show
  1. README.md +15 -1
  2. requirements.txt +3 -3
  3. src/data.py +4 -16
  4. src/main.py +12 -2
  5. src/onboarding.py +16 -10
README.md CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Amazon Spending Visualizer
2
 
3
  Have you ever wondered how much you spend on Amazon? Wonder no more!
@@ -18,7 +30,9 @@ sanctioned way to get the data is the GDPR-style "Request My Data" export.
18
  - Amazon emails a download link within a few hours to a few days
19
  2. Drop the resulting `Your Orders.zip` into `data/`. The app reads
20
  `Your Amazon Orders/Order History.csv` straight out of the zip, so the zip is
21
- always the source of truth — replace it to refresh the data.
 
 
22
  3. Install deps:
23
  ```
24
  python3 -m venv .venv
 
1
+ ---
2
+ title: Amazon Spending Visualizer
3
+ emoji: 📦
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.56.0
8
+ app_file: app.py
9
+ pinned: false
10
+ python_version: "3.12"
11
+ ---
12
+
13
  # Amazon Spending Visualizer
14
 
15
  Have you ever wondered how much you spend on Amazon? Wonder no more!
 
30
  - Amazon emails a download link within a few hours to a few days
31
  2. Drop the resulting `Your Orders.zip` into `data/`. The app reads
32
  `Your Amazon Orders/Order History.csv` straight out of the zip, so the zip is
33
+ always the source of truth — replace it to refresh the data. (You can also
34
+ skip this step and upload the zip through the in-app uploader at runtime,
35
+ which is how the hosted Hugging Face Space works.)
36
  3. Install deps:
37
  ```
38
  python3 -m venv .venv
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- streamlit>=1.32
2
- pandas>=2.0
3
- plotly>=5.18
 
1
+ streamlit==1.56.0
2
+ pandas==3.0.2
3
+ plotly==6.7.0
src/data.py CHANGED
@@ -1,14 +1,10 @@
 
1
  import zipfile
2
 
3
  import pandas as pd
4
  import streamlit as st
5
 
6
- from src.constants import (
7
- EXCLUDED_WEBSITES,
8
- ORDERS_CSV_ENTRY,
9
- ORDERS_ZIP,
10
- REFUNDS_CSV_ENTRY,
11
- )
12
 
13
  # Tolerance (USD) for matching a refund row to a specific line item by amount.
14
  # Refund Details.csv has Order ID but no ASIN, so to flag the actual returned
@@ -40,17 +36,9 @@ def _match_refunds_to_lines(orders: pd.DataFrame, refunds: pd.DataFrame) -> set:
40
  return refunded_idx
41
 
42
 
43
- # `zip_mtime` is unused inside the function — it's an `@st.cache_data` key so
44
- # the cache invalidates when the zip is replaced (e.g. user uploads a fresh
45
- # Amazon export). Without it, the first load would be served forever.
46
  @st.cache_data
47
- def load_data(zip_mtime: float) -> tuple[pd.DataFrame, pd.DataFrame]:
48
- del zip_mtime # cache-key only; see comment above
49
- if not ORDERS_ZIP.exists():
50
- raise FileNotFoundError(
51
- f"Missing {ORDERS_ZIP}. Place the Amazon 'Your Orders.zip' export in data/."
52
- )
53
- with zipfile.ZipFile(ORDERS_ZIP) as z:
54
  with z.open(ORDERS_CSV_ENTRY) as f:
55
  orders = pd.read_csv(f)
56
  with z.open(REFUNDS_CSV_ENTRY) as f:
 
1
+ import io
2
  import zipfile
3
 
4
  import pandas as pd
5
  import streamlit as st
6
 
7
+ from src.constants import EXCLUDED_WEBSITES, ORDERS_CSV_ENTRY, REFUNDS_CSV_ENTRY
 
 
 
 
 
8
 
9
  # Tolerance (USD) for matching a refund row to a specific line item by amount.
10
  # Refund Details.csv has Order ID but no ASIN, so to flag the actual returned
 
36
  return refunded_idx
37
 
38
 
 
 
 
39
  @st.cache_data
40
+ def load_data(zip_bytes: bytes) -> tuple[pd.DataFrame, pd.DataFrame]:
41
+ with zipfile.ZipFile(io.BytesIO(zip_bytes)) as z:
 
 
 
 
 
42
  with z.open(ORDERS_CSV_ENTRY) as f:
43
  orders = pd.read_csv(f)
44
  with z.open(REFUNDS_CSV_ENTRY) as f:
src/main.py CHANGED
@@ -7,15 +7,25 @@ from src.data import load_data
7
  from src.plots import monthly_spend, top_products
8
 
9
 
 
 
 
 
 
 
 
 
 
10
  def run() -> None:
11
  st.set_page_config(page_title=APP_NAME, layout="wide")
12
  st.title(APP_NAME)
13
 
14
- if not ORDERS_ZIP.exists():
 
15
  onboarding.render()
16
  return
17
 
18
- orders, refunds = load_data(ORDERS_ZIP.stat().st_mtime)
19
 
20
  full_net = monthly_spend.compute_full_net(orders, refunds)
21
  sma = monthly_spend.compute_sma(full_net)
 
7
  from src.plots import monthly_spend, top_products
8
 
9
 
10
+ def _resolve_zip_bytes() -> bytes | None:
11
+ # Disk wins so local users keep the "drop once into data/" UX. Session-state
12
+ # is the upload path used on Hugging Face Spaces (and any other hosted
13
+ # deployment) where there's no persistent filesystem.
14
+ if ORDERS_ZIP.exists():
15
+ return ORDERS_ZIP.read_bytes()
16
+ return st.session_state.get("uploaded_zip")
17
+
18
+
19
  def run() -> None:
20
  st.set_page_config(page_title=APP_NAME, layout="wide")
21
  st.title(APP_NAME)
22
 
23
+ zip_bytes = _resolve_zip_bytes()
24
+ if zip_bytes is None:
25
  onboarding.render()
26
  return
27
 
28
+ orders, refunds = load_data(zip_bytes)
29
 
30
  full_net = monthly_spend.compute_full_net(orders, refunds)
31
  sma = monthly_spend.compute_sma(full_net)
src/onboarding.py CHANGED
@@ -1,18 +1,27 @@
1
  import streamlit as st
2
 
3
- from src.constants import DATA_DIR, ORDERS_ZIP
4
 
5
  AMAZON_REQUEST_URL = "https://amazon.com/gp/privacycentral/dsar/preview.html"
6
 
7
 
8
  def render() -> None:
9
- st.info(
10
- f"No Amazon export found yet. Drop `Your Orders.zip` into `{DATA_DIR}/` " "to get started."
11
- )
12
 
13
  st.subheader("Already have the export?")
14
- st.markdown("Place `Your Orders.zip` at this exact path, then click **Refresh** below:")
15
- st.code(str(ORDERS_ZIP), language=None)
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  st.subheader("Don't have it yet?")
18
  st.markdown(
@@ -23,9 +32,6 @@ request a data export:
23
  1. Open Amazon's data request page: [{AMAZON_REQUEST_URL}]({AMAZON_REQUEST_URL})
24
  2. Select **Your Orders** and submit the request
25
  3. Wait for Amazon to email a download link (typically a few hours to a few days)
26
- 4. Drop the resulting `Your Orders.zip` into `data/` and click **Refresh**
27
  """
28
  )
29
-
30
- if st.button("Refresh", type="primary"):
31
- st.rerun()
 
1
  import streamlit as st
2
 
3
+ from src.constants import ORDERS_ZIP
4
 
5
  AMAZON_REQUEST_URL = "https://amazon.com/gp/privacycentral/dsar/preview.html"
6
 
7
 
8
  def render() -> None:
9
+ st.info("No Amazon export loaded yet. Upload `Your Orders.zip` below to get started.")
 
 
10
 
11
  st.subheader("Already have the export?")
12
+ uploaded = st.file_uploader(
13
+ "Upload `Your Orders.zip`",
14
+ type=["zip"],
15
+ accept_multiple_files=False,
16
+ )
17
+ if uploaded is not None:
18
+ st.session_state["uploaded_zip"] = uploaded.getvalue()
19
+ st.rerun()
20
+
21
+ st.caption(
22
+ f"Running locally? You can also drop the zip at `{ORDERS_ZIP}` and reload "
23
+ "to skip the upload step on every session."
24
+ )
25
 
26
  st.subheader("Don't have it yet?")
27
  st.markdown(
 
32
  1. Open Amazon's data request page: [{AMAZON_REQUEST_URL}]({AMAZON_REQUEST_URL})
33
  2. Select **Your Orders** and submit the request
34
  3. Wait for Amazon to email a download link (typically a few hours to a few days)
35
+ 4. Upload the resulting `Your Orders.zip` above
36
  """
37
  )