alexoh2020 commited on
Commit
aa1d4c8
·
1 Parent(s): 18ae8e6

mnt bucket sync p1

Browse files
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  .DS_Store
 
2
  .venv/
3
  venv/
4
  __pycache__/
 
1
  .DS_Store
2
+ mnt/
3
  .venv/
4
  venv/
5
  __pycache__/
app.py CHANGED
@@ -2,6 +2,7 @@
2
 
3
  from __future__ import annotations
4
 
 
5
  import html
6
  import json
7
  import os
@@ -12,14 +13,56 @@ from pathlib import Path
12
 
13
  import gradio as gr
14
 
15
- IMAGE_CDN_BASE = (
16
- "https://huggingface.co/DTanzillo/one-haut-encoded/resolve/main/data/images"
17
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  MODEL_API = {"popularity": "popularity", "knn": "knn", "ncf": "ncf"}
20
  MODEL_STATIC_KEY = {"popularity": "popularity", "knn": "knn", "ncf": "ncf_meta"}
21
 
22
- _DATA_PATH = Path(__file__).resolve().parent / "data/processed/precomputed_recommendations.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  with open(_DATA_PATH, encoding="utf-8") as f:
24
  _RAW_USERS: list[dict] = json.load(f)
25
  _USER_BY_ID: dict[str, dict] = {u["customer_id"]: u for u in _RAW_USERS}
@@ -30,8 +73,21 @@ def get_api_base() -> str:
30
 
31
 
32
  def image_url(article_id: object) -> str:
 
 
 
 
33
  aid = str(article_id)
34
- return f"{IMAGE_CDN_BASE}/{aid[:3]}/{aid}.jpg"
 
 
 
 
 
 
 
 
 
35
 
36
 
37
  def normalize_rec_response(data: object) -> list[dict]:
@@ -209,4 +265,5 @@ def build_ui():
209
  demo = build_ui()
210
 
211
  if __name__ == "__main__":
212
- demo.queue().launch()
 
 
2
 
3
  from __future__ import annotations
4
 
5
+ import base64
6
  import html
7
  import json
8
  import os
 
13
 
14
  import gradio as gr
15
 
16
+ # Default: HF dataset bucket with resized demo images. Override for local testing:
17
+ # export ONEHAUT_IMAGE_CDN_BASE=https://huggingface.co/datasets/USER/REPO/resolve/main/images
18
+ def _image_cdn_base() -> str:
19
+ return os.environ.get(
20
+ "ONEHAUT_IMAGE_CDN_BASE",
21
+ "https://huggingface.co/datasets/alexoh2020/onehautapp-storage/resolve/main/images",
22
+ ).strip().rstrip("/")
23
+
24
+
25
+ def _mnt_data_roots() -> list[Path]:
26
+ """HF Space persistent volume is /mnt/data; repo clone uses onehautapp/mnt/data."""
27
+ extra = os.environ.get("ONEHAUT_MNT_DATA", "").strip()
28
+ roots: list[Path] = []
29
+ if extra:
30
+ roots.append(Path(extra))
31
+ roots.append(Path("/mnt/data"))
32
+ roots.append(Path(__file__).resolve().parent / "mnt/data")
33
+ return roots
34
+
35
+
36
+ def _local_image_file(article_id: str) -> Path | None:
37
+ rel = Path("images") / article_id[:3] / f"{article_id}.jpg"
38
+ for root in _mnt_data_roots():
39
+ p = (root / rel).resolve()
40
+ if p.is_file():
41
+ return p
42
+ return None
43
+
44
+
45
+ # Max bytes to inline as data: URL in HTML (avoid huge payloads)
46
+ _MAX_INLINE_IMAGE_BYTES = 512 * 1024
47
 
48
  MODEL_API = {"popularity": "popularity", "knn": "knn", "ncf": "ncf"}
49
  MODEL_STATIC_KEY = {"popularity": "popularity", "knn": "knn", "ncf": "ncf_meta"}
50
 
51
+
52
+ def _precomputed_path() -> Path:
53
+ """Prefer Space /mnt/metadata, then app bundle mnt/metadata, then data/processed."""
54
+ candidates = [
55
+ Path("/mnt/metadata/precomputed_recommendations.json"),
56
+ Path(__file__).resolve().parent / "mnt/metadata/precomputed_recommendations.json",
57
+ Path(__file__).resolve().parent / "data/processed/precomputed_recommendations.json",
58
+ ]
59
+ for p in candidates:
60
+ if p.is_file():
61
+ return p
62
+ return candidates[1]
63
+
64
+
65
+ _DATA_PATH = _precomputed_path()
66
  with open(_DATA_PATH, encoding="utf-8") as f:
67
  _RAW_USERS: list[dict] = json.load(f)
68
  _USER_BY_ID: dict[str, dict] = {u["customer_id"]: u for u in _RAW_USERS}
 
73
 
74
 
75
  def image_url(article_id: object) -> str:
76
+ """Prefer JPEG under /mnt/data (Space) or onehautapp/mnt/data; else HF CDN.
77
+
78
+ Local files are inlined as data: URLs so gr.HTML img tags work without extra routes.
79
+ """
80
  aid = str(article_id)
81
+ local = _local_image_file(aid)
82
+ if local is not None:
83
+ try:
84
+ raw = local.read_bytes()
85
+ if len(raw) <= _MAX_INLINE_IMAGE_BYTES:
86
+ b64 = base64.b64encode(raw).decode("ascii")
87
+ return f"data:image/jpeg;base64,{b64}"
88
+ except OSError:
89
+ pass
90
+ return f"{_image_cdn_base()}/{aid[:3]}/{aid}.jpg"
91
 
92
 
93
  def normalize_rec_response(data: object) -> list[dict]:
 
265
  demo = build_ui()
266
 
267
  if __name__ == "__main__":
268
+ # Allow Gradio to resolve file paths under /mnt if needed (e.g. future file= URLs)
269
+ demo.queue().launch(allowed_paths=["/mnt"])
data/processed/precomputed_recommendations.json DELETED
The diff for this file is too large to render. See raw diff