mnm-matin commited on
Commit
803add0
·
verified ·
1 Parent(s): b66d284

Deploy ABO catalog HyperView Space

Browse files
.dockerignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .DS_Store
4
+ demo_data/
.hyperview/extensions/abo-catalog-readout/extension.toml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ name = "abo-catalog-readout"
2
+ description = "ABO CLIP vs HyCoCLIP catalog comparison readout"
3
+
4
+ [[panels]]
5
+ id = "catalog-comparison"
6
+ title = "Hierarchy Retrieval Readout"
7
+ position = "right"
8
+ file = "panel.js"
.hyperview/extensions/abo-catalog-readout/panel.js ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const sdk = globalThis.HyperViewPanelSDK;
2
+ if (!sdk) throw new Error("HyperViewPanelSDK is not available on window.");
3
+
4
+ const { React, components, hooks } = sdk;
5
+ const { Panel, PanelToolbar, PanelToolbarButton } = components;
6
+ const {
7
+ usePanelSelection,
8
+ usePanelSamples,
9
+ usePanelCommands,
10
+ usePanelProps,
11
+ } = hooks;
12
+
13
+ const colors = {
14
+ panelBg: "#111827",
15
+ cardBg: "#161f2d",
16
+ buttonBg: "#1f2937",
17
+ border: "#334155",
18
+ text: "#e5e7eb",
19
+ strongText: "#f8fafc",
20
+ mutedText: "#9ca3af",
21
+ bodyText: "#cbd5e1",
22
+ error: "#fca5a5",
23
+ };
24
+
25
+ function prettyLabel(label) {
26
+ return String(label || "unlabeled").replaceAll("_", " ").toLowerCase();
27
+ }
28
+
29
+ function titleLabel(label) {
30
+ return prettyLabel(label).replace(/\b\w/g, (char) => char.toUpperCase());
31
+ }
32
+
33
+ function Section({ title, children }) {
34
+ return React.createElement(
35
+ "section",
36
+ { style: { display: "flex", flexDirection: "column", gap: 8 } },
37
+ React.createElement(
38
+ "h3",
39
+ {
40
+ style: {
41
+ margin: 0,
42
+ color: colors.strongText,
43
+ fontSize: 13,
44
+ fontWeight: 700,
45
+ },
46
+ },
47
+ title,
48
+ ),
49
+ children,
50
+ );
51
+ }
52
+
53
+ function Button({ children, onClick, title, disabled }) {
54
+ return React.createElement(
55
+ "button",
56
+ {
57
+ type: "button",
58
+ onClick,
59
+ title,
60
+ disabled,
61
+ style: {
62
+ border: `1px solid ${colors.border}`,
63
+ background: colors.buttonBg,
64
+ color: colors.text,
65
+ borderRadius: 4,
66
+ padding: "7px 8px",
67
+ fontSize: 11,
68
+ lineHeight: 1.2,
69
+ cursor: disabled ? "default" : "pointer",
70
+ textAlign: "left",
71
+ opacity: disabled ? 0.65 : 1,
72
+ },
73
+ },
74
+ children,
75
+ );
76
+ }
77
+
78
+ function normalizeModels(value) {
79
+ if (!Array.isArray(value)) return [];
80
+ return value
81
+ .map((model, index) => ({
82
+ key: String(model.key || `model-${index}`),
83
+ displayName: String(model.displayName || model.display_name || model.key || `Model ${index + 1}`),
84
+ buttonLabel: String(
85
+ model.buttonLabel || model.button_label || `${model.displayName || model.key || "Model"} query`,
86
+ ),
87
+ layoutKey: model.layoutKey || model.layout_key || null,
88
+ }))
89
+ .filter((model) => model.layoutKey);
90
+ }
91
+
92
+ function getSummary(item, modelKey) {
93
+ return item.summaries?.[modelKey] || item.modelSummaries?.[modelKey] || {};
94
+ }
95
+
96
+ function ExampleCard({
97
+ item,
98
+ models,
99
+ loadingKey,
100
+ onSelectQuery,
101
+ }) {
102
+ const gridColumns = models.length > 1 ? "repeat(2, minmax(0, 1fr))" : "1fr";
103
+
104
+ return React.createElement(
105
+ "div",
106
+ {
107
+ style: {
108
+ border: `1px solid ${colors.border}`,
109
+ borderRadius: 6,
110
+ background: colors.cardBg,
111
+ padding: 10,
112
+ display: "flex",
113
+ flexDirection: "column",
114
+ gap: 8,
115
+ },
116
+ },
117
+ React.createElement(
118
+ "div",
119
+ { style: { color: colors.strongText, fontSize: 13, fontWeight: 700 } },
120
+ item.title,
121
+ ),
122
+ React.createElement(
123
+ "div",
124
+ { style: { color: colors.mutedText, fontSize: 11 } },
125
+ `${item.family} query: ${titleLabel(item.queryLabel)}`,
126
+ ),
127
+ React.createElement(
128
+ "div",
129
+ { style: { display: "grid", gridTemplateColumns: gridColumns, gap: 8 } },
130
+ models.map((model) => {
131
+ const summary = getSummary(item, model.key);
132
+ return React.createElement(
133
+ "div",
134
+ {
135
+ key: model.key,
136
+ style: {
137
+ border: `1px solid ${colors.border}`,
138
+ borderRadius: 4,
139
+ padding: 8,
140
+ },
141
+ },
142
+ React.createElement(
143
+ "div",
144
+ { style: { color: colors.strongText, fontSize: 12, fontWeight: 700, marginBottom: 4 } },
145
+ `${model.displayName}: ${summary.hits ?? "-"} / 10 matching neighbors`,
146
+ ),
147
+ React.createElement(
148
+ "div",
149
+ { style: { color: colors.bodyText, fontSize: 11, lineHeight: 1.35 } },
150
+ summary.text || "Open Samples to inspect this model's nearest products.",
151
+ ),
152
+ );
153
+ }),
154
+ ),
155
+ React.createElement(
156
+ "div",
157
+ { style: { color: colors.mutedText, fontSize: 11, lineHeight: 1.35 } },
158
+ "Both buttons select this same query. The Samples tab shows that model's neighbors.",
159
+ ),
160
+ React.createElement(
161
+ "div",
162
+ { style: { display: "grid", gridTemplateColumns: gridColumns, gap: 6 } },
163
+ models.map((model) => {
164
+ const choiceKey = `${item.queryId}:${model.key}`;
165
+ return React.createElement(
166
+ Button,
167
+ {
168
+ key: model.key,
169
+ onClick: () => onSelectQuery(item, model),
170
+ disabled: loadingKey === choiceKey,
171
+ title: `Select this query and show ${model.displayName} neighbors`,
172
+ },
173
+ loadingKey === choiceKey ? "Loading..." : model.buttonLabel,
174
+ );
175
+ }),
176
+ ),
177
+ );
178
+ }
179
+
180
+ export default function CatalogComparisonPanel() {
181
+ const selection = usePanelSelection();
182
+ const samplesState = usePanelSamples();
183
+ const commands = usePanelCommands();
184
+ const panelProps = usePanelProps();
185
+ const [panelError, setPanelError] = React.useState(null);
186
+ const [loadingKey, setLoadingKey] = React.useState(null);
187
+
188
+ const models = React.useMemo(() => normalizeModels(panelProps.models), [panelProps.models]);
189
+ const examples = Array.isArray(panelProps.examples) ? panelProps.examples : [];
190
+ const modelNames = React.useMemo(
191
+ () => models.map((model) => model.displayName).join(" and "),
192
+ [models],
193
+ );
194
+
195
+ const clearSelection = async () => {
196
+ commands.setLabelFilter(null);
197
+ setPanelError(null);
198
+ await commands.setSelection([], { clearLasso: true });
199
+ };
200
+
201
+ const selectModelQuery = async (item, model) => {
202
+ const key = `${item.queryId}:${model.key}`;
203
+ setPanelError(null);
204
+
205
+ if (!model.layoutKey) {
206
+ setPanelError(`${model.displayName} layout is not ready yet. Try again in a moment.`);
207
+ return;
208
+ }
209
+
210
+ setLoadingKey(key);
211
+ try {
212
+ await commands.showSimilar({
213
+ sampleId: item.queryId,
214
+ layoutKey: model.layoutKey,
215
+ k: 18,
216
+ focus: "samples",
217
+ });
218
+ } catch (error) {
219
+ const message = error instanceof Error ? error.message : String(error);
220
+ setPanelError(`Could not select query: ${message}`);
221
+ } finally {
222
+ setLoadingKey(null);
223
+ }
224
+ };
225
+
226
+ return React.createElement(
227
+ Panel,
228
+ { className: "h-full" },
229
+ React.createElement(PanelToolbar, {
230
+ items: [
231
+ { id: "dataset", label: "Data", value: "ABO" },
232
+ { id: "samples", label: "Items", value: String(samplesState.totalSamples ?? "-") },
233
+ { id: "selected", label: "Selected", value: String(selection.selectedIds?.length ?? 0) },
234
+ ],
235
+ actions: React.createElement(PanelToolbarButton, { onClick: clearSelection }, "Reset"),
236
+ }),
237
+ React.createElement(
238
+ "div",
239
+ {
240
+ style: {
241
+ height: "100%",
242
+ overflow: "auto",
243
+ padding: 12,
244
+ display: "flex",
245
+ flexDirection: "column",
246
+ gap: 14,
247
+ background: colors.panelBg,
248
+ },
249
+ },
250
+ React.createElement(
251
+ Section,
252
+ { title: "What This Shows" },
253
+ React.createElement(
254
+ "div",
255
+ { style: { color: colors.bodyText, fontSize: 12, lineHeight: 1.45 } },
256
+ `Use the same query image under ${modelNames}. Samples opens to that model's nearest products.`,
257
+ ),
258
+ ),
259
+ React.createElement(
260
+ Section,
261
+ { title: "Real Examples" },
262
+ React.createElement(
263
+ "div",
264
+ { style: { display: "flex", flexDirection: "column", gap: 8 } },
265
+ examples.map((item) =>
266
+ React.createElement(ExampleCard, {
267
+ key: item.id,
268
+ item,
269
+ models,
270
+ loadingKey,
271
+ onSelectQuery: selectModelQuery,
272
+ }),
273
+ ),
274
+ ),
275
+ ),
276
+ panelError
277
+ ? React.createElement(
278
+ "div",
279
+ { style: { color: colors.error, fontSize: 11, lineHeight: 1.35 } },
280
+ panelError,
281
+ )
282
+ : null,
283
+ ),
284
+ );
285
+ }
Dockerfile ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN apt-get update && apt-get install -y --no-install-recommends \
4
+ build-essential \
5
+ curl \
6
+ git \
7
+ libssl-dev \
8
+ pkg-config \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ RUN useradd -m -u 1000 user
12
+ USER user
13
+
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH \
16
+ HF_HOME=/home/user/.cache/huggingface \
17
+ PYTHONUNBUFFERED=1 \
18
+ PIP_NO_CACHE_DIR=1
19
+
20
+ WORKDIR $HOME/app
21
+
22
+ RUN pip install --upgrade pip
23
+
24
+ ARG HYPERVIEW_PACKAGE=git+https://github.com/Hyper3Labs/HyperView.git@main
25
+ ARG HYPER_MODELS_VERSION=0.2.0
26
+
27
+ # Install CPU-only PyTorch first so the Space does not pull the default CUDA bundle.
28
+ RUN pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
29
+ RUN pip install "${HYPERVIEW_PACKAGE}" && python -c "import hyperview; print('hyperview', hyperview.__version__)"
30
+ RUN pip install "hyper-models==${HYPER_MODELS_VERSION}" && python -c "import hyper_models; print('hyper_models', hyper_models.__version__)"
31
+ RUN pip install "datasets>=4.5.0" "Pillow>=12.0.0"
32
+
33
+ COPY --chown=user demo.py ./demo.py
34
+ COPY --chown=user .hyperview ./.hyperview
35
+
36
+ ENV HYPERVIEW_HOST=0.0.0.0 \
37
+ HYPERVIEW_PORT=7860 \
38
+ HYPERVIEW_DATASETS_DIR=/home/user/app/demo_data/datasets \
39
+ HYPERVIEW_MEDIA_DIR=/home/user/app/demo_data/media
40
+
41
+ EXPOSE 7860
42
+
43
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=600s --retries=3 \
44
+ CMD curl -f http://localhost:7860/__hyperview__/health || exit 1
45
+
46
+ CMD ["python", "demo.py"]
README.md CHANGED
@@ -1,10 +1,67 @@
1
  ---
2
  title: HyperView ABO Catalog
3
- emoji: 🦀
4
- colorFrom: purple
5
- colorTo: pink
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: HyperView ABO Catalog
3
+ emoji: 🛒
4
+ colorFrom: gray
5
+ colorTo: blue
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
+ # HyperView - ABO Catalog Model Comparison
12
+
13
+ This demo builds a small Amazon Berkeley Objects product-catalog subset and opens
14
+ HyperView with two pinned scatter panels plus a comparison readout:
15
+
16
+ - CLIP ViT-B/32 in a Euclidean 2D layout
17
+ - HyCoCLIP `hycoclip-vit-s` in a Poincare 2D layout
18
+
19
+ The right-side panel uses fixed product examples to compare nearest-neighbor
20
+ behavior for the same query under each model.
21
+
22
+ The demo loads the full ABO metadata mirror from
23
+ `hyper3labs/amazon-berkeley-objects`, then deterministically selects a balanced
24
+ subset for the live comparison. Local `demo_data` is only a runtime cache for
25
+ downloaded images, HyperView dataset state, embeddings, and layouts.
26
+
27
+ Run locally from the HyperView repo:
28
+
29
+ ```bash
30
+ uv run python hyperview-spaces/spaces/abo-catalog-clip-hycoclip/demo.py
31
+ ```
32
+
33
+ Useful overrides:
34
+
35
+ ```bash
36
+ HYPERVIEW_PORT=6263 ABO_MAX_PRODUCT_TYPES=12 ABO_SAMPLES_PER_PRODUCT_TYPE=20 \
37
+ uv run python hyperview-spaces/spaces/abo-catalog-clip-hycoclip/demo.py
38
+ ```
39
+
40
+ ## Swap the comparison model
41
+
42
+ The model choices live in the `MODEL_SPECS` block near the top of
43
+ [demo.py](demo.py). To swap the candidate model, update these environment
44
+ variables or edit the second entry in `MODEL_SPECS`:
45
+
46
+ ```bash
47
+ ABO_CANDIDATE_DISPLAY_NAME="New Model" \
48
+ ABO_CANDIDATE_PROVIDER="hyper-models" \
49
+ ABO_CANDIDATE_MODEL="new-model-id" \
50
+ ABO_CANDIDATE_LAYOUT="poincare:2d" \
51
+ ABO_CANDIDATE_GEOMETRY="poincare" \
52
+ python demo.py
53
+ ```
54
+
55
+ The panel reads model labels, layout keys, and fixed examples from props passed
56
+ by `demo.py`, so model swaps should not require editing the extension
57
+ JavaScript.
58
+
59
+ ## Deploy source
60
+
61
+ This folder is intended to deploy to `hyper3labs/HyperView-ABO-Catalog` from
62
+ the `hyperview-spaces` deployment repository.
63
+
64
+ The Dockerfile currently installs HyperView from GitHub `main` because the demo
65
+ uses custom panel and explicit similarity-query APIs that must be present in
66
+ the runtime. After the next HyperView release, switch `HYPERVIEW_PACKAGE` to a
67
+ released PyPI pin such as `hyperview==0.4.3`.
demo.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """ABO product-catalog comparison demo for CLIP vs HyCoCLIP in HyperView."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import os
7
+ import re
8
+ import urllib.request
9
+ from collections import Counter, defaultdict
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from datasets import load_dataset
14
+ from PIL import Image, ImageOps
15
+
16
+ import hyperview as hv
17
+
18
+
19
+ SPACE_DIR = Path(__file__).resolve().parent
20
+ SPACE_HOST = os.environ.get("HYPERVIEW_HOST", "127.0.0.1")
21
+ SPACE_PORT = int(os.environ.get("HYPERVIEW_PORT", "6262"))
22
+ WORKSPACE_ID = os.environ.get("HYPERVIEW_WORKSPACE_ID", "abo-catalog-clip-hycoclip")
23
+ DATASET_NAME = os.environ.get("HYPERVIEW_DATASET_NAME", "abo_catalog_clip_hycoclip_side_by_side")
24
+ EXTENSION_DIR = SPACE_DIR / ".hyperview" / "extensions" / "abo-catalog-readout"
25
+
26
+ HF_ABO_DATASET = os.environ.get("ABO_HF_DATASET", "hyper3labs/amazon-berkeley-objects")
27
+ HF_ABO_CONFIG = os.environ.get("ABO_HF_CONFIG", "listings")
28
+ HF_ABO_SPLIT = os.environ.get("ABO_HF_SPLIT", "train")
29
+
30
+ MAX_PRODUCT_TYPES = int(os.environ.get("ABO_MAX_PRODUCT_TYPES", "20"))
31
+ SAMPLES_PER_PRODUCT_TYPE = int(os.environ.get("ABO_SAMPLES_PER_PRODUCT_TYPE", "25"))
32
+ MIN_PRODUCT_TYPE_COUNT = int(os.environ.get("ABO_MIN_PRODUCT_TYPE_COUNT", "10"))
33
+ IMAGE_MAX_SIZE = (768, 768)
34
+ FORCE_SAMPLE_REFRESH = os.environ.get("HYPERVIEW_ABO_FORCE_REFRESH", "").lower() in {
35
+ "1",
36
+ "true",
37
+ "yes",
38
+ }
39
+
40
+ ALLOWED_COUNTRIES = set(
41
+ item.strip()
42
+ for item in os.environ.get("ABO_ALLOWED_COUNTRIES", "US,GB,AU,CA,AE,SG,IN").split(",")
43
+ if item.strip()
44
+ )
45
+
46
+ MODEL_SPECS = [
47
+ {
48
+ "key": "clip",
49
+ "display_name": os.environ.get("ABO_BASELINE_DISPLAY_NAME", "CLIP"),
50
+ "button_label": os.environ.get("ABO_BASELINE_BUTTON_LABEL", "CLIP query"),
51
+ "provider": os.environ.get("ABO_BASELINE_PROVIDER", "embed-anything"),
52
+ "model": os.environ.get("ABO_BASELINE_MODEL", "openai/clip-vit-base-patch32"),
53
+ "layout": os.environ.get("ABO_BASELINE_LAYOUT", "euclidean:2d"),
54
+ "geometry": os.environ.get("ABO_BASELINE_GEOMETRY", "euclidean"),
55
+ "layout_dimension": int(os.environ.get("ABO_BASELINE_LAYOUT_DIMENSION", "2")),
56
+ "metric": os.environ.get("ABO_BASELINE_METRIC", "cosine"),
57
+ "panel_title": os.environ.get("ABO_BASELINE_PANEL_TITLE", "CLIP - Euclidean Catalog Map"),
58
+ },
59
+ {
60
+ "key": "candidate",
61
+ "display_name": os.environ.get("ABO_CANDIDATE_DISPLAY_NAME", "HyCoCLIP"),
62
+ "button_label": os.environ.get("ABO_CANDIDATE_BUTTON_LABEL", "HyCoCLIP query"),
63
+ "provider": os.environ.get("ABO_CANDIDATE_PROVIDER", "hyper-models"),
64
+ "model": os.environ.get("ABO_CANDIDATE_MODEL", "hycoclip-vit-s"),
65
+ "layout": os.environ.get("ABO_CANDIDATE_LAYOUT", "poincare:2d"),
66
+ "geometry": os.environ.get("ABO_CANDIDATE_GEOMETRY", "poincare"),
67
+ "layout_dimension": int(os.environ.get("ABO_CANDIDATE_LAYOUT_DIMENSION", "2")),
68
+ "metric": os.environ.get("ABO_CANDIDATE_METRIC", "cosine"),
69
+ "panel_title": os.environ.get("ABO_CANDIDATE_PANEL_TITLE", "HyCoCLIP - Poincare Catalog Map"),
70
+ },
71
+ ]
72
+
73
+ DEMO_EXAMPLES = [
74
+ {
75
+ "id": "lighting",
76
+ "title": "Lighting fixture",
77
+ "family": "Lighting",
78
+ "queryId": "B07HK5WXQP_510lSNJKiyL",
79
+ "queryLabel": "LIGHT_FIXTURE",
80
+ "summaries": {
81
+ "clip": {
82
+ "hits": 2,
83
+ "text": "Also returns earrings, home decor, bedding, kitchen, sandals.",
84
+ },
85
+ "candidate": {
86
+ "hits": 10,
87
+ "text": "Returns fixtures and lamps.",
88
+ },
89
+ },
90
+ },
91
+ {
92
+ "id": "chandelier",
93
+ "title": "Chandelier-style fixture",
94
+ "family": "Lighting",
95
+ "queryId": "B07MF1RNWQ_51Vei4EHzBL",
96
+ "queryLabel": "LIGHT_FIXTURE",
97
+ "summaries": {
98
+ "clip": {
99
+ "hits": 2,
100
+ "text": "Also returns earrings, necklace-like jewelry, table.",
101
+ },
102
+ "candidate": {
103
+ "hits": 10,
104
+ "text": "Returns light fixtures first, then lamps.",
105
+ },
106
+ },
107
+ },
108
+ {
109
+ "id": "footwear",
110
+ "title": "Sandal",
111
+ "family": "Footwear",
112
+ "queryId": "B07WHRRNQK_61_LTvw9qDL",
113
+ "queryLabel": "SANDAL",
114
+ "summaries": {
115
+ "clip": {
116
+ "hits": 6,
117
+ "text": "Also returns accessories, handbags.",
118
+ },
119
+ "candidate": {
120
+ "hits": 10,
121
+ "text": "Returns sandals with nearby shoes.",
122
+ },
123
+ },
124
+ },
125
+ ]
126
+
127
+
128
+ def media_root() -> Path:
129
+ root = Path(os.environ.get("HYPERVIEW_MEDIA_DIR", str(SPACE_DIR / "demo_data" / "media")))
130
+ path = root / DATASET_NAME
131
+ path.mkdir(parents=True, exist_ok=True)
132
+ return path
133
+
134
+
135
+ def readable_product_type(label: str | None) -> str:
136
+ if not label:
137
+ return ""
138
+ text = label.replace("_", " ").replace("-", " ").lower()
139
+ return re.sub(r"\s+", " ", text).strip()
140
+
141
+
142
+ def safe_sample_id(item_id: str, image_id: str) -> str:
143
+ raw = f"{item_id}_{image_id}"
144
+ return re.sub(r"[^A-Za-z0-9_.-]+", "_", raw).strip("_")[:96]
145
+
146
+
147
+ def select_balanced(records: list[dict]) -> list[dict]:
148
+ grouped: dict[str, list[dict]] = defaultdict(list)
149
+ for record in records:
150
+ grouped[record["product_type"]].append(record)
151
+
152
+ eligible = [
153
+ (ptype, items)
154
+ for ptype, items in grouped.items()
155
+ if len(items) >= MIN_PRODUCT_TYPE_COUNT
156
+ ]
157
+ eligible.sort(key=lambda item: (-len(item[1]), item[0]))
158
+
159
+ selected: list[dict] = []
160
+ for _ptype, items in eligible[:MAX_PRODUCT_TYPES]:
161
+ selected.extend(items[:SAMPLES_PER_PRODUCT_TYPE])
162
+ return selected
163
+
164
+
165
+ def download_product_image(record: dict, destination: Path) -> bool:
166
+ if destination.exists() and destination.stat().st_size > 0:
167
+ return True
168
+
169
+ url = record.get("image_url")
170
+ if not url:
171
+ return False
172
+
173
+ raw_path = destination.with_suffix(destination.suffix + ".download")
174
+ tmp_path = destination.with_suffix(destination.suffix + ".tmp")
175
+ try:
176
+ urllib.request.urlretrieve(url, raw_path)
177
+ image = ImageOps.exif_transpose(Image.open(raw_path)).convert("RGB")
178
+ image.thumbnail(IMAGE_MAX_SIZE, Image.Resampling.LANCZOS)
179
+ image.save(tmp_path, format="JPEG", quality=90, optimize=True)
180
+ tmp_path.replace(destination)
181
+ return True
182
+ except Exception as exc:
183
+ print(f"Skipping image {url}: {exc}", flush=True)
184
+ return False
185
+ finally:
186
+ raw_path.unlink(missing_ok=True)
187
+ tmp_path.unlink(missing_ok=True)
188
+
189
+
190
+ def hf_catalog_records() -> list[dict]:
191
+ print(f"Loading ABO listings from Hugging Face dataset {HF_ABO_DATASET}...", flush=True)
192
+ source = load_dataset(HF_ABO_DATASET, HF_ABO_CONFIG, split=HF_ABO_SPLIT)
193
+
194
+ records = []
195
+ for row in source:
196
+ if ALLOWED_COUNTRIES and row.get("country") not in ALLOWED_COUNTRIES:
197
+ continue
198
+ if not row.get("title") or not row.get("product_type") or not row.get("main_image_id"):
199
+ continue
200
+ if not row.get("department") or not row.get("main_image_url"):
201
+ continue
202
+
203
+ records.append(
204
+ {
205
+ "item_id": row.get("item_id"),
206
+ "title": row.get("title"),
207
+ "product_type": row.get("product_type"),
208
+ "product_type_readable": row.get("product_type_readable")
209
+ or readable_product_type(row.get("product_type")),
210
+ "department": row.get("department"),
211
+ "country": row.get("country"),
212
+ "brand": row.get("brand"),
213
+ "color": row.get("color"),
214
+ "style": row.get("style"),
215
+ "image_id": row.get("main_image_id"),
216
+ "image_url": row.get("main_image_url"),
217
+ "source": HF_ABO_DATASET,
218
+ }
219
+ )
220
+ return records
221
+
222
+
223
+ def prepare_catalog_records() -> list[dict]:
224
+ records = select_balanced(hf_catalog_records())
225
+ print(
226
+ f"Selected {len(records)} ABO products across "
227
+ f"{len({record['product_type'] for record in records})} product types.",
228
+ flush=True,
229
+ )
230
+ return records
231
+
232
+
233
+ def add_abo_samples(dataset: hv.Dataset) -> None:
234
+ existing_ids = {sample.id for sample in dataset.samples}
235
+ media_dir = media_root()
236
+ added = 0
237
+ updated = 0
238
+ skipped = 0
239
+ product_counts: Counter[str] = Counter()
240
+ records = prepare_catalog_records()
241
+ expected_ids = {
242
+ safe_sample_id(str(record["item_id"]), str(record["image_id"])) for record in records
243
+ }
244
+ missing_ids = expected_ids - existing_ids
245
+ missing_media = [
246
+ sample_id for sample_id in expected_ids if not (media_dir / f"{sample_id}.jpg").exists()
247
+ ]
248
+
249
+ if not FORCE_SAMPLE_REFRESH and not missing_ids and not missing_media:
250
+ product_counts.update(record["product_type"] for record in records)
251
+ print(
252
+ f"ABO samples already prepared ({len(records)} products). "
253
+ "Set HYPERVIEW_ABO_FORCE_REFRESH=1 to rebuild samples.",
254
+ flush=True,
255
+ )
256
+ print(f"Product-type counts: {dict(product_counts)}", flush=True)
257
+ return
258
+
259
+ for index, record in enumerate(records, start=1):
260
+ sample_id = safe_sample_id(str(record["item_id"]), str(record["image_id"]))
261
+ destination = media_dir / f"{sample_id}.jpg"
262
+ if not download_product_image(record, destination):
263
+ skipped += 1
264
+ continue
265
+
266
+ sample_exists = sample_id in existing_ids
267
+
268
+ metadata = dict(record)
269
+ metadata["hierarchy"] = f"{record['department']} -> {record['product_type_readable']}"
270
+
271
+ dataset.add_image(
272
+ str(destination),
273
+ label=record["product_type"],
274
+ metadata=metadata,
275
+ sample_id=sample_id,
276
+ )
277
+ if sample_exists:
278
+ updated += 1
279
+ else:
280
+ existing_ids.add(sample_id)
281
+ added += 1
282
+ product_counts[record["product_type"]] += 1
283
+
284
+ if index == 1 or index % 50 == 0 or index == len(records):
285
+ print(
286
+ f"Prepared {index}/{len(records)} products "
287
+ f"({added} added, {updated} updated, {skipped} skipped).",
288
+ flush=True,
289
+ )
290
+
291
+ print(f"Product-type counts: {dict(product_counts)}", flush=True)
292
+
293
+
294
+ def ensure_layouts(dataset: hv.Dataset) -> dict[str, str]:
295
+ layouts: dict[str, str] = {}
296
+ for spec in MODEL_SPECS:
297
+ print(f"Ensuring {spec['display_name']} embeddings...", flush=True)
298
+ space_key = dataset.compute_embeddings(
299
+ model=spec["model"],
300
+ provider=spec["provider"],
301
+ batch_size=32,
302
+ show_progress=True,
303
+ )
304
+ print(f"Ensuring {spec['display_name']} layout...", flush=True)
305
+ layouts[spec["key"]] = dataset.compute_visualization(
306
+ space_key=space_key,
307
+ layout=spec["layout"],
308
+ n_neighbors=20,
309
+ min_dist=0.08,
310
+ metric=spec["metric"],
311
+ )
312
+ return layouts
313
+
314
+
315
+ def build_dataset() -> tuple[hv.Dataset, dict[str, str]]:
316
+ dataset = hv.Dataset(DATASET_NAME)
317
+ add_abo_samples(dataset)
318
+ layouts = ensure_layouts(dataset)
319
+ return dataset, layouts
320
+
321
+
322
+ def model_panel_props(layouts: dict[str, str]) -> list[dict[str, Any]]:
323
+ props = []
324
+ for spec in MODEL_SPECS:
325
+ props.append(
326
+ {
327
+ "key": spec["key"],
328
+ "displayName": spec["display_name"],
329
+ "buttonLabel": spec["button_label"],
330
+ "layoutKey": layouts[spec["key"]],
331
+ }
332
+ )
333
+ return props
334
+
335
+
336
+ def build_demo_view(layouts: dict[str, str]) -> hv.ui.View:
337
+ scatter_panels = [
338
+ hv.ui.Scatter(
339
+ id=f"{spec['key']}-catalog-map",
340
+ title=spec["panel_title"],
341
+ layout_key=layouts[spec["key"]],
342
+ geometry=spec["geometry"],
343
+ layout_dimension=spec["layout_dimension"],
344
+ )
345
+ for spec in MODEL_SPECS
346
+ ]
347
+ return hv.ui.View(
348
+ hv.ui.Horizontal(*scatter_panels),
349
+ hv.ui.ExtensionPanel(
350
+ id="catalog-hierarchy-readout",
351
+ extension="abo-catalog-readout",
352
+ panel="catalog-comparison",
353
+ position="right",
354
+ props={
355
+ "models": model_panel_props(layouts),
356
+ "examples": DEMO_EXAMPLES,
357
+ },
358
+ ),
359
+ )
360
+
361
+
362
+ def launch_demo(dataset: hv.Dataset, layouts: dict[str, str]) -> hv.Session:
363
+ session = hv.launch(
364
+ dataset,
365
+ host=SPACE_HOST,
366
+ port=SPACE_PORT,
367
+ open_browser=False,
368
+ block=False,
369
+ workspace_id=WORKSPACE_ID,
370
+ )
371
+ session.ui.add_extension(EXTENSION_DIR, workspace_id=WORKSPACE_ID)
372
+ session.ui.apply_view(
373
+ build_demo_view(layouts),
374
+ workspace_id=WORKSPACE_ID,
375
+ )
376
+ session.ui.set_active_layout(None, workspace_id=WORKSPACE_ID)
377
+ session.ui.set_selection([], workspace_id=WORKSPACE_ID)
378
+ print(f"\nHyperView ABO catalog demo is running at {session.url}", flush=True)
379
+ model_names = " and ".join(spec["display_name"] for spec in MODEL_SPECS)
380
+ print(f" {model_names} pinned scatter panels are added side by side.", flush=True)
381
+ print(" Press Ctrl+C to stop.\n", flush=True)
382
+ return session
383
+
384
+
385
+ def main() -> None:
386
+ dataset, layouts = build_dataset()
387
+ print("Layouts:", flush=True)
388
+ for spec in MODEL_SPECS:
389
+ print(f" {spec['display_name']}: {layouts[spec['key']]}", flush=True)
390
+ session = launch_demo(dataset, layouts)
391
+ session.wait()
392
+
393
+
394
+ if __name__ == "__main__":
395
+ main()