waltgrace commited on Apr 8

Commit

4cda727

verified ·

1 Parent(s): 4406d00

Initial release: data-label-factory v0.1.0

Browse files

Files changed (49) hide show

.env.example +13 -0
.gitattributes +1 -0
.gitignore +51 -0
README.md +303 -0
data_label_factory/__init__.py +13 -0
data_label_factory/cli.py +629 -0
data_label_factory/experiments.py +161 -0
data_label_factory/gather.py +554 -0
data_label_factory/project.py +176 -0
docs/canvas-demo.gif +3 -0
docs/x-launch-thread.md +113 -0
projects/drones.yaml +133 -0
projects/stop-signs.yaml +72 -0
pyproject.toml +72 -0
setup.py +6 -0
web/.env.example +11 -0
web/.gitignore +41 -0
web/README.md +36 -0
web/app/api/labels/route.ts +131 -0
web/app/canvas/page.tsx +332 -0
web/app/favicon.ico +0 -0
web/app/globals.css +141 -0
web/app/layout.tsx +33 -0
web/app/page.tsx +311 -0
web/components.json +25 -0
web/components/BboxCanvas.tsx +329 -0
web/components/BboxOverlay.tsx +130 -0
web/components/ui/badge.tsx +52 -0
web/components/ui/button.tsx +58 -0
web/components/ui/card.tsx +103 -0
web/components/ui/separator.tsx +25 -0
web/components/ui/skeleton.tsx +13 -0
web/components/ui/sonner.tsx +49 -0
web/components/ui/tabs.tsx +82 -0
web/lib/canvas-utils.ts +78 -0
web/lib/r2.ts +91 -0
web/lib/types.ts +86 -0
web/lib/utils.ts +6 -0
web/next-env.d.ts +6 -0
web/next.config.ts +7 -0
web/package-lock.json +0 -0
web/package.json +34 -0
web/postcss.config.mjs +7 -0
web/public/file.svg +1 -0
web/public/globe.svg +1 -0
web/public/next.svg +1 -0
web/public/vercel.svg +1 -0
web/public/window.svg +1 -0
web/tsconfig.json +34 -0

.env.example ADDED Viewed

	@@ -0,0 +1,13 @@

+# data-label-factory environment configuration
+#
+# Copy to .env (or just `export` these in your shell) and edit URLs
+# to point at the backend(s) you have running.
+# ----- Qwen 2.5-VL backend (mlx_vlm.server) -----
+# Default: localhost. Override if you run the server on another machine.
+QWEN_URL=http://localhost:8291
+QWEN_MODEL_PATH=mlx-community/Qwen2.5-VL-3B-Instruct-4bit
+# ----- Gemma 4 + Falcon backend (mac_tensor) -----
+# Default: localhost. Override if you run on a remote Mac (e.g. an M4 Mini).
+GEMMA_URL=http://localhost:8500

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+docs/canvas-demo.gif filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,51 @@

+# Python
+__pycache__/
+*.py[cod]
+*.so
+*.egg
+*.egg-info/
+dist/
+build/
+.eggs/
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+.venv/
+venv/
+env/
+.python-version
+# Node / Next.js (web UI)
+web/node_modules/
+web/.next/
+web/.turbo/
+web/out/
+web/*.log
+web/canvas-shot.mjs
+# Local data + experiment outputs
+data/
+experiments/
+*.coco.json
+*.verified.json
+keep_list.json
+manifest.json
+# Local model caches
+~/data-label-factory/
+~/models/
+# Secrets
+.env
+.env.local
+*.pem
+*.key
+# OS
+.DS_Store
+Thumbs.db
+# Editor
+.vscode/
+.idea/
+*.swp

README.md ADDED Viewed

	@@ -0,0 +1,303 @@

+# data-label-factory
+A generic auto-labeling pipeline for vision datasets. Pick any object class in
+a YAML file, run one command, and end up with a clean COCO dataset reviewed in
+a browser. Designed to run entirely on a 16 GB Apple Silicon Mac.
+```
+gather  →  filter  →  label  →  verify  →  review
+ (DDG/   (VLM YES/   (Falcon  (VLM per-   (canvas
+  yt)     NO)         bbox)    bbox)       UI)
+```
+Two interchangeable VLM backends:
+| Backend | Model | Server | Pick when |
+|---|---|---|---|
+| `qwen` | Qwen 2.5-VL-3B 4-bit | `mlx_vlm.server` | You want fast YES/NO classification (~3.5s/img on M4) |
+| `gemma` | Gemma 4-26B-A4B 4-bit | `mac_tensor` (Expert Sniper) | You want richer reasoning + grounded segmentation in one server |
+The `label` stage always uses **Falcon Perception** for bbox grounding, served
+out of `mac_tensor` alongside Gemma. Falcon doesn't depend on the VLM choice —
+it's a separate ~600 MB model.
+---
+## What you get when this finishes
+For our reference run on a fiber-optic-drone detector:
+- **1421 source images** gathered from DuckDuckGo + Wikimedia + Openverse
+- **15,355 Falcon Perception bboxes** generated via the `label` stage
+- **11,928 / 15,355 (78%)** approved by Qwen 2.5-VL in the `verify` stage
+- **Reviewed in a browser** via the canvas web UI (`web/`)
+Per-query agreement between Falcon and Qwen on this dataset:
+`cable spool` 88%, `quadcopter` 81%, `drone` 80%, `fiber optic spool` 57%.
+You can reproduce all of this from this repo by following the steps below.
+---
+## 1. Install
+```bash
+# Clone
+git clone https://github.com/walter-grace/data-label-factory.git
+cd data-label-factory
+# Install the CLI (registers `data_label_factory` on your $PATH)
+pip install -e .
+# (Optional) Add image-search dependencies for the `gather` stage
+pip install -e ".[gather]"
+# (Optional) Web UI deps — only if you want to review labels in a browser
+cd web && npm install && cd ..
+```
+You can also install directly from HuggingFace:
+```bash
+pip install git+https://huggingface.co/waltgrace/data-label-factory
+```
+The factory CLI needs Python 3.10+. The backend servers (Qwen and/or Gemma)
+are installed separately — you only need the one(s) you plan to use.
+---
+## 2. Pick a backend and start it
+### Option A — Qwen 2.5-VL (recommended for filter/verify)
+```bash
+# Install mlx-vlm (Apple Silicon)
+pip install mlx-vlm
+# Start the OpenAI-compatible server
+python3 -m mlx_vlm.server \
+  --model mlx-community/Qwen2.5-VL-3B-Instruct-4bit \
+  --port 8291
+```
+Verify it's alive:
+```bash
+QWEN_URL=http://localhost:8291 data_label_factory status
+```
+### Option B — Gemma 4 + Falcon (recommended for `label`)
+This is the [MLX Expert Sniper](https://github.com/walter-grace/mac-code) deploy
+package. It serves Gemma 4-26B-A4B (chat / `--vision`) **and** Falcon Perception
+(`--falcon`) from the same process at port 8500. Total ~5 GB resident on a 16 GB
+Mac via SSD-streamed experts.
+```bash
+# Install + download model (one-time, ~13 GB)
+git clone https://github.com/walter-grace/mac-code
+cd mac-code/research/expert-sniper/distributed
+pip install -e . mlx mlx-vlm fastapi uvicorn pillow huggingface_hub python-multipart
+huggingface-cli download mlx-community/gemma-4-26b-a4b-it-4bit \
+  --local-dir ~/models/gemma4-source
+python3 split_gemma4.py \
+  --input  ~/models/gemma4-source \
+  --output ~/models/gemma4-stream
+# Launch
+python3 -m mac_tensor ui --vision --falcon \
+  --stream-dir ~/models/gemma4-stream \
+  --source-dir ~/models/gemma4-source \
+  --port 8500
+```
+Verify:
+```bash
+GEMMA_URL=http://localhost:8500 data_label_factory status
+```
+You can run **both** servers at the same time. The factory CLI will use whichever
+backend you select per command via `--backend qwen|gemma`.
+---
+## 3. Define a project
+A project YAML is the *only* thing you need to write to onboard a new object
+class. Two examples ship in `projects/`:
+- [`projects/drones.yaml`](projects/drones.yaml) — fiber-optic drone detection (the original use case)
+- [`projects/stop-signs.yaml`](projects/stop-signs.yaml) — minimal smoke test
+Copy one and edit the four important fields:
+```yaml
+project_name:  fire-hydrants
+target_object: "fire hydrant"            # templated into all prompts as {target_object}
+data_root:     ~/data-label-factory/fire-hydrants
+buckets:
+  positive/clear_view:
+    queries: ["red fire hydrant", "yellow fire hydrant", "fire hydrant on sidewalk"]
+  negative/other_street_objects:
+    queries: ["mailbox", "parking meter", "trash can"]
+  background/empty_streets:
+    queries: ["empty city street", "suburban sidewalk"]
+falcon_queries:                          # what Falcon will look for during `label`
+  - "fire hydrant"
+  - "red metal post"
+backends:
+  filter: qwen                           # default per stage; CLI --backend overrides
+  label:  gemma
+  verify: qwen
+```
+Inspect a project before running anything:
+```bash
+data_label_factory project --project projects/fire-hydrants.yaml
+```
+---
+## 4. Run the pipeline
+The four stages can be run individually or chained:
+```bash
+PROJECT=projects/stop-signs.yaml
+# 4a. Gather — image search across buckets
+data_label_factory gather  --project $PROJECT --max-per-query 30
+# 4b. Filter — image-level YES/NO via your chosen VLM
+data_label_factory filter  --project $PROJECT --backend qwen
+# 4c. Label — Falcon Perception bbox grounding (needs Gemma server up)
+data_label_factory label   --project $PROJECT
+# 4d. Verify — per-bbox YES/NO via your chosen VLM
+#     (verify is a TODO in the generic CLI today; runpod_falcon/verify_vlm.py
+#      is the original drone-specific impl that the generic version will wrap.)
+# OR run gather → filter end-to-end:
+data_label_factory pipeline --project $PROJECT --backend qwen
+```
+Every command writes a timestamped folder under `experiments/` (relative to
+your current working directory) with the config, prompts, raw model answers,
+and JSON outputs. List them with:
+```bash
+data_label_factory list
+```
+---
+## 5. Review the labels in a browser
+The `web/` directory is a Next.js + HTML5 Canvas review tool. It reads your
+labeled JSON straight from R2 (or local — see `web/app/api/labels/route.ts`)
+and renders the bboxes over each image with hover, click-to-select, scroll-zoom,
+and keyboard navigation.
+```bash
+cd web
+PORT=3030 npm run dev
+# open http://localhost:3030/canvas
+```
+Features:
+- **Drag** to pan, **scroll** to zoom around the cursor, **double-click** to reset
+- **←/→** to navigate images, **click** a bbox to select it
+- **Color coding**: per-query color, dashed red for VLM rejections, white outline for active
+- **Bucket tabs** to filter by source bucket
+- **Per-image query summary** with YES/NO counts
+The grid view at `http://localhost:3030/` is the older shadcn-based browser
+with thumbnail-grid + per-bbox approve/reject buttons.
+---
+## Configuration reference
+### Environment variables
+| Var | Default | What |
+|---|---|---|
+| `QWEN_URL` | `http://localhost:8291` | Where the `mlx_vlm.server` lives |
+| `QWEN_MODEL_PATH` | `mlx-community/Qwen2.5-VL-3B-Instruct-4bit` | Model id sent in the OpenAI request |
+| `GEMMA_URL` | `http://localhost:8500` | Where `mac_tensor` lives (also serves Falcon) |
+Set them inline for one command, or `export` them in your shell.
+### CLI flags
+```
+data_label_factory <command> [flags]
+Commands:
+  status                      Check both backends are alive
+  project --project P         Print a project YAML for inspection
+  gather  --project P         Search the web for images across buckets
+  filter  --project P         Image-level YES/NO via Qwen or Gemma
+  label   --project P         Falcon Perception bbox grounding
+  pipeline --project P        gather → filter
+  list                        Show experiments
+Common flags:
+  --backend qwen|gemma        Pick the VLM (filter, pipeline). Overrides project YAML.
+  --limit N                   Process at most N images (smoke testing)
+  --experiment NAME           Reuse an existing experiment dir
+```
+### Project YAML reference
+See [`projects/drones.yaml`](projects/drones.yaml) for the canonical, fully
+commented example. Required fields: `project_name`, `target_object`, `buckets`,
+`falcon_queries`. Everything else has defaults.
+---
+## How big is this thing?
+| Component | Disk | RAM (resident) |
+|---|---|---|
+| Factory CLI + Python deps | < 50 MB | negligible |
+| Qwen 2.5-VL-3B 4-bit | ~2.2 GB | ~2.5 GB |
+| Gemma 4-26B-A4B (Expert Sniper streaming) | ~13 GB on disk | ~3 GB |
+| Falcon Perception 0.6B | ~1.5 GB | ~1.5 GB |
+| Web UI dev server | ~300 MB node_modules | ~150 MB |
+| **Total (Gemma + Falcon path)** | **~17 GB** | **~5 GB** |
+Fits comfortably on a 16 GB Apple Silicon Mac.
+---
+## Known issues
+1. **Gemma `/api/chat_vision` is unreliable for batch YES/NO prompts.** When the
+   chained agent doesn't see a clear reason to call Falcon, it can stall. For the
+   `filter` and `verify` stages, prefer `--backend qwen`. Gemma is rock solid for
+   the `label` stage (which uses `/api/falcon` directly).
+2. **The generic `verify` command is a TODO** — the original drone-specific
+   `runpod_falcon/verify_vlm.py` works today, the generic wrapper is a small
+   refactor still pending.
+3. **Image search hits DDG rate limits** if you run with too high `--max-per-query`.
+   30-50 per query is comfortable; beyond ~100 you'll see throttling.
+---
+## Credits
+- **Falcon Perception** by TII — Apache 2.0
+- **Gemma 4** by Google DeepMind — Apache 2.0
+- **Qwen 2.5-VL** by Alibaba — Apache 2.0
+- **MLX** by Apple Machine Learning Research — MIT
+- **mlx-vlm** by Prince Canuma — MIT
+- **MLX Expert Sniper** streaming engine by [walter-grace](https://github.com/walter-grace/mac-code)

data_label_factory/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""data_label_factory — generic auto-labeling pipeline for vision datasets.
+Public API:
+    load_project(path)         → ProjectConfig
+    ProjectConfig              → loaded project YAML with helpers
+CLI entry point: `data_label_factory` (defined in pyproject.toml).
+"""
+from .project import load_project, ProjectConfig
+__version__ = "0.1.0"
+__all__ = ["load_project", "ProjectConfig", "__version__"]

data_label_factory/cli.py ADDED Viewed

	@@ -0,0 +1,629 @@

+#!/usr/bin/env python3
+"""
+data_label_factory — generic data labeling pipeline driven by a project YAML.
+Same architecture as drone_factory but TARGET-AGNOSTIC. Pick any object class,
+write a project YAML, run the same pipeline. Drones, stop signs, fire hydrants,
+manufacturing defects — same scripts, different config.
+Subcommands:
+    status              check M4 backends are alive
+    gather              DDG image search → local cache (uses project bucket queries)
+    filter              image-level YES/NO classification
+    label               Falcon Perception bbox grounding (or Qwen if config says so)
+    verify              per-bbox YES/NO classification
+    pipeline            full chain: gather → filter → label → verify
+    list                list experiments
+    show <experiment>   show experiment details
+    project             dump a project YAML for inspection
+Usage:
+    # Inspect a project config
+    data_label_factory project --project projects/drones.yaml
+    # Run the entire pipeline for a project
+    data_label_factory pipeline --project projects/stop-signs.yaml --max-per-query 20
+    # Just gather (no labeling)
+    data_label_factory gather --project projects/drones.yaml --max-per-query 30
+    # Filter a specific experiment
+    data_label_factory filter --project projects/drones.yaml --experiment latest
+"""
+import argparse
+import base64
+import io
+import json
+import os
+import subprocess
+import sys
+import time
+import urllib.request
+from collections import defaultdict
+from datetime import datetime
+from pathlib import Path
+HERE = os.path.dirname(os.path.abspath(__file__))
+from .project import load_project, ProjectConfig
+from .experiments import (
+    make_experiment_dir, write_readme, write_config,
+    update_latest_symlink, list_experiments,
+)
+# ============================================================
+# CONFIG — overridable via environment variables
+# ============================================================
+#
+# Users pick a VLM backend at runtime via --backend qwen|gemma.
+#
+#   qwen   → Qwen 2.5-VL via mlx_vlm.server      (default URL: http://localhost:8291)
+#   gemma  → Gemma 4 via mac_tensor              (default URL: http://localhost:8500)
+#
+# Falcon Perception (bbox grounding for `label`) is bundled with mac_tensor and
+# is always reached via the GEMMA_URL regardless of which VLM you picked for
+# the chat-style YES/NO stages.
+#
+# Override URLs via env vars when running against a remote machine, e.g.:
+#   QWEN_URL=http://10.0.0.5:8291 data_label_factory filter --project ...
+QWEN_URL = os.environ.get("QWEN_URL", "http://localhost:8291")
+QWEN_MODEL_PATH = os.environ.get(
+    "QWEN_MODEL_PATH", "mlx-community/Qwen2.5-VL-3B-Instruct-4bit"
+)
+GEMMA_URL = os.environ.get("GEMMA_URL", "http://localhost:8500")
+VALID_BACKENDS = ("qwen", "gemma")
+# ============================================================
+# BACKEND CLIENTS (reused)
+# ============================================================
+def call_qwen(image_path: str, prompt: str, timeout: int = 60) -> tuple:
+    from PIL import Image
+    img = Image.open(image_path).convert("RGB")
+    if max(img.size) > 1024:
+        ratio = 1024 / max(img.size)
+        img = img.resize((int(img.size[0]*ratio), int(img.size[1]*ratio)), Image.LANCZOS)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    b64 = base64.b64encode(buf.getvalue()).decode()
+    payload = {
+        "model": QWEN_MODEL_PATH,
+        "messages": [{"role": "user", "content": [
+            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
+            {"type": "text", "text": prompt},
+        ]}],
+        "max_tokens": 32, "temperature": 0,
+    }
+    req = urllib.request.Request(
+        f"{QWEN_URL}/v1/chat/completions",
+        data=json.dumps(payload).encode(),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    t0 = time.time()
+    with urllib.request.urlopen(req, timeout=timeout) as r:
+        data = json.loads(r.read())
+    return data["choices"][0]["message"]["content"].strip(), time.time() - t0
+def call_gemma(image_path: str, prompt: str, timeout: int = 300, max_tokens: int = 64) -> tuple:
+    """Hit mac_tensor /api/chat_vision with multipart + parse SSE.
+    Returns (final_text, elapsed_seconds)."""
+    boundary = f"----factory{int(time.time()*1000)}"
+    body = io.BytesIO()
+    def part(name, value, filename=None, content_type=None):
+        body.write(f"--{boundary}\r\n".encode())
+        if filename:
+            body.write(f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'.encode())
+            body.write(f'Content-Type: {content_type or "application/octet-stream"}\r\n\r\n'.encode())
+            body.write(value)
+            body.write(b"\r\n")
+        else:
+            body.write(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode())
+            body.write(str(value).encode())
+            body.write(b"\r\n")
+    with open(image_path, "rb") as f:
+        img_bytes = f.read()
+    part("message", prompt)
+    part("max_tokens", str(max_tokens))
+    part("image", img_bytes, filename=os.path.basename(image_path), content_type="image/jpeg")
+    body.write(f"--{boundary}--\r\n".encode())
+    req = urllib.request.Request(
+        f"{GEMMA_URL}/api/chat_vision",
+        data=body.getvalue(),
+        headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
+        method="POST",
+    )
+    t0 = time.time()
+    chunks = []
+    final_text = ""
+    with urllib.request.urlopen(req, timeout=timeout) as resp:
+        for line in resp:
+            line = line.rstrip(b"\r\n")
+            if not line.startswith(b"data:"):
+                continue
+            try:
+                event = json.loads(line[len(b"data:"):].strip())
+            except Exception:
+                continue
+            etype = event.get("type")
+            if etype == "token":
+                chunks.append(event.get("text", ""))
+            elif etype == "final":
+                final_text = event.get("text", "")
+                break
+            elif etype == "done":
+                break
+    text = (final_text or "".join(chunks)).strip()
+    return text, time.time() - t0
+def call_vlm(backend: str, image_path: str, prompt: str, timeout: int = 120) -> tuple:
+    """Backend-agnostic chat call. Returns (text, elapsed_seconds).
+    Raises ValueError on unknown backend."""
+    if backend == "qwen":
+        return call_qwen(image_path, prompt, timeout=timeout)
+    if backend == "gemma":
+        return call_gemma(image_path, prompt, timeout=timeout)
+    raise ValueError(f"unknown backend {backend!r}; valid: {VALID_BACKENDS}")
+def resolve_backend(args, proj: ProjectConfig, stage: str) -> str:
+    """CLI flag wins over project YAML; project YAML wins over default 'qwen'."""
+    cli = getattr(args, "backend", None)
+    if cli:
+        if cli not in VALID_BACKENDS:
+            raise SystemExit(f"--backend must be one of {VALID_BACKENDS}, got {cli!r}")
+        return cli
+    backend = proj.backend_for(stage)
+    if backend not in VALID_BACKENDS:
+        # project specifies "pod" or other legacy value — fall back to qwen
+        return "qwen"
+    return backend
+def call_falcon_m4(image_path: str, query: str, timeout: int = 120) -> dict:
+    """Hit mac_tensor /api/falcon (direct, no chained agent). Returns parsed JSON."""
+    boundary = f"----factory{int(time.time()*1000)}"
+    body = io.BytesIO()
+    def part(name, value, filename=None, content_type=None):
+        body.write(f"--{boundary}\r\n".encode())
+        if filename:
+            body.write(f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'.encode())
+            body.write(f'Content-Type: {content_type or "application/octet-stream"}\r\n\r\n'.encode())
+            body.write(value)
+            body.write(b"\r\n")
+        else:
+            body.write(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode())
+            body.write(str(value).encode())
+            body.write(b"\r\n")
+    with open(image_path, "rb") as f:
+        img_bytes = f.read()
+    part("query", query)
+    part("image", img_bytes, filename=os.path.basename(image_path), content_type="image/jpeg")
+    body.write(f"--{boundary}--\r\n".encode())
+    req = urllib.request.Request(
+        f"{GEMMA_URL}/api/falcon",
+        data=body.getvalue(),
+        headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
+        method="POST",
+    )
+    t0 = time.time()
+    with urllib.request.urlopen(req, timeout=timeout) as resp:
+        data = json.loads(resp.read())
+    data["_elapsed_seconds"] = time.time() - t0
+    return data
+def parse_yes_no(text: str) -> str:
+    t = text.strip().upper()
+    first = t.split()[0].rstrip(".,") if t else ""
+    if "YES" in first: return "YES"
+    if "NO" in first: return "NO"
+    if "YES" in t: return "YES"
+    if "NO" in t: return "NO"
+    return "UNKNOWN"
+# ============================================================
+# COMMANDS
+# ============================================================
+def cmd_status(args):
+    print("=" * 60)
+    print("Backend status")
+    print("=" * 60)
+    print(f"  QWEN_URL  = {QWEN_URL}   (override with env QWEN_URL)")
+    print(f"  GEMMA_URL = {GEMMA_URL}  (override with env GEMMA_URL)")
+    for name, url, info_path in [
+        ("Qwen2.5-VL (mlx_vlm.server)", QWEN_URL, "/v1/models"),
+        ("Gemma 4 + Falcon (mac_tensor)", GEMMA_URL, "/api/info"),
+    ]:
+        print(f"\n  {name}")
+        print(f"  {url}")
+        try:
+            with urllib.request.urlopen(f"{url}{info_path}", timeout=5) as r:
+                data = json.loads(r.read())
+            print(f"  ✓ alive: {json.dumps(data)[:200]}")
+        except Exception as e:
+            print(f"  ✗ DOWN: {e}")
+def cmd_project(args):
+    """Print a project config for inspection."""
+    proj = load_project(args.project)
+    print("=" * 60)
+    print(f"Project: {proj.project_name}")
+    print("=" * 60)
+    print(f"  target_object:  {proj.target_object!r}")
+    print(f"  description:    {proj.description.strip()}")
+    print(f"  data_root:      {proj.local_image_dir()}")
+    print(f"  r2_bucket:      {proj.r2_bucket}")
+    print(f"  r2 raw prefix:  {proj.r2_raw_prefix}")
+    print(f"  r2 labels:      {proj.r2_labels_prefix}")
+    print(f"\n  buckets ({len(proj.bucket_queries)}):")
+    for b, qs in proj.bucket_queries.items():
+        print(f"    {b:40s} {len(qs)} queries")
+    print(f"\n  falcon_queries: {proj.falcon_queries}")
+    print(f"  backends:       {proj.backends}")
+    print(f"  total_queries:  {proj.total_query_count()}")
+    print(f"\n  Filter prompt preview:")
+    for line in proj.prompt("filter").split("\n")[:6]:
+        print(f"    {line}")
+def resolve_experiment(name_or_latest: str) -> str:
+    base = "experiments"
+    if name_or_latest == "latest":
+        link = os.path.join(base, "latest")
+        if os.path.islink(link):
+            return os.path.abspath(os.path.realpath(link))
+        exps = list_experiments(base)
+        if exps:
+            return exps[0]["path"]
+        raise FileNotFoundError("no experiments found")
+    full = os.path.join(base, name_or_latest)
+    if os.path.exists(full):
+        return os.path.abspath(full)
+    for e in list_experiments(base):
+        if name_or_latest in e["name"]:
+            return e["path"]
+    raise FileNotFoundError(f"experiment '{name_or_latest}' not found")
+def cmd_gather(args):
+    """Run gather_v2 once per bucket from the project's bucket_queries."""
+    proj = load_project(args.project)
+    print(f"Gathering for project: {proj.project_name}")
+    print(f"  target: {proj.target_object}")
+    print(f"  data_root: {proj.local_image_dir()}")
+    print(f"  buckets: {len(proj.bucket_queries)}")
+    # Make experiment dir if not given
+    exp_name = args.experiment or f"gather-{proj.project_name}"
+    exp_dir = make_experiment_dir(exp_name)
+    write_readme(exp_dir, exp_name,
+                 description=f"Gather for {proj.project_name} ({proj.target_object})",
+                 params=vars(args))
+    write_config(exp_dir, {"project": proj.raw, **vars(args)})
+    update_latest_symlink(exp_dir)
+    print(f"Experiment: {exp_dir}")
+    env = os.environ.copy()
+    env["EXPERIMENT_DIR"] = exp_dir
+    summary = []
+    for bucket, queries in proj.bucket_queries.items():
+        print(f"\n[{bucket}] {len(queries)} queries")
+        cmd = [
+            sys.executable, os.path.join(HERE, "gather.py"),
+            "--out", proj.local_image_dir(),
+            "--bucket", bucket,
+            "--max-per-query", str(args.max_per_query),
+            "--workers", str(args.workers),
+        ]
+        for q in queries:
+            cmd += ["--query", q]
+        t0 = time.time()
+        try:
+            result = subprocess.run(cmd, env=env, capture_output=True, text=True, check=True)
+            print(result.stdout.strip().split("\n")[-2:][0] if result.stdout else "")
+        except subprocess.CalledProcessError as e:
+            print(f"  FAILED: {e.stderr[-300:]}")
+        summary.append({"bucket": bucket, "elapsed": round(time.time() - t0, 1)})
+    print(f"\nDONE — {sum(s['elapsed'] for s in summary):.0f}s total")
+def cmd_filter(args):
+    """Run image-level YES/NO classification on all images for a project.
+    Backend chosen via --backend (qwen|gemma) or project YAML."""
+    proj = load_project(args.project)
+    backend = resolve_backend(args, proj, "filter")
+    img_root = proj.local_image_dir()
+    if not os.path.exists(img_root):
+        print(f"  no images at {img_root}; run gather first")
+        return
+    images = []
+    for root, _, names in os.walk(img_root):
+        for n in names:
+            if n.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
+                full = os.path.join(root, n)
+                rel = os.path.relpath(full, img_root)
+                parts = rel.split("/")
+                if len(parts) < 2:
+                    continue
+                images.append(("/".join(parts[:2]), rel, full))
+    if args.limit > 0:
+        images = images[:args.limit]
+    prompt = proj.prompt("filter")
+    backend_label = {"qwen": "Qwen 2.5-VL", "gemma": "Gemma 4"}[backend]
+    print(f"Filtering {len(images)} images via {backend_label}...")
+    print(f"  prompt: {prompt[:120]}...")
+    results = []
+    counts = {"YES": 0, "NO": 0, "UNKNOWN": 0, "ERROR": 0}
+    t0 = time.time()
+    for i, (bucket, rel, full) in enumerate(images, 1):
+        try:
+            answer, elapsed = call_vlm(backend, full, prompt)
+            verdict = parse_yes_no(answer)
+        except Exception as e:
+            answer, elapsed, verdict = f"ERROR: {e}", 0, "ERROR"
+        counts[verdict] += 1
+        results.append({
+            "image_path": rel, "bucket": bucket, "verdict": verdict,
+            "raw_answer": answer[:120], "elapsed_seconds": round(elapsed, 3),
+        })
+        if i % 10 == 0 or i == len(images):
+            elapsed_total = time.time() - t0
+            rate = i / max(elapsed_total, 1)
+            eta = (len(images) - i) / max(rate, 0.001) / 60
+            print(f"  [{i:4d}/{len(images)}] YES={counts['YES']} NO={counts['NO']} ERR={counts['ERROR']}  ETA {eta:.0f} min")
+    # Save to a fresh experiment dir
+    exp_name = args.experiment or f"filter-{proj.project_name}"
+    exp_dir = resolve_experiment(args.experiment) if args.experiment else make_experiment_dir(exp_name)
+    out_dir = os.path.join(exp_dir, f"filter_{backend}")
+    os.makedirs(out_dir, exist_ok=True)
+    out_path = os.path.join(out_dir, "keep_list.json")
+    with open(out_path, "w") as f:
+        json.dump({"backend": backend, "project": proj.project_name,
+                   "counts": counts, "results": results}, f, indent=2)
+    print(f"\nSaved {out_path}")
+    print(f"  YES rate: {counts['YES']/max(1,len(images)):.0%}")
+def cmd_label(args):
+    """Label all images via M4 /api/falcon (one POST per image per query).
+    Saves COCO-format annotations to <experiment>/label_falcon/<project>.coco.json.
+    """
+    proj = load_project(args.project)
+    img_root = proj.local_image_dir()
+    if not os.path.exists(img_root):
+        print(f"  no images at {img_root}; run gather first")
+        return
+    images = []
+    for root, _, names in os.walk(img_root):
+        for n in names:
+            if n.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
+                full = os.path.join(root, n)
+                rel = os.path.relpath(full, img_root)
+                if "/" not in rel:
+                    continue
+                images.append((rel.split("/", 1)[0], rel, full))
+    if args.limit > 0:
+        images = images[:args.limit]
+    print(f"Labeling {len(images)} images x {len(proj.falcon_queries)} Falcon queries each")
+    print(f"  queries: {proj.falcon_queries}")
+    # COCO accumulator
+    coco = {
+        "info": {
+            "description": f"data_label_factory run for {proj.project_name}",
+            "date_created": datetime.now().isoformat(timespec="seconds"),
+            "target_object": proj.target_object,
+        },
+        "images": [],
+        "annotations": [],
+        "categories": [
+            {"id": i+1, "name": q, "supercategory": "object"}
+            for i, q in enumerate(proj.falcon_queries)
+        ],
+    }
+    cat_id = {q: i+1 for i, q in enumerate(proj.falcon_queries)}
+    next_img_id, next_ann_id = 1, 1
+    n_with_dets = 0
+    n_total_dets = 0
+    t0 = time.time()
+    for i, (bucket, rel, full) in enumerate(images, 1):
+        try:
+            from PIL import Image
+            im = Image.open(full)
+            iw, ih = im.size
+        except Exception as e:
+            print(f"  skip {rel}: load fail {e}")
+            continue
+        img_id = next_img_id
+        next_img_id += 1
+        coco["images"].append({"id": img_id, "file_name": rel, "width": iw, "height": ih, "bucket": bucket})
+        img_dets = 0
+        for q in proj.falcon_queries:
+            try:
+                resp = call_falcon_m4(full, q, timeout=180)
+                masks = resp.get("masks", [])
+            except Exception as e:
+                masks = []
+                print(f"    {rel} [{q}]: error {str(e)[:80]}")
+            for m in masks:
+                bb = m.get("bbox_norm") or {}
+                if not bb:
+                    continue
+                x1 = bb.get("x1", 0) * iw
+                y1 = bb.get("y1", 0) * ih
+                x2 = bb.get("x2", 0) * iw
+                y2 = bb.get("y2", 0) * ih
+                w = max(0, x2 - x1)
+                h = max(0, y2 - y1)
+                coco["annotations"].append({
+                    "id": next_ann_id, "image_id": img_id,
+                    "category_id": cat_id[q],
+                    "bbox": [round(x1, 2), round(y1, 2), round(w, 2), round(h, 2)],
+                    "area": round(w * h, 2), "iscrowd": 0,
+                    "score": float(m.get("area_fraction", 1.0)),
+                })
+                next_ann_id += 1
+                img_dets += 1
+        if img_dets > 0:
+            n_with_dets += 1
+        n_total_dets += img_dets
+        if i % 5 == 0 or i == len(images):
+            elapsed = time.time() - t0
+            rate = i / max(elapsed, 1)
+            eta = (len(images) - i) / max(rate, 0.001) / 60
+            print(f"  [{i:4d}/{len(images)}] hit={n_with_dets} dets={n_total_dets} ETA {eta:.0f} min")
+    # Save COCO
+    exp_dir = resolve_experiment(args.experiment) if args.experiment else make_experiment_dir(f"label-m4-{proj.project_name}")
+    out_dir = os.path.join(exp_dir, "label_falcon")
+    os.makedirs(out_dir, exist_ok=True)
+    out_path = os.path.join(out_dir, f"{proj.project_name}.coco.json")
+    with open(out_path, "w") as f:
+        json.dump(coco, f, indent=2)
+    print(f"\nSaved {out_path}")
+    print(f"  {len(coco['images'])} images, {len(coco['annotations'])} bboxes")
+def cmd_pipeline(args):
+    """Full pipeline: gather → filter for the project."""
+    proj = load_project(args.project)
+    print("=" * 70)
+    print(f"PIPELINE — {proj.project_name} ({proj.target_object})")
+    print("=" * 70)
+    exp = make_experiment_dir(f"pipeline-{proj.project_name}")
+    write_readme(exp, f"pipeline-{proj.project_name}",
+                 description=f"Full pipeline for {proj.target_object}",
+                 params=vars(args))
+    write_config(exp, {"project": proj.raw, **vars(args)})
+    update_latest_symlink(exp)
+    print(f"Experiment: {exp}\n")
+    # 1. Gather
+    print(">>> GATHER")
+    args.experiment = os.path.basename(exp).split("_", 2)[-1]
+    cmd_gather(args)
+    # 2. Filter
+    print("\n>>> FILTER")
+    args.experiment = os.path.basename(exp)
+    cmd_filter(args)
+    # Label + verify TBD via pod or qwen — skipping in this MVP
+    print("\n>>> LABEL + VERIFY: skipped in MVP — use drone_factory pod path or extend")
+    print(f"\nPIPELINE DONE — {exp}")
+def cmd_list(args):
+    print("=" * 60)
+    print("Experiments")
+    print("=" * 60)
+    for e in list_experiments():
+        cfg = e.get("config", {})
+        proj = (cfg.get("project") or {}).get("project_name", cfg.get("backend", "?"))
+        print(f"  {e['name']:50s}  project={proj}")
+# ============================================================
+# MAIN
+# ============================================================
+def main():
+    p = argparse.ArgumentParser(
+        prog="data_label_factory",
+        description=(
+            "Generic data labeling pipeline. Pick any object class via a "
+            "project YAML, then run: gather → filter → label → verify. "
+            "Choose your VLM backend with --backend qwen|gemma."
+        ),
+    )
+    sub = p.add_subparsers(dest="command", required=True)
+    def add_backend_flag(parser):
+        parser.add_argument(
+            "--backend",
+            choices=VALID_BACKENDS,
+            default=None,
+            help=("VLM backend for chat-style stages (filter, verify). "
+                  "Overrides the project YAML. Defaults to project setting "
+                  "or 'qwen'."),
+        )
+    sub.add_parser("status", help="Check backends are alive")
+    sp = sub.add_parser("project", help="Show project YAML")
+    sp.add_argument("--project", required=True)
+    sg = sub.add_parser("gather", help="Gather images for a project")
+    sg.add_argument("--project", required=True)
+    sg.add_argument("--max-per-query", type=int, default=30)
+    sg.add_argument("--workers", type=int, default=50)
+    sg.add_argument("--experiment", default=None)
+    sf = sub.add_parser("filter", help="Image-level YES/NO classification (qwen or gemma)")
+    sf.add_argument("--project", required=True)
+    sf.add_argument("--experiment", default=None)
+    sf.add_argument("--limit", type=int, default=0)
+    add_backend_flag(sf)
+    sl = sub.add_parser("label", help="Falcon Perception bbox grounding via mac_tensor /api/falcon")
+    sl.add_argument("--project", required=True)
+    sl.add_argument("--experiment", default=None)
+    sl.add_argument("--limit", type=int, default=0)
+    spi = sub.add_parser("pipeline", help="Full chain: gather → filter (label/verify TBD)")
+    spi.add_argument("--project", required=True)
+    spi.add_argument("--max-per-query", type=int, default=20)
+    spi.add_argument("--workers", type=int, default=50)
+    spi.add_argument("--experiment", default=None)
+    spi.add_argument("--limit", type=int, default=0)
+    add_backend_flag(spi)
+    sub.add_parser("list", help="List experiments")
+    args = p.parse_args()
+    cmd_func = {
+        "status": cmd_status,
+        "project": cmd_project,
+        "gather": cmd_gather,
+        "filter": cmd_filter,
+        "label": cmd_label,
+        "pipeline": cmd_pipeline,
+        "list": cmd_list,
+    }.get(args.command)
+    if cmd_func is None:
+        p.print_help()
+        sys.exit(1)
+    cmd_func(args)
+if __name__ == "__main__":
+    main()

data_label_factory/experiments.py ADDED Viewed

	@@ -0,0 +1,161 @@

+"""
+experiments.py — dated experiment folder convention.
+Every pipeline run goes into experiments/<YYYY-MM-DD_HHMMSS>_<name>/
+with a README + config.json so we can compare runs over time.
+Layout:
+    experiments/
+    ├── 2026-04-07_193000_first-yt-batch/
+    │   ├── README.md         ← what this run was, parameters, observations
+    │   ├── config.json       ← exact CLI args
+    │   ├── gather/           ← gather_v2 outputs (images go to drone-dataset-v2/)
+    │   │   ├── manifest.json
+    │   │   └── stats.json
+    │   ├── filter_qwen/      ← run_qwen_filter outputs
+    │   │   ├── keep_list.json
+    │   │   └── stats.json
+    │   ├── label_falcon/     ← pod_label outputs (from RunPod)
+    │   │   ├── coco.json
+    │   │   └── stats.json
+    │   ├── verify_qwen/      ← verify_vlm outputs (from RunPod)
+    │   │   ├── verified.json
+    │   │   └── stats.json
+    │   └── reviews/          ← human verdicts from the web UI
+    │       └── reviews.json
+    └── latest -> 2026-04-07_193000_first-yt-batch/   ← symlink to most recent
+The drone-dataset-v2/ images themselves are SHARED across experiments —
+each experiment writes labels/filters/verifications referencing those images,
+not copies of them.
+"""
+import json
+import os
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+def make_experiment_dir(name: str = "", base: str = "experiments") -> str:
+    """Create a fresh experiment dir with a timestamp + optional name suffix.
+    Returns the absolute path."""
+    ts = datetime.now().strftime("%Y-%m-%d_%H%M%S")
+    safe_name = name.strip().replace(" ", "-").replace("/", "_") if name else ""
+    folder = f"{ts}_{safe_name}" if safe_name else ts
+    full = os.path.abspath(os.path.join(base, folder))
+    os.makedirs(full, exist_ok=True)
+    # Create the standard subdirs
+    for sub in ("gather", "filter_qwen", "label_falcon", "verify_qwen", "reviews"):
+        os.makedirs(os.path.join(full, sub), exist_ok=True)
+    return full
+def write_readme(experiment_dir: str, name: str, description: str, params: dict):
+    """Write a small markdown README capturing what this experiment is."""
+    readme_path = os.path.join(experiment_dir, "README.md")
+    lines = [
+        f"# Experiment: {name or os.path.basename(experiment_dir)}",
+        "",
+        f"**Started:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
+        f"**Path:** `{experiment_dir}`",
+        "",
+        "## Description",
+        "",
+        description or "(no description)",
+        "",
+        "## Parameters",
+        "",
+        "```json",
+        json.dumps(params, indent=2),
+        "```",
+        "",
+        "## Pipeline stages",
+        "",
+        "1. **gather/** — image gathering manifest",
+        "2. **filter_qwen/** — image-level Qwen YES/NO filter results",
+        "3. **label_falcon/** — Falcon Perception bbox grounding (COCO format)",
+        "4. **verify_qwen/** — per-bbox Qwen verification",
+        "5. **reviews/** — human verdicts from the web UI",
+        "",
+    ]
+    with open(readme_path, "w") as f:
+        f.write("\n".join(lines))
+def write_config(experiment_dir: str, config: dict):
+    """Write the exact config used for this experiment."""
+    with open(os.path.join(experiment_dir, "config.json"), "w") as f:
+        json.dump(config, f, indent=2)
+def update_latest_symlink(experiment_dir: str, base: str = "experiments"):
+    """Update the experiments/latest symlink to point at this experiment."""
+    base_abs = os.path.abspath(base)
+    link = os.path.join(base_abs, "latest")
+    target = os.path.basename(experiment_dir)  # relative symlink
+    if os.path.islink(link):
+        os.unlink(link)
+    elif os.path.exists(link):
+        # Don't clobber a real directory
+        return
+    try:
+        os.symlink(target, link)
+    except OSError:
+        pass  # symlinks can fail on some filesystems
+def list_experiments(base: str = "experiments") -> list:
+    """List all experiment directories in chronological order (newest first)."""
+    if not os.path.exists(base):
+        return []
+    out = []
+    for entry in sorted(os.listdir(base), reverse=True):
+        if entry == "latest":
+            continue
+        full = os.path.join(base, entry)
+        if not os.path.isdir(full):
+            continue
+        readme = os.path.join(full, "README.md")
+        config = os.path.join(full, "config.json")
+        cfg = {}
+        if os.path.exists(config):
+            try:
+                cfg = json.load(open(config))
+            except Exception:
+                pass
+        out.append({
+            "name": entry,
+            "path": full,
+            "config": cfg,
+            "has_readme": os.path.exists(readme),
+        })
+    return out
+if __name__ == "__main__":
+    # CLI: list experiments or make one
+    import argparse
+    p = argparse.ArgumentParser()
+    sub = p.add_subparsers(dest="cmd")
+    p_new = sub.add_parser("new", help="Create a new dated experiment folder")
+    p_new.add_argument("--name", default="", help="Optional human-readable suffix")
+    p_new.add_argument("--description", default="")
+    p_list = sub.add_parser("list", help="List existing experiments")
+    args = p.parse_args()
+    if args.cmd == "new":
+        path = make_experiment_dir(args.name)
+        write_readme(path, args.name, args.description, {})
+        update_latest_symlink(path)
+        print(path)
+    elif args.cmd == "list":
+        for e in list_experiments():
+            print(f"  {e['name']}")
+    else:
+        p.print_help()

data_label_factory/gather.py ADDED Viewed

	@@ -0,0 +1,554 @@

+#!/usr/bin/env python3
+"""
+gather_v2.py — smarter, parallel image gatherer for the drone-falcon dataset.
+Improvements over gather_images.py (v1):
+  - Parallel downloads (50 threads instead of sequential)
+  - YouTube frame extraction via yt-dlp + ffmpeg (the killer feature for combat footage)
+  - Optional inline Qwen filter — only saves images Qwen says YES to
+  - Perceptual-hash dedup across sources (catches the same image from different sites)
+  - Resumable via local manifest
+Sources:
+  - DuckDuckGo image search   (broad, noisy)
+  - Wikimedia Commons         (CC, niche, slower)
+  - YouTube videos / playlists (gold for combat footage)
+Outputs:
+  drone-dataset-v2/<bucket>/<file>.jpg     ← local mirror
+  drone-dataset-v2/manifest.json           ← every file with provenance
+Usage:
+    # Web search only (DDG + Wikimedia)
+    python3 gather_v2.py --bucket positive/fiber_spool_drone \\
+        --query "fiber optic drone Ukraine" --query "tethered fpv drone" \\
+        --max-per-query 100
+    # YouTube frame extraction
+    python3 gather_v2.py --bucket positive/fiber_spool_drone \\
+        --youtube "https://youtube.com/playlist?list=ABC123" \\
+        --fps 1 --max-frames-per-video 200
+    # Inline Qwen filter (only saves YES images)
+    python3 gather_v2.py --bucket positive/fiber_spool_drone \\
+        --query "fiber optic drone" --filter
+"""
+import argparse
+import base64
+import hashlib
+import io
+import json
+import os
+import shutil
+import subprocess
+import time
+import urllib.request
+import urllib.parse
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from PIL import Image
+# ============================================================
+# CONFIG
+# ============================================================
+USER_AGENT = "data-label-factory-gather/0.1 (research project)"
+# Override via env vars (same as the rest of the factory CLI)
+M4_QWEN_URL = os.environ.get("QWEN_URL", "http://localhost:8291")
+QWEN_MODEL_PATH = os.environ.get(
+    "QWEN_MODEL_PATH", "mlx-community/Qwen2.5-VL-3B-Instruct-4bit"
+)
+QWEN_FILTER_PROMPT = (
+    "Look at this image. Does it show a drone, a cable spool, or a wound fiber optic cable?\n"
+    "Answer with exactly one word: YES or NO.\n"
+    "YES if you see ANY of: a drone, a quadcopter, a cable reel, a fiber spool, a wound cable.\n"
+    "NO if the main subject is something else."
+)
+IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".gif"}
+# ============================================================
+# DUCKDUCKGO IMAGE SEARCH (no API key)
+# ============================================================
+def ddg_search(query: str, max_results: int = 50) -> list:
+    """Returns list of dicts: {url, source, title, page}."""
+    import re
+    results = []
+    headers = {"User-Agent": USER_AGENT}
+    # Step 1: get vqd token
+    try:
+        token_url = f"https://duckduckgo.com/?q={urllib.parse.quote(query)}&iax=images&ia=images"
+        req = urllib.request.Request(token_url, headers=headers)
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            html = resp.read().decode("utf-8", errors="ignore")
+        m = re.search(r'vqd=["\']?([\d-]+)["\']?', html)
+        if not m:
+            return results
+        vqd = m.group(1)
+    except Exception as e:
+        print(f"  ddg token err: {e}")
+        return results
+    # Step 2: paginate i.js
+    seen = set()
+    next_url = None
+    while len(results) < max_results:
+        if next_url is None:
+            params = {"l": "us-en", "o": "json", "q": query, "vqd": vqd, "f": ",,,,,", "p": "1"}
+            url = f"https://duckduckgo.com/i.js?{urllib.parse.urlencode(params)}"
+        else:
+            url = "https://duckduckgo.com" + next_url
+        try:
+            req = urllib.request.Request(url, headers=headers)
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                data = json.loads(resp.read())
+        except Exception as e:
+            print(f"  ddg page err: {e}")
+            break
+        items = data.get("results", [])
+        if not items:
+            break
+        for it in items:
+            img_url = it.get("image")
+            if not img_url or img_url in seen:
+                continue
+            seen.add(img_url)
+            results.append({
+                "url": img_url,
+                "source": "duckduckgo",
+                "title": it.get("title", "")[:200],
+                "page": it.get("url", ""),
+                "license": "unknown",
+                "query": query,
+            })
+            if len(results) >= max_results:
+                break
+        next_url = data.get("next")
+        if not next_url:
+            break
+        time.sleep(0.3)
+    return results
+# ============================================================
+# WIKIMEDIA COMMONS (CC, free)
+# ============================================================
+def wikimedia_search(query: str, max_results: int = 50) -> list:
+    params = {
+        "action": "query", "format": "json",
+        "generator": "search", "gsrsearch": f"filetype:bitmap {query}",
+        "gsrnamespace": "6", "gsrlimit": str(min(50, max_results)),
+        "prop": "imageinfo", "iiprop": "url|extmetadata|size",
+    }
+    url = f"https://commons.wikimedia.org/w/api.php?{urllib.parse.urlencode(params)}"
+    results = []
+    try:
+        req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+        with urllib.request.urlopen(req, timeout=20) as resp:
+            data = json.loads(resp.read())
+    except Exception as e:
+        print(f"  wikimedia err: {e}")
+        return results
+    pages = (data.get("query") or {}).get("pages") or {}
+    for _, p in pages.items():
+        ii = (p.get("imageinfo") or [{}])[0]
+        img_url = ii.get("url")
+        if not img_url:
+            continue
+        ext = (ii.get("extmetadata") or {})
+        license_name = (ext.get("LicenseShortName") or {}).get("value", "")
+        results.append({
+            "url": img_url,
+            "source": "wikimedia",
+            "title": p.get("title", ""),
+            "page": f"https://commons.wikimedia.org/wiki/{urllib.parse.quote(p.get('title', ''))}",
+            "license": license_name,
+            "query": query,
+        })
+        if len(results) >= max_results:
+            break
+    return results
+# ============================================================
+# YOUTUBE FRAME EXTRACTION (the killer feature)
+# ============================================================
+def youtube_extract_frames(
+    video_url: str,
+    out_dir: str,
+    fps: float = 1.0,
+    max_frames: int = 200,
+    cookies_from_browser: str = None,
+) -> list:
+    """Download a YouTube video, extract frames at given fps. Returns list of frame paths.
+    Uses yt-dlp + ffmpeg (via imageio_ffmpeg's bundled binary).
+    """
+    import yt_dlp
+    import imageio_ffmpeg
+    ffmpeg_bin = imageio_ffmpeg.get_ffmpeg_exe()
+    os.makedirs(out_dir, exist_ok=True)
+    work_dir = os.path.join(out_dir, "_video_tmp")
+    os.makedirs(work_dir, exist_ok=True)
+    # Download with yt-dlp — android+web player clients bypass most YT bot detection
+    print(f"  yt-dlp downloading: {video_url}")
+    ydl_opts = {
+        "format": "worstvideo[height>=480]/worst",
+        "outtmpl": os.path.join(work_dir, "%(id)s.%(ext)s"),
+        "quiet": True,
+        "no_warnings": True,
+        "noplaylist": True,
+        "extractor_args": {"youtube": {"player_client": ["android", "web"]}},
+    }
+    if cookies_from_browser:
+        ydl_opts["cookiesfrombrowser"] = (cookies_from_browser,)
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(video_url, download=True)
+        video_id = info.get("id", "video")
+        title = info.get("title", "")
+    except Exception as e:
+        print(f"  yt-dlp failed: {e}")
+        return []
+    # Find downloaded file
+    video_files = [os.path.join(work_dir, f) for f in os.listdir(work_dir) if f.startswith(video_id)]
+    if not video_files:
+        print(f"  no downloaded video found in {work_dir}")
+        return []
+    video_file = video_files[0]
+    # Extract frames via ffmpeg
+    print(f"  ffmpeg extracting frames at {fps} fps from {video_file}")
+    frame_pattern = os.path.join(work_dir, f"{video_id}_%05d.jpg")
+    cmd = [
+        ffmpeg_bin, "-y", "-i", video_file,
+        "-vf", f"fps={fps}",
+        "-frames:v", str(max_frames),
+        "-q:v", "3",
+        frame_pattern,
+    ]
+    try:
+        subprocess.run(cmd, capture_output=True, check=True, timeout=600)
+    except Exception as e:
+        print(f"  ffmpeg failed: {e}")
+        return []
+    frames = sorted(f for f in os.listdir(work_dir) if f.startswith(video_id + "_") and f.endswith(".jpg"))
+    out_frames = []
+    for i, fr in enumerate(frames):
+        src = os.path.join(work_dir, fr)
+        dest = os.path.join(out_dir, f"yt_{video_id}_{i:05d}.jpg")
+        shutil.move(src, dest)
+        out_frames.append({
+            "path": dest,
+            "source": "youtube",
+            "video_id": video_id,
+            "video_title": title,
+            "video_url": video_url,
+            "frame_index": i,
+            "license": "see source video",
+        })
+    # Clean up downloaded video
+    try:
+        os.unlink(video_file)
+    except Exception:
+        pass
+    print(f"  → extracted {len(out_frames)} frames")
+    return out_frames
+# ============================================================
+# QWEN INLINE FILTER (optional)
+# ============================================================
+def qwen_yes_no(image_path: str, m4_url: str = M4_QWEN_URL, timeout: int = 30) -> tuple:
+    """Returns (verdict, raw_answer). verdict ∈ {YES, NO, UNKNOWN, ERROR}."""
+    try:
+        img = Image.open(image_path).convert("RGB")
+        max_dim = 1024
+        if max(img.size) > max_dim:
+            ratio = max_dim / max(img.size)
+            img = img.resize((int(img.size[0] * ratio), int(img.size[1] * ratio)), Image.LANCZOS)
+        buf = io.BytesIO()
+        img.save(buf, format="PNG")
+        b64 = base64.b64encode(buf.getvalue()).decode()
+        payload = {
+            "model": QWEN_MODEL_PATH,
+            "messages": [{
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
+                    {"type": "text", "text": QWEN_FILTER_PROMPT},
+                ],
+            }],
+            "max_tokens": 12, "temperature": 0,
+        }
+        req = urllib.request.Request(
+            f"{m4_url}/v1/chat/completions",
+            data=json.dumps(payload).encode(),
+            headers={"Content-Type": "application/json"},
+            method="POST",
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as r:
+            data = json.loads(r.read())
+        ans = data["choices"][0]["message"]["content"].strip().upper()
+        first = ans.split()[0].rstrip(".,") if ans else ""
+        verdict = "YES" if "YES" in first else ("NO" if "NO" in first else "UNKNOWN")
+        return verdict, ans
+    except Exception as e:
+        return "ERROR", str(e)
+# ============================================================
+# DOWNLOAD + DEDUP
+# ============================================================
+def url_filename(url: str, source: str) -> str:
+    h = hashlib.sha1(url.encode()).hexdigest()[:12]
+    ext = os.path.splitext(urllib.parse.urlparse(url).path)[1].lower()
+    if ext not in IMAGE_EXTS:
+        ext = ".jpg"
+    return f"{source}_{h}{ext}"
+def download_one(url: str, dest: str, timeout: int = 30) -> tuple:
+    try:
+        req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = resp.read()
+        if len(data) < 1024:
+            return False, 0, "too small"
+        with open(dest, "wb") as f:
+            f.write(data)
+        return True, len(data), None
+    except Exception as e:
+        return False, 0, str(e)
+def perceptual_hash(image_path: str) -> str:
+    """8x8 average-hash for fast cross-source dedup."""
+    try:
+        img = Image.open(image_path).convert("L").resize((8, 8), Image.LANCZOS)
+        pixels = list(img.getdata())
+        avg = sum(pixels) / len(pixels)
+        bits = "".join("1" if p > avg else "0" for p in pixels)
+        return hex(int(bits, 2))[2:].zfill(16)
+    except Exception:
+        return ""
+# ============================================================
+# MAIN
+# ============================================================
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--out", default="drone-dataset-v2", help="Image output root (shared across experiments)")
+    p.add_argument("--bucket", required=True, help="Bucket subpath, e.g. positive/fiber_spool_drone")
+    p.add_argument("--experiment", default="",
+                   help="Optional experiment name; if set, creates experiments/<YYYY-MM-DD_HHMMSS>_<name>/")
+    p.add_argument("--query", action="append", default=[],
+                   help="Search query (repeatable). Hits DDG + Wikimedia.")
+    p.add_argument("--youtube", action="append", default=[],
+                   help="YouTube video URL or playlist URL (repeatable). Extracts frames.")
+    p.add_argument("--fps", type=float, default=1.0, help="Frames per second to extract from videos")
+    p.add_argument("--max-frames-per-video", type=int, default=200)
+    p.add_argument("--max-per-query", type=int, default=100)
+    p.add_argument("--workers", type=int, default=50, help="Parallel download threads")
+    p.add_argument("--filter", action="store_true",
+                   help="Run Qwen YES/NO filter on each downloaded image, skip NO")
+    p.add_argument("--cookies-from-browser", default=None,
+                   help="For YouTube: chrome|safari|firefox — use browser cookies for age-gated/login videos")
+    args = p.parse_args()
+    bucket_dir = os.path.join(args.out, args.bucket)
+    os.makedirs(bucket_dir, exist_ok=True)
+    # Set up the dated experiment dir if requested
+    experiment_dir = None
+    if args.experiment or "EXPERIMENT_DIR" in os.environ:
+        from experiments import make_experiment_dir, write_readme, write_config, update_latest_symlink
+        if "EXPERIMENT_DIR" in os.environ:
+            experiment_dir = os.environ["EXPERIMENT_DIR"]
+            os.makedirs(os.path.join(experiment_dir, "gather"), exist_ok=True)
+        else:
+            experiment_dir = make_experiment_dir(args.experiment)
+            write_readme(
+                experiment_dir,
+                name=args.experiment,
+                description=f"gather_v2 run: bucket={args.bucket}, queries={args.query}, youtube={len(args.youtube)} videos",
+                params=vars(args),
+            )
+            write_config(experiment_dir, vars(args))
+            update_latest_symlink(experiment_dir)
+        manifest_path = os.path.join(experiment_dir, "gather", "manifest.json")
+        print(f"Experiment dir: {experiment_dir}")
+    else:
+        manifest_path = os.path.join(args.out, "manifest.json")
+    manifest = []
+    if os.path.exists(manifest_path):
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+    print(f"Resumed: {len(manifest)} files in manifest")
+    # Track URL + perceptual-hash dedup sets
+    seen_urls = {m["url"] for m in manifest if "url" in m}
+    seen_hashes = {m["phash"] for m in manifest if m.get("phash")}
+    # ===== Step 1: web search =====
+    web_hits = []
+    for q in args.query:
+        print(f"\n[search] {q!r}")
+        ddg_results = ddg_search(q, max_results=args.max_per_query)
+        wiki_results = wikimedia_search(q, max_results=args.max_per_query)
+        print(f"  DDG: {len(ddg_results)}  Wikimedia: {len(wiki_results)}")
+        web_hits.extend(ddg_results)
+        web_hits.extend(wiki_results)
+    # Filter out duplicates by URL
+    web_hits = [h for h in web_hits if h["url"] not in seen_urls]
+    print(f"\n  {len(web_hits)} new web URLs to download (after dedup)")
+    # ===== Step 2: parallel download =====
+    downloaded = []
+    if web_hits:
+        print(f"\n[download] {len(web_hits)} files via {args.workers} threads...")
+        t0 = time.time()
+        with ThreadPoolExecutor(max_workers=args.workers) as pool:
+            futures = {}
+            for hit in web_hits:
+                fname = url_filename(hit["url"], hit["source"])
+                dest = os.path.join(bucket_dir, fname)
+                if os.path.exists(dest):
+                    continue
+                futures[pool.submit(download_one, hit["url"], dest)] = (hit, dest)
+            n_ok, n_skip, n_err = 0, 0, 0
+            for fut in as_completed(futures):
+                hit, dest = futures[fut]
+                ok, nbytes, err = fut.result()
+                if ok:
+                    n_ok += 1
+                    downloaded.append({**hit, "path": dest, "bytes": nbytes})
+                else:
+                    n_err += 1
+        elapsed = time.time() - t0
+        print(f"  downloaded: {n_ok} new, {n_err} errors in {elapsed:.0f}s")
+    # ===== Step 3: YouTube frame extraction =====
+    youtube_hits = []
+    for video_url in args.youtube:
+        print(f"\n[youtube] {video_url}")
+        frames = youtube_extract_frames(
+            video_url, bucket_dir,
+            fps=args.fps, max_frames=args.max_frames_per_video,
+            cookies_from_browser=args.cookies_from_browser,
+        )
+        youtube_hits.extend(frames)
+    # ===== Step 4: dedup via perceptual hash =====
+    if downloaded or youtube_hits:
+        print(f"\n[dedup] computing perceptual hashes...")
+        for entry in downloaded + youtube_hits:
+            phash = perceptual_hash(entry["path"])
+            entry["phash"] = phash
+            if phash and phash in seen_hashes:
+                # duplicate — remove the file
+                try:
+                    os.unlink(entry["path"])
+                except Exception:
+                    pass
+                entry["dropped"] = "dup_phash"
+            else:
+                seen_hashes.add(phash)
+        n_dropped = sum(1 for e in downloaded + youtube_hits if e.get("dropped"))
+        print(f"  dropped {n_dropped} duplicates")
+    # ===== Step 5: Optional Qwen filter =====
+    survivors = []
+    for entry in downloaded + youtube_hits:
+        if entry.get("dropped"):
+            continue
+        if not args.filter:
+            survivors.append(entry)
+            continue
+        verdict, raw = qwen_yes_no(entry["path"])
+        entry["qwen_verdict"] = verdict
+        entry["qwen_answer"] = raw
+        if verdict != "YES":
+            try:
+                os.unlink(entry["path"])
+            except Exception:
+                pass
+            entry["dropped"] = f"qwen_{verdict}"
+        else:
+            survivors.append(entry)
+    # ===== Save manifest + stats =====
+    for entry in downloaded + youtube_hits:
+        entry["bucket"] = args.bucket
+        manifest.append(entry)
+    os.makedirs(os.path.dirname(manifest_path), exist_ok=True)
+    with open(manifest_path, "w") as f:
+        json.dump(manifest, f, indent=2)
+    # If we're inside an experiment dir, write stats.json next to the manifest
+    if experiment_dir:
+        stats = {
+            "bucket": args.bucket,
+            "queries": args.query,
+            "youtube_urls": args.youtube,
+            "web_hits_found": len(web_hits),
+            "downloaded": len(downloaded),
+            "youtube_frames": len(youtube_hits),
+            "dropped_dup": sum(1 for e in (downloaded + youtube_hits) if e.get("dropped") == "dup_phash"),
+            "dropped_qwen": sum(1 for e in (downloaded + youtube_hits) if e.get("dropped", "").startswith("qwen")),
+            "survivors": len(survivors),
+            "filter_enabled": args.filter,
+            "manifest_total": len(manifest),
+            "completed_at": datetime.now().isoformat(timespec="seconds") if 'datetime' in dir() else None,
+        }
+        try:
+            from datetime import datetime as _dt
+            stats["completed_at"] = _dt.now().isoformat(timespec="seconds")
+        except Exception:
+            pass
+        stats_path = os.path.join(experiment_dir, "gather", "stats.json")
+        with open(stats_path, "w") as f:
+            json.dump(stats, f, indent=2)
+        print(f"  stats:            {stats_path}")
+    # ===== Summary =====
+    print("\n" + "=" * 60)
+    print("DONE")
+    print("=" * 60)
+    print(f"  bucket:           {args.bucket}")
+    print(f"  web hits found:   {len(web_hits)}")
+    print(f"  downloaded:       {len(downloaded)}")
+    print(f"  youtube frames:   {len(youtube_hits)}")
+    if args.filter:
+        n_yes = sum(1 for e in downloaded + youtube_hits if e.get("qwen_verdict") == "YES")
+        n_no = sum(1 for e in downloaded + youtube_hits if e.get("qwen_verdict") == "NO")
+        print(f"  qwen filter:      YES={n_yes}  NO={n_no}")
+    print(f"  survivors:        {len(survivors)}")
+    print(f"  manifest:         {manifest_path} ({len(manifest)} total)")
+if __name__ == "__main__":
+    main()

data_label_factory/project.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""
+project.py — load and validate a project YAML for the data labeling factory.
+Usage:
+    from project import load_project
+    proj = load_project("projects/drones.yaml")
+    print(proj.target_object)            # "fiber optic drone"
+    print(proj.bucket_queries["positive/fiber_spool_drone"])  # list of queries
+    print(proj.prompt("filter"))         # templated string with {target_object} substituted
+    print(proj.r2_key("raw", "positive/fiber_spool_drone/foo.jpg"))  # raw_v2/positive/...
+The project loader is the SINGLE source of truth for paths, prompts, queries, and
+backends. All scripts (gather, filter, label, verify) read from this object instead
+of having hardcoded values.
+"""
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field
+from typing import Any
+try:
+    import yaml
+except ImportError:
+    raise SystemExit("PyYAML required: pip install pyyaml")
+# Default prompt templates. Override in the project YAML's `prompts:` section.
+DEFAULT_PROMPTS = {
+    "filter": (
+        "Look at this image. Does it show a {target_object} or a related object "
+        "(its components, parts, or accessories)?\n"
+        "Answer with exactly one word: YES or NO.\n"
+        "YES if the main subject is a {target_object} or directly relevant to it.\n"
+        "NO if the main subject is unrelated."
+    ),
+    "verify": (
+        "Look carefully at this image crop. "
+        "Question: Is the main object in this crop actually a {query}? "
+        "Answer first with one word: YES, NO, or UNSURE. "
+        "Then briefly say what the object actually is in 5-10 words."
+    ),
+    "label_describe": (
+        "Look at this image. If it shows a {target_object} or related object, "
+        "describe what you see in 1-2 sentences. "
+        "If it doesn't, say 'no {target_object}'."
+    ),
+}
+@dataclass
+class ProjectConfig:
+    """Loaded project YAML, with helpers."""
+    project_name: str
+    target_object: str
+    description: str
+    data_root: str
+    r2_bucket: str
+    r2_raw_prefix: str
+    r2_labels_prefix: str
+    r2_reviews_prefix: str
+    bucket_queries: dict[str, list[str]]
+    falcon_queries: list[str]
+    prompts_raw: dict[str, str]
+    backends: dict[str, str]
+    pod_config: dict[str, Any]
+    raw: dict[str, Any] = field(default_factory=dict)
+    # ---------- helpers ----------
+    def prompt(self, name: str, **extra) -> str:
+        """Get a prompt template with {target_object} (and any extras) substituted."""
+        template = self.prompts_raw.get(name) or DEFAULT_PROMPTS.get(name)
+        if template is None:
+            raise KeyError(f"unknown prompt name: {name!r}")
+        ctx = {"target_object": self.target_object, **extra}
+        return template.format(**ctx)
+    def r2_key(self, kind: str, *parts: str) -> str:
+        """Build an R2 object key for a given stage.
+        kind ∈ {raw, labels, reviews, dataset}
+        """
+        if kind == "raw":
+            return self.r2_raw_prefix.rstrip("/") + "/" + "/".join(parts)
+        if kind == "labels":
+            return self.r2_labels_prefix.rstrip("/") + "/" + "/".join(parts)
+        if kind == "reviews":
+            return self.r2_reviews_prefix
+        raise KeyError(f"unknown r2 kind: {kind}")
+    def local_image_dir(self) -> str:
+        """Resolved local image cache directory."""
+        return os.path.expanduser(self.data_root)
+    def all_buckets(self) -> list[str]:
+        return list(self.bucket_queries.keys())
+    def total_query_count(self) -> int:
+        return sum(len(v) for v in self.bucket_queries.values())
+    def backend_for(self, stage: str) -> str:
+        return self.backends.get(stage, "qwen")
+def load_project(path: str) -> ProjectConfig:
+    """Load + validate a project YAML."""
+    path = os.path.expanduser(path)
+    with open(path) as f:
+        data = yaml.safe_load(f)
+    if not isinstance(data, dict):
+        raise ValueError(f"project YAML must be a mapping, got {type(data).__name__}")
+    required = ["project_name", "target_object", "buckets", "falcon_queries"]
+    for k in required:
+        if k not in data:
+            raise ValueError(f"project YAML missing required field: {k}")
+    # Buckets normalization
+    bucket_queries = {}
+    for bucket, spec in data["buckets"].items():
+        if isinstance(spec, list):
+            bucket_queries[bucket] = spec
+        elif isinstance(spec, dict) and "queries" in spec:
+            bucket_queries[bucket] = spec["queries"]
+        else:
+            raise ValueError(f"bucket {bucket!r} must be a list or dict with 'queries'")
+    r2 = data.get("r2", {})
+    backends = data.get("backends", {})
+    backends.setdefault("filter", "qwen")
+    backends.setdefault("label", "pod")
+    backends.setdefault("verify", "pod")
+    return ProjectConfig(
+        project_name=data["project_name"],
+        target_object=data["target_object"],
+        description=data.get("description", ""),
+        data_root=data.get("data_root", "~/data-label-factory/" + data["project_name"]),
+        r2_bucket=r2.get("bucket", data["project_name"]),
+        r2_raw_prefix=r2.get("raw_prefix", "raw/"),
+        r2_labels_prefix=r2.get("labels_prefix", "labels/"),
+        r2_reviews_prefix=r2.get("reviews_prefix", "labels/reviews.json"),
+        bucket_queries=bucket_queries,
+        falcon_queries=list(data["falcon_queries"]),
+        prompts_raw=data.get("prompts") or {},
+        backends=backends,
+        pod_config=data.get("pod", {}),
+        raw=data,
+    )
+# CLI: load + dump for inspection
+if __name__ == "__main__":
+    import sys
+    import json
+    if len(sys.argv) < 2:
+        print("usage: python3 project.py <project.yaml>")
+        sys.exit(1)
+    proj = load_project(sys.argv[1])
+    print("=" * 60)
+    print(f"Project: {proj.project_name}")
+    print("=" * 60)
+    print(f"  target_object:    {proj.target_object!r}")
+    print(f"  data_root:        {proj.local_image_dir()}")
+    print(f"  r2_bucket:        {proj.r2_bucket}")
+    print(f"  r2 raw prefix:    {proj.r2_raw_prefix}")
+    print(f"  buckets ({len(proj.bucket_queries)}):")
+    for b, qs in proj.bucket_queries.items():
+        print(f"    {b:40s} {len(qs)} queries")
+    print(f"  falcon_queries:   {proj.falcon_queries}")
+    print(f"  backends:         {proj.backends}")
+    print(f"  total_queries:    {proj.total_query_count()}")
+    print(f"\n  Sample filter prompt:")
+    print(f"    {proj.prompt('filter')[:250]}")

docs/canvas-demo.gif ADDED Viewed

Git LFS Details

SHA256: 2e74e91cd803ad39c4b5fce613c427c0cda1686f242cf5ac73406fed187d47d4
Pointer size: 131 Bytes
Size of remote file: 913 kB

docs/x-launch-thread.md ADDED Viewed

	@@ -0,0 +1,113 @@

+# X launch thread — data-label-factory
+A 6-tweet thread. Target: ML-Twitter, Apple-Silicon devs, dataset-builders.
+Hook: "labeled 1.8k drone images on a 16 GB MacBook." Asset: canvas-demo.gif.
+---
+**1/ (the hook)**
+I labeled 1,799 fiber-optic drone images on a 16 GB MacBook.
+No GPU. No cloud. No labeling vendor.
+One Python CLI + one YAML file + a 26-billion-parameter vision model streamed off the SSD.
+Open-sourcing the whole pipeline today 🧵
+[attach: canvas-demo.gif]
+---
+**2/ (what's in the box)**
+`data-label-factory` is a generic auto-labeling pipeline for vision datasets.
+You write a project YAML — `target_object: "fire hydrant"`, a few search queries, done — and run:
+```
+data_label_factory pipeline --project projects/fire-hydrants.yaml
+```
+Out the other end: a clean COCO dataset, reviewed in a browser.
+---
+**3/ (the pipeline)**
+Four stages, all running locally on Apple Silicon:
+```
+gather → filter → label → verify → review
+ (DDG)   (VLM)    (Falcon) (VLM)   (Canvas)
+```
+- **gather** — DuckDuckGo / Wikimedia / Openverse image search per bucket
+- **filter** — image-level YES/NO classification (Qwen 2.5-VL or Gemma 4)
+- **label** — bbox grounding via Falcon Perception (TII)
+- **verify** — per-bbox YES/NO via the same VLM
+- **review** — HTML5 Canvas web UI with hover/click/zoom/pan
+---
+**4/ (how it fits in 16 GB RAM)**
+The trick is MLX Expert Sniper.
+Gemma 4-26B is a Mixture-of-Experts model — only ~3 GB of weights are active per token. So instead of loading all 13 GB into RAM, we **stream cold experts off the SSD on demand**.
+Resident set: ~3 GB Gemma + 1.5 GB Falcon = ~5 GB total.
+You get 26B-param vision quality on a base-model M-series Mac.
+---
+**5/ (what we labeled)**
+Reference run: detect fiber-optic-spool drones (the Ukraine-conflict kind).
+- 1,421 images gathered from DDG + Wikimedia + Openverse
+- 15,355 Falcon Perception bboxes generated
+- 11,928 (78%) verified YES by Qwen 2.5-VL
+- All reviewed in the canvas UI
+Per-query Falcon↔Qwen agreement:
+`cable spool` 88% · `quadcopter` 81% · `drone` 80%
+---
+**6/ (the canvas UI)**
+The review tool is **pure HTML5 Canvas** — no SVG, no React-DOM bbox elements, just `ctx.drawImage` + `ctx.strokeRect` rendered every frame.
+Drag to pan, scroll to zoom around the cursor, click a bbox to inspect, ←→ to step through 1,799 images.
+[attach: canvas-demo.gif]
+---
+**7/ (the link)**
+Repo: https://github.com/<USER>/data-label-factory
+Reference dataset (1.8k drone images, COCO + verdicts): https://huggingface.co/datasets/<USER>/fiber-optic-drones
+Reproduce in 5 commands:
+```
+git clone <repo>
+cd data-label-factory && pip install pyyaml pillow requests
+python3 -m mlx_vlm.server --model mlx-community/Qwen2.5-VL-3B-Instruct-4bit --port 8291
+data_label_factory pipeline --project projects/stop-signs.yaml
+cd web && PORT=3030 npm run dev   # http://localhost:3030/canvas
+```
+Built on @MLX_apple, @PrinceCanuma's mlx-vlm, Falcon Perception by @TIIuae, and Gemma 4 by @GoogleDeepMind. Apache 2.0 all the way down.
+---
+## Notes for posting day
+- Replace `<USER>` with the github org once chosen
+- Confirm HF dataset card exists before posting tweet 7
+- Pin tweet 1 to profile for the day
+- Best post window: Tue/Wed 9-11am PT (ML-Twitter is most active)
+- If engagement spikes, follow up with: a behind-the-scenes thread on the Expert Sniper streaming engine, OR a "label your own dataset in 10 minutes" tutorial

projects/drones.yaml ADDED Viewed

	@@ -0,0 +1,133 @@

+# =====================================================================
+# drones.yaml — example project config for the data labeling factory
+# =====================================================================
+#
+# This is the canonical example. It captures EXACTLY what we built tonight
+# for the fiber-optic drone detector. To make a new project (e.g. stop signs,
+# fire hydrants, manufacturing defects), copy this file, change `target_object`,
+# adjust the queries, and run:
+#
+#     data_label_factory pipeline --project projects/drones.yaml
+#
+# Generic shape:
+#   project_name      → human-readable identifier (used in experiment dir names)
+#   target_object     → the thing you're trying to detect (templated into prompts)
+#   data_root         → where local images go
+#   r2:               → cloud storage config (bucket, prefix per stage)
+#   buckets:          → gather plan (5 buckets is conventional but any structure works)
+#   falcon_queries:   → list of queries to run Falcon Perception with
+#   prompts:          → optional overrides for templated prompts
+#   backends:         → which model backend to use per stage
+# =====================================================================
+project_name: drones
+target_object: "fiber optic drone"
+description: |
+  Auto-labeling pipeline for fiber-optic drone detection. Falcon Perception
+  grounds bboxes for any drone, spool, or cable; Qwen2.5-VL verifies each.
+# Where local images live (gitignored)
+data_root: ~/drone-falcon-data/v2
+# Cloudflare R2 storage
+r2:
+  bucket: drone-falcon
+  raw_prefix: raw_v2/                    # gathered images
+  labels_prefix: labels/                 # COCO + verified JSONs
+  reviews_prefix: labels/reviews.json    # human verdicts saved by web UI
+# What to gather, organized by bucket. Each bucket is a folder under data_root
+# and a corresponding R2 prefix. Multiple queries are OR'd via DDG/Wikimedia.
+buckets:
+  positive/fiber_spool_drone:
+    queries:
+      - "fiber optic FPV drone"
+      - "tethered fiber optic drone"
+      - "Ukraine fiber optic drone war"
+      - "fiber optic kamikaze drone"
+      - "fiber optic drone payload"
+      - "wired FPV drone Ukraine"
+      - "fiber optic drone with spool"
+      - "Russian fiber optic drone"
+      - "fiber optic dispenser drone"
+      - "fiber optic combat drone"
+  positive/spool_only:
+    queries:
+      - "fiber optic cable spool"
+      - "optical fiber reel"
+      - "fiber optic winding machine"
+      - "spooled optical fiber cable"
+      - "fiber optic cable on reel"
+      - "optical fiber cable drum"
+  negative/drones_no_spool:
+    queries:
+      - "DJI Mavic 3 Pro photo"
+      - "FPV racing drone closeup"
+      - "consumer quadcopter flying"
+      - "agricultural spraying drone"
+      - "DJI Mini 4 Pro photo"
+      - "Autel Evo drone"
+      - "Skydio 2 drone"
+      - "racing drone build"
+  distractor/round_things:
+    queries:
+      - "garden hose reel"
+      - "cable drum reel industrial"
+      - "duct tape roll"
+      - "fire hose reel"
+      - "rope coil pile"
+      - "extension cord reel"
+      - "thread spool sewing"
+  background/empty:
+    queries:
+      - "blue sky clouds"
+      - "open field landscape"
+      - "industrial workshop interior"
+      - "outdoor military training"
+# What Falcon Perception should look for in each image (the bbox grounding queries).
+# These are the specific objects we want bounding boxes on.
+falcon_queries:
+  - "fiber optic spool"
+  - "cable spool"
+  - "drone"
+  - "quadcopter"
+  - "fiber optic drone"
+# Optional: prompt overrides. Default templates use {target_object} substitution.
+# Leave commented to use the defaults from lib/project.py.
+prompts:
+  filter: |
+    Look at this image. Does it show a {target_object}, a related component
+    (cable spool, fiber reel, wound cable), or any other relevant object?
+    Answer with exactly one word: YES or NO.
+    YES if you see ANY of: a {target_object}, a quadcopter, a cable reel, a fiber spool.
+    NO if the main subject is something else.
+  verify: |
+    Look carefully at this image crop.
+    Question: Is the main object in this crop actually a {query}?
+    Answer first with one word: YES, NO, or UNSURE.
+    Then briefly say what the object actually is in 5-10 words.
+# Which model backend to use per stage.
+# qwen   = Qwen2.5-VL-3B via mlx-vlm server (M4 :8291)  — fast, free
+# gemma  = Gemma 4 26B via mac_tensor (M4 :8500)        — slow chained agent
+# falcon = Falcon Perception via mac_tensor /api/falcon — bbox grounding only
+# pod    = remote RunPod GPU pod                        — fast, ~$0.15-1.65/run
+backends:
+  filter: qwen
+  label:  pod          # Falcon on RunPod for production scale
+  verify: pod          # Qwen on the same pod
+# Pod settings (only used when a stage backend = pod)
+pod:
+  gpu_types: ["NVIDIA L40S"]
+  data_centers: ["EU-RO-1", "EU-CZ-1", "EU-NL-1", "US-CA-2"]
+  image: "runpod/pytorch:1.0.3-cu1290-torch291-ubuntu2204"
+  container_disk_gb: 30
+  volume_gb: 30

projects/stop-signs.yaml ADDED Viewed

	@@ -0,0 +1,72 @@

+# =====================================================================
+# stop-signs.yaml — proves the data labeling factory is GENERIC
+# =====================================================================
+#
+# Same pipeline as drones.yaml but for stop signs. The only changes are
+# the queries, the target_object, the data_root, and the falcon_queries.
+# All scripts read this YAML and adapt automatically — zero code changes
+# required to onboard a new object class.
+#
+# Run with:
+#     data_label_factory pipeline --project projects/stop-signs.yaml
+# =====================================================================
+project_name: stop-signs
+target_object: "stop sign"
+description: |
+  Smoke test project — train a stop sign detector for autonomous driving research.
+data_root: ~/data-label-factory/stop-signs
+r2:
+  bucket: drone-falcon         # reuse the same R2 bucket — different prefixes
+  raw_prefix: stop-signs/raw/
+  labels_prefix: stop-signs/labels/
+  reviews_prefix: stop-signs/labels/reviews.json
+buckets:
+  positive/clear_view:
+    queries:
+      - "stop sign closeup"
+      - "red stop sign octagon"
+      - "stop sign daytime"
+      - "stop sign intersection"
+  positive/partial_view:
+    queries:
+      - "stop sign occluded"
+      - "stop sign in distance"
+      - "stop sign at angle"
+  negative/other_signs:
+    queries:
+      - "yield sign"
+      - "speed limit sign"
+      - "do not enter sign"
+      - "one way sign"
+  distractor/red_signs:
+    queries:
+      - "red warning sign"
+      - "red circle traffic sign"
+      - "red parking sign"
+      - "no entry sign"
+  background/no_signs:
+    queries:
+      - "empty highway"
+      - "country road landscape"
+      - "city street empty"
+      - "parking lot empty"
+falcon_queries:
+  - "stop sign"
+  - "traffic sign"
+  - "red octagonal sign"
+  - "road sign"
+# Use only Qwen + local M4 for the smoke test (no RunPod)
+backends:
+  filter: qwen
+  label: qwen      # for smoke test, label with Qwen too — proves generic backend selection
+  verify: qwen

pyproject.toml ADDED Viewed

	@@ -0,0 +1,72 @@

+[build-system]
+requires = ["setuptools>=64", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "data-label-factory"
+version = "0.1.0"
+description = "Generic auto-labeling pipeline for vision datasets — runs on a 16 GB Apple Silicon Mac via SSD-streaming MoE."
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "Apache-2.0" }
+authors = [
+    { name = "walter-grace" },
+]
+keywords = [
+    "vision",
+    "dataset",
+    "labeling",
+    "annotation",
+    "object-detection",
+    "mlx",
+    "apple-silicon",
+    "qwen",
+    "gemma",
+    "falcon-perception",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: MacOS",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering :: Image Recognition",
+]
+dependencies = [
+    "pyyaml>=6.0",
+    "pillow>=9.0",
+    "requests>=2.28",
+]
+[project.optional-dependencies]
+gather = [
+    # Image-search gathering (DDG, Wikimedia, YouTube frame extraction)
+    "duckduckgo-search>=4.0",
+    "yt-dlp>=2024.0.0",
+]
+dev = [
+    "pytest>=7.0",
+    "ruff>=0.5.0",
+]
+[project.urls]
+Homepage = "https://github.com/walter-grace/data-label-factory"
+Repository = "https://github.com/walter-grace/data-label-factory"
+HuggingFace = "https://huggingface.co/waltgrace/data-label-factory"
+Issues = "https://github.com/walter-grace/data-label-factory/issues"
+[project.scripts]
+data_label_factory = "data_label_factory.cli:main"
+data-label-factory = "data_label_factory.cli:main"
+[tool.setuptools]
+packages = ["data_label_factory"]
+[tool.setuptools.package-data]
+data_label_factory = ["*.py"]

setup.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""Shim so older pip versions can do editable installs.
+Real metadata lives in pyproject.toml."""
+from setuptools import setup
+setup()

web/.env.example ADDED Viewed

	@@ -0,0 +1,11 @@

+# web/.env.local — Cloudflare R2 credentials for the review UI
+#
+# Copy this file to web/.env.local and fill in your own R2 bucket details.
+# .env.local is gitignored. NEVER commit real credentials.
+#
+# Cloudflare R2 → Manage R2 API Tokens → Create API token (read+write on your bucket)
+R2_ENDPOINT_URL=https://<your-account-id>.r2.cloudflarestorage.com
+R2_ACCESS_KEY_ID=<your-access-key>
+R2_SECRET_ACCESS_KEY=<your-secret-key>
+R2_BUCKET=<your-bucket-name>

web/.gitignore ADDED Viewed

	@@ -0,0 +1,41 @@

+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+# testing
+/coverage
+# next.js
+/.next/
+/out/
+# production
+/build
+# misc
+.DS_Store
+*.pem
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+# env files (can opt-in for committing if needed)
+.env*
+# vercel
+.vercel
+# typescript
+*.tsbuildinfo
+next-env.d.ts

web/README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
+## Getting Started
+First, run the development server:
+```bash
+npm run dev
+# or
+yarn dev
+# or
+pnpm dev
+# or
+bun dev
+```
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
+This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
+## Learn More
+To learn more about Next.js, take a look at the following resources:
+- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
+- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
+## Deploy on Vercel
+The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
+Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.

web/app/api/labels/route.ts ADDED Viewed

	@@ -0,0 +1,131 @@

+import { NextResponse } from "next/server";
+import { getJson, putJson, presignGet } from "@/lib/r2";
+import type { LabelPartial, ImageReview, VerifiedRun } from "@/lib/types";
+/**
+ * GET /api/labels
+ *   → returns the labeled dataset converted to ImageReview[] format,
+ *     with one presigned image URL per entry, plus Qwen VLM verdicts,
+ *     plus any saved human reviews.
+ *
+ * Reads from R2:
+ *   labels/partial.json          ← Falcon bboxes (live snapshot from pod)
+ *   labels/run1.verified.json    ← Qwen yes/no verdict per bbox
+ *   labels/reviews.json          ← Human verdicts (saved by this UI)
+ */
+export async function GET() {
+    // Try the live partial first
+    let partial = await getJson<LabelPartial>("labels/partial.json");
+    if (!partial || !partial.results) {
+        const finalRun = await getJson<LabelPartial>("labels/run1_partial.json");
+        if (finalRun) partial = finalRun;
+    }
+    if (!partial || !partial.results) {
+        return NextResponse.json({ images: [], total: 0, error: "no labels found in r2 yet" });
+    }
+    // Load Qwen verdicts (if they exist) — try run2 first, fall back to run1
+    let verified = await getJson<VerifiedRun>("labels/run2.verified.json");
+    if (!verified?.annotations) {
+        verified = await getJson<VerifiedRun>("labels/run1.verified.json");
+    }
+    const verdictById = new Map<number, { verdict: "YES" | "NO" | "UNSURE"; reasoning: string }>();
+    if (verified?.annotations) {
+        for (const v of verified.annotations) {
+            verdictById.set(v.annotation_id, { verdict: v.verdict, reasoning: v.reasoning });
+        }
+    }
+    // Load any existing human reviews
+    const reviews = (await getJson<Record<string, ImageReview>>("labels/reviews.json")) ?? {};
+    const images: (ImageReview & { url: string })[] = [];
+    for (const [path, res] of Object.entries(partial.results)) {
+        if (res.error || !res.queries) continue;
+        // Convert pod-side path to R2 key, supporting both v1 and v2 layouts:
+        //   /workspace/images/...        → raw/...        (v1)
+        //   /workspace/images_v2/...     → raw_v2/...     (v2)
+        let r2Key: string;
+        const idx2 = path.indexOf("/images_v2/");
+        const idx1 = path.indexOf("/images/");
+        if (idx2 !== -1) {
+            r2Key = "raw_v2/" + path.slice(idx2 + "/images_v2/".length);
+        } else if (idx1 !== -1) {
+            r2Key = "raw/" + path.slice(idx1 + "/images/".length);
+        } else {
+            continue;
+        }
+        const stripPrefix = r2Key.startsWith("raw_v2/") ? "raw_v2/" : "raw/";
+        const bucket = r2Key.slice(stripPrefix.length).split("/").slice(0, 2).join("/");
+        // Flatten queries → bboxes with query labels + Qwen verdicts
+        const flatBboxes: ImageReview["bboxes"] = [];
+        for (const [query, qres] of Object.entries(res.queries)) {
+            if (qres.error) continue;
+            for (const b of qres.bboxes) {
+                const v = b.annotation_id != null ? verdictById.get(b.annotation_id) : undefined;
+                flatBboxes.push({
+                    ...b,
+                    query,
+                    vlm_verdict: v?.verdict,
+                    vlm_reasoning: v?.reasoning,
+                });
+            }
+        }
+        if (flatBboxes.length === 0) continue;  // skip empty for now
+        const existing = reviews[r2Key];
+        // Merge existing verdicts onto fresh bboxes (match by index for now)
+        if (existing) {
+            for (let i = 0; i < flatBboxes.length && i < existing.bboxes.length; i++) {
+                flatBboxes[i].verdict = existing.bboxes[i].verdict;
+                flatBboxes[i].note = existing.bboxes[i].note;
+            }
+        }
+        const url = await presignGet(r2Key, 3600);
+        images.push({
+            image_path: r2Key,
+            bucket,
+            width: res.width,
+            height: res.height,
+            bboxes: flatBboxes,
+            image_verdict: existing?.image_verdict,
+            reviewed_at: existing?.reviewed_at,
+            url,
+        });
+    }
+    // Sort by bucket priority (positive first, then most detections)
+    const PRIORITY: Record<string, number> = {
+        "positive/fiber_spool_drone": 0,
+        "positive/spool_only": 1,
+        "distractor/round_things": 2,
+        "negative/drones_no_spool": 3,
+        "background/empty": 4,
+    };
+    images.sort((a, b) => {
+        const pa = PRIORITY[a.bucket] ?? 99;
+        const pb = PRIORITY[b.bucket] ?? 99;
+        if (pa !== pb) return pa - pb;
+        return b.bboxes.length - a.bboxes.length;
+    });
+    return NextResponse.json({ images, total: images.length });
+}
+/**
+ * POST /api/labels — save a single image's review back to R2.
+ */
+export async function POST(req: Request) {
+    const body = (await req.json()) as ImageReview;
+    if (!body.image_path) {
+        return NextResponse.json({ error: "missing image_path" }, { status: 400 });
+    }
+    const reviews = (await getJson<Record<string, ImageReview>>("labels/reviews.json")) ?? {};
+    reviews[body.image_path] = { ...body, reviewed_at: new Date().toISOString() };
+    await putJson("labels/reviews.json", reviews);
+    return NextResponse.json({ ok: true, total_reviewed: Object.keys(reviews).length });
+}

web/app/canvas/page.tsx ADDED Viewed

	@@ -0,0 +1,332 @@

+"use client";
+import { useEffect, useState, useMemo } from "react";
+import { BboxCanvas } from "@/components/BboxCanvas";
+import { colorForQuery } from "@/components/BboxOverlay";
+import type { ImageReview } from "@/lib/types";
+type LoadedImage = ImageReview & { url: string };
+export default function CanvasPage() {
+    const [images, setImages] = useState<LoadedImage[]>([]);
+    const [loading, setLoading] = useState(true);
+    const [error, setError] = useState<string | null>(null);
+    const [bucketFilter, setBucketFilter] = useState<string>("all");
+    const [selectedIdx, setSelectedIdx] = useState<number>(0);
+    const [activeBbox, setActiveBbox] = useState<number | null>(null);
+    useEffect(() => {
+        fetch("/api/labels")
+            .then((r) => r.json())
+            .then((data) => {
+                if (data.error) setError(data.error);
+                else setImages(data.images ?? []);
+            })
+            .catch((e) => setError(String(e)))
+            .finally(() => setLoading(false));
+    }, []);
+    const filtered = useMemo(() => {
+        if (bucketFilter === "all") return images;
+        return images.filter((i) => i.bucket === bucketFilter);
+    }, [images, bucketFilter]);
+    const current = filtered[selectedIdx];
+    const bucketCounts = useMemo(() => {
+        const m = new Map<string, number>();
+        for (const i of images) m.set(i.bucket, (m.get(i.bucket) ?? 0) + 1);
+        return m;
+    }, [images]);
+    const queryStats = useMemo(() => {
+        if (!current) return [] as { query: string; count: number; yes: number; no: number }[];
+        const m = new Map<string, { query: string; count: number; yes: number; no: number }>();
+        for (const b of current.bboxes) {
+            const e = m.get(b.query) ?? { query: b.query, count: 0, yes: 0, no: 0 };
+            e.count++;
+            if (b.vlm_verdict === "YES") e.yes++;
+            if (b.vlm_verdict === "NO") e.no++;
+            m.set(b.query, e);
+        }
+        return Array.from(m.values()).sort((a, b) => b.count - a.count);
+    }, [current]);
+    // Keyboard navigation
+    useEffect(() => {
+        const onKey = (e: KeyboardEvent) => {
+            if (e.target instanceof HTMLInputElement) return;
+            if (e.key === "ArrowRight" || e.key === "j") {
+                setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1));
+                setActiveBbox(null);
+            } else if (e.key === "ArrowLeft" || e.key === "k") {
+                setSelectedIdx((i) => Math.max(i - 1, 0));
+                setActiveBbox(null);
+            }
+        };
+        window.addEventListener("keydown", onKey);
+        return () => window.removeEventListener("keydown", onKey);
+    }, [filtered.length]);
+    if (loading) {
+        return (
+            <main className="min-h-screen bg-zinc-950 text-zinc-100 p-8">
+                <div className="text-2xl">Loading…</div>
+            </main>
+        );
+    }
+    if (error) {
+        return (
+            <main className="min-h-screen bg-zinc-950 text-zinc-100 p-8">
+                <h1 className="text-3xl font-bold mb-4">drone-falcon · canvas review</h1>
+                <div className="rounded-lg border border-red-800 bg-red-950 p-6 text-red-200">
+                    Error: {error}
+                </div>
+            </main>
+        );
+    }
+    const buckets = ["all", ...Array.from(bucketCounts.keys()).sort()];
+    const total = images.length;
+    const totalApproved = images.filter((i) => i.image_verdict === "approved").length;
+    const totalRejected = images.filter((i) => i.image_verdict === "rejected").length;
+    return (
+        <main className="min-h-screen bg-zinc-950 text-zinc-50 font-sans">
+            {/* Header */}
+            <header className="border-b border-zinc-800 bg-zinc-900/60 backdrop-blur px-6 py-4 flex items-center justify-between">
+                <div>
+                    <h1 className="text-2xl font-semibold tracking-tight text-zinc-50">
+                        drone-falcon <span className="text-zinc-500">/</span> canvas review
+                    </h1>
+                    <p className="text-sm text-zinc-300 mt-0.5">
+                        HTML5 Canvas viewer — <span className="text-zinc-100">drag</span> to pan,{" "}
+                        <span className="text-zinc-100">scroll</span> to zoom,{" "}
+                        <span className="text-zinc-100">double-click</span> to reset
+                    </p>
+                </div>
+                <div className="flex items-center gap-2 text-sm">
+                    <a href="/" className="text-zinc-300 hover:text-zinc-50 underline-offset-4 hover:underline mr-2">
+                        ← grid view
+                    </a>
+                    <span className="rounded-md border border-zinc-700 bg-zinc-900 px-2.5 py-1 text-zinc-100 font-medium">
+                        {total.toLocaleString()} labeled
+                    </span>
+                    <span className="rounded-md border border-emerald-600/60 bg-emerald-500/10 px-2.5 py-1 text-emerald-300 font-medium">
+                        {totalApproved} approved
+                    </span>
+                    <span className="rounded-md border border-red-600/60 bg-red-500/10 px-2.5 py-1 text-red-300 font-medium">
+                        {totalRejected} rejected
+                    </span>
+                </div>
+            </header>
+            {/* Bucket tabs */}
+            <div className="border-b border-zinc-800 bg-zinc-950 px-6 py-3">
+                <div className="flex flex-wrap items-center gap-2">
+                    {buckets.map((b) => {
+                        const active = bucketFilter === b;
+                        const count = b === "all" ? total : (bucketCounts.get(b) ?? 0);
+                        const label = b === "all" ? "All" : b;
+                        return (
+                            <button
+                                key={b}
+                                onClick={() => { setBucketFilter(b); setSelectedIdx(0); setActiveBbox(null); }}
+                                className={`px-3 py-1.5 rounded-md text-sm font-medium transition-colors border ${
+                                    active
+                                        ? "bg-zinc-50 text-zinc-950 border-zinc-50"
+                                        : "bg-zinc-900 text-zinc-200 border-zinc-700 hover:bg-zinc-800 hover:text-zinc-50"
+                                }`}
+                            >
+                                {label}
+                                <span className={`ml-1.5 text-xs ${active ? "text-zinc-500" : "text-zinc-400"}`}>
+                                    {count}
+                                </span>
+                            </button>
+                        );
+                    })}
+                </div>
+            </div>
+            {/* Main canvas + sidebars */}
+            <div className="grid grid-cols-12 gap-4 p-4">
+                {/* Thumbnail strip */}
+                <div className="col-span-2 min-w-0 max-h-[calc(100vh-180px)] overflow-y-auto pr-2 space-y-2">
+                    <div className="text-xs font-semibold uppercase tracking-wider text-zinc-400 px-1 pb-1">
+                        Images ({filtered.length})
+                    </div>
+                    {filtered.slice(0, 200).map((img, idx) => (
+                        <button
+                            key={img.image_path}
+                            onClick={() => { setSelectedIdx(idx); setActiveBbox(null); }}
+                            className={`block w-full overflow-hidden rounded-md border-2 transition-all ${
+                                idx === selectedIdx
+                                    ? "border-blue-500 ring-2 ring-blue-500/30"
+                                    : "border-zinc-800 hover:border-zinc-500"
+                            }`}
+                        >
+                            {/* eslint-disable-next-line @next/next/no-img-element */}
+                            <img src={img.url} alt="" className="w-full h-20 object-cover" />
+                            <div className="bg-zinc-900 px-2 py-1 text-xs text-zinc-200 font-medium text-left">
+                                {img.bboxes.length} detections
+                            </div>
+                        </button>
+                    ))}
+                </div>
+                {/* Canvas area */}
+                <div className="col-span-7 min-w-0">
+                    {current ? (
+                        <div className="rounded-lg border border-zinc-800 bg-zinc-900 p-4 min-w-0">
+                            <div className="flex items-center justify-between mb-3">
+                                <div className="text-sm font-mono text-zinc-200 truncate">
+                                    {current.image_path}
+                                </div>
+                                <div className="text-sm text-zinc-300 font-medium whitespace-nowrap ml-3">
+                                    {current.width}×{current.height} · {current.bboxes.length} bboxes ·{" "}
+                                    <span className="text-zinc-50">{selectedIdx + 1}</span>
+                                    <span className="text-zinc-500">/{filtered.length}</span>
+                                </div>
+                            </div>
+                            <BboxCanvas
+                                src={current.url}
+                                width={current.width}
+                                height={current.height}
+                                bboxes={current.bboxes.map((b, idx) => ({ ...b, idx }))}
+                                activeIdx={activeBbox}
+                                onBboxClick={setActiveBbox}
+                                aspectRatio={16 / 10}
+                            />
+                            <div className="mt-3 flex flex-wrap items-center gap-x-4 gap-y-2 text-sm text-zinc-300">
+                                <span className="flex items-center gap-1.5">
+                                    <kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">←</kbd>
+                                    <kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">→</kbd>
+                                    navigate
+                                </span>
+                                <span className="flex items-center gap-1.5">
+                                    <kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">scroll</kbd>
+                                    zoom
+                                </span>
+                                <span className="flex items-center gap-1.5">
+                                    <kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">shift+drag</kbd>
+                                    pan
+                                </span>
+                                <span className="flex items-center gap-1.5">
+                                    <kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">dblclick</kbd>
+                                    reset view
+                                </span>
+                                <span className="flex items-center gap-1.5">
+                                    <kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">click</kbd>
+                                    select bbox
+                                </span>
+                            </div>
+                        </div>
+                    ) : (
+                        <div className="rounded-lg border border-zinc-800 bg-zinc-900 p-8 text-zinc-300 text-center">
+                            No images in this bucket
+                        </div>
+                    )}
+                </div>
+                {/* Sidebar: bbox details */}
+                <div className="col-span-3 min-w-0 max-h-[calc(100vh-180px)] overflow-y-auto space-y-4 pr-1">
+                    {/* Selected bbox */}
+                    {activeBbox !== null && current && (() => {
+                        const b = current.bboxes[activeBbox];
+                        return (
+                            <div className="rounded-lg border-2 border-blue-500 bg-zinc-900 p-4">
+                                <div className="flex items-center justify-between mb-3">
+                                    <div className="text-sm font-bold text-blue-300 uppercase tracking-wide">
+                                        Bbox #{activeBbox + 1}
+                                    </div>
+                                    <button
+                                        onClick={() => setActiveBbox(null)}
+                                        className="text-zinc-400 hover:text-zinc-100 text-sm"
+                                    >
+                                        ✕
+                                    </button>
+                                </div>
+                                <div className="flex items-center gap-2 mb-3">
+                                    <span
+                                        className="inline-block w-3 h-3 rounded-sm border border-zinc-600"
+                                        style={{ backgroundColor: colorForQuery(b.query) }}
+                                    />
+                                    <span className="text-sm text-zinc-100 font-medium">{b.query}</span>
+                                </div>
+                                <div className="text-xs text-zinc-400 mb-2">Detected by Falcon</div>
+                                {b.vlm_verdict && (
+                                    <div className="mt-3 pt-3 border-t border-zinc-800">
+                                        <div className="text-xs text-zinc-400 mb-1">Qwen verdict</div>
+                                        <div className={`text-base font-bold ${
+                                            b.vlm_verdict === "YES" ? "text-emerald-400" :
+                                            b.vlm_verdict === "NO" ? "text-red-400" :
+                                            "text-amber-400"
+                                        }`}>
+                                            {b.vlm_verdict}
+                                        </div>
+                                        {b.vlm_reasoning && (
+                                            <div className="text-zinc-300 italic mt-2 text-sm leading-relaxed">
+                                                &ldquo;{b.vlm_reasoning}&rdquo;
+                                            </div>
+                                        )}
+                                    </div>
+                                )}
+                                <div className="text-xs text-zinc-400 font-mono mt-3 pt-3 border-t border-zinc-800">
+                                    <div>x1: {Math.round(b.x1)}  y1: {Math.round(b.y1)}</div>
+                                    <div>x2: {Math.round(b.x2)}  y2: {Math.round(b.y2)}</div>
+                                </div>
+                            </div>
+                        );
+                    })()}
+                    {/* Per-query summary */}
+                    <div className="rounded-lg border border-zinc-800 bg-zinc-900 p-4">
+                        <div className="text-xs font-bold uppercase tracking-wider text-zinc-400 mb-3">
+                            Queries on this image
+                        </div>
+                        <div className="space-y-2">
+                            {queryStats.map((qs) => (
+                                <div key={qs.query} className="flex items-center justify-between text-sm">
+                                    <div className="flex items-center gap-2 min-w-0">
+                                        <span
+                                            className="h-3 w-3 rounded-sm border border-zinc-600 flex-shrink-0"
+                                            style={{ backgroundColor: colorForQuery(qs.query) }}
+                                        />
+                                        <span className="text-zinc-100 truncate">{qs.query}</span>
+                                    </div>
+                                    <span className="text-zinc-300 font-medium whitespace-nowrap ml-2">
+                                        {qs.count}
+                                        {qs.yes > 0 && <span className="text-emerald-400 ml-1.5">✓{qs.yes}</span>}
+                                        {qs.no > 0 && <span className="text-red-400 ml-1">✗{qs.no}</span>}
+                                    </span>
+                                </div>
+                            ))}
+                        </div>
+                    </div>
+                    {/* Controls */}
+                    <div className="rounded-lg border border-zinc-800 bg-zinc-900 p-4">
+                        <div className="text-xs font-bold uppercase tracking-wider text-zinc-400 mb-3">
+                            Navigate
+                        </div>
+                        <div className="flex gap-2">
+                            <button
+                                onClick={() => { setSelectedIdx((i) => Math.max(i - 1, 0)); setActiveBbox(null); }}
+                                className="flex-1 px-3 py-2 rounded-md bg-zinc-800 hover:bg-zinc-700 text-zinc-100 text-sm font-medium border border-zinc-700 transition-colors"
+                            >
+                                ← Previous
+                            </button>
+                            <button
+                                onClick={() => { setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1)); setActiveBbox(null); }}
+                                className="flex-1 px-3 py-2 rounded-md bg-zinc-800 hover:bg-zinc-700 text-zinc-100 text-sm font-medium border border-zinc-700 transition-colors"
+                            >
+                                Next →
+                            </button>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </main>
+    );
+}

web/app/favicon.ico ADDED Viewed

web/app/globals.css ADDED Viewed

	@@ -0,0 +1,141 @@

+@import "tailwindcss";
+@import "tw-animate-css";
+@import "shadcn/tailwind.css";
+@custom-variant dark (&:is(.dark *));
+@theme inline {
+  --color-background: var(--background);
+  --color-foreground: var(--foreground);
+  --font-sans: ui-sans-serif, system-ui, -apple-system, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
+  --font-mono: ui-monospace, "SF Mono", "Menlo", "Cascadia Mono", "Roboto Mono", monospace;
+  --font-heading: ui-sans-serif, system-ui, -apple-system, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
+  --color-sidebar-ring: var(--sidebar-ring);
+  --color-sidebar-border: var(--sidebar-border);
+  --color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
+  --color-sidebar-accent: var(--sidebar-accent);
+  --color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
+  --color-sidebar-primary: var(--sidebar-primary);
+  --color-sidebar-foreground: var(--sidebar-foreground);
+  --color-sidebar: var(--sidebar);
+  --color-chart-5: var(--chart-5);
+  --color-chart-4: var(--chart-4);
+  --color-chart-3: var(--chart-3);
+  --color-chart-2: var(--chart-2);
+  --color-chart-1: var(--chart-1);
+  --color-ring: var(--ring);
+  --color-input: var(--input);
+  --color-border: var(--border);
+  --color-destructive: var(--destructive);
+  --color-accent-foreground: var(--accent-foreground);
+  --color-accent: var(--accent);
+  --color-muted-foreground: var(--muted-foreground);
+  --color-muted: var(--muted);
+  --color-secondary-foreground: var(--secondary-foreground);
+  --color-secondary: var(--secondary);
+  --color-primary-foreground: var(--primary-foreground);
+  --color-primary: var(--primary);
+  --color-popover-foreground: var(--popover-foreground);
+  --color-popover: var(--popover);
+  --color-card-foreground: var(--card-foreground);
+  --color-card: var(--card);
+  --radius-sm: calc(var(--radius) * 0.6);
+  --radius-md: calc(var(--radius) * 0.8);
+  --radius-lg: var(--radius);
+  --radius-xl: calc(var(--radius) * 1.4);
+  --radius-2xl: calc(var(--radius) * 1.8);
+  --radius-3xl: calc(var(--radius) * 2.2);
+  --radius-4xl: calc(var(--radius) * 2.6);
+}
+:root {
+  --background: oklch(1 0 0);
+  --foreground: oklch(0.145 0 0);
+  --card: oklch(1 0 0);
+  --card-foreground: oklch(0.145 0 0);
+  --popover: oklch(1 0 0);
+  --popover-foreground: oklch(0.145 0 0);
+  --primary: oklch(0.205 0 0);
+  --primary-foreground: oklch(0.985 0 0);
+  --secondary: oklch(0.97 0 0);
+  --secondary-foreground: oklch(0.205 0 0);
+  --muted: oklch(0.97 0 0);
+  --muted-foreground: oklch(0.556 0 0);
+  --accent: oklch(0.97 0 0);
+  --accent-foreground: oklch(0.205 0 0);
+  --destructive: oklch(0.577 0.245 27.325);
+  --border: oklch(0.922 0 0);
+  --input: oklch(0.922 0 0);
+  --ring: oklch(0.708 0 0);
+  --chart-1: oklch(0.87 0 0);
+  --chart-2: oklch(0.556 0 0);
+  --chart-3: oklch(0.439 0 0);
+  --chart-4: oklch(0.371 0 0);
+  --chart-5: oklch(0.269 0 0);
+  --radius: 0.625rem;
+  --sidebar: oklch(0.985 0 0);
+  --sidebar-foreground: oklch(0.145 0 0);
+  --sidebar-primary: oklch(0.205 0 0);
+  --sidebar-primary-foreground: oklch(0.985 0 0);
+  --sidebar-accent: oklch(0.97 0 0);
+  --sidebar-accent-foreground: oklch(0.205 0 0);
+  --sidebar-border: oklch(0.922 0 0);
+  --sidebar-ring: oklch(0.708 0 0);
+}
+.dark {
+  --background: oklch(0.145 0 0);
+  --foreground: oklch(0.985 0 0);
+  --card: oklch(0.205 0 0);
+  --card-foreground: oklch(0.985 0 0);
+  --popover: oklch(0.205 0 0);
+  --popover-foreground: oklch(0.985 0 0);
+  --primary: oklch(0.922 0 0);
+  --primary-foreground: oklch(0.205 0 0);
+  --secondary: oklch(0.269 0 0);
+  --secondary-foreground: oklch(0.985 0 0);
+  --muted: oklch(0.269 0 0);
+  --muted-foreground: oklch(0.708 0 0);
+  --accent: oklch(0.269 0 0);
+  --accent-foreground: oklch(0.985 0 0);
+  --destructive: oklch(0.704 0.191 22.216);
+  --border: oklch(1 0 0 / 10%);
+  --input: oklch(1 0 0 / 15%);
+  --ring: oklch(0.556 0 0);
+  --chart-1: oklch(0.87 0 0);
+  --chart-2: oklch(0.556 0 0);
+  --chart-3: oklch(0.439 0 0);
+  --chart-4: oklch(0.371 0 0);
+  --chart-5: oklch(0.269 0 0);
+  --sidebar: oklch(0.205 0 0);
+  --sidebar-foreground: oklch(0.985 0 0);
+  --sidebar-primary: oklch(0.488 0.243 264.376);
+  --sidebar-primary-foreground: oklch(0.985 0 0);
+  --sidebar-accent: oklch(0.269 0 0);
+  --sidebar-accent-foreground: oklch(0.985 0 0);
+  --sidebar-border: oklch(1 0 0 / 10%);
+  --sidebar-ring: oklch(0.556 0 0);
+}
+@layer base {
+  * {
+    @apply border-border outline-ring/50;
+  }
+  body {
+    @apply bg-background text-foreground;
+    font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
+    font-feature-settings: "cv11", "ss01";
+    -webkit-font-smoothing: antialiased;
+    -moz-osx-font-smoothing: grayscale;
+  }
+  html {
+    @apply font-sans;
+  }
+  h1, h2, h3, h4, h5, h6 {
+    font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
+    letter-spacing: -0.01em;
+  }
+  code, kbd, pre, .font-mono {
+    font-family: ui-monospace, "SF Mono", "Menlo", "Cascadia Mono", "Roboto Mono", monospace;
+  }
+}

web/app/layout.tsx ADDED Viewed

	@@ -0,0 +1,33 @@

+import type { Metadata } from "next";
+import { Geist, Geist_Mono } from "next/font/google";
+import "./globals.css";
+const geistSans = Geist({
+  variable: "--font-geist-sans",
+  subsets: ["latin"],
+});
+const geistMono = Geist_Mono({
+  variable: "--font-geist-mono",
+  subsets: ["latin"],
+});
+export const metadata: Metadata = {
+  title: "Create Next App",
+  description: "Generated by create next app",
+};
+export default function RootLayout({
+  children,
+}: Readonly<{
+  children: React.ReactNode;
+}>) {
+  return (
+    <html
+      lang="en"
+      className={`${geistSans.variable} ${geistMono.variable} dark h-full antialiased`}
+    >
+      <body className="min-h-full flex flex-col bg-zinc-950 text-zinc-100 font-sans">{children}</body>
+    </html>
+  );
+}

web/app/page.tsx ADDED Viewed

	@@ -0,0 +1,311 @@

+"use client";
+import { useEffect, useState, useCallback, useMemo, useRef } from "react";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Button } from "@/components/ui/button";
+import { Badge } from "@/components/ui/badge";
+import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { Toaster } from "@/components/ui/sonner";
+import { toast } from "sonner";
+import { BboxOverlay, colorForQuery, type AnnotatedBbox } from "@/components/BboxOverlay";
+import type { ImageReview } from "@/lib/types";
+type LoadedImage = ImageReview & { url: string };
+export default function Home() {
+    const [images, setImages] = useState<LoadedImage[]>([]);
+    const [loading, setLoading] = useState(true);
+    const [error, setError] = useState<string | null>(null);
+    const [bucketFilter, setBucketFilter] = useState<string>("all");
+    const [selectedIdx, setSelectedIdx] = useState<number>(0);
+    const [activeBbox, setActiveBbox] = useState<number | null>(null);
+    const cardRef = useRef<HTMLDivElement>(null);
+    useEffect(() => {
+        fetch("/api/labels")
+            .then((r) => r.json())
+            .then((data) => {
+                if (data.error) setError(data.error);
+                else setImages(data.images ?? []);
+            })
+            .catch((e) => setError(String(e)))
+            .finally(() => setLoading(false));
+    }, []);
+    const filtered = useMemo(() => {
+        if (bucketFilter === "all") return images;
+        return images.filter((i) => i.bucket === bucketFilter);
+    }, [images, bucketFilter]);
+    const current = filtered[selectedIdx];
+    const annotated: AnnotatedBbox[] = useMemo(() => {
+        if (!current) return [];
+        return current.bboxes.map((b, idx) => ({ ...b, idx }));
+    }, [current]);
+    const queryStats = useMemo(() => {
+        if (!current) return [] as { query: string; count: number; approved: number; rejected: number }[];
+        const m = new Map<string, { query: string; count: number; approved: number; rejected: number }>();
+        for (const b of current.bboxes) {
+            const e = m.get(b.query) ?? { query: b.query, count: 0, approved: 0, rejected: 0 };
+            e.count++;
+            if (b.verdict === "approved") e.approved++;
+            if (b.verdict === "rejected") e.rejected++;
+            m.set(b.query, e);
+        }
+        return Array.from(m.values()).sort((a, b) => b.count - a.count);
+    }, [current]);
+    const totalReviewed = images.filter((i) => i.image_verdict).length;
+    const totalApproved = images.filter((i) => i.image_verdict === "approved").length;
+    const totalRejected = images.filter((i) => i.image_verdict === "rejected").length;
+    const bucketCounts = useMemo(() => {
+        const m = new Map<string, number>();
+        for (const i of images) m.set(i.bucket, (m.get(i.bucket) ?? 0) + 1);
+        return m;
+    }, [images]);
+    const saveReview = useCallback(async (img: LoadedImage) => {
+        try {
+            const res = await fetch("/api/labels", {
+                method: "POST",
+                headers: { "Content-Type": "application/json" },
+                body: JSON.stringify(img),
+            });
+            const data = await res.json();
+            if (data.ok) toast.success(`Saved (${data.total_reviewed} reviewed)`);
+            else toast.error(data.error ?? "save failed");
+        } catch (e) {
+            toast.error(String(e));
+        }
+    }, []);
+    const setImageVerdict = useCallback(
+        (verdict: "approved" | "rejected" | "unsure") => {
+            if (!current) return;
+            const updated = { ...current, image_verdict: verdict };
+            setImages((prev) => prev.map((p) => (p.image_path === current.image_path ? updated : p)));
+            saveReview(updated);
+            setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1));
+            setActiveBbox(null);
+        },
+        [current, filtered.length, saveReview],
+    );
+    const setBboxVerdict = useCallback(
+        (idx: number, verdict: "approved" | "rejected" | "unsure") => {
+            if (!current) return;
+            const newBboxes = current.bboxes.map((b, i) => (i === idx ? { ...b, verdict } : b));
+            const updated = { ...current, bboxes: newBboxes };
+            setImages((prev) => prev.map((p) => (p.image_path === current.image_path ? updated : p)));
+            saveReview(updated);
+        },
+        [current, saveReview],
+    );
+    useEffect(() => {
+        const onKey = (e: KeyboardEvent) => {
+            if (e.target instanceof HTMLInputElement) return;
+            if (e.key === "ArrowRight" || e.key === "j") {
+                setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1));
+                setActiveBbox(null);
+            } else if (e.key === "ArrowLeft" || e.key === "k") {
+                setSelectedIdx((i) => Math.max(i - 1, 0));
+                setActiveBbox(null);
+            } else if (e.key === "y") {
+                setImageVerdict("approved");
+            } else if (e.key === "n") {
+                setImageVerdict("rejected");
+            } else if (e.key === "u") {
+                setImageVerdict("unsure");
+            }
+        };
+        window.addEventListener("keydown", onKey);
+        return () => window.removeEventListener("keydown", onKey);
+    }, [filtered.length, setImageVerdict]);
+    if (loading) {
+        return (
+            <main className="min-h-screen bg-zinc-950 text-zinc-100 p-8">
+                <div className="text-2xl">Loading labels from R2…</div>
+            </main>
+        );
+    }
+    if (error) {
+        return (
+            <main className="min-h-screen bg-zinc-950 text-zinc-100 p-8">
+                <h1 className="text-3xl font-bold mb-4">drone-falcon factory</h1>
+                <Card className="bg-red-950 border-red-800">
+                    <CardContent className="pt-6">
+                        <div className="text-red-300">Error: {error}</div>
+                        <div className="text-zinc-400 mt-2 text-sm">
+                            The labeling pod is probably still running. Sync labels/partial.json to R2 to see them here.
+                        </div>
+                    </CardContent>
+                </Card>
+            </main>
+        );
+    }
+    const buckets = ["all", ...Array.from(bucketCounts.keys()).sort()];
+    return (
+        <main className="min-h-screen bg-zinc-950 text-zinc-100">
+            <Toaster theme="dark" position="bottom-right" />
+            <header className="border-b border-zinc-800 px-6 py-4">
+                <div className="flex items-center justify-between">
+                    <div>
+                        <h1 className="text-2xl font-bold tracking-tight">drone-falcon · review factory</h1>
+                        <p className="text-sm text-zinc-400">human verification of Falcon Perception bboxes</p>
+                    </div>
+                    <div className="flex items-center gap-3 text-sm">
+                        <Badge variant="outline" className="border-zinc-700 text-zinc-300">{images.length} labeled</Badge>
+                        <Badge variant="outline" className="border-emerald-700 text-emerald-400">{totalApproved} approved</Badge>
+                        <Badge variant="outline" className="border-red-700 text-red-400">{totalRejected} rejected</Badge>
+                        <Badge variant="outline" className="border-zinc-700 text-zinc-300">{totalReviewed}/{images.length} reviewed</Badge>
+                    </div>
+                </div>
+            </header>
+            <div className="border-b border-zinc-800 px-6 py-2">
+                <Tabs value={bucketFilter} onValueChange={(v) => { setBucketFilter(v); setSelectedIdx(0); }}>
+                    <TabsList className="bg-zinc-900">
+                        {buckets.map((b) => (
+                            <TabsTrigger key={b} value={b} className="data-[state=active]:bg-zinc-800">
+                                {b === "all" ? `All (${images.length})` : `${b} (${bucketCounts.get(b) ?? 0})`}
+                            </TabsTrigger>
+                        ))}
+                    </TabsList>
+                </Tabs>
+            </div>
+            <div className="grid grid-cols-12 gap-4 p-4">
+                <div className="col-span-2 max-h-[calc(100vh-180px)] overflow-y-auto pr-2 space-y-2">
+                    {filtered.map((img, idx) => (
+                        <button
+                            key={img.image_path}
+                            onClick={() => { setSelectedIdx(idx); setActiveBbox(null); }}
+                            className={`block w-full overflow-hidden rounded border-2 transition-all ${
+                                idx === selectedIdx ? "border-blue-500" : "border-zinc-800 hover:border-zinc-600"
+                            }`}
+                        >
+                            {/* eslint-disable-next-line @next/next/no-img-element */}
+                            <img src={img.url} alt="" className="w-full h-20 object-cover" />
+                            <div className="bg-zinc-900 px-1 py-0.5 text-[10px] text-zinc-400 flex justify-between">
+                                <span>{img.bboxes.length} dets</span>
+                                <span>
+                                    {img.image_verdict === "approved" && "✓"}
+                                    {img.image_verdict === "rejected" && "✗"}
+                                </span>
+                            </div>
+                        </button>
+                    ))}
+                </div>
+                <div className="col-span-7" ref={cardRef}>
+                    {current ? (
+                        <Card className="bg-zinc-900 border-zinc-800">
+                            <CardHeader className="pb-2">
+                                <CardTitle className="text-base font-mono text-zinc-300 truncate">{current.image_path}</CardTitle>
+                                <div className="text-xs text-zinc-500">
+                                    {current.width}×{current.height} · {current.bboxes.length} bboxes · {selectedIdx + 1}/{filtered.length}
+                                </div>
+                            </CardHeader>
+                            <CardContent>
+                                <BboxOverlay
+                                    src={current.url}
+                                    width={current.width}
+                                    height={current.height}
+                                    bboxes={annotated}
+                                    activeIdx={activeBbox}
+                                    onBboxClick={(idx) => setActiveBbox(idx)}
+                                />
+                                <div className="mt-4 flex gap-2">
+                                    <Button onClick={() => setImageVerdict("approved")} className="bg-emerald-700 hover:bg-emerald-600">
+                                        ✓ Approve image (Y)
+                                    </Button>
+                                    <Button onClick={() => setImageVerdict("rejected")} variant="destructive">
+                                        ✗ Reject image (N)
+                                    </Button>
+                                    <Button onClick={() => setImageVerdict("unsure")} variant="outline" className="border-zinc-700">
+                                        ? Unsure (U)
+                                    </Button>
+                                    <div className="flex-1" />
+                                    <Button onClick={() => { setSelectedIdx((i) => Math.max(i - 1, 0)); setActiveBbox(null); }} variant="outline" className="border-zinc-700">←</Button>
+                                    <Button onClick={() => { setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1)); setActiveBbox(null); }} variant="outline" className="border-zinc-700">→</Button>
+                                </div>
+                                <div className="mt-2 text-xs text-zinc-500">
+                                    Shortcuts: <kbd>Y</kbd> approve · <kbd>N</kbd> reject · <kbd>U</kbd> unsure · <kbd>←</kbd> <kbd>→</kbd> navigate · click a bbox to select
+                                </div>
+                            </CardContent>
+                        </Card>
+                    ) : (
+                        <Card className="bg-zinc-900 border-zinc-800">
+                            <CardContent className="pt-6 text-zinc-400">No images in this bucket</CardContent>
+                        </Card>
+                    )}
+                </div>
+                <div className="col-span-3 max-h-[calc(100vh-180px)] overflow-y-auto">
+                    <Card className="bg-zinc-900 border-zinc-800">
+                        <CardHeader className="pb-2">
+                            <CardTitle className="text-sm">Per-query bboxes</CardTitle>
+                        </CardHeader>
+                        <CardContent className="space-y-3">
+                            {queryStats.map((qs) => (
+                                <div key={qs.query} className="space-y-1">
+                                    <div className="flex items-center justify-between text-xs">
+                                        <div className="flex items-center gap-2">
+                                            <span className="h-3 w-3 rounded" style={{ backgroundColor: colorForQuery(qs.query) }} />
+                                            <span className="font-medium">{qs.query}</span>
+                                        </div>
+                                        <span className="text-zinc-500">
+                                            {qs.count}
+                                            {qs.approved > 0 && <span className="text-emerald-500"> ✓{qs.approved}</span>}
+                                            {qs.rejected > 0 && <span className="text-red-500"> ✗{qs.rejected}</span>}
+                                        </span>
+                                    </div>
+                                </div>
+                            ))}
+                            {activeBbox !== null && current && (() => {
+                                const bbox = current.bboxes[activeBbox];
+                                return (
+                                    <div className="border-t border-zinc-800 pt-3 mt-3">
+                                        <div className="text-xs font-bold mb-2">Selected bbox #{activeBbox + 1}</div>
+                                        <div className="text-xs text-zinc-400 mb-1">Falcon: {bbox.query}</div>
+                                        {bbox.vlm_verdict && (
+                                            <div className="text-xs mb-2">
+                                                <span className="text-zinc-500">Qwen: </span>
+                                                <span className={
+                                                    bbox.vlm_verdict === "YES" ? "text-emerald-400" :
+                                                    bbox.vlm_verdict === "NO" ? "text-red-400" :
+                                                    "text-amber-400"
+                                                }>
+                                                    {bbox.vlm_verdict}
+                                                </span>
+                                                {bbox.vlm_reasoning && (
+                                                    <div className="text-zinc-400 italic mt-1 text-[11px]">
+                                                        &ldquo;{bbox.vlm_reasoning}&rdquo;
+                                                    </div>
+                                                )}
+                                            </div>
+                                        )}
+                                        <div className="flex gap-1 mt-2">
+                                            <Button size="sm" className="bg-emerald-700 hover:bg-emerald-600 text-xs h-7" onClick={() => setBboxVerdict(activeBbox, "approved")}>✓</Button>
+                                            <Button size="sm" variant="destructive" className="text-xs h-7" onClick={() => setBboxVerdict(activeBbox, "rejected")}>✗</Button>
+                                            <Button size="sm" variant="outline" className="text-xs h-7 border-zinc-700" onClick={() => setBboxVerdict(activeBbox, "unsure")}>?</Button>
+                                        </div>
+                                    </div>
+                                );
+                            })()}
+                        </CardContent>
+                    </Card>
+                </div>
+            </div>
+        </main>
+    );
+}

web/components.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "$schema": "https://ui.shadcn.com/schema.json",
+  "style": "base-nova",
+  "rsc": true,
+  "tsx": true,
+  "tailwind": {
+    "config": "",
+    "css": "app/globals.css",
+    "baseColor": "neutral",
+    "cssVariables": true,
+    "prefix": ""
+  },
+  "iconLibrary": "lucide",
+  "rtl": false,
+  "aliases": {
+    "components": "@/components",
+    "utils": "@/lib/utils",
+    "ui": "@/components/ui",
+    "lib": "@/lib",
+    "hooks": "@/hooks"
+  },
+  "menuColor": "default",
+  "menuAccent": "subtle",
+  "registries": {}
+}

web/components/BboxCanvas.tsx ADDED Viewed

	@@ -0,0 +1,329 @@

+"use client";
+import { useEffect, useRef, useState, useCallback } from "react";
+import type { Bbox } from "@/lib/types";
+import {
+    fitViewport,
+    zoomAt,
+    imageToCanvas,
+    hitBbox,
+    type Viewport,
+} from "@/lib/canvas-utils";
+import { colorForQuery } from "@/components/BboxOverlay";
+type AnnotatedBbox = Bbox & {
+    query: string;
+    vlm_verdict?: "YES" | "NO" | "UNSURE";
+    vlm_reasoning?: string;
+    verdict?: "approved" | "rejected" | "unsure";
+};
+type Props = {
+    src: string;
+    width: number;            // image native width in pixels
+    height: number;           // image native height in pixels
+    bboxes: AnnotatedBbox[];
+    activeIdx: number | null;
+    onBboxClick: (idx: number | null) => void;
+    onBboxHover?: (idx: number | null) => void;
+    showLabels?: boolean;
+    /** Optional fixed canvas display size. If omitted, the canvas fills its parent
+     *  width and uses `aspectRatio` to derive the height. */
+    canvasWidth?: number;
+    canvasHeight?: number;
+    /** Aspect ratio (w/h) used when sizing responsively. Default 16/10. */
+    aspectRatio?: number;
+};
+/**
+ * Pure HTML5 Canvas bbox renderer + interaction.
+ * Single <canvas>, draws image + bboxes in one pass per frame.
+ * Mouse wheel = zoom around cursor. Drag = pan. Click = select. Hover = highlight.
+ */
+export function BboxCanvas({
+    src,
+    width: imgW,
+    height: imgH,
+    bboxes,
+    activeIdx,
+    onBboxClick,
+    onBboxHover,
+    showLabels = true,
+    canvasWidth,
+    canvasHeight,
+    aspectRatio = 16 / 10,
+}: Props) {
+    const containerRef = useRef<HTMLDivElement>(null);
+    const canvasRef = useRef<HTMLCanvasElement>(null);
+    const imgRef = useRef<HTMLImageElement | null>(null);
+    const vpRef = useRef<Viewport>({ scale: 1, offsetX: 0, offsetY: 0 });
+    const [hoverIdx, setHoverIdx] = useState<number | null>(null);
+    const [isLoaded, setIsLoaded] = useState(false);
+    // Responsive size: if canvasWidth/Height are not provided, measure container.
+    const [size, setSize] = useState<{ w: number; h: number }>(() => ({
+        w: canvasWidth ?? 800,
+        h: canvasHeight ?? Math.round((canvasWidth ?? 800) / aspectRatio),
+    }));
+    useEffect(() => {
+        if (canvasWidth && canvasHeight) {
+            setSize({ w: canvasWidth, h: canvasHeight });
+            return;
+        }
+        const el = containerRef.current;
+        if (!el) return;
+        const update = () => {
+            const w = Math.max(200, Math.floor(el.clientWidth));
+            const h = Math.max(150, Math.floor(w / aspectRatio));
+            setSize((prev) => (prev.w === w && prev.h === h ? prev : { w, h }));
+        };
+        update();
+        const ro = new ResizeObserver(update);
+        ro.observe(el);
+        return () => ro.disconnect();
+    }, [canvasWidth, canvasHeight, aspectRatio]);
+    // Refit viewport when canvas size changes (e.g., window resize)
+    useEffect(() => {
+        if (imgRef.current) {
+            vpRef.current = fitViewport(imgW, imgH, size.w, size.h);
+            requestAnimationFrame(draw);
+        }
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }, [size.w, size.h]);
+    const [pan, setPan] = useState<{ active: boolean; lastX: number; lastY: number }>({
+        active: false,
+        lastX: 0,
+        lastY: 0,
+    });
+    // Load the image element once.
+    // NOTE: we deliberately do NOT set crossOrigin — R2 presigned URLs don't send
+    // CORS headers, and setting crossOrigin would block the load. The canvas becomes
+    // "tainted" but drawImage still works; we just can't call toDataURL/getImageData
+    // (which we don't need for rendering).
+    useEffect(() => {
+        const img = new window.Image();
+        img.src = src;
+        img.onload = () => {
+            imgRef.current = img;
+            // Reset viewport to fit on image change
+            vpRef.current = fitViewport(imgW, imgH, size.w, size.h);
+            setIsLoaded(true);
+            requestAnimationFrame(draw);
+        };
+        img.onerror = () => {
+            setIsLoaded(false);
+        };
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }, [src, imgW, imgH]);
+    // Redraw when bboxes / activeIdx / hover change
+    useEffect(() => {
+        if (isLoaded) requestAnimationFrame(draw);
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }, [bboxes, activeIdx, hoverIdx, isLoaded]);
+    const draw = useCallback(() => {
+        const canvas = canvasRef.current;
+        const img = imgRef.current;
+        if (!canvas || !img) return;
+        const ctx = canvas.getContext("2d");
+        if (!ctx) return;
+        const vp = vpRef.current;
+        const W = canvas.width;
+        const H = canvas.height;
+        // Background
+        ctx.fillStyle = "#0a0a0a";
+        ctx.fillRect(0, 0, W, H);
+        // Image
+        ctx.drawImage(img, vp.offsetX, vp.offsetY, imgW * vp.scale, imgH * vp.scale);
+        // Bboxes — draw rejected first (under), then accepted, then highlighted
+        const draws = bboxes.map((b, idx) => ({ b, idx }));
+        // Sort: rejected first (drawn under), then unverified, then accepted, then active
+        draws.sort((a, b) => {
+            const sa = sortKey(a.b, a.idx === activeIdx, a.idx === hoverIdx);
+            const sb = sortKey(b.b, b.idx === activeIdx, b.idx === hoverIdx);
+            return sa - sb;
+        });
+        for (const { b, idx } of draws) {
+            const isHover = idx === hoverIdx;
+            const isActive = idx === activeIdx;
+            const color = colorForQuery(b.query);
+            const human = b.verdict;
+            const vlm = b.vlm_verdict;
+            // Rectangle in canvas coords
+            const { cx: x1, cy: y1 } = imageToCanvas(vp, b.x1, b.y1);
+            const { cx: x2, cy: y2 } = imageToCanvas(vp, b.x2, b.y2);
+            const rectW = x2 - x1;
+            const rectH = y2 - y1;
+            // Style
+            let strokeStyle = color;
+            let lineWidth = 2;
+            let fillStyle = `${color}1A`; // ~10% alpha
+            ctx.setLineDash([]);
+            if (human === "rejected") {
+                strokeStyle = "#ef4444";
+                lineWidth = 1.5;
+                fillStyle = "#00000020";
+                ctx.setLineDash([6, 4]);
+            } else if (human === "approved") {
+                strokeStyle = "#10b981";
+                lineWidth = 3;
+                fillStyle = `${color}26`; // ~15% alpha
+            } else if (vlm === "NO") {
+                strokeStyle = "#f87171";
+                lineWidth = 1.5;
+                fillStyle = "#00000018";
+                ctx.setLineDash([5, 3]);
+            } else if (vlm === "YES") {
+                strokeStyle = color;
+                lineWidth = 2;
+                fillStyle = `${color}1F`;
+            }
+            if (isHover && !isActive) {
+                lineWidth += 1;
+                fillStyle = `${color}33`;
+            }
+            if (isActive) {
+                lineWidth = 4;
+                strokeStyle = "#ffffff";
+                fillStyle = `${color}40`;
+                ctx.setLineDash([]);
+            }
+            ctx.fillStyle = fillStyle;
+            ctx.fillRect(x1, y1, rectW, rectH);
+            ctx.strokeStyle = strokeStyle;
+            ctx.lineWidth = lineWidth;
+            ctx.strokeRect(x1, y1, rectW, rectH);
+            // Label (only if showing labels AND bbox is large enough on screen)
+            if (showLabels && rectW > 30 && rectH > 14) {
+                const label = b.query + (vlm === "YES" ? " ✓" : vlm === "NO" ? " ✗" : "");
+                ctx.font = "bold 11px -apple-system, system-ui, sans-serif";
+                const metrics = ctx.measureText(label);
+                const labelW = metrics.width + 8;
+                const labelH = 16;
+                const labelY = y1 - labelH;
+                if (labelY > 0) {
+                    ctx.fillStyle = strokeStyle;
+                    ctx.fillRect(x1, labelY, labelW, labelH);
+                    ctx.fillStyle = "#000000";
+                    ctx.fillText(label, x1 + 4, labelY + 12);
+                }
+            }
+        }
+        // Bottom-right HUD: zoom % and bbox count
+        ctx.font = "11px monospace";
+        ctx.fillStyle = "#ffffff90";
+        const hud = `${(vp.scale * 100).toFixed(0)}% zoom · ${bboxes.length} bboxes${hoverIdx !== null ? ` · #${hoverIdx + 1}` : ""}`;
+        ctx.fillText(hud, 8, H - 8);
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }, [bboxes, activeIdx, hoverIdx, imgW, imgH, showLabels]);
+    // Mouse handlers
+    const onMouseMove = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
+        const canvas = canvasRef.current;
+        if (!canvas) return;
+        const rect = canvas.getBoundingClientRect();
+        const cx = e.clientX - rect.left;
+        const cy = e.clientY - rect.top;
+        if (pan.active) {
+            const dx = cx - pan.lastX;
+            const dy = cy - pan.lastY;
+            vpRef.current = {
+                ...vpRef.current,
+                offsetX: vpRef.current.offsetX + dx,
+                offsetY: vpRef.current.offsetY + dy,
+            };
+            setPan({ active: true, lastX: cx, lastY: cy });
+            requestAnimationFrame(draw);
+            return;
+        }
+        const idx = hitBbox(bboxes, cx, cy, vpRef.current);
+        if (idx !== hoverIdx) {
+            setHoverIdx(idx);
+            onBboxHover?.(idx);
+        }
+    }, [bboxes, hoverIdx, onBboxHover, pan, draw]);
+    const onMouseDown = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
+        const canvas = canvasRef.current;
+        if (!canvas) return;
+        const rect = canvas.getBoundingClientRect();
+        const cx = e.clientX - rect.left;
+        const cy = e.clientY - rect.top;
+        if (e.shiftKey) {
+            setPan({ active: true, lastX: cx, lastY: cy });
+            return;
+        }
+        const idx = hitBbox(bboxes, cx, cy, vpRef.current);
+        onBboxClick(idx);
+    }, [bboxes, onBboxClick]);
+    const onMouseUp = useCallback(() => {
+        setPan({ active: false, lastX: 0, lastY: 0 });
+    }, []);
+    // React 19's synthetic onWheel is passive, so e.preventDefault() is a no-op
+    // and the page scrolls alongside the zoom. Attach a native non-passive listener.
+    useEffect(() => {
+        const canvas = canvasRef.current;
+        if (!canvas) return;
+        const handler = (e: WheelEvent) => {
+            e.preventDefault();
+            const rect = canvas.getBoundingClientRect();
+            const cx = e.clientX - rect.left;
+            const cy = e.clientY - rect.top;
+            const factor = e.deltaY < 0 ? 1.1 : 1 / 1.1;
+            vpRef.current = zoomAt(vpRef.current, cx, cy, factor);
+            requestAnimationFrame(draw);
+        };
+        canvas.addEventListener("wheel", handler, { passive: false });
+        return () => canvas.removeEventListener("wheel", handler);
+    }, [draw]);
+    const onDoubleClick = useCallback(() => {
+        // Reset viewport
+        vpRef.current = fitViewport(imgW, imgH, size.w, size.h);
+        requestAnimationFrame(draw);
+    }, [imgW, imgH, size.w, size.h, draw]);
+    return (
+        <div ref={containerRef} className="w-full">
+            <canvas
+                ref={canvasRef}
+                width={size.w}
+                height={size.h}
+                className="block rounded border border-zinc-800 cursor-crosshair"
+                style={{ width: size.w, height: size.h, maxWidth: "100%" }}
+                onMouseMove={onMouseMove}
+                onMouseDown={onMouseDown}
+                onMouseUp={onMouseUp}
+                onMouseLeave={onMouseUp}
+                onDoubleClick={onDoubleClick}
+            />
+        </div>
+    );
+}
+function sortKey(b: AnnotatedBbox, isActive: boolean, isHover: boolean): number {
+    if (isActive) return 4;
+    if (isHover) return 3;
+    if (b.verdict === "approved") return 2;
+    if (b.vlm_verdict === "YES") return 1;
+    return 0;
+}

web/components/BboxOverlay.tsx ADDED Viewed

	@@ -0,0 +1,130 @@

+"use client";
+import { cn } from "@/lib/utils";
+import type { Bbox, BboxVerdict } from "@/lib/types";
+export const QUERY_COLORS: Record<string, string> = {
+    "fiber optic spool": "#22c55e",   // green
+    spool: "#10b981",                 // emerald
+    "cable spool": "#06b6d4",         // cyan
+    drone: "#3b82f6",                 // blue
+    quadcopter: "#6366f1",            // indigo
+    "fiber optic drone": "#84cc16",   // lime
+    cable: "#f59e0b",                 // amber
+    cylinder: "#ec4899",              // pink
+    objects: "#a3a3a3",               // neutral
+};
+export function colorForQuery(q: string): string {
+    return QUERY_COLORS[q] ?? "#ef4444";
+}
+export type AnnotatedBbox = Bbox & {
+    query: string;
+    verdict?: BboxVerdict;                          // human verdict
+    vlm_verdict?: "YES" | "NO" | "UNSURE";         // Qwen VLM verdict
+    vlm_reasoning?: string;
+    idx: number;
+};
+type Props = {
+    src: string;
+    width: number;
+    height: number;
+    bboxes: AnnotatedBbox[];
+    activeIdx?: number | null;
+    onBboxClick?: (idx: number) => void;
+    showLabels?: boolean;
+};
+/**
+ * Image with overlaid bboxes. Bboxes are positioned with absolute % coords
+ * so they scale automatically with the image's display size.
+ */
+export function BboxOverlay({
+    src,
+    width,
+    height,
+    bboxes,
+    activeIdx = null,
+    onBboxClick,
+    showLabels = true,
+}: Props) {
+    return (
+        <div className="relative w-full" style={{ aspectRatio: `${width} / ${height}` }}>
+            {/* eslint-disable-next-line @next/next/no-img-element */}
+            <img
+                src={src}
+                alt=""
+                className="absolute inset-0 h-full w-full object-contain"
+                draggable={false}
+            />
+            {bboxes.map((b) => {
+                const queryColor = colorForQuery(b.query);
+                const isActive = activeIdx === b.idx;
+                const human = b.verdict;
+                const vlm = b.vlm_verdict;
+                // Border style: VLM=NO uses dashed red, VLM=YES uses solid query color,
+                // human verdict overrides VLM
+                let borderStyle = `2px solid ${queryColor}`;
+                let opacity = 1;
+                if (human === "rejected") {
+                    borderStyle = `2px dashed #ef4444`;
+                    opacity = 0.35;
+                } else if (human === "approved") {
+                    borderStyle = `3px solid #10b981`;
+                } else if (vlm === "NO") {
+                    borderStyle = `2px dashed #f87171`;
+                    opacity = 0.5;
+                } else if (vlm === "YES") {
+                    borderStyle = `2px solid ${queryColor}`;
+                }
+                const title = [
+                    b.query,
+                    vlm ? `Qwen: ${vlm}` : null,
+                    b.vlm_reasoning ? `(${b.vlm_reasoning})` : null,
+                    human ? `Human: ${human}` : null,
+                ].filter(Boolean).join(" — ");
+                return (
+                    <div
+                        key={b.idx}
+                        className={cn(
+                            "absolute cursor-pointer transition-all",
+                            isActive && "ring-4 ring-white",
+                        )}
+                        style={{
+                            left: `${b.x1_norm * 100}%`,
+                            top: `${b.y1_norm * 100}%`,
+                            width: `${(b.x2_norm - b.x1_norm) * 100}%`,
+                            height: `${(b.y2_norm - b.y1_norm) * 100}%`,
+                            border: borderStyle,
+                            backgroundColor: human === "rejected" || vlm === "NO" ? "#00000010" : `${queryColor}10`,
+                            opacity,
+                        }}
+                        onClick={(e) => {
+                            e.stopPropagation();
+                            onBboxClick?.(b.idx);
+                        }}
+                        title={title}
+                    >
+                        {showLabels && (
+                            <span
+                                className="absolute -top-5 left-0 whitespace-nowrap px-1 py-px text-[10px] font-bold text-black"
+                                style={{ backgroundColor: queryColor }}
+                            >
+                                {b.query}
+                                {vlm === "YES" && " ✓"}
+                                {vlm === "NO" && " ✗"}
+                                {human === "approved" && " 👤✓"}
+                                {human === "rejected" && " 👤✗"}
+                            </span>
+                        )}
+                    </div>
+                );
+            })}
+        </div>
+    );
+}

web/components/ui/badge.tsx ADDED Viewed

	@@ -0,0 +1,52 @@

+import { mergeProps } from "@base-ui/react/merge-props"
+import { useRender } from "@base-ui/react/use-render"
+import { cva, type VariantProps } from "class-variance-authority"
+import { cn } from "@/lib/utils"
+const badgeVariants = cva(
+  "group/badge inline-flex h-5 w-fit shrink-0 items-center justify-center gap-1 overflow-hidden rounded-4xl border border-transparent px-2 py-0.5 text-xs font-medium whitespace-nowrap transition-all focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 aria-invalid:border-destructive aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 [&>svg]:pointer-events-none [&>svg]:size-3!",
+  {
+    variants: {
+      variant: {
+        default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
+        secondary:
+          "bg-secondary text-secondary-foreground [a]:hover:bg-secondary/80",
+        destructive:
+          "bg-destructive/10 text-destructive focus-visible:ring-destructive/20 dark:bg-destructive/20 dark:focus-visible:ring-destructive/40 [a]:hover:bg-destructive/20",
+        outline:
+          "border-border text-foreground [a]:hover:bg-muted [a]:hover:text-muted-foreground",
+        ghost:
+          "hover:bg-muted hover:text-muted-foreground dark:hover:bg-muted/50",
+        link: "text-primary underline-offset-4 hover:underline",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+    },
+  }
+)
+function Badge({
+  className,
+  variant = "default",
+  render,
+  ...props
+}: useRender.ComponentProps<"span"> & VariantProps<typeof badgeVariants>) {
+  return useRender({
+    defaultTagName: "span",
+    props: mergeProps<"span">(
+      {
+        className: cn(badgeVariants({ variant }), className),
+      },
+      props
+    ),
+    render,
+    state: {
+      slot: "badge",
+      variant,
+    },
+  })
+}
+export { Badge, badgeVariants }

web/components/ui/button.tsx ADDED Viewed

	@@ -0,0 +1,58 @@

+import { Button as ButtonPrimitive } from "@base-ui/react/button"
+import { cva, type VariantProps } from "class-variance-authority"
+import { cn } from "@/lib/utils"
+const buttonVariants = cva(
+  "group/button inline-flex shrink-0 items-center justify-center rounded-lg border border-transparent bg-clip-padding text-sm font-medium whitespace-nowrap transition-all outline-none select-none focus-visible:border-ring focus-visible:ring-3 focus-visible:ring-ring/50 active:not-aria-[haspopup]:translate-y-px disabled:pointer-events-none disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-3 aria-invalid:ring-destructive/20 dark:aria-invalid:border-destructive/50 dark:aria-invalid:ring-destructive/40 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
+  {
+    variants: {
+      variant: {
+        default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
+        outline:
+          "border-border bg-background hover:bg-muted hover:text-foreground aria-expanded:bg-muted aria-expanded:text-foreground dark:border-input dark:bg-input/30 dark:hover:bg-input/50",
+        secondary:
+          "bg-secondary text-secondary-foreground hover:bg-secondary/80 aria-expanded:bg-secondary aria-expanded:text-secondary-foreground",
+        ghost:
+          "hover:bg-muted hover:text-foreground aria-expanded:bg-muted aria-expanded:text-foreground dark:hover:bg-muted/50",
+        destructive:
+          "bg-destructive/10 text-destructive hover:bg-destructive/20 focus-visible:border-destructive/40 focus-visible:ring-destructive/20 dark:bg-destructive/20 dark:hover:bg-destructive/30 dark:focus-visible:ring-destructive/40",
+        link: "text-primary underline-offset-4 hover:underline",
+      },
+      size: {
+        default:
+          "h-8 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2",
+        xs: "h-6 gap-1 rounded-[min(var(--radius-md),10px)] px-2 text-xs in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3",
+        sm: "h-7 gap-1 rounded-[min(var(--radius-md),12px)] px-2.5 text-[0.8rem] in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3.5",
+        lg: "h-9 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2",
+        icon: "size-8",
+        "icon-xs":
+          "size-6 rounded-[min(var(--radius-md),10px)] in-data-[slot=button-group]:rounded-lg [&_svg:not([class*='size-'])]:size-3",
+        "icon-sm":
+          "size-7 rounded-[min(var(--radius-md),12px)] in-data-[slot=button-group]:rounded-lg",
+        "icon-lg": "size-9",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+      size: "default",
+    },
+  }
+)
+function Button({
+  className,
+  variant = "default",
+  size = "default",
+  ...props
+}: ButtonPrimitive.Props & VariantProps<typeof buttonVariants>) {
+  return (
+    <ButtonPrimitive
+      data-slot="button"
+      className={cn(buttonVariants({ variant, size, className }))}
+      {...props}
+    />
+  )
+}
+export { Button, buttonVariants }

web/components/ui/card.tsx ADDED Viewed

	@@ -0,0 +1,103 @@

+import * as React from "react"
+import { cn } from "@/lib/utils"
+function Card({
+  className,
+  size = "default",
+  ...props
+}: React.ComponentProps<"div"> & { size?: "default" | "sm" }) {
+  return (
+    <div
+      data-slot="card"
+      data-size={size}
+      className={cn(
+        "group/card flex flex-col gap-4 overflow-hidden rounded-xl bg-card py-4 text-sm text-card-foreground ring-1 ring-foreground/10 has-data-[slot=card-footer]:pb-0 has-[>img:first-child]:pt-0 data-[size=sm]:gap-3 data-[size=sm]:py-3 data-[size=sm]:has-data-[slot=card-footer]:pb-0 *:[img:first-child]:rounded-t-xl *:[img:last-child]:rounded-b-xl",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-header"
+      className={cn(
+        "group/card-header @container/card-header grid auto-rows-min items-start gap-1 rounded-t-xl px-4 group-data-[size=sm]/card:px-3 has-data-[slot=card-action]:grid-cols-[1fr_auto] has-data-[slot=card-description]:grid-rows-[auto_auto] [.border-b]:pb-4 group-data-[size=sm]/card:[.border-b]:pb-3",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-title"
+      className={cn(
+        "font-heading text-base leading-snug font-medium group-data-[size=sm]/card:text-sm",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-description"
+      className={cn("text-sm text-muted-foreground", className)}
+      {...props}
+    />
+  )
+}
+function CardAction({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-action"
+      className={cn(
+        "col-start-2 row-span-2 row-start-1 self-start justify-self-end",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+function CardContent({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-content"
+      className={cn("px-4 group-data-[size=sm]/card:px-3", className)}
+      {...props}
+    />
+  )
+}
+function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-footer"
+      className={cn(
+        "flex items-center rounded-b-xl border-t bg-muted/50 p-4 group-data-[size=sm]/card:p-3",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+export {
+  Card,
+  CardHeader,
+  CardFooter,
+  CardTitle,
+  CardAction,
+  CardDescription,
+  CardContent,
+}

web/components/ui/separator.tsx ADDED Viewed

	@@ -0,0 +1,25 @@

+"use client"
+import { Separator as SeparatorPrimitive } from "@base-ui/react/separator"
+import { cn } from "@/lib/utils"
+function Separator({
+  className,
+  orientation = "horizontal",
+  ...props
+}: SeparatorPrimitive.Props) {
+  return (
+    <SeparatorPrimitive
+      data-slot="separator"
+      orientation={orientation}
+      className={cn(
+        "shrink-0 bg-border data-horizontal:h-px data-horizontal:w-full data-vertical:w-px data-vertical:self-stretch",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+export { Separator }

web/components/ui/skeleton.tsx ADDED Viewed

	@@ -0,0 +1,13 @@

+import { cn } from "@/lib/utils"
+function Skeleton({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="skeleton"
+      className={cn("animate-pulse rounded-md bg-muted", className)}
+      {...props}
+    />
+  )
+}
+export { Skeleton }

web/components/ui/sonner.tsx ADDED Viewed

	@@ -0,0 +1,49 @@

+"use client"
+import { useTheme } from "next-themes"
+import { Toaster as Sonner, type ToasterProps } from "sonner"
+import { CircleCheckIcon, InfoIcon, TriangleAlertIcon, OctagonXIcon, Loader2Icon } from "lucide-react"
+const Toaster = ({ ...props }: ToasterProps) => {
+  const { theme = "system" } = useTheme()
+  return (
+    <Sonner
+      theme={theme as ToasterProps["theme"]}
+      className="toaster group"
+      icons={{
+        success: (
+          <CircleCheckIcon className="size-4" />
+        ),
+        info: (
+          <InfoIcon className="size-4" />
+        ),
+        warning: (
+          <TriangleAlertIcon className="size-4" />
+        ),
+        error: (
+          <OctagonXIcon className="size-4" />
+        ),
+        loading: (
+          <Loader2Icon className="size-4 animate-spin" />
+        ),
+      }}
+      style={
+        {
+          "--normal-bg": "var(--popover)",
+          "--normal-text": "var(--popover-foreground)",
+          "--normal-border": "var(--border)",
+          "--border-radius": "var(--radius)",
+        } as React.CSSProperties
+      }
+      toastOptions={{
+        classNames: {
+          toast: "cn-toast",
+        },
+      }}
+      {...props}
+    />
+  )
+}
+export { Toaster }

web/components/ui/tabs.tsx ADDED Viewed

	@@ -0,0 +1,82 @@

+"use client"
+import { Tabs as TabsPrimitive } from "@base-ui/react/tabs"
+import { cva, type VariantProps } from "class-variance-authority"
+import { cn } from "@/lib/utils"
+function Tabs({
+  className,
+  orientation = "horizontal",
+  ...props
+}: TabsPrimitive.Root.Props) {
+  return (
+    <TabsPrimitive.Root
+      data-slot="tabs"
+      data-orientation={orientation}
+      className={cn(
+        "group/tabs flex gap-2 data-horizontal:flex-col",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+const tabsListVariants = cva(
+  "group/tabs-list inline-flex w-fit items-center justify-center rounded-lg p-[3px] text-muted-foreground group-data-horizontal/tabs:h-8 group-data-vertical/tabs:h-fit group-data-vertical/tabs:flex-col data-[variant=line]:rounded-none",
+  {
+    variants: {
+      variant: {
+        default: "bg-muted",
+        line: "gap-1 bg-transparent",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+    },
+  }
+)
+function TabsList({
+  className,
+  variant = "default",
+  ...props
+}: TabsPrimitive.List.Props & VariantProps<typeof tabsListVariants>) {
+  return (
+    <TabsPrimitive.List
+      data-slot="tabs-list"
+      data-variant={variant}
+      className={cn(tabsListVariants({ variant }), className)}
+      {...props}
+    />
+  )
+}
+function TabsTrigger({ className, ...props }: TabsPrimitive.Tab.Props) {
+  return (
+    <TabsPrimitive.Tab
+      data-slot="tabs-trigger"
+      className={cn(
+        "relative inline-flex h-[calc(100%-1px)] flex-1 items-center justify-center gap-1.5 rounded-md border border-transparent px-1.5 py-0.5 text-sm font-medium whitespace-nowrap text-foreground/60 transition-all group-data-vertical/tabs:w-full group-data-vertical/tabs:justify-start hover:text-foreground focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 focus-visible:outline-1 focus-visible:outline-ring disabled:pointer-events-none disabled:opacity-50 has-data-[icon=inline-end]:pr-1 has-data-[icon=inline-start]:pl-1 aria-disabled:pointer-events-none aria-disabled:opacity-50 dark:text-muted-foreground dark:hover:text-foreground group-data-[variant=default]/tabs-list:data-active:shadow-sm group-data-[variant=line]/tabs-list:data-active:shadow-none [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
+        "group-data-[variant=line]/tabs-list:bg-transparent group-data-[variant=line]/tabs-list:data-active:bg-transparent dark:group-data-[variant=line]/tabs-list:data-active:border-transparent dark:group-data-[variant=line]/tabs-list:data-active:bg-transparent",
+        "data-active:bg-background data-active:text-foreground dark:data-active:border-input dark:data-active:bg-input/30 dark:data-active:text-foreground",
+        "after:absolute after:bg-foreground after:opacity-0 after:transition-opacity group-data-horizontal/tabs:after:inset-x-0 group-data-horizontal/tabs:after:bottom-[-5px] group-data-horizontal/tabs:after:h-0.5 group-data-vertical/tabs:after:inset-y-0 group-data-vertical/tabs:after:-right-1 group-data-vertical/tabs:after:w-0.5 group-data-[variant=line]/tabs-list:data-active:after:opacity-100",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+function TabsContent({ className, ...props }: TabsPrimitive.Panel.Props) {
+  return (
+    <TabsPrimitive.Panel
+      data-slot="tabs-content"
+      className={cn("flex-1 text-sm outline-none", className)}
+      {...props}
+    />
+  )
+}
+export { Tabs, TabsList, TabsTrigger, TabsContent, tabsListVariants }

web/lib/canvas-utils.ts ADDED Viewed

	@@ -0,0 +1,78 @@

+// canvas-utils.ts — pure functions for canvas coord transforms + hit testing
+import type { Bbox } from "./types";
+export type Viewport = {
+    scale: number;       // image pixels → canvas pixels multiplier
+    offsetX: number;     // top-left of image in canvas coords
+    offsetY: number;
+};
+/** Compute the viewport that fits an image inside a canvas, preserving aspect ratio. */
+export function fitViewport(
+    imgW: number,
+    imgH: number,
+    canvasW: number,
+    canvasH: number,
+    padding = 0.05,
+): Viewport {
+    const padPx = Math.min(canvasW, canvasH) * padding;
+    const availW = canvasW - padPx * 2;
+    const availH = canvasH - padPx * 2;
+    const scale = Math.min(availW / imgW, availH / imgH);
+    const offsetX = (canvasW - imgW * scale) / 2;
+    const offsetY = (canvasH - imgH * scale) / 2;
+    return { scale, offsetX, offsetY };
+}
+/** Apply zoom around a point in canvas coords (e.g. mouse cursor). */
+export function zoomAt(
+    vp: Viewport,
+    canvasX: number,
+    canvasY: number,
+    factor: number,
+): Viewport {
+    // The image-space point under (canvasX, canvasY) before zoom:
+    const imgX = (canvasX - vp.offsetX) / vp.scale;
+    const imgY = (canvasY - vp.offsetY) / vp.scale;
+    const newScale = vp.scale * factor;
+    // Adjust offsets so that same image-space point lands under the same canvas point:
+    return {
+        scale: newScale,
+        offsetX: canvasX - imgX * newScale,
+        offsetY: canvasY - imgY * newScale,
+    };
+}
+/** Convert image-pixel coords → canvas coords. */
+export function imageToCanvas(vp: Viewport, x: number, y: number) {
+    return { cx: vp.offsetX + x * vp.scale, cy: vp.offsetY + y * vp.scale };
+}
+/** Convert canvas coords → image-pixel coords. */
+export function canvasToImage(vp: Viewport, cx: number, cy: number) {
+    return { x: (cx - vp.offsetX) / vp.scale, y: (cy - vp.offsetY) / vp.scale };
+}
+/** Hit test: which bbox (by index) contains the canvas point? Returns the SMALLEST. */
+export function hitBbox(
+    bboxes: Bbox[],
+    canvasX: number,
+    canvasY: number,
+    vp: Viewport,
+): number | null {
+    const { x, y } = canvasToImage(vp, canvasX, canvasY);
+    let bestIdx: number | null = null;
+    let bestArea = Infinity;
+    for (let i = 0; i < bboxes.length; i++) {
+        const b = bboxes[i];
+        if (x >= b.x1 && x <= b.x2 && y >= b.y1 && y <= b.y2) {
+            const area = (b.x2 - b.x1) * (b.y2 - b.y1);
+            if (area < bestArea) {
+                bestArea = area;
+                bestIdx = i;
+            }
+        }
+    }
+    return bestIdx;
+}

web/lib/r2.ts ADDED Viewed

	@@ -0,0 +1,91 @@

+import { S3Client, GetObjectCommand, ListObjectsV2Command, PutObjectCommand } from "@aws-sdk/client-s3";
+import { getSignedUrl } from "@aws-sdk/s3-request-presigner";
+// R2 credentials must be supplied via environment variables.
+// Set these in web/.env.local (gitignored) — see web/.env.example.
+//
+//   R2_ENDPOINT_URL=https://<account>.r2.cloudflarestorage.com
+//   R2_ACCESS_KEY_ID=...
+//   R2_SECRET_ACCESS_KEY=...
+//   R2_BUCKET=your-bucket-name
+//
+// These live server-side only and are never exposed to the browser.
+const R2_ENDPOINT = process.env.R2_ENDPOINT_URL;
+const R2_ACCESS_KEY = process.env.R2_ACCESS_KEY_ID;
+const R2_SECRET_KEY = process.env.R2_SECRET_ACCESS_KEY;
+export const R2_BUCKET = process.env.R2_BUCKET ?? "";
+if (!R2_ENDPOINT || !R2_ACCESS_KEY || !R2_SECRET_KEY || !R2_BUCKET) {
+    throw new Error(
+        "R2 is not configured. Set R2_ENDPOINT_URL, R2_ACCESS_KEY_ID, " +
+        "R2_SECRET_ACCESS_KEY, and R2_BUCKET in web/.env.local. " +
+        "See web/.env.example for the full list."
+    );
+}
+export const r2 = new S3Client({
+    region: "auto",
+    endpoint: R2_ENDPOINT,
+    credentials: {
+        accessKeyId: R2_ACCESS_KEY,
+        secretAccessKey: R2_SECRET_KEY,
+    },
+});
+/**
+ * Generate a short-lived presigned GET URL for an R2 object so the browser
+ * can fetch it directly without exposing credentials.
+ */
+export async function presignGet(key: string, expiresIn = 3600): Promise<string> {
+    const cmd = new GetObjectCommand({ Bucket: R2_BUCKET, Key: key });
+    return getSignedUrl(r2, cmd, { expiresIn });
+}
+/**
+ * List all object keys under a prefix in our bucket.
+ */
+export async function listAll(prefix: string): Promise<string[]> {
+    const out: string[] = [];
+    let continuationToken: string | undefined = undefined;
+    do {
+        const resp: Awaited<ReturnType<typeof r2.send<ListObjectsV2Command>>> = await r2.send(
+            new ListObjectsV2Command({
+                Bucket: R2_BUCKET,
+                Prefix: prefix,
+                ContinuationToken: continuationToken,
+            }),
+        );
+        for (const obj of resp.Contents ?? []) {
+            if (obj.Key) out.push(obj.Key);
+        }
+        continuationToken = resp.IsTruncated ? resp.NextContinuationToken : undefined;
+    } while (continuationToken);
+    return out;
+}
+/**
+ * Read a JSON object from R2.
+ */
+export async function getJson<T>(key: string): Promise<T | null> {
+    try {
+        const resp = await r2.send(new GetObjectCommand({ Bucket: R2_BUCKET, Key: key }));
+        const text = await resp.Body!.transformToString();
+        return JSON.parse(text) as T;
+    } catch {
+        return null;
+    }
+}
+/**
+ * Write a JSON object to R2.
+ */
+export async function putJson(key: string, value: unknown): Promise<void> {
+    await r2.send(
+        new PutObjectCommand({
+            Bucket: R2_BUCKET,
+            Key: key,
+            Body: JSON.stringify(value, null, 2),
+            ContentType: "application/json",
+        }),
+    );
+}

web/lib/types.ts ADDED Viewed

	@@ -0,0 +1,86 @@

+// Types matching pod_label.py output format
+export type Bbox = {
+    x1: number;
+    y1: number;
+    x2: number;
+    y2: number;
+    x1_norm: number;
+    y1_norm: number;
+    x2_norm: number;
+    y2_norm: number;
+    cx_norm: number;
+    cy_norm: number;
+    w_norm: number;
+    h_norm: number;
+    area_fraction: number;
+    annotation_id?: number;  // links to verified.json verdict (when present)
+};
+// VLM verdict from verify_vlm.py output
+export type VlmVerdict = {
+    annotation_id: number;
+    image_id: number;
+    image_file: string;
+    category_name: string;
+    bbox: number[];        // [x, y, w, h]
+    verdict: "YES" | "NO" | "UNSURE";
+    reasoning: string;
+    elapsed: number;
+};
+export type VerifiedRun = {
+    run_name: string;
+    model: string;
+    prompt_version: string;
+    crop_padding: number;
+    summary: {
+        completed: number;
+        total: number;
+        yes: number;
+        no: number;
+        unsure: number;
+        yes_rate: number;
+        elapsed_seconds: number;
+        avg_seconds_per_bbox: number;
+    };
+    annotations: VlmVerdict[];
+};
+export type QueryResult = {
+    bboxes: Bbox[];
+    count: number;
+    elapsed?: number;
+    error?: string;
+};
+export type ImageResult = {
+    width: number;
+    height: number;
+    queries: Record<string, QueryResult>;
+    error?: string;
+};
+export type LabelPartial = {
+    completed: number;
+    results: Record<string, ImageResult>;
+};
+// Verdicts: human review state stored alongside Falcon labels
+export type BboxVerdict = "approved" | "rejected" | "unsure";
+export type ImageReview = {
+    image_path: string;            // R2 key (e.g. "raw/positive/fiber_spool_drone/foo.jpg")
+    bucket: string;
+    width: number;
+    height: number;
+    bboxes: Array<Bbox & {
+        query: string;
+        verdict?: BboxVerdict;     // human verdict
+        vlm_verdict?: "YES" | "NO" | "UNSURE";  // Qwen verdict from verify_vlm.py
+        vlm_reasoning?: string;
+        note?: string;
+    }>;
+    image_verdict?: "approved" | "rejected" | "unsure";  // overall image-level call
+    reviewed_at?: string;
+};

web/lib/utils.ts ADDED Viewed

	@@ -0,0 +1,6 @@

+import { clsx, type ClassValue } from "clsx"
+import { twMerge } from "tailwind-merge"
+export function cn(...inputs: ClassValue[]) {
+  return twMerge(clsx(inputs))
+}

web/next-env.d.ts ADDED Viewed

	@@ -0,0 +1,6 @@

+/// <reference types="next" />
+/// <reference types="next/image-types/global" />
+import "./.next/dev/types/routes.d.ts";
+// NOTE: This file should not be edited
+// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.

web/next.config.ts ADDED Viewed

	@@ -0,0 +1,7 @@

+import type { NextConfig } from "next";
+const nextConfig: NextConfig = {
+  /* config options here */
+};
+export default nextConfig;

web/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

web/package.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "name": "web",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start"
+  },
+  "dependencies": {
+    "@aws-sdk/client-s3": "^3.1026.0",
+    "@aws-sdk/s3-request-presigner": "^3.1026.0",
+    "@base-ui/react": "^1.3.0",
+    "class-variance-authority": "^0.7.1",
+    "clsx": "^2.1.1",
+    "lucide-react": "^1.7.0",
+    "next": "16.2.2",
+    "next-themes": "^0.4.6",
+    "react": "19.2.4",
+    "react-dom": "19.2.4",
+    "shadcn": "^4.2.0",
+    "sonner": "^2.0.7",
+    "tailwind-merge": "^3.5.0",
+    "tw-animate-css": "^1.4.0"
+  },
+  "devDependencies": {
+    "@tailwindcss/postcss": "^4",
+    "@types/node": "^20",
+    "@types/react": "^19",
+    "@types/react-dom": "^19",
+    "tailwindcss": "^4",
+    "typescript": "^5"
+  }
+}

web/postcss.config.mjs ADDED Viewed

	@@ -0,0 +1,7 @@

+const config = {
+  plugins: {
+    "@tailwindcss/postcss": {},
+  },
+};
+export default config;

web/public/file.svg ADDED Viewed

web/public/globe.svg ADDED Viewed

web/public/next.svg ADDED Viewed

web/public/vercel.svg ADDED Viewed

web/public/window.svg ADDED Viewed

web/tsconfig.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "compilerOptions": {
+    "target": "ES2017",
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "react-jsx",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./*"]
+    }
+  },
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts",
+    ".next/dev/types/**/*.ts",
+    "**/*.mts"
+  ],
+  "exclude": ["node_modules"]
+}