Initial release: data-label-factory v0.1.0
Browse files- .env.example +13 -0
- .gitattributes +1 -0
- .gitignore +51 -0
- README.md +303 -0
- data_label_factory/__init__.py +13 -0
- data_label_factory/cli.py +629 -0
- data_label_factory/experiments.py +161 -0
- data_label_factory/gather.py +554 -0
- data_label_factory/project.py +176 -0
- docs/canvas-demo.gif +3 -0
- docs/x-launch-thread.md +113 -0
- projects/drones.yaml +133 -0
- projects/stop-signs.yaml +72 -0
- pyproject.toml +72 -0
- setup.py +6 -0
- web/.env.example +11 -0
- web/.gitignore +41 -0
- web/README.md +36 -0
- web/app/api/labels/route.ts +131 -0
- web/app/canvas/page.tsx +332 -0
- web/app/favicon.ico +0 -0
- web/app/globals.css +141 -0
- web/app/layout.tsx +33 -0
- web/app/page.tsx +311 -0
- web/components.json +25 -0
- web/components/BboxCanvas.tsx +329 -0
- web/components/BboxOverlay.tsx +130 -0
- web/components/ui/badge.tsx +52 -0
- web/components/ui/button.tsx +58 -0
- web/components/ui/card.tsx +103 -0
- web/components/ui/separator.tsx +25 -0
- web/components/ui/skeleton.tsx +13 -0
- web/components/ui/sonner.tsx +49 -0
- web/components/ui/tabs.tsx +82 -0
- web/lib/canvas-utils.ts +78 -0
- web/lib/r2.ts +91 -0
- web/lib/types.ts +86 -0
- web/lib/utils.ts +6 -0
- web/next-env.d.ts +6 -0
- web/next.config.ts +7 -0
- web/package-lock.json +0 -0
- web/package.json +34 -0
- web/postcss.config.mjs +7 -0
- web/public/file.svg +1 -0
- web/public/globe.svg +1 -0
- web/public/next.svg +1 -0
- web/public/vercel.svg +1 -0
- web/public/window.svg +1 -0
- web/tsconfig.json +34 -0
.env.example
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# data-label-factory environment configuration
|
| 2 |
+
#
|
| 3 |
+
# Copy to .env (or just `export` these in your shell) and edit URLs
|
| 4 |
+
# to point at the backend(s) you have running.
|
| 5 |
+
|
| 6 |
+
# ----- Qwen 2.5-VL backend (mlx_vlm.server) -----
|
| 7 |
+
# Default: localhost. Override if you run the server on another machine.
|
| 8 |
+
QWEN_URL=http://localhost:8291
|
| 9 |
+
QWEN_MODEL_PATH=mlx-community/Qwen2.5-VL-3B-Instruct-4bit
|
| 10 |
+
|
| 11 |
+
# ----- Gemma 4 + Falcon backend (mac_tensor) -----
|
| 12 |
+
# Default: localhost. Override if you run on a remote Mac (e.g. an M4 Mini).
|
| 13 |
+
GEMMA_URL=http://localhost:8500
|
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
docs/canvas-demo.gif filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.so
|
| 5 |
+
*.egg
|
| 6 |
+
*.egg-info/
|
| 7 |
+
dist/
|
| 8 |
+
build/
|
| 9 |
+
.eggs/
|
| 10 |
+
.pytest_cache/
|
| 11 |
+
.ruff_cache/
|
| 12 |
+
.mypy_cache/
|
| 13 |
+
.venv/
|
| 14 |
+
venv/
|
| 15 |
+
env/
|
| 16 |
+
.python-version
|
| 17 |
+
|
| 18 |
+
# Node / Next.js (web UI)
|
| 19 |
+
web/node_modules/
|
| 20 |
+
web/.next/
|
| 21 |
+
web/.turbo/
|
| 22 |
+
web/out/
|
| 23 |
+
web/*.log
|
| 24 |
+
web/canvas-shot.mjs
|
| 25 |
+
|
| 26 |
+
# Local data + experiment outputs
|
| 27 |
+
data/
|
| 28 |
+
experiments/
|
| 29 |
+
*.coco.json
|
| 30 |
+
*.verified.json
|
| 31 |
+
keep_list.json
|
| 32 |
+
manifest.json
|
| 33 |
+
|
| 34 |
+
# Local model caches
|
| 35 |
+
~/data-label-factory/
|
| 36 |
+
~/models/
|
| 37 |
+
|
| 38 |
+
# Secrets
|
| 39 |
+
.env
|
| 40 |
+
.env.local
|
| 41 |
+
*.pem
|
| 42 |
+
*.key
|
| 43 |
+
|
| 44 |
+
# OS
|
| 45 |
+
.DS_Store
|
| 46 |
+
Thumbs.db
|
| 47 |
+
|
| 48 |
+
# Editor
|
| 49 |
+
.vscode/
|
| 50 |
+
.idea/
|
| 51 |
+
*.swp
|
README.md
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# data-label-factory
|
| 2 |
+
|
| 3 |
+
A generic auto-labeling pipeline for vision datasets. Pick any object class in
|
| 4 |
+
a YAML file, run one command, and end up with a clean COCO dataset reviewed in
|
| 5 |
+
a browser. Designed to run entirely on a 16 GB Apple Silicon Mac.
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
gather → filter → label → verify → review
|
| 9 |
+
(DDG/ (VLM YES/ (Falcon (VLM per- (canvas
|
| 10 |
+
yt) NO) bbox) bbox) UI)
|
| 11 |
+
```
|
| 12 |
+
|
| 13 |
+
Two interchangeable VLM backends:
|
| 14 |
+
|
| 15 |
+
| Backend | Model | Server | Pick when |
|
| 16 |
+
|---|---|---|---|
|
| 17 |
+
| `qwen` | Qwen 2.5-VL-3B 4-bit | `mlx_vlm.server` | You want fast YES/NO classification (~3.5s/img on M4) |
|
| 18 |
+
| `gemma` | Gemma 4-26B-A4B 4-bit | `mac_tensor` (Expert Sniper) | You want richer reasoning + grounded segmentation in one server |
|
| 19 |
+
|
| 20 |
+
The `label` stage always uses **Falcon Perception** for bbox grounding, served
|
| 21 |
+
out of `mac_tensor` alongside Gemma. Falcon doesn't depend on the VLM choice —
|
| 22 |
+
it's a separate ~600 MB model.
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## What you get when this finishes
|
| 27 |
+
|
| 28 |
+
For our reference run on a fiber-optic-drone detector:
|
| 29 |
+
|
| 30 |
+
- **1421 source images** gathered from DuckDuckGo + Wikimedia + Openverse
|
| 31 |
+
- **15,355 Falcon Perception bboxes** generated via the `label` stage
|
| 32 |
+
- **11,928 / 15,355 (78%)** approved by Qwen 2.5-VL in the `verify` stage
|
| 33 |
+
- **Reviewed in a browser** via the canvas web UI (`web/`)
|
| 34 |
+
|
| 35 |
+
Per-query agreement between Falcon and Qwen on this dataset:
|
| 36 |
+
`cable spool` 88%, `quadcopter` 81%, `drone` 80%, `fiber optic spool` 57%.
|
| 37 |
+
|
| 38 |
+
You can reproduce all of this from this repo by following the steps below.
|
| 39 |
+
|
| 40 |
+
---
|
| 41 |
+
|
| 42 |
+
## 1. Install
|
| 43 |
+
|
| 44 |
+
```bash
|
| 45 |
+
# Clone
|
| 46 |
+
git clone https://github.com/walter-grace/data-label-factory.git
|
| 47 |
+
cd data-label-factory
|
| 48 |
+
|
| 49 |
+
# Install the CLI (registers `data_label_factory` on your $PATH)
|
| 50 |
+
pip install -e .
|
| 51 |
+
|
| 52 |
+
# (Optional) Add image-search dependencies for the `gather` stage
|
| 53 |
+
pip install -e ".[gather]"
|
| 54 |
+
|
| 55 |
+
# (Optional) Web UI deps — only if you want to review labels in a browser
|
| 56 |
+
cd web && npm install && cd ..
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
You can also install directly from HuggingFace:
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
pip install git+https://huggingface.co/waltgrace/data-label-factory
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
The factory CLI needs Python 3.10+. The backend servers (Qwen and/or Gemma)
|
| 66 |
+
are installed separately — you only need the one(s) you plan to use.
|
| 67 |
+
|
| 68 |
+
---
|
| 69 |
+
|
| 70 |
+
## 2. Pick a backend and start it
|
| 71 |
+
|
| 72 |
+
### Option A — Qwen 2.5-VL (recommended for filter/verify)
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
# Install mlx-vlm (Apple Silicon)
|
| 76 |
+
pip install mlx-vlm
|
| 77 |
+
|
| 78 |
+
# Start the OpenAI-compatible server
|
| 79 |
+
python3 -m mlx_vlm.server \
|
| 80 |
+
--model mlx-community/Qwen2.5-VL-3B-Instruct-4bit \
|
| 81 |
+
--port 8291
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
Verify it's alive:
|
| 85 |
+
|
| 86 |
+
```bash
|
| 87 |
+
QWEN_URL=http://localhost:8291 data_label_factory status
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
### Option B — Gemma 4 + Falcon (recommended for `label`)
|
| 91 |
+
|
| 92 |
+
This is the [MLX Expert Sniper](https://github.com/walter-grace/mac-code) deploy
|
| 93 |
+
package. It serves Gemma 4-26B-A4B (chat / `--vision`) **and** Falcon Perception
|
| 94 |
+
(`--falcon`) from the same process at port 8500. Total ~5 GB resident on a 16 GB
|
| 95 |
+
Mac via SSD-streamed experts.
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
# Install + download model (one-time, ~13 GB)
|
| 99 |
+
git clone https://github.com/walter-grace/mac-code
|
| 100 |
+
cd mac-code/research/expert-sniper/distributed
|
| 101 |
+
pip install -e . mlx mlx-vlm fastapi uvicorn pillow huggingface_hub python-multipart
|
| 102 |
+
|
| 103 |
+
huggingface-cli download mlx-community/gemma-4-26b-a4b-it-4bit \
|
| 104 |
+
--local-dir ~/models/gemma4-source
|
| 105 |
+
python3 split_gemma4.py \
|
| 106 |
+
--input ~/models/gemma4-source \
|
| 107 |
+
--output ~/models/gemma4-stream
|
| 108 |
+
|
| 109 |
+
# Launch
|
| 110 |
+
python3 -m mac_tensor ui --vision --falcon \
|
| 111 |
+
--stream-dir ~/models/gemma4-stream \
|
| 112 |
+
--source-dir ~/models/gemma4-source \
|
| 113 |
+
--port 8500
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
Verify:
|
| 117 |
+
|
| 118 |
+
```bash
|
| 119 |
+
GEMMA_URL=http://localhost:8500 data_label_factory status
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
You can run **both** servers at the same time. The factory CLI will use whichever
|
| 123 |
+
backend you select per command via `--backend qwen|gemma`.
|
| 124 |
+
|
| 125 |
+
---
|
| 126 |
+
|
| 127 |
+
## 3. Define a project
|
| 128 |
+
|
| 129 |
+
A project YAML is the *only* thing you need to write to onboard a new object
|
| 130 |
+
class. Two examples ship in `projects/`:
|
| 131 |
+
|
| 132 |
+
- [`projects/drones.yaml`](projects/drones.yaml) — fiber-optic drone detection (the original use case)
|
| 133 |
+
- [`projects/stop-signs.yaml`](projects/stop-signs.yaml) — minimal smoke test
|
| 134 |
+
|
| 135 |
+
Copy one and edit the four important fields:
|
| 136 |
+
|
| 137 |
+
```yaml
|
| 138 |
+
project_name: fire-hydrants
|
| 139 |
+
target_object: "fire hydrant" # templated into all prompts as {target_object}
|
| 140 |
+
data_root: ~/data-label-factory/fire-hydrants
|
| 141 |
+
|
| 142 |
+
buckets:
|
| 143 |
+
positive/clear_view:
|
| 144 |
+
queries: ["red fire hydrant", "yellow fire hydrant", "fire hydrant on sidewalk"]
|
| 145 |
+
negative/other_street_objects:
|
| 146 |
+
queries: ["mailbox", "parking meter", "trash can"]
|
| 147 |
+
background/empty_streets:
|
| 148 |
+
queries: ["empty city street", "suburban sidewalk"]
|
| 149 |
+
|
| 150 |
+
falcon_queries: # what Falcon will look for during `label`
|
| 151 |
+
- "fire hydrant"
|
| 152 |
+
- "red metal post"
|
| 153 |
+
|
| 154 |
+
backends:
|
| 155 |
+
filter: qwen # default per stage; CLI --backend overrides
|
| 156 |
+
label: gemma
|
| 157 |
+
verify: qwen
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
Inspect a project before running anything:
|
| 161 |
+
|
| 162 |
+
```bash
|
| 163 |
+
data_label_factory project --project projects/fire-hydrants.yaml
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
---
|
| 167 |
+
|
| 168 |
+
## 4. Run the pipeline
|
| 169 |
+
|
| 170 |
+
The four stages can be run individually or chained:
|
| 171 |
+
|
| 172 |
+
```bash
|
| 173 |
+
PROJECT=projects/stop-signs.yaml
|
| 174 |
+
|
| 175 |
+
# 4a. Gather — image search across buckets
|
| 176 |
+
data_label_factory gather --project $PROJECT --max-per-query 30
|
| 177 |
+
|
| 178 |
+
# 4b. Filter — image-level YES/NO via your chosen VLM
|
| 179 |
+
data_label_factory filter --project $PROJECT --backend qwen
|
| 180 |
+
|
| 181 |
+
# 4c. Label — Falcon Perception bbox grounding (needs Gemma server up)
|
| 182 |
+
data_label_factory label --project $PROJECT
|
| 183 |
+
|
| 184 |
+
# 4d. Verify — per-bbox YES/NO via your chosen VLM
|
| 185 |
+
# (verify is a TODO in the generic CLI today; runpod_falcon/verify_vlm.py
|
| 186 |
+
# is the original drone-specific impl that the generic version will wrap.)
|
| 187 |
+
|
| 188 |
+
# OR run gather → filter end-to-end:
|
| 189 |
+
data_label_factory pipeline --project $PROJECT --backend qwen
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
Every command writes a timestamped folder under `experiments/` (relative to
|
| 193 |
+
your current working directory) with the config, prompts, raw model answers,
|
| 194 |
+
and JSON outputs. List them with:
|
| 195 |
+
|
| 196 |
+
```bash
|
| 197 |
+
data_label_factory list
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
---
|
| 201 |
+
|
| 202 |
+
## 5. Review the labels in a browser
|
| 203 |
+
|
| 204 |
+
The `web/` directory is a Next.js + HTML5 Canvas review tool. It reads your
|
| 205 |
+
labeled JSON straight from R2 (or local — see `web/app/api/labels/route.ts`)
|
| 206 |
+
and renders the bboxes over each image with hover, click-to-select, scroll-zoom,
|
| 207 |
+
and keyboard navigation.
|
| 208 |
+
|
| 209 |
+
```bash
|
| 210 |
+
cd web
|
| 211 |
+
PORT=3030 npm run dev
|
| 212 |
+
# open http://localhost:3030/canvas
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
Features:
|
| 216 |
+
- **Drag** to pan, **scroll** to zoom around the cursor, **double-click** to reset
|
| 217 |
+
- **←/→** to navigate images, **click** a bbox to select it
|
| 218 |
+
- **Color coding**: per-query color, dashed red for VLM rejections, white outline for active
|
| 219 |
+
- **Bucket tabs** to filter by source bucket
|
| 220 |
+
- **Per-image query summary** with YES/NO counts
|
| 221 |
+
|
| 222 |
+
The grid view at `http://localhost:3030/` is the older shadcn-based browser
|
| 223 |
+
with thumbnail-grid + per-bbox approve/reject buttons.
|
| 224 |
+
|
| 225 |
+
---
|
| 226 |
+
|
| 227 |
+
## Configuration reference
|
| 228 |
+
|
| 229 |
+
### Environment variables
|
| 230 |
+
|
| 231 |
+
| Var | Default | What |
|
| 232 |
+
|---|---|---|
|
| 233 |
+
| `QWEN_URL` | `http://localhost:8291` | Where the `mlx_vlm.server` lives |
|
| 234 |
+
| `QWEN_MODEL_PATH` | `mlx-community/Qwen2.5-VL-3B-Instruct-4bit` | Model id sent in the OpenAI request |
|
| 235 |
+
| `GEMMA_URL` | `http://localhost:8500` | Where `mac_tensor` lives (also serves Falcon) |
|
| 236 |
+
|
| 237 |
+
Set them inline for one command, or `export` them in your shell.
|
| 238 |
+
|
| 239 |
+
### CLI flags
|
| 240 |
+
|
| 241 |
+
```
|
| 242 |
+
data_label_factory <command> [flags]
|
| 243 |
+
|
| 244 |
+
Commands:
|
| 245 |
+
status Check both backends are alive
|
| 246 |
+
project --project P Print a project YAML for inspection
|
| 247 |
+
gather --project P Search the web for images across buckets
|
| 248 |
+
filter --project P Image-level YES/NO via Qwen or Gemma
|
| 249 |
+
label --project P Falcon Perception bbox grounding
|
| 250 |
+
pipeline --project P gather → filter
|
| 251 |
+
list Show experiments
|
| 252 |
+
|
| 253 |
+
Common flags:
|
| 254 |
+
--backend qwen|gemma Pick the VLM (filter, pipeline). Overrides project YAML.
|
| 255 |
+
--limit N Process at most N images (smoke testing)
|
| 256 |
+
--experiment NAME Reuse an existing experiment dir
|
| 257 |
+
```
|
| 258 |
+
|
| 259 |
+
### Project YAML reference
|
| 260 |
+
|
| 261 |
+
See [`projects/drones.yaml`](projects/drones.yaml) for the canonical, fully
|
| 262 |
+
commented example. Required fields: `project_name`, `target_object`, `buckets`,
|
| 263 |
+
`falcon_queries`. Everything else has defaults.
|
| 264 |
+
|
| 265 |
+
---
|
| 266 |
+
|
| 267 |
+
## How big is this thing?
|
| 268 |
+
|
| 269 |
+
| Component | Disk | RAM (resident) |
|
| 270 |
+
|---|---|---|
|
| 271 |
+
| Factory CLI + Python deps | < 50 MB | negligible |
|
| 272 |
+
| Qwen 2.5-VL-3B 4-bit | ~2.2 GB | ~2.5 GB |
|
| 273 |
+
| Gemma 4-26B-A4B (Expert Sniper streaming) | ~13 GB on disk | ~3 GB |
|
| 274 |
+
| Falcon Perception 0.6B | ~1.5 GB | ~1.5 GB |
|
| 275 |
+
| Web UI dev server | ~300 MB node_modules | ~150 MB |
|
| 276 |
+
| **Total (Gemma + Falcon path)** | **~17 GB** | **~5 GB** |
|
| 277 |
+
|
| 278 |
+
Fits comfortably on a 16 GB Apple Silicon Mac.
|
| 279 |
+
|
| 280 |
+
---
|
| 281 |
+
|
| 282 |
+
## Known issues
|
| 283 |
+
|
| 284 |
+
1. **Gemma `/api/chat_vision` is unreliable for batch YES/NO prompts.** When the
|
| 285 |
+
chained agent doesn't see a clear reason to call Falcon, it can stall. For the
|
| 286 |
+
`filter` and `verify` stages, prefer `--backend qwen`. Gemma is rock solid for
|
| 287 |
+
the `label` stage (which uses `/api/falcon` directly).
|
| 288 |
+
2. **The generic `verify` command is a TODO** — the original drone-specific
|
| 289 |
+
`runpod_falcon/verify_vlm.py` works today, the generic wrapper is a small
|
| 290 |
+
refactor still pending.
|
| 291 |
+
3. **Image search hits DDG rate limits** if you run with too high `--max-per-query`.
|
| 292 |
+
30-50 per query is comfortable; beyond ~100 you'll see throttling.
|
| 293 |
+
|
| 294 |
+
---
|
| 295 |
+
|
| 296 |
+
## Credits
|
| 297 |
+
|
| 298 |
+
- **Falcon Perception** by TII — Apache 2.0
|
| 299 |
+
- **Gemma 4** by Google DeepMind — Apache 2.0
|
| 300 |
+
- **Qwen 2.5-VL** by Alibaba — Apache 2.0
|
| 301 |
+
- **MLX** by Apple Machine Learning Research — MIT
|
| 302 |
+
- **mlx-vlm** by Prince Canuma — MIT
|
| 303 |
+
- **MLX Expert Sniper** streaming engine by [walter-grace](https://github.com/walter-grace/mac-code)
|
data_label_factory/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""data_label_factory — generic auto-labeling pipeline for vision datasets.
|
| 2 |
+
|
| 3 |
+
Public API:
|
| 4 |
+
load_project(path) → ProjectConfig
|
| 5 |
+
ProjectConfig → loaded project YAML with helpers
|
| 6 |
+
|
| 7 |
+
CLI entry point: `data_label_factory` (defined in pyproject.toml).
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from .project import load_project, ProjectConfig
|
| 11 |
+
|
| 12 |
+
__version__ = "0.1.0"
|
| 13 |
+
__all__ = ["load_project", "ProjectConfig", "__version__"]
|
data_label_factory/cli.py
ADDED
|
@@ -0,0 +1,629 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
data_label_factory — generic data labeling pipeline driven by a project YAML.
|
| 4 |
+
|
| 5 |
+
Same architecture as drone_factory but TARGET-AGNOSTIC. Pick any object class,
|
| 6 |
+
write a project YAML, run the same pipeline. Drones, stop signs, fire hydrants,
|
| 7 |
+
manufacturing defects — same scripts, different config.
|
| 8 |
+
|
| 9 |
+
Subcommands:
|
| 10 |
+
status check M4 backends are alive
|
| 11 |
+
gather DDG image search → local cache (uses project bucket queries)
|
| 12 |
+
filter image-level YES/NO classification
|
| 13 |
+
label Falcon Perception bbox grounding (or Qwen if config says so)
|
| 14 |
+
verify per-bbox YES/NO classification
|
| 15 |
+
pipeline full chain: gather → filter → label → verify
|
| 16 |
+
list list experiments
|
| 17 |
+
show <experiment> show experiment details
|
| 18 |
+
project dump a project YAML for inspection
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
# Inspect a project config
|
| 22 |
+
data_label_factory project --project projects/drones.yaml
|
| 23 |
+
|
| 24 |
+
# Run the entire pipeline for a project
|
| 25 |
+
data_label_factory pipeline --project projects/stop-signs.yaml --max-per-query 20
|
| 26 |
+
|
| 27 |
+
# Just gather (no labeling)
|
| 28 |
+
data_label_factory gather --project projects/drones.yaml --max-per-query 30
|
| 29 |
+
|
| 30 |
+
# Filter a specific experiment
|
| 31 |
+
data_label_factory filter --project projects/drones.yaml --experiment latest
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
import argparse
|
| 35 |
+
import base64
|
| 36 |
+
import io
|
| 37 |
+
import json
|
| 38 |
+
import os
|
| 39 |
+
import subprocess
|
| 40 |
+
import sys
|
| 41 |
+
import time
|
| 42 |
+
import urllib.request
|
| 43 |
+
from collections import defaultdict
|
| 44 |
+
from datetime import datetime
|
| 45 |
+
from pathlib import Path
|
| 46 |
+
|
| 47 |
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
| 48 |
+
|
| 49 |
+
from .project import load_project, ProjectConfig
|
| 50 |
+
from .experiments import (
|
| 51 |
+
make_experiment_dir, write_readme, write_config,
|
| 52 |
+
update_latest_symlink, list_experiments,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# ============================================================
|
| 57 |
+
# CONFIG — overridable via environment variables
|
| 58 |
+
# ============================================================
|
| 59 |
+
#
|
| 60 |
+
# Users pick a VLM backend at runtime via --backend qwen|gemma.
|
| 61 |
+
#
|
| 62 |
+
# qwen → Qwen 2.5-VL via mlx_vlm.server (default URL: http://localhost:8291)
|
| 63 |
+
# gemma → Gemma 4 via mac_tensor (default URL: http://localhost:8500)
|
| 64 |
+
#
|
| 65 |
+
# Falcon Perception (bbox grounding for `label`) is bundled with mac_tensor and
|
| 66 |
+
# is always reached via the GEMMA_URL regardless of which VLM you picked for
|
| 67 |
+
# the chat-style YES/NO stages.
|
| 68 |
+
#
|
| 69 |
+
# Override URLs via env vars when running against a remote machine, e.g.:
|
| 70 |
+
# QWEN_URL=http://10.0.0.5:8291 data_label_factory filter --project ...
|
| 71 |
+
|
| 72 |
+
QWEN_URL = os.environ.get("QWEN_URL", "http://localhost:8291")
|
| 73 |
+
QWEN_MODEL_PATH = os.environ.get(
|
| 74 |
+
"QWEN_MODEL_PATH", "mlx-community/Qwen2.5-VL-3B-Instruct-4bit"
|
| 75 |
+
)
|
| 76 |
+
GEMMA_URL = os.environ.get("GEMMA_URL", "http://localhost:8500")
|
| 77 |
+
|
| 78 |
+
VALID_BACKENDS = ("qwen", "gemma")
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# ============================================================
|
| 82 |
+
# BACKEND CLIENTS (reused)
|
| 83 |
+
# ============================================================
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def call_qwen(image_path: str, prompt: str, timeout: int = 60) -> tuple:
|
| 87 |
+
from PIL import Image
|
| 88 |
+
img = Image.open(image_path).convert("RGB")
|
| 89 |
+
if max(img.size) > 1024:
|
| 90 |
+
ratio = 1024 / max(img.size)
|
| 91 |
+
img = img.resize((int(img.size[0]*ratio), int(img.size[1]*ratio)), Image.LANCZOS)
|
| 92 |
+
buf = io.BytesIO()
|
| 93 |
+
img.save(buf, format="PNG")
|
| 94 |
+
b64 = base64.b64encode(buf.getvalue()).decode()
|
| 95 |
+
payload = {
|
| 96 |
+
"model": QWEN_MODEL_PATH,
|
| 97 |
+
"messages": [{"role": "user", "content": [
|
| 98 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
|
| 99 |
+
{"type": "text", "text": prompt},
|
| 100 |
+
]}],
|
| 101 |
+
"max_tokens": 32, "temperature": 0,
|
| 102 |
+
}
|
| 103 |
+
req = urllib.request.Request(
|
| 104 |
+
f"{QWEN_URL}/v1/chat/completions",
|
| 105 |
+
data=json.dumps(payload).encode(),
|
| 106 |
+
headers={"Content-Type": "application/json"},
|
| 107 |
+
method="POST",
|
| 108 |
+
)
|
| 109 |
+
t0 = time.time()
|
| 110 |
+
with urllib.request.urlopen(req, timeout=timeout) as r:
|
| 111 |
+
data = json.loads(r.read())
|
| 112 |
+
return data["choices"][0]["message"]["content"].strip(), time.time() - t0
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def call_gemma(image_path: str, prompt: str, timeout: int = 300, max_tokens: int = 64) -> tuple:
|
| 116 |
+
"""Hit mac_tensor /api/chat_vision with multipart + parse SSE.
|
| 117 |
+
Returns (final_text, elapsed_seconds)."""
|
| 118 |
+
boundary = f"----factory{int(time.time()*1000)}"
|
| 119 |
+
body = io.BytesIO()
|
| 120 |
+
def part(name, value, filename=None, content_type=None):
|
| 121 |
+
body.write(f"--{boundary}\r\n".encode())
|
| 122 |
+
if filename:
|
| 123 |
+
body.write(f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'.encode())
|
| 124 |
+
body.write(f'Content-Type: {content_type or "application/octet-stream"}\r\n\r\n'.encode())
|
| 125 |
+
body.write(value)
|
| 126 |
+
body.write(b"\r\n")
|
| 127 |
+
else:
|
| 128 |
+
body.write(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode())
|
| 129 |
+
body.write(str(value).encode())
|
| 130 |
+
body.write(b"\r\n")
|
| 131 |
+
with open(image_path, "rb") as f:
|
| 132 |
+
img_bytes = f.read()
|
| 133 |
+
part("message", prompt)
|
| 134 |
+
part("max_tokens", str(max_tokens))
|
| 135 |
+
part("image", img_bytes, filename=os.path.basename(image_path), content_type="image/jpeg")
|
| 136 |
+
body.write(f"--{boundary}--\r\n".encode())
|
| 137 |
+
|
| 138 |
+
req = urllib.request.Request(
|
| 139 |
+
f"{GEMMA_URL}/api/chat_vision",
|
| 140 |
+
data=body.getvalue(),
|
| 141 |
+
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
|
| 142 |
+
method="POST",
|
| 143 |
+
)
|
| 144 |
+
t0 = time.time()
|
| 145 |
+
chunks = []
|
| 146 |
+
final_text = ""
|
| 147 |
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
| 148 |
+
for line in resp:
|
| 149 |
+
line = line.rstrip(b"\r\n")
|
| 150 |
+
if not line.startswith(b"data:"):
|
| 151 |
+
continue
|
| 152 |
+
try:
|
| 153 |
+
event = json.loads(line[len(b"data:"):].strip())
|
| 154 |
+
except Exception:
|
| 155 |
+
continue
|
| 156 |
+
etype = event.get("type")
|
| 157 |
+
if etype == "token":
|
| 158 |
+
chunks.append(event.get("text", ""))
|
| 159 |
+
elif etype == "final":
|
| 160 |
+
final_text = event.get("text", "")
|
| 161 |
+
break
|
| 162 |
+
elif etype == "done":
|
| 163 |
+
break
|
| 164 |
+
text = (final_text or "".join(chunks)).strip()
|
| 165 |
+
return text, time.time() - t0
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def call_vlm(backend: str, image_path: str, prompt: str, timeout: int = 120) -> tuple:
|
| 169 |
+
"""Backend-agnostic chat call. Returns (text, elapsed_seconds).
|
| 170 |
+
Raises ValueError on unknown backend."""
|
| 171 |
+
if backend == "qwen":
|
| 172 |
+
return call_qwen(image_path, prompt, timeout=timeout)
|
| 173 |
+
if backend == "gemma":
|
| 174 |
+
return call_gemma(image_path, prompt, timeout=timeout)
|
| 175 |
+
raise ValueError(f"unknown backend {backend!r}; valid: {VALID_BACKENDS}")
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def resolve_backend(args, proj: ProjectConfig, stage: str) -> str:
|
| 179 |
+
"""CLI flag wins over project YAML; project YAML wins over default 'qwen'."""
|
| 180 |
+
cli = getattr(args, "backend", None)
|
| 181 |
+
if cli:
|
| 182 |
+
if cli not in VALID_BACKENDS:
|
| 183 |
+
raise SystemExit(f"--backend must be one of {VALID_BACKENDS}, got {cli!r}")
|
| 184 |
+
return cli
|
| 185 |
+
backend = proj.backend_for(stage)
|
| 186 |
+
if backend not in VALID_BACKENDS:
|
| 187 |
+
# project specifies "pod" or other legacy value — fall back to qwen
|
| 188 |
+
return "qwen"
|
| 189 |
+
return backend
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def call_falcon_m4(image_path: str, query: str, timeout: int = 120) -> dict:
|
| 193 |
+
"""Hit mac_tensor /api/falcon (direct, no chained agent). Returns parsed JSON."""
|
| 194 |
+
boundary = f"----factory{int(time.time()*1000)}"
|
| 195 |
+
body = io.BytesIO()
|
| 196 |
+
def part(name, value, filename=None, content_type=None):
|
| 197 |
+
body.write(f"--{boundary}\r\n".encode())
|
| 198 |
+
if filename:
|
| 199 |
+
body.write(f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'.encode())
|
| 200 |
+
body.write(f'Content-Type: {content_type or "application/octet-stream"}\r\n\r\n'.encode())
|
| 201 |
+
body.write(value)
|
| 202 |
+
body.write(b"\r\n")
|
| 203 |
+
else:
|
| 204 |
+
body.write(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode())
|
| 205 |
+
body.write(str(value).encode())
|
| 206 |
+
body.write(b"\r\n")
|
| 207 |
+
with open(image_path, "rb") as f:
|
| 208 |
+
img_bytes = f.read()
|
| 209 |
+
part("query", query)
|
| 210 |
+
part("image", img_bytes, filename=os.path.basename(image_path), content_type="image/jpeg")
|
| 211 |
+
body.write(f"--{boundary}--\r\n".encode())
|
| 212 |
+
|
| 213 |
+
req = urllib.request.Request(
|
| 214 |
+
f"{GEMMA_URL}/api/falcon",
|
| 215 |
+
data=body.getvalue(),
|
| 216 |
+
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
|
| 217 |
+
method="POST",
|
| 218 |
+
)
|
| 219 |
+
t0 = time.time()
|
| 220 |
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
| 221 |
+
data = json.loads(resp.read())
|
| 222 |
+
data["_elapsed_seconds"] = time.time() - t0
|
| 223 |
+
return data
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def parse_yes_no(text: str) -> str:
|
| 227 |
+
t = text.strip().upper()
|
| 228 |
+
first = t.split()[0].rstrip(".,") if t else ""
|
| 229 |
+
if "YES" in first: return "YES"
|
| 230 |
+
if "NO" in first: return "NO"
|
| 231 |
+
if "YES" in t: return "YES"
|
| 232 |
+
if "NO" in t: return "NO"
|
| 233 |
+
return "UNKNOWN"
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# ============================================================
|
| 237 |
+
# COMMANDS
|
| 238 |
+
# ============================================================
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def cmd_status(args):
|
| 242 |
+
print("=" * 60)
|
| 243 |
+
print("Backend status")
|
| 244 |
+
print("=" * 60)
|
| 245 |
+
print(f" QWEN_URL = {QWEN_URL} (override with env QWEN_URL)")
|
| 246 |
+
print(f" GEMMA_URL = {GEMMA_URL} (override with env GEMMA_URL)")
|
| 247 |
+
for name, url, info_path in [
|
| 248 |
+
("Qwen2.5-VL (mlx_vlm.server)", QWEN_URL, "/v1/models"),
|
| 249 |
+
("Gemma 4 + Falcon (mac_tensor)", GEMMA_URL, "/api/info"),
|
| 250 |
+
]:
|
| 251 |
+
print(f"\n {name}")
|
| 252 |
+
print(f" {url}")
|
| 253 |
+
try:
|
| 254 |
+
with urllib.request.urlopen(f"{url}{info_path}", timeout=5) as r:
|
| 255 |
+
data = json.loads(r.read())
|
| 256 |
+
print(f" ✓ alive: {json.dumps(data)[:200]}")
|
| 257 |
+
except Exception as e:
|
| 258 |
+
print(f" ✗ DOWN: {e}")
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def cmd_project(args):
|
| 262 |
+
"""Print a project config for inspection."""
|
| 263 |
+
proj = load_project(args.project)
|
| 264 |
+
print("=" * 60)
|
| 265 |
+
print(f"Project: {proj.project_name}")
|
| 266 |
+
print("=" * 60)
|
| 267 |
+
print(f" target_object: {proj.target_object!r}")
|
| 268 |
+
print(f" description: {proj.description.strip()}")
|
| 269 |
+
print(f" data_root: {proj.local_image_dir()}")
|
| 270 |
+
print(f" r2_bucket: {proj.r2_bucket}")
|
| 271 |
+
print(f" r2 raw prefix: {proj.r2_raw_prefix}")
|
| 272 |
+
print(f" r2 labels: {proj.r2_labels_prefix}")
|
| 273 |
+
print(f"\n buckets ({len(proj.bucket_queries)}):")
|
| 274 |
+
for b, qs in proj.bucket_queries.items():
|
| 275 |
+
print(f" {b:40s} {len(qs)} queries")
|
| 276 |
+
print(f"\n falcon_queries: {proj.falcon_queries}")
|
| 277 |
+
print(f" backends: {proj.backends}")
|
| 278 |
+
print(f" total_queries: {proj.total_query_count()}")
|
| 279 |
+
print(f"\n Filter prompt preview:")
|
| 280 |
+
for line in proj.prompt("filter").split("\n")[:6]:
|
| 281 |
+
print(f" {line}")
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def resolve_experiment(name_or_latest: str) -> str:
|
| 285 |
+
base = "experiments"
|
| 286 |
+
if name_or_latest == "latest":
|
| 287 |
+
link = os.path.join(base, "latest")
|
| 288 |
+
if os.path.islink(link):
|
| 289 |
+
return os.path.abspath(os.path.realpath(link))
|
| 290 |
+
exps = list_experiments(base)
|
| 291 |
+
if exps:
|
| 292 |
+
return exps[0]["path"]
|
| 293 |
+
raise FileNotFoundError("no experiments found")
|
| 294 |
+
full = os.path.join(base, name_or_latest)
|
| 295 |
+
if os.path.exists(full):
|
| 296 |
+
return os.path.abspath(full)
|
| 297 |
+
for e in list_experiments(base):
|
| 298 |
+
if name_or_latest in e["name"]:
|
| 299 |
+
return e["path"]
|
| 300 |
+
raise FileNotFoundError(f"experiment '{name_or_latest}' not found")
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
def cmd_gather(args):
|
| 304 |
+
"""Run gather_v2 once per bucket from the project's bucket_queries."""
|
| 305 |
+
proj = load_project(args.project)
|
| 306 |
+
print(f"Gathering for project: {proj.project_name}")
|
| 307 |
+
print(f" target: {proj.target_object}")
|
| 308 |
+
print(f" data_root: {proj.local_image_dir()}")
|
| 309 |
+
print(f" buckets: {len(proj.bucket_queries)}")
|
| 310 |
+
|
| 311 |
+
# Make experiment dir if not given
|
| 312 |
+
exp_name = args.experiment or f"gather-{proj.project_name}"
|
| 313 |
+
exp_dir = make_experiment_dir(exp_name)
|
| 314 |
+
write_readme(exp_dir, exp_name,
|
| 315 |
+
description=f"Gather for {proj.project_name} ({proj.target_object})",
|
| 316 |
+
params=vars(args))
|
| 317 |
+
write_config(exp_dir, {"project": proj.raw, **vars(args)})
|
| 318 |
+
update_latest_symlink(exp_dir)
|
| 319 |
+
print(f"Experiment: {exp_dir}")
|
| 320 |
+
|
| 321 |
+
env = os.environ.copy()
|
| 322 |
+
env["EXPERIMENT_DIR"] = exp_dir
|
| 323 |
+
|
| 324 |
+
summary = []
|
| 325 |
+
for bucket, queries in proj.bucket_queries.items():
|
| 326 |
+
print(f"\n[{bucket}] {len(queries)} queries")
|
| 327 |
+
cmd = [
|
| 328 |
+
sys.executable, os.path.join(HERE, "gather.py"),
|
| 329 |
+
"--out", proj.local_image_dir(),
|
| 330 |
+
"--bucket", bucket,
|
| 331 |
+
"--max-per-query", str(args.max_per_query),
|
| 332 |
+
"--workers", str(args.workers),
|
| 333 |
+
]
|
| 334 |
+
for q in queries:
|
| 335 |
+
cmd += ["--query", q]
|
| 336 |
+
t0 = time.time()
|
| 337 |
+
try:
|
| 338 |
+
result = subprocess.run(cmd, env=env, capture_output=True, text=True, check=True)
|
| 339 |
+
print(result.stdout.strip().split("\n")[-2:][0] if result.stdout else "")
|
| 340 |
+
except subprocess.CalledProcessError as e:
|
| 341 |
+
print(f" FAILED: {e.stderr[-300:]}")
|
| 342 |
+
summary.append({"bucket": bucket, "elapsed": round(time.time() - t0, 1)})
|
| 343 |
+
|
| 344 |
+
print(f"\nDONE — {sum(s['elapsed'] for s in summary):.0f}s total")
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def cmd_filter(args):
|
| 348 |
+
"""Run image-level YES/NO classification on all images for a project.
|
| 349 |
+
Backend chosen via --backend (qwen|gemma) or project YAML."""
|
| 350 |
+
proj = load_project(args.project)
|
| 351 |
+
backend = resolve_backend(args, proj, "filter")
|
| 352 |
+
|
| 353 |
+
img_root = proj.local_image_dir()
|
| 354 |
+
if not os.path.exists(img_root):
|
| 355 |
+
print(f" no images at {img_root}; run gather first")
|
| 356 |
+
return
|
| 357 |
+
|
| 358 |
+
images = []
|
| 359 |
+
for root, _, names in os.walk(img_root):
|
| 360 |
+
for n in names:
|
| 361 |
+
if n.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
|
| 362 |
+
full = os.path.join(root, n)
|
| 363 |
+
rel = os.path.relpath(full, img_root)
|
| 364 |
+
parts = rel.split("/")
|
| 365 |
+
if len(parts) < 2:
|
| 366 |
+
continue
|
| 367 |
+
images.append(("/".join(parts[:2]), rel, full))
|
| 368 |
+
if args.limit > 0:
|
| 369 |
+
images = images[:args.limit]
|
| 370 |
+
|
| 371 |
+
prompt = proj.prompt("filter")
|
| 372 |
+
backend_label = {"qwen": "Qwen 2.5-VL", "gemma": "Gemma 4"}[backend]
|
| 373 |
+
print(f"Filtering {len(images)} images via {backend_label}...")
|
| 374 |
+
print(f" prompt: {prompt[:120]}...")
|
| 375 |
+
|
| 376 |
+
results = []
|
| 377 |
+
counts = {"YES": 0, "NO": 0, "UNKNOWN": 0, "ERROR": 0}
|
| 378 |
+
t0 = time.time()
|
| 379 |
+
for i, (bucket, rel, full) in enumerate(images, 1):
|
| 380 |
+
try:
|
| 381 |
+
answer, elapsed = call_vlm(backend, full, prompt)
|
| 382 |
+
verdict = parse_yes_no(answer)
|
| 383 |
+
except Exception as e:
|
| 384 |
+
answer, elapsed, verdict = f"ERROR: {e}", 0, "ERROR"
|
| 385 |
+
counts[verdict] += 1
|
| 386 |
+
results.append({
|
| 387 |
+
"image_path": rel, "bucket": bucket, "verdict": verdict,
|
| 388 |
+
"raw_answer": answer[:120], "elapsed_seconds": round(elapsed, 3),
|
| 389 |
+
})
|
| 390 |
+
if i % 10 == 0 or i == len(images):
|
| 391 |
+
elapsed_total = time.time() - t0
|
| 392 |
+
rate = i / max(elapsed_total, 1)
|
| 393 |
+
eta = (len(images) - i) / max(rate, 0.001) / 60
|
| 394 |
+
print(f" [{i:4d}/{len(images)}] YES={counts['YES']} NO={counts['NO']} ERR={counts['ERROR']} ETA {eta:.0f} min")
|
| 395 |
+
|
| 396 |
+
# Save to a fresh experiment dir
|
| 397 |
+
exp_name = args.experiment or f"filter-{proj.project_name}"
|
| 398 |
+
exp_dir = resolve_experiment(args.experiment) if args.experiment else make_experiment_dir(exp_name)
|
| 399 |
+
out_dir = os.path.join(exp_dir, f"filter_{backend}")
|
| 400 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 401 |
+
out_path = os.path.join(out_dir, "keep_list.json")
|
| 402 |
+
with open(out_path, "w") as f:
|
| 403 |
+
json.dump({"backend": backend, "project": proj.project_name,
|
| 404 |
+
"counts": counts, "results": results}, f, indent=2)
|
| 405 |
+
print(f"\nSaved {out_path}")
|
| 406 |
+
print(f" YES rate: {counts['YES']/max(1,len(images)):.0%}")
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def cmd_label(args):
|
| 410 |
+
"""Label all images via M4 /api/falcon (one POST per image per query).
|
| 411 |
+
Saves COCO-format annotations to <experiment>/label_falcon/<project>.coco.json.
|
| 412 |
+
"""
|
| 413 |
+
proj = load_project(args.project)
|
| 414 |
+
img_root = proj.local_image_dir()
|
| 415 |
+
if not os.path.exists(img_root):
|
| 416 |
+
print(f" no images at {img_root}; run gather first")
|
| 417 |
+
return
|
| 418 |
+
|
| 419 |
+
images = []
|
| 420 |
+
for root, _, names in os.walk(img_root):
|
| 421 |
+
for n in names:
|
| 422 |
+
if n.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
|
| 423 |
+
full = os.path.join(root, n)
|
| 424 |
+
rel = os.path.relpath(full, img_root)
|
| 425 |
+
if "/" not in rel:
|
| 426 |
+
continue
|
| 427 |
+
images.append((rel.split("/", 1)[0], rel, full))
|
| 428 |
+
if args.limit > 0:
|
| 429 |
+
images = images[:args.limit]
|
| 430 |
+
print(f"Labeling {len(images)} images x {len(proj.falcon_queries)} Falcon queries each")
|
| 431 |
+
print(f" queries: {proj.falcon_queries}")
|
| 432 |
+
|
| 433 |
+
# COCO accumulator
|
| 434 |
+
coco = {
|
| 435 |
+
"info": {
|
| 436 |
+
"description": f"data_label_factory run for {proj.project_name}",
|
| 437 |
+
"date_created": datetime.now().isoformat(timespec="seconds"),
|
| 438 |
+
"target_object": proj.target_object,
|
| 439 |
+
},
|
| 440 |
+
"images": [],
|
| 441 |
+
"annotations": [],
|
| 442 |
+
"categories": [
|
| 443 |
+
{"id": i+1, "name": q, "supercategory": "object"}
|
| 444 |
+
for i, q in enumerate(proj.falcon_queries)
|
| 445 |
+
],
|
| 446 |
+
}
|
| 447 |
+
cat_id = {q: i+1 for i, q in enumerate(proj.falcon_queries)}
|
| 448 |
+
next_img_id, next_ann_id = 1, 1
|
| 449 |
+
n_with_dets = 0
|
| 450 |
+
n_total_dets = 0
|
| 451 |
+
t0 = time.time()
|
| 452 |
+
|
| 453 |
+
for i, (bucket, rel, full) in enumerate(images, 1):
|
| 454 |
+
try:
|
| 455 |
+
from PIL import Image
|
| 456 |
+
im = Image.open(full)
|
| 457 |
+
iw, ih = im.size
|
| 458 |
+
except Exception as e:
|
| 459 |
+
print(f" skip {rel}: load fail {e}")
|
| 460 |
+
continue
|
| 461 |
+
img_id = next_img_id
|
| 462 |
+
next_img_id += 1
|
| 463 |
+
coco["images"].append({"id": img_id, "file_name": rel, "width": iw, "height": ih, "bucket": bucket})
|
| 464 |
+
|
| 465 |
+
img_dets = 0
|
| 466 |
+
for q in proj.falcon_queries:
|
| 467 |
+
try:
|
| 468 |
+
resp = call_falcon_m4(full, q, timeout=180)
|
| 469 |
+
masks = resp.get("masks", [])
|
| 470 |
+
except Exception as e:
|
| 471 |
+
masks = []
|
| 472 |
+
print(f" {rel} [{q}]: error {str(e)[:80]}")
|
| 473 |
+
for m in masks:
|
| 474 |
+
bb = m.get("bbox_norm") or {}
|
| 475 |
+
if not bb:
|
| 476 |
+
continue
|
| 477 |
+
x1 = bb.get("x1", 0) * iw
|
| 478 |
+
y1 = bb.get("y1", 0) * ih
|
| 479 |
+
x2 = bb.get("x2", 0) * iw
|
| 480 |
+
y2 = bb.get("y2", 0) * ih
|
| 481 |
+
w = max(0, x2 - x1)
|
| 482 |
+
h = max(0, y2 - y1)
|
| 483 |
+
coco["annotations"].append({
|
| 484 |
+
"id": next_ann_id, "image_id": img_id,
|
| 485 |
+
"category_id": cat_id[q],
|
| 486 |
+
"bbox": [round(x1, 2), round(y1, 2), round(w, 2), round(h, 2)],
|
| 487 |
+
"area": round(w * h, 2), "iscrowd": 0,
|
| 488 |
+
"score": float(m.get("area_fraction", 1.0)),
|
| 489 |
+
})
|
| 490 |
+
next_ann_id += 1
|
| 491 |
+
img_dets += 1
|
| 492 |
+
|
| 493 |
+
if img_dets > 0:
|
| 494 |
+
n_with_dets += 1
|
| 495 |
+
n_total_dets += img_dets
|
| 496 |
+
|
| 497 |
+
if i % 5 == 0 or i == len(images):
|
| 498 |
+
elapsed = time.time() - t0
|
| 499 |
+
rate = i / max(elapsed, 1)
|
| 500 |
+
eta = (len(images) - i) / max(rate, 0.001) / 60
|
| 501 |
+
print(f" [{i:4d}/{len(images)}] hit={n_with_dets} dets={n_total_dets} ETA {eta:.0f} min")
|
| 502 |
+
|
| 503 |
+
# Save COCO
|
| 504 |
+
exp_dir = resolve_experiment(args.experiment) if args.experiment else make_experiment_dir(f"label-m4-{proj.project_name}")
|
| 505 |
+
out_dir = os.path.join(exp_dir, "label_falcon")
|
| 506 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 507 |
+
out_path = os.path.join(out_dir, f"{proj.project_name}.coco.json")
|
| 508 |
+
with open(out_path, "w") as f:
|
| 509 |
+
json.dump(coco, f, indent=2)
|
| 510 |
+
print(f"\nSaved {out_path}")
|
| 511 |
+
print(f" {len(coco['images'])} images, {len(coco['annotations'])} bboxes")
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
def cmd_pipeline(args):
|
| 515 |
+
"""Full pipeline: gather → filter for the project."""
|
| 516 |
+
proj = load_project(args.project)
|
| 517 |
+
print("=" * 70)
|
| 518 |
+
print(f"PIPELINE — {proj.project_name} ({proj.target_object})")
|
| 519 |
+
print("=" * 70)
|
| 520 |
+
|
| 521 |
+
exp = make_experiment_dir(f"pipeline-{proj.project_name}")
|
| 522 |
+
write_readme(exp, f"pipeline-{proj.project_name}",
|
| 523 |
+
description=f"Full pipeline for {proj.target_object}",
|
| 524 |
+
params=vars(args))
|
| 525 |
+
write_config(exp, {"project": proj.raw, **vars(args)})
|
| 526 |
+
update_latest_symlink(exp)
|
| 527 |
+
print(f"Experiment: {exp}\n")
|
| 528 |
+
|
| 529 |
+
# 1. Gather
|
| 530 |
+
print(">>> GATHER")
|
| 531 |
+
args.experiment = os.path.basename(exp).split("_", 2)[-1]
|
| 532 |
+
cmd_gather(args)
|
| 533 |
+
|
| 534 |
+
# 2. Filter
|
| 535 |
+
print("\n>>> FILTER")
|
| 536 |
+
args.experiment = os.path.basename(exp)
|
| 537 |
+
cmd_filter(args)
|
| 538 |
+
|
| 539 |
+
# Label + verify TBD via pod or qwen — skipping in this MVP
|
| 540 |
+
print("\n>>> LABEL + VERIFY: skipped in MVP — use drone_factory pod path or extend")
|
| 541 |
+
print(f"\nPIPELINE DONE — {exp}")
|
| 542 |
+
|
| 543 |
+
|
| 544 |
+
def cmd_list(args):
|
| 545 |
+
print("=" * 60)
|
| 546 |
+
print("Experiments")
|
| 547 |
+
print("=" * 60)
|
| 548 |
+
for e in list_experiments():
|
| 549 |
+
cfg = e.get("config", {})
|
| 550 |
+
proj = (cfg.get("project") or {}).get("project_name", cfg.get("backend", "?"))
|
| 551 |
+
print(f" {e['name']:50s} project={proj}")
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
# ============================================================
|
| 555 |
+
# MAIN
|
| 556 |
+
# ============================================================
|
| 557 |
+
|
| 558 |
+
|
| 559 |
+
def main():
|
| 560 |
+
p = argparse.ArgumentParser(
|
| 561 |
+
prog="data_label_factory",
|
| 562 |
+
description=(
|
| 563 |
+
"Generic data labeling pipeline. Pick any object class via a "
|
| 564 |
+
"project YAML, then run: gather → filter → label → verify. "
|
| 565 |
+
"Choose your VLM backend with --backend qwen|gemma."
|
| 566 |
+
),
|
| 567 |
+
)
|
| 568 |
+
sub = p.add_subparsers(dest="command", required=True)
|
| 569 |
+
|
| 570 |
+
def add_backend_flag(parser):
|
| 571 |
+
parser.add_argument(
|
| 572 |
+
"--backend",
|
| 573 |
+
choices=VALID_BACKENDS,
|
| 574 |
+
default=None,
|
| 575 |
+
help=("VLM backend for chat-style stages (filter, verify). "
|
| 576 |
+
"Overrides the project YAML. Defaults to project setting "
|
| 577 |
+
"or 'qwen'."),
|
| 578 |
+
)
|
| 579 |
+
|
| 580 |
+
sub.add_parser("status", help="Check backends are alive")
|
| 581 |
+
|
| 582 |
+
sp = sub.add_parser("project", help="Show project YAML")
|
| 583 |
+
sp.add_argument("--project", required=True)
|
| 584 |
+
|
| 585 |
+
sg = sub.add_parser("gather", help="Gather images for a project")
|
| 586 |
+
sg.add_argument("--project", required=True)
|
| 587 |
+
sg.add_argument("--max-per-query", type=int, default=30)
|
| 588 |
+
sg.add_argument("--workers", type=int, default=50)
|
| 589 |
+
sg.add_argument("--experiment", default=None)
|
| 590 |
+
|
| 591 |
+
sf = sub.add_parser("filter", help="Image-level YES/NO classification (qwen or gemma)")
|
| 592 |
+
sf.add_argument("--project", required=True)
|
| 593 |
+
sf.add_argument("--experiment", default=None)
|
| 594 |
+
sf.add_argument("--limit", type=int, default=0)
|
| 595 |
+
add_backend_flag(sf)
|
| 596 |
+
|
| 597 |
+
sl = sub.add_parser("label", help="Falcon Perception bbox grounding via mac_tensor /api/falcon")
|
| 598 |
+
sl.add_argument("--project", required=True)
|
| 599 |
+
sl.add_argument("--experiment", default=None)
|
| 600 |
+
sl.add_argument("--limit", type=int, default=0)
|
| 601 |
+
|
| 602 |
+
spi = sub.add_parser("pipeline", help="Full chain: gather → filter (label/verify TBD)")
|
| 603 |
+
spi.add_argument("--project", required=True)
|
| 604 |
+
spi.add_argument("--max-per-query", type=int, default=20)
|
| 605 |
+
spi.add_argument("--workers", type=int, default=50)
|
| 606 |
+
spi.add_argument("--experiment", default=None)
|
| 607 |
+
spi.add_argument("--limit", type=int, default=0)
|
| 608 |
+
add_backend_flag(spi)
|
| 609 |
+
|
| 610 |
+
sub.add_parser("list", help="List experiments")
|
| 611 |
+
|
| 612 |
+
args = p.parse_args()
|
| 613 |
+
cmd_func = {
|
| 614 |
+
"status": cmd_status,
|
| 615 |
+
"project": cmd_project,
|
| 616 |
+
"gather": cmd_gather,
|
| 617 |
+
"filter": cmd_filter,
|
| 618 |
+
"label": cmd_label,
|
| 619 |
+
"pipeline": cmd_pipeline,
|
| 620 |
+
"list": cmd_list,
|
| 621 |
+
}.get(args.command)
|
| 622 |
+
if cmd_func is None:
|
| 623 |
+
p.print_help()
|
| 624 |
+
sys.exit(1)
|
| 625 |
+
cmd_func(args)
|
| 626 |
+
|
| 627 |
+
|
| 628 |
+
if __name__ == "__main__":
|
| 629 |
+
main()
|
data_label_factory/experiments.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
experiments.py — dated experiment folder convention.
|
| 3 |
+
|
| 4 |
+
Every pipeline run goes into experiments/<YYYY-MM-DD_HHMMSS>_<name>/
|
| 5 |
+
with a README + config.json so we can compare runs over time.
|
| 6 |
+
|
| 7 |
+
Layout:
|
| 8 |
+
experiments/
|
| 9 |
+
├── 2026-04-07_193000_first-yt-batch/
|
| 10 |
+
│ ├── README.md ← what this run was, parameters, observations
|
| 11 |
+
│ ├── config.json ← exact CLI args
|
| 12 |
+
│ ├── gather/ ← gather_v2 outputs (images go to drone-dataset-v2/)
|
| 13 |
+
│ │ ├── manifest.json
|
| 14 |
+
│ │ └── stats.json
|
| 15 |
+
│ ├── filter_qwen/ ← run_qwen_filter outputs
|
| 16 |
+
│ │ ├── keep_list.json
|
| 17 |
+
│ │ └── stats.json
|
| 18 |
+
│ ├── label_falcon/ ← pod_label outputs (from RunPod)
|
| 19 |
+
│ │ ├── coco.json
|
| 20 |
+
│ │ └── stats.json
|
| 21 |
+
│ ├── verify_qwen/ ← verify_vlm outputs (from RunPod)
|
| 22 |
+
│ │ ├── verified.json
|
| 23 |
+
│ │ └── stats.json
|
| 24 |
+
│ └── reviews/ ← human verdicts from the web UI
|
| 25 |
+
│ └── reviews.json
|
| 26 |
+
└── latest -> 2026-04-07_193000_first-yt-batch/ ← symlink to most recent
|
| 27 |
+
|
| 28 |
+
The drone-dataset-v2/ images themselves are SHARED across experiments —
|
| 29 |
+
each experiment writes labels/filters/verifications referencing those images,
|
| 30 |
+
not copies of them.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
import json
|
| 34 |
+
import os
|
| 35 |
+
import sys
|
| 36 |
+
import time
|
| 37 |
+
from datetime import datetime
|
| 38 |
+
from pathlib import Path
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def make_experiment_dir(name: str = "", base: str = "experiments") -> str:
|
| 42 |
+
"""Create a fresh experiment dir with a timestamp + optional name suffix.
|
| 43 |
+
Returns the absolute path."""
|
| 44 |
+
ts = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
| 45 |
+
safe_name = name.strip().replace(" ", "-").replace("/", "_") if name else ""
|
| 46 |
+
folder = f"{ts}_{safe_name}" if safe_name else ts
|
| 47 |
+
full = os.path.abspath(os.path.join(base, folder))
|
| 48 |
+
os.makedirs(full, exist_ok=True)
|
| 49 |
+
|
| 50 |
+
# Create the standard subdirs
|
| 51 |
+
for sub in ("gather", "filter_qwen", "label_falcon", "verify_qwen", "reviews"):
|
| 52 |
+
os.makedirs(os.path.join(full, sub), exist_ok=True)
|
| 53 |
+
|
| 54 |
+
return full
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def write_readme(experiment_dir: str, name: str, description: str, params: dict):
|
| 58 |
+
"""Write a small markdown README capturing what this experiment is."""
|
| 59 |
+
readme_path = os.path.join(experiment_dir, "README.md")
|
| 60 |
+
lines = [
|
| 61 |
+
f"# Experiment: {name or os.path.basename(experiment_dir)}",
|
| 62 |
+
"",
|
| 63 |
+
f"**Started:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
| 64 |
+
f"**Path:** `{experiment_dir}`",
|
| 65 |
+
"",
|
| 66 |
+
"## Description",
|
| 67 |
+
"",
|
| 68 |
+
description or "(no description)",
|
| 69 |
+
"",
|
| 70 |
+
"## Parameters",
|
| 71 |
+
"",
|
| 72 |
+
"```json",
|
| 73 |
+
json.dumps(params, indent=2),
|
| 74 |
+
"```",
|
| 75 |
+
"",
|
| 76 |
+
"## Pipeline stages",
|
| 77 |
+
"",
|
| 78 |
+
"1. **gather/** — image gathering manifest",
|
| 79 |
+
"2. **filter_qwen/** — image-level Qwen YES/NO filter results",
|
| 80 |
+
"3. **label_falcon/** — Falcon Perception bbox grounding (COCO format)",
|
| 81 |
+
"4. **verify_qwen/** — per-bbox Qwen verification",
|
| 82 |
+
"5. **reviews/** — human verdicts from the web UI",
|
| 83 |
+
"",
|
| 84 |
+
]
|
| 85 |
+
with open(readme_path, "w") as f:
|
| 86 |
+
f.write("\n".join(lines))
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def write_config(experiment_dir: str, config: dict):
|
| 90 |
+
"""Write the exact config used for this experiment."""
|
| 91 |
+
with open(os.path.join(experiment_dir, "config.json"), "w") as f:
|
| 92 |
+
json.dump(config, f, indent=2)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def update_latest_symlink(experiment_dir: str, base: str = "experiments"):
|
| 96 |
+
"""Update the experiments/latest symlink to point at this experiment."""
|
| 97 |
+
base_abs = os.path.abspath(base)
|
| 98 |
+
link = os.path.join(base_abs, "latest")
|
| 99 |
+
target = os.path.basename(experiment_dir) # relative symlink
|
| 100 |
+
if os.path.islink(link):
|
| 101 |
+
os.unlink(link)
|
| 102 |
+
elif os.path.exists(link):
|
| 103 |
+
# Don't clobber a real directory
|
| 104 |
+
return
|
| 105 |
+
try:
|
| 106 |
+
os.symlink(target, link)
|
| 107 |
+
except OSError:
|
| 108 |
+
pass # symlinks can fail on some filesystems
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def list_experiments(base: str = "experiments") -> list:
|
| 112 |
+
"""List all experiment directories in chronological order (newest first)."""
|
| 113 |
+
if not os.path.exists(base):
|
| 114 |
+
return []
|
| 115 |
+
out = []
|
| 116 |
+
for entry in sorted(os.listdir(base), reverse=True):
|
| 117 |
+
if entry == "latest":
|
| 118 |
+
continue
|
| 119 |
+
full = os.path.join(base, entry)
|
| 120 |
+
if not os.path.isdir(full):
|
| 121 |
+
continue
|
| 122 |
+
readme = os.path.join(full, "README.md")
|
| 123 |
+
config = os.path.join(full, "config.json")
|
| 124 |
+
cfg = {}
|
| 125 |
+
if os.path.exists(config):
|
| 126 |
+
try:
|
| 127 |
+
cfg = json.load(open(config))
|
| 128 |
+
except Exception:
|
| 129 |
+
pass
|
| 130 |
+
out.append({
|
| 131 |
+
"name": entry,
|
| 132 |
+
"path": full,
|
| 133 |
+
"config": cfg,
|
| 134 |
+
"has_readme": os.path.exists(readme),
|
| 135 |
+
})
|
| 136 |
+
return out
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
if __name__ == "__main__":
|
| 140 |
+
# CLI: list experiments or make one
|
| 141 |
+
import argparse
|
| 142 |
+
p = argparse.ArgumentParser()
|
| 143 |
+
sub = p.add_subparsers(dest="cmd")
|
| 144 |
+
|
| 145 |
+
p_new = sub.add_parser("new", help="Create a new dated experiment folder")
|
| 146 |
+
p_new.add_argument("--name", default="", help="Optional human-readable suffix")
|
| 147 |
+
p_new.add_argument("--description", default="")
|
| 148 |
+
|
| 149 |
+
p_list = sub.add_parser("list", help="List existing experiments")
|
| 150 |
+
|
| 151 |
+
args = p.parse_args()
|
| 152 |
+
if args.cmd == "new":
|
| 153 |
+
path = make_experiment_dir(args.name)
|
| 154 |
+
write_readme(path, args.name, args.description, {})
|
| 155 |
+
update_latest_symlink(path)
|
| 156 |
+
print(path)
|
| 157 |
+
elif args.cmd == "list":
|
| 158 |
+
for e in list_experiments():
|
| 159 |
+
print(f" {e['name']}")
|
| 160 |
+
else:
|
| 161 |
+
p.print_help()
|
data_label_factory/gather.py
ADDED
|
@@ -0,0 +1,554 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
gather_v2.py — smarter, parallel image gatherer for the drone-falcon dataset.
|
| 4 |
+
|
| 5 |
+
Improvements over gather_images.py (v1):
|
| 6 |
+
- Parallel downloads (50 threads instead of sequential)
|
| 7 |
+
- YouTube frame extraction via yt-dlp + ffmpeg (the killer feature for combat footage)
|
| 8 |
+
- Optional inline Qwen filter — only saves images Qwen says YES to
|
| 9 |
+
- Perceptual-hash dedup across sources (catches the same image from different sites)
|
| 10 |
+
- Resumable via local manifest
|
| 11 |
+
|
| 12 |
+
Sources:
|
| 13 |
+
- DuckDuckGo image search (broad, noisy)
|
| 14 |
+
- Wikimedia Commons (CC, niche, slower)
|
| 15 |
+
- YouTube videos / playlists (gold for combat footage)
|
| 16 |
+
|
| 17 |
+
Outputs:
|
| 18 |
+
drone-dataset-v2/<bucket>/<file>.jpg ← local mirror
|
| 19 |
+
drone-dataset-v2/manifest.json ← every file with provenance
|
| 20 |
+
|
| 21 |
+
Usage:
|
| 22 |
+
# Web search only (DDG + Wikimedia)
|
| 23 |
+
python3 gather_v2.py --bucket positive/fiber_spool_drone \\
|
| 24 |
+
--query "fiber optic drone Ukraine" --query "tethered fpv drone" \\
|
| 25 |
+
--max-per-query 100
|
| 26 |
+
|
| 27 |
+
# YouTube frame extraction
|
| 28 |
+
python3 gather_v2.py --bucket positive/fiber_spool_drone \\
|
| 29 |
+
--youtube "https://youtube.com/playlist?list=ABC123" \\
|
| 30 |
+
--fps 1 --max-frames-per-video 200
|
| 31 |
+
|
| 32 |
+
# Inline Qwen filter (only saves YES images)
|
| 33 |
+
python3 gather_v2.py --bucket positive/fiber_spool_drone \\
|
| 34 |
+
--query "fiber optic drone" --filter
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
import argparse
|
| 38 |
+
import base64
|
| 39 |
+
import hashlib
|
| 40 |
+
import io
|
| 41 |
+
import json
|
| 42 |
+
import os
|
| 43 |
+
import shutil
|
| 44 |
+
import subprocess
|
| 45 |
+
import time
|
| 46 |
+
import urllib.request
|
| 47 |
+
import urllib.parse
|
| 48 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 49 |
+
from pathlib import Path
|
| 50 |
+
from PIL import Image
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# ============================================================
|
| 54 |
+
# CONFIG
|
| 55 |
+
# ============================================================
|
| 56 |
+
|
| 57 |
+
USER_AGENT = "data-label-factory-gather/0.1 (research project)"
|
| 58 |
+
# Override via env vars (same as the rest of the factory CLI)
|
| 59 |
+
M4_QWEN_URL = os.environ.get("QWEN_URL", "http://localhost:8291")
|
| 60 |
+
QWEN_MODEL_PATH = os.environ.get(
|
| 61 |
+
"QWEN_MODEL_PATH", "mlx-community/Qwen2.5-VL-3B-Instruct-4bit"
|
| 62 |
+
)
|
| 63 |
+
QWEN_FILTER_PROMPT = (
|
| 64 |
+
"Look at this image. Does it show a drone, a cable spool, or a wound fiber optic cable?\n"
|
| 65 |
+
"Answer with exactly one word: YES or NO.\n"
|
| 66 |
+
"YES if you see ANY of: a drone, a quadcopter, a cable reel, a fiber spool, a wound cable.\n"
|
| 67 |
+
"NO if the main subject is something else."
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".gif"}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# ============================================================
|
| 74 |
+
# DUCKDUCKGO IMAGE SEARCH (no API key)
|
| 75 |
+
# ============================================================
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def ddg_search(query: str, max_results: int = 50) -> list:
|
| 79 |
+
"""Returns list of dicts: {url, source, title, page}."""
|
| 80 |
+
import re
|
| 81 |
+
results = []
|
| 82 |
+
headers = {"User-Agent": USER_AGENT}
|
| 83 |
+
|
| 84 |
+
# Step 1: get vqd token
|
| 85 |
+
try:
|
| 86 |
+
token_url = f"https://duckduckgo.com/?q={urllib.parse.quote(query)}&iax=images&ia=images"
|
| 87 |
+
req = urllib.request.Request(token_url, headers=headers)
|
| 88 |
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
| 89 |
+
html = resp.read().decode("utf-8", errors="ignore")
|
| 90 |
+
m = re.search(r'vqd=["\']?([\d-]+)["\']?', html)
|
| 91 |
+
if not m:
|
| 92 |
+
return results
|
| 93 |
+
vqd = m.group(1)
|
| 94 |
+
except Exception as e:
|
| 95 |
+
print(f" ddg token err: {e}")
|
| 96 |
+
return results
|
| 97 |
+
|
| 98 |
+
# Step 2: paginate i.js
|
| 99 |
+
seen = set()
|
| 100 |
+
next_url = None
|
| 101 |
+
while len(results) < max_results:
|
| 102 |
+
if next_url is None:
|
| 103 |
+
params = {"l": "us-en", "o": "json", "q": query, "vqd": vqd, "f": ",,,,,", "p": "1"}
|
| 104 |
+
url = f"https://duckduckgo.com/i.js?{urllib.parse.urlencode(params)}"
|
| 105 |
+
else:
|
| 106 |
+
url = "https://duckduckgo.com" + next_url
|
| 107 |
+
try:
|
| 108 |
+
req = urllib.request.Request(url, headers=headers)
|
| 109 |
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
| 110 |
+
data = json.loads(resp.read())
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f" ddg page err: {e}")
|
| 113 |
+
break
|
| 114 |
+
items = data.get("results", [])
|
| 115 |
+
if not items:
|
| 116 |
+
break
|
| 117 |
+
for it in items:
|
| 118 |
+
img_url = it.get("image")
|
| 119 |
+
if not img_url or img_url in seen:
|
| 120 |
+
continue
|
| 121 |
+
seen.add(img_url)
|
| 122 |
+
results.append({
|
| 123 |
+
"url": img_url,
|
| 124 |
+
"source": "duckduckgo",
|
| 125 |
+
"title": it.get("title", "")[:200],
|
| 126 |
+
"page": it.get("url", ""),
|
| 127 |
+
"license": "unknown",
|
| 128 |
+
"query": query,
|
| 129 |
+
})
|
| 130 |
+
if len(results) >= max_results:
|
| 131 |
+
break
|
| 132 |
+
next_url = data.get("next")
|
| 133 |
+
if not next_url:
|
| 134 |
+
break
|
| 135 |
+
time.sleep(0.3)
|
| 136 |
+
return results
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# ============================================================
|
| 140 |
+
# WIKIMEDIA COMMONS (CC, free)
|
| 141 |
+
# ============================================================
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def wikimedia_search(query: str, max_results: int = 50) -> list:
|
| 145 |
+
params = {
|
| 146 |
+
"action": "query", "format": "json",
|
| 147 |
+
"generator": "search", "gsrsearch": f"filetype:bitmap {query}",
|
| 148 |
+
"gsrnamespace": "6", "gsrlimit": str(min(50, max_results)),
|
| 149 |
+
"prop": "imageinfo", "iiprop": "url|extmetadata|size",
|
| 150 |
+
}
|
| 151 |
+
url = f"https://commons.wikimedia.org/w/api.php?{urllib.parse.urlencode(params)}"
|
| 152 |
+
results = []
|
| 153 |
+
try:
|
| 154 |
+
req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
|
| 155 |
+
with urllib.request.urlopen(req, timeout=20) as resp:
|
| 156 |
+
data = json.loads(resp.read())
|
| 157 |
+
except Exception as e:
|
| 158 |
+
print(f" wikimedia err: {e}")
|
| 159 |
+
return results
|
| 160 |
+
pages = (data.get("query") or {}).get("pages") or {}
|
| 161 |
+
for _, p in pages.items():
|
| 162 |
+
ii = (p.get("imageinfo") or [{}])[0]
|
| 163 |
+
img_url = ii.get("url")
|
| 164 |
+
if not img_url:
|
| 165 |
+
continue
|
| 166 |
+
ext = (ii.get("extmetadata") or {})
|
| 167 |
+
license_name = (ext.get("LicenseShortName") or {}).get("value", "")
|
| 168 |
+
results.append({
|
| 169 |
+
"url": img_url,
|
| 170 |
+
"source": "wikimedia",
|
| 171 |
+
"title": p.get("title", ""),
|
| 172 |
+
"page": f"https://commons.wikimedia.org/wiki/{urllib.parse.quote(p.get('title', ''))}",
|
| 173 |
+
"license": license_name,
|
| 174 |
+
"query": query,
|
| 175 |
+
})
|
| 176 |
+
if len(results) >= max_results:
|
| 177 |
+
break
|
| 178 |
+
return results
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
# ============================================================
|
| 182 |
+
# YOUTUBE FRAME EXTRACTION (the killer feature)
|
| 183 |
+
# ============================================================
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def youtube_extract_frames(
|
| 187 |
+
video_url: str,
|
| 188 |
+
out_dir: str,
|
| 189 |
+
fps: float = 1.0,
|
| 190 |
+
max_frames: int = 200,
|
| 191 |
+
cookies_from_browser: str = None,
|
| 192 |
+
) -> list:
|
| 193 |
+
"""Download a YouTube video, extract frames at given fps. Returns list of frame paths.
|
| 194 |
+
Uses yt-dlp + ffmpeg (via imageio_ffmpeg's bundled binary).
|
| 195 |
+
"""
|
| 196 |
+
import yt_dlp
|
| 197 |
+
import imageio_ffmpeg
|
| 198 |
+
|
| 199 |
+
ffmpeg_bin = imageio_ffmpeg.get_ffmpeg_exe()
|
| 200 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 201 |
+
work_dir = os.path.join(out_dir, "_video_tmp")
|
| 202 |
+
os.makedirs(work_dir, exist_ok=True)
|
| 203 |
+
|
| 204 |
+
# Download with yt-dlp — android+web player clients bypass most YT bot detection
|
| 205 |
+
print(f" yt-dlp downloading: {video_url}")
|
| 206 |
+
ydl_opts = {
|
| 207 |
+
"format": "worstvideo[height>=480]/worst",
|
| 208 |
+
"outtmpl": os.path.join(work_dir, "%(id)s.%(ext)s"),
|
| 209 |
+
"quiet": True,
|
| 210 |
+
"no_warnings": True,
|
| 211 |
+
"noplaylist": True,
|
| 212 |
+
"extractor_args": {"youtube": {"player_client": ["android", "web"]}},
|
| 213 |
+
}
|
| 214 |
+
if cookies_from_browser:
|
| 215 |
+
ydl_opts["cookiesfrombrowser"] = (cookies_from_browser,)
|
| 216 |
+
try:
|
| 217 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 218 |
+
info = ydl.extract_info(video_url, download=True)
|
| 219 |
+
video_id = info.get("id", "video")
|
| 220 |
+
title = info.get("title", "")
|
| 221 |
+
except Exception as e:
|
| 222 |
+
print(f" yt-dlp failed: {e}")
|
| 223 |
+
return []
|
| 224 |
+
|
| 225 |
+
# Find downloaded file
|
| 226 |
+
video_files = [os.path.join(work_dir, f) for f in os.listdir(work_dir) if f.startswith(video_id)]
|
| 227 |
+
if not video_files:
|
| 228 |
+
print(f" no downloaded video found in {work_dir}")
|
| 229 |
+
return []
|
| 230 |
+
video_file = video_files[0]
|
| 231 |
+
|
| 232 |
+
# Extract frames via ffmpeg
|
| 233 |
+
print(f" ffmpeg extracting frames at {fps} fps from {video_file}")
|
| 234 |
+
frame_pattern = os.path.join(work_dir, f"{video_id}_%05d.jpg")
|
| 235 |
+
cmd = [
|
| 236 |
+
ffmpeg_bin, "-y", "-i", video_file,
|
| 237 |
+
"-vf", f"fps={fps}",
|
| 238 |
+
"-frames:v", str(max_frames),
|
| 239 |
+
"-q:v", "3",
|
| 240 |
+
frame_pattern,
|
| 241 |
+
]
|
| 242 |
+
try:
|
| 243 |
+
subprocess.run(cmd, capture_output=True, check=True, timeout=600)
|
| 244 |
+
except Exception as e:
|
| 245 |
+
print(f" ffmpeg failed: {e}")
|
| 246 |
+
return []
|
| 247 |
+
|
| 248 |
+
frames = sorted(f for f in os.listdir(work_dir) if f.startswith(video_id + "_") and f.endswith(".jpg"))
|
| 249 |
+
out_frames = []
|
| 250 |
+
for i, fr in enumerate(frames):
|
| 251 |
+
src = os.path.join(work_dir, fr)
|
| 252 |
+
dest = os.path.join(out_dir, f"yt_{video_id}_{i:05d}.jpg")
|
| 253 |
+
shutil.move(src, dest)
|
| 254 |
+
out_frames.append({
|
| 255 |
+
"path": dest,
|
| 256 |
+
"source": "youtube",
|
| 257 |
+
"video_id": video_id,
|
| 258 |
+
"video_title": title,
|
| 259 |
+
"video_url": video_url,
|
| 260 |
+
"frame_index": i,
|
| 261 |
+
"license": "see source video",
|
| 262 |
+
})
|
| 263 |
+
|
| 264 |
+
# Clean up downloaded video
|
| 265 |
+
try:
|
| 266 |
+
os.unlink(video_file)
|
| 267 |
+
except Exception:
|
| 268 |
+
pass
|
| 269 |
+
|
| 270 |
+
print(f" → extracted {len(out_frames)} frames")
|
| 271 |
+
return out_frames
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
# ============================================================
|
| 275 |
+
# QWEN INLINE FILTER (optional)
|
| 276 |
+
# ============================================================
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def qwen_yes_no(image_path: str, m4_url: str = M4_QWEN_URL, timeout: int = 30) -> tuple:
|
| 280 |
+
"""Returns (verdict, raw_answer). verdict ∈ {YES, NO, UNKNOWN, ERROR}."""
|
| 281 |
+
try:
|
| 282 |
+
img = Image.open(image_path).convert("RGB")
|
| 283 |
+
max_dim = 1024
|
| 284 |
+
if max(img.size) > max_dim:
|
| 285 |
+
ratio = max_dim / max(img.size)
|
| 286 |
+
img = img.resize((int(img.size[0] * ratio), int(img.size[1] * ratio)), Image.LANCZOS)
|
| 287 |
+
buf = io.BytesIO()
|
| 288 |
+
img.save(buf, format="PNG")
|
| 289 |
+
b64 = base64.b64encode(buf.getvalue()).decode()
|
| 290 |
+
payload = {
|
| 291 |
+
"model": QWEN_MODEL_PATH,
|
| 292 |
+
"messages": [{
|
| 293 |
+
"role": "user",
|
| 294 |
+
"content": [
|
| 295 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
|
| 296 |
+
{"type": "text", "text": QWEN_FILTER_PROMPT},
|
| 297 |
+
],
|
| 298 |
+
}],
|
| 299 |
+
"max_tokens": 12, "temperature": 0,
|
| 300 |
+
}
|
| 301 |
+
req = urllib.request.Request(
|
| 302 |
+
f"{m4_url}/v1/chat/completions",
|
| 303 |
+
data=json.dumps(payload).encode(),
|
| 304 |
+
headers={"Content-Type": "application/json"},
|
| 305 |
+
method="POST",
|
| 306 |
+
)
|
| 307 |
+
with urllib.request.urlopen(req, timeout=timeout) as r:
|
| 308 |
+
data = json.loads(r.read())
|
| 309 |
+
ans = data["choices"][0]["message"]["content"].strip().upper()
|
| 310 |
+
first = ans.split()[0].rstrip(".,") if ans else ""
|
| 311 |
+
verdict = "YES" if "YES" in first else ("NO" if "NO" in first else "UNKNOWN")
|
| 312 |
+
return verdict, ans
|
| 313 |
+
except Exception as e:
|
| 314 |
+
return "ERROR", str(e)
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
# ============================================================
|
| 318 |
+
# DOWNLOAD + DEDUP
|
| 319 |
+
# ============================================================
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def url_filename(url: str, source: str) -> str:
|
| 323 |
+
h = hashlib.sha1(url.encode()).hexdigest()[:12]
|
| 324 |
+
ext = os.path.splitext(urllib.parse.urlparse(url).path)[1].lower()
|
| 325 |
+
if ext not in IMAGE_EXTS:
|
| 326 |
+
ext = ".jpg"
|
| 327 |
+
return f"{source}_{h}{ext}"
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def download_one(url: str, dest: str, timeout: int = 30) -> tuple:
|
| 331 |
+
try:
|
| 332 |
+
req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
|
| 333 |
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
| 334 |
+
data = resp.read()
|
| 335 |
+
if len(data) < 1024:
|
| 336 |
+
return False, 0, "too small"
|
| 337 |
+
with open(dest, "wb") as f:
|
| 338 |
+
f.write(data)
|
| 339 |
+
return True, len(data), None
|
| 340 |
+
except Exception as e:
|
| 341 |
+
return False, 0, str(e)
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
def perceptual_hash(image_path: str) -> str:
|
| 345 |
+
"""8x8 average-hash for fast cross-source dedup."""
|
| 346 |
+
try:
|
| 347 |
+
img = Image.open(image_path).convert("L").resize((8, 8), Image.LANCZOS)
|
| 348 |
+
pixels = list(img.getdata())
|
| 349 |
+
avg = sum(pixels) / len(pixels)
|
| 350 |
+
bits = "".join("1" if p > avg else "0" for p in pixels)
|
| 351 |
+
return hex(int(bits, 2))[2:].zfill(16)
|
| 352 |
+
except Exception:
|
| 353 |
+
return ""
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
# ============================================================
|
| 357 |
+
# MAIN
|
| 358 |
+
# ============================================================
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def main():
|
| 362 |
+
p = argparse.ArgumentParser()
|
| 363 |
+
p.add_argument("--out", default="drone-dataset-v2", help="Image output root (shared across experiments)")
|
| 364 |
+
p.add_argument("--bucket", required=True, help="Bucket subpath, e.g. positive/fiber_spool_drone")
|
| 365 |
+
p.add_argument("--experiment", default="",
|
| 366 |
+
help="Optional experiment name; if set, creates experiments/<YYYY-MM-DD_HHMMSS>_<name>/")
|
| 367 |
+
p.add_argument("--query", action="append", default=[],
|
| 368 |
+
help="Search query (repeatable). Hits DDG + Wikimedia.")
|
| 369 |
+
p.add_argument("--youtube", action="append", default=[],
|
| 370 |
+
help="YouTube video URL or playlist URL (repeatable). Extracts frames.")
|
| 371 |
+
p.add_argument("--fps", type=float, default=1.0, help="Frames per second to extract from videos")
|
| 372 |
+
p.add_argument("--max-frames-per-video", type=int, default=200)
|
| 373 |
+
p.add_argument("--max-per-query", type=int, default=100)
|
| 374 |
+
p.add_argument("--workers", type=int, default=50, help="Parallel download threads")
|
| 375 |
+
p.add_argument("--filter", action="store_true",
|
| 376 |
+
help="Run Qwen YES/NO filter on each downloaded image, skip NO")
|
| 377 |
+
p.add_argument("--cookies-from-browser", default=None,
|
| 378 |
+
help="For YouTube: chrome|safari|firefox — use browser cookies for age-gated/login videos")
|
| 379 |
+
args = p.parse_args()
|
| 380 |
+
|
| 381 |
+
bucket_dir = os.path.join(args.out, args.bucket)
|
| 382 |
+
os.makedirs(bucket_dir, exist_ok=True)
|
| 383 |
+
|
| 384 |
+
# Set up the dated experiment dir if requested
|
| 385 |
+
experiment_dir = None
|
| 386 |
+
if args.experiment or "EXPERIMENT_DIR" in os.environ:
|
| 387 |
+
from experiments import make_experiment_dir, write_readme, write_config, update_latest_symlink
|
| 388 |
+
if "EXPERIMENT_DIR" in os.environ:
|
| 389 |
+
experiment_dir = os.environ["EXPERIMENT_DIR"]
|
| 390 |
+
os.makedirs(os.path.join(experiment_dir, "gather"), exist_ok=True)
|
| 391 |
+
else:
|
| 392 |
+
experiment_dir = make_experiment_dir(args.experiment)
|
| 393 |
+
write_readme(
|
| 394 |
+
experiment_dir,
|
| 395 |
+
name=args.experiment,
|
| 396 |
+
description=f"gather_v2 run: bucket={args.bucket}, queries={args.query}, youtube={len(args.youtube)} videos",
|
| 397 |
+
params=vars(args),
|
| 398 |
+
)
|
| 399 |
+
write_config(experiment_dir, vars(args))
|
| 400 |
+
update_latest_symlink(experiment_dir)
|
| 401 |
+
manifest_path = os.path.join(experiment_dir, "gather", "manifest.json")
|
| 402 |
+
print(f"Experiment dir: {experiment_dir}")
|
| 403 |
+
else:
|
| 404 |
+
manifest_path = os.path.join(args.out, "manifest.json")
|
| 405 |
+
manifest = []
|
| 406 |
+
if os.path.exists(manifest_path):
|
| 407 |
+
with open(manifest_path) as f:
|
| 408 |
+
manifest = json.load(f)
|
| 409 |
+
print(f"Resumed: {len(manifest)} files in manifest")
|
| 410 |
+
|
| 411 |
+
# Track URL + perceptual-hash dedup sets
|
| 412 |
+
seen_urls = {m["url"] for m in manifest if "url" in m}
|
| 413 |
+
seen_hashes = {m["phash"] for m in manifest if m.get("phash")}
|
| 414 |
+
|
| 415 |
+
# ===== Step 1: web search =====
|
| 416 |
+
web_hits = []
|
| 417 |
+
for q in args.query:
|
| 418 |
+
print(f"\n[search] {q!r}")
|
| 419 |
+
ddg_results = ddg_search(q, max_results=args.max_per_query)
|
| 420 |
+
wiki_results = wikimedia_search(q, max_results=args.max_per_query)
|
| 421 |
+
print(f" DDG: {len(ddg_results)} Wikimedia: {len(wiki_results)}")
|
| 422 |
+
web_hits.extend(ddg_results)
|
| 423 |
+
web_hits.extend(wiki_results)
|
| 424 |
+
|
| 425 |
+
# Filter out duplicates by URL
|
| 426 |
+
web_hits = [h for h in web_hits if h["url"] not in seen_urls]
|
| 427 |
+
print(f"\n {len(web_hits)} new web URLs to download (after dedup)")
|
| 428 |
+
|
| 429 |
+
# ===== Step 2: parallel download =====
|
| 430 |
+
downloaded = []
|
| 431 |
+
if web_hits:
|
| 432 |
+
print(f"\n[download] {len(web_hits)} files via {args.workers} threads...")
|
| 433 |
+
t0 = time.time()
|
| 434 |
+
with ThreadPoolExecutor(max_workers=args.workers) as pool:
|
| 435 |
+
futures = {}
|
| 436 |
+
for hit in web_hits:
|
| 437 |
+
fname = url_filename(hit["url"], hit["source"])
|
| 438 |
+
dest = os.path.join(bucket_dir, fname)
|
| 439 |
+
if os.path.exists(dest):
|
| 440 |
+
continue
|
| 441 |
+
futures[pool.submit(download_one, hit["url"], dest)] = (hit, dest)
|
| 442 |
+
n_ok, n_skip, n_err = 0, 0, 0
|
| 443 |
+
for fut in as_completed(futures):
|
| 444 |
+
hit, dest = futures[fut]
|
| 445 |
+
ok, nbytes, err = fut.result()
|
| 446 |
+
if ok:
|
| 447 |
+
n_ok += 1
|
| 448 |
+
downloaded.append({**hit, "path": dest, "bytes": nbytes})
|
| 449 |
+
else:
|
| 450 |
+
n_err += 1
|
| 451 |
+
elapsed = time.time() - t0
|
| 452 |
+
print(f" downloaded: {n_ok} new, {n_err} errors in {elapsed:.0f}s")
|
| 453 |
+
|
| 454 |
+
# ===== Step 3: YouTube frame extraction =====
|
| 455 |
+
youtube_hits = []
|
| 456 |
+
for video_url in args.youtube:
|
| 457 |
+
print(f"\n[youtube] {video_url}")
|
| 458 |
+
frames = youtube_extract_frames(
|
| 459 |
+
video_url, bucket_dir,
|
| 460 |
+
fps=args.fps, max_frames=args.max_frames_per_video,
|
| 461 |
+
cookies_from_browser=args.cookies_from_browser,
|
| 462 |
+
)
|
| 463 |
+
youtube_hits.extend(frames)
|
| 464 |
+
|
| 465 |
+
# ===== Step 4: dedup via perceptual hash =====
|
| 466 |
+
if downloaded or youtube_hits:
|
| 467 |
+
print(f"\n[dedup] computing perceptual hashes...")
|
| 468 |
+
for entry in downloaded + youtube_hits:
|
| 469 |
+
phash = perceptual_hash(entry["path"])
|
| 470 |
+
entry["phash"] = phash
|
| 471 |
+
if phash and phash in seen_hashes:
|
| 472 |
+
# duplicate — remove the file
|
| 473 |
+
try:
|
| 474 |
+
os.unlink(entry["path"])
|
| 475 |
+
except Exception:
|
| 476 |
+
pass
|
| 477 |
+
entry["dropped"] = "dup_phash"
|
| 478 |
+
else:
|
| 479 |
+
seen_hashes.add(phash)
|
| 480 |
+
n_dropped = sum(1 for e in downloaded + youtube_hits if e.get("dropped"))
|
| 481 |
+
print(f" dropped {n_dropped} duplicates")
|
| 482 |
+
|
| 483 |
+
# ===== Step 5: Optional Qwen filter =====
|
| 484 |
+
survivors = []
|
| 485 |
+
for entry in downloaded + youtube_hits:
|
| 486 |
+
if entry.get("dropped"):
|
| 487 |
+
continue
|
| 488 |
+
if not args.filter:
|
| 489 |
+
survivors.append(entry)
|
| 490 |
+
continue
|
| 491 |
+
verdict, raw = qwen_yes_no(entry["path"])
|
| 492 |
+
entry["qwen_verdict"] = verdict
|
| 493 |
+
entry["qwen_answer"] = raw
|
| 494 |
+
if verdict != "YES":
|
| 495 |
+
try:
|
| 496 |
+
os.unlink(entry["path"])
|
| 497 |
+
except Exception:
|
| 498 |
+
pass
|
| 499 |
+
entry["dropped"] = f"qwen_{verdict}"
|
| 500 |
+
else:
|
| 501 |
+
survivors.append(entry)
|
| 502 |
+
|
| 503 |
+
# ===== Save manifest + stats =====
|
| 504 |
+
for entry in downloaded + youtube_hits:
|
| 505 |
+
entry["bucket"] = args.bucket
|
| 506 |
+
manifest.append(entry)
|
| 507 |
+
os.makedirs(os.path.dirname(manifest_path), exist_ok=True)
|
| 508 |
+
with open(manifest_path, "w") as f:
|
| 509 |
+
json.dump(manifest, f, indent=2)
|
| 510 |
+
|
| 511 |
+
# If we're inside an experiment dir, write stats.json next to the manifest
|
| 512 |
+
if experiment_dir:
|
| 513 |
+
stats = {
|
| 514 |
+
"bucket": args.bucket,
|
| 515 |
+
"queries": args.query,
|
| 516 |
+
"youtube_urls": args.youtube,
|
| 517 |
+
"web_hits_found": len(web_hits),
|
| 518 |
+
"downloaded": len(downloaded),
|
| 519 |
+
"youtube_frames": len(youtube_hits),
|
| 520 |
+
"dropped_dup": sum(1 for e in (downloaded + youtube_hits) if e.get("dropped") == "dup_phash"),
|
| 521 |
+
"dropped_qwen": sum(1 for e in (downloaded + youtube_hits) if e.get("dropped", "").startswith("qwen")),
|
| 522 |
+
"survivors": len(survivors),
|
| 523 |
+
"filter_enabled": args.filter,
|
| 524 |
+
"manifest_total": len(manifest),
|
| 525 |
+
"completed_at": datetime.now().isoformat(timespec="seconds") if 'datetime' in dir() else None,
|
| 526 |
+
}
|
| 527 |
+
try:
|
| 528 |
+
from datetime import datetime as _dt
|
| 529 |
+
stats["completed_at"] = _dt.now().isoformat(timespec="seconds")
|
| 530 |
+
except Exception:
|
| 531 |
+
pass
|
| 532 |
+
stats_path = os.path.join(experiment_dir, "gather", "stats.json")
|
| 533 |
+
with open(stats_path, "w") as f:
|
| 534 |
+
json.dump(stats, f, indent=2)
|
| 535 |
+
print(f" stats: {stats_path}")
|
| 536 |
+
|
| 537 |
+
# ===== Summary =====
|
| 538 |
+
print("\n" + "=" * 60)
|
| 539 |
+
print("DONE")
|
| 540 |
+
print("=" * 60)
|
| 541 |
+
print(f" bucket: {args.bucket}")
|
| 542 |
+
print(f" web hits found: {len(web_hits)}")
|
| 543 |
+
print(f" downloaded: {len(downloaded)}")
|
| 544 |
+
print(f" youtube frames: {len(youtube_hits)}")
|
| 545 |
+
if args.filter:
|
| 546 |
+
n_yes = sum(1 for e in downloaded + youtube_hits if e.get("qwen_verdict") == "YES")
|
| 547 |
+
n_no = sum(1 for e in downloaded + youtube_hits if e.get("qwen_verdict") == "NO")
|
| 548 |
+
print(f" qwen filter: YES={n_yes} NO={n_no}")
|
| 549 |
+
print(f" survivors: {len(survivors)}")
|
| 550 |
+
print(f" manifest: {manifest_path} ({len(manifest)} total)")
|
| 551 |
+
|
| 552 |
+
|
| 553 |
+
if __name__ == "__main__":
|
| 554 |
+
main()
|
data_label_factory/project.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
project.py — load and validate a project YAML for the data labeling factory.
|
| 3 |
+
|
| 4 |
+
Usage:
|
| 5 |
+
from project import load_project
|
| 6 |
+
proj = load_project("projects/drones.yaml")
|
| 7 |
+
print(proj.target_object) # "fiber optic drone"
|
| 8 |
+
print(proj.bucket_queries["positive/fiber_spool_drone"]) # list of queries
|
| 9 |
+
print(proj.prompt("filter")) # templated string with {target_object} substituted
|
| 10 |
+
print(proj.r2_key("raw", "positive/fiber_spool_drone/foo.jpg")) # raw_v2/positive/...
|
| 11 |
+
|
| 12 |
+
The project loader is the SINGLE source of truth for paths, prompts, queries, and
|
| 13 |
+
backends. All scripts (gather, filter, label, verify) read from this object instead
|
| 14 |
+
of having hardcoded values.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
import os
|
| 19 |
+
from dataclasses import dataclass, field
|
| 20 |
+
from typing import Any
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
import yaml
|
| 24 |
+
except ImportError:
|
| 25 |
+
raise SystemExit("PyYAML required: pip install pyyaml")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# Default prompt templates. Override in the project YAML's `prompts:` section.
|
| 29 |
+
DEFAULT_PROMPTS = {
|
| 30 |
+
"filter": (
|
| 31 |
+
"Look at this image. Does it show a {target_object} or a related object "
|
| 32 |
+
"(its components, parts, or accessories)?\n"
|
| 33 |
+
"Answer with exactly one word: YES or NO.\n"
|
| 34 |
+
"YES if the main subject is a {target_object} or directly relevant to it.\n"
|
| 35 |
+
"NO if the main subject is unrelated."
|
| 36 |
+
),
|
| 37 |
+
"verify": (
|
| 38 |
+
"Look carefully at this image crop. "
|
| 39 |
+
"Question: Is the main object in this crop actually a {query}? "
|
| 40 |
+
"Answer first with one word: YES, NO, or UNSURE. "
|
| 41 |
+
"Then briefly say what the object actually is in 5-10 words."
|
| 42 |
+
),
|
| 43 |
+
"label_describe": (
|
| 44 |
+
"Look at this image. If it shows a {target_object} or related object, "
|
| 45 |
+
"describe what you see in 1-2 sentences. "
|
| 46 |
+
"If it doesn't, say 'no {target_object}'."
|
| 47 |
+
),
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@dataclass
|
| 52 |
+
class ProjectConfig:
|
| 53 |
+
"""Loaded project YAML, with helpers."""
|
| 54 |
+
|
| 55 |
+
project_name: str
|
| 56 |
+
target_object: str
|
| 57 |
+
description: str
|
| 58 |
+
data_root: str
|
| 59 |
+
r2_bucket: str
|
| 60 |
+
r2_raw_prefix: str
|
| 61 |
+
r2_labels_prefix: str
|
| 62 |
+
r2_reviews_prefix: str
|
| 63 |
+
bucket_queries: dict[str, list[str]]
|
| 64 |
+
falcon_queries: list[str]
|
| 65 |
+
prompts_raw: dict[str, str]
|
| 66 |
+
backends: dict[str, str]
|
| 67 |
+
pod_config: dict[str, Any]
|
| 68 |
+
raw: dict[str, Any] = field(default_factory=dict)
|
| 69 |
+
|
| 70 |
+
# ---------- helpers ----------
|
| 71 |
+
|
| 72 |
+
def prompt(self, name: str, **extra) -> str:
|
| 73 |
+
"""Get a prompt template with {target_object} (and any extras) substituted."""
|
| 74 |
+
template = self.prompts_raw.get(name) or DEFAULT_PROMPTS.get(name)
|
| 75 |
+
if template is None:
|
| 76 |
+
raise KeyError(f"unknown prompt name: {name!r}")
|
| 77 |
+
ctx = {"target_object": self.target_object, **extra}
|
| 78 |
+
return template.format(**ctx)
|
| 79 |
+
|
| 80 |
+
def r2_key(self, kind: str, *parts: str) -> str:
|
| 81 |
+
"""Build an R2 object key for a given stage.
|
| 82 |
+
kind ∈ {raw, labels, reviews, dataset}
|
| 83 |
+
"""
|
| 84 |
+
if kind == "raw":
|
| 85 |
+
return self.r2_raw_prefix.rstrip("/") + "/" + "/".join(parts)
|
| 86 |
+
if kind == "labels":
|
| 87 |
+
return self.r2_labels_prefix.rstrip("/") + "/" + "/".join(parts)
|
| 88 |
+
if kind == "reviews":
|
| 89 |
+
return self.r2_reviews_prefix
|
| 90 |
+
raise KeyError(f"unknown r2 kind: {kind}")
|
| 91 |
+
|
| 92 |
+
def local_image_dir(self) -> str:
|
| 93 |
+
"""Resolved local image cache directory."""
|
| 94 |
+
return os.path.expanduser(self.data_root)
|
| 95 |
+
|
| 96 |
+
def all_buckets(self) -> list[str]:
|
| 97 |
+
return list(self.bucket_queries.keys())
|
| 98 |
+
|
| 99 |
+
def total_query_count(self) -> int:
|
| 100 |
+
return sum(len(v) for v in self.bucket_queries.values())
|
| 101 |
+
|
| 102 |
+
def backend_for(self, stage: str) -> str:
|
| 103 |
+
return self.backends.get(stage, "qwen")
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def load_project(path: str) -> ProjectConfig:
|
| 107 |
+
"""Load + validate a project YAML."""
|
| 108 |
+
path = os.path.expanduser(path)
|
| 109 |
+
with open(path) as f:
|
| 110 |
+
data = yaml.safe_load(f)
|
| 111 |
+
|
| 112 |
+
if not isinstance(data, dict):
|
| 113 |
+
raise ValueError(f"project YAML must be a mapping, got {type(data).__name__}")
|
| 114 |
+
|
| 115 |
+
required = ["project_name", "target_object", "buckets", "falcon_queries"]
|
| 116 |
+
for k in required:
|
| 117 |
+
if k not in data:
|
| 118 |
+
raise ValueError(f"project YAML missing required field: {k}")
|
| 119 |
+
|
| 120 |
+
# Buckets normalization
|
| 121 |
+
bucket_queries = {}
|
| 122 |
+
for bucket, spec in data["buckets"].items():
|
| 123 |
+
if isinstance(spec, list):
|
| 124 |
+
bucket_queries[bucket] = spec
|
| 125 |
+
elif isinstance(spec, dict) and "queries" in spec:
|
| 126 |
+
bucket_queries[bucket] = spec["queries"]
|
| 127 |
+
else:
|
| 128 |
+
raise ValueError(f"bucket {bucket!r} must be a list or dict with 'queries'")
|
| 129 |
+
|
| 130 |
+
r2 = data.get("r2", {})
|
| 131 |
+
backends = data.get("backends", {})
|
| 132 |
+
backends.setdefault("filter", "qwen")
|
| 133 |
+
backends.setdefault("label", "pod")
|
| 134 |
+
backends.setdefault("verify", "pod")
|
| 135 |
+
|
| 136 |
+
return ProjectConfig(
|
| 137 |
+
project_name=data["project_name"],
|
| 138 |
+
target_object=data["target_object"],
|
| 139 |
+
description=data.get("description", ""),
|
| 140 |
+
data_root=data.get("data_root", "~/data-label-factory/" + data["project_name"]),
|
| 141 |
+
r2_bucket=r2.get("bucket", data["project_name"]),
|
| 142 |
+
r2_raw_prefix=r2.get("raw_prefix", "raw/"),
|
| 143 |
+
r2_labels_prefix=r2.get("labels_prefix", "labels/"),
|
| 144 |
+
r2_reviews_prefix=r2.get("reviews_prefix", "labels/reviews.json"),
|
| 145 |
+
bucket_queries=bucket_queries,
|
| 146 |
+
falcon_queries=list(data["falcon_queries"]),
|
| 147 |
+
prompts_raw=data.get("prompts") or {},
|
| 148 |
+
backends=backends,
|
| 149 |
+
pod_config=data.get("pod", {}),
|
| 150 |
+
raw=data,
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# CLI: load + dump for inspection
|
| 155 |
+
if __name__ == "__main__":
|
| 156 |
+
import sys
|
| 157 |
+
import json
|
| 158 |
+
if len(sys.argv) < 2:
|
| 159 |
+
print("usage: python3 project.py <project.yaml>")
|
| 160 |
+
sys.exit(1)
|
| 161 |
+
proj = load_project(sys.argv[1])
|
| 162 |
+
print("=" * 60)
|
| 163 |
+
print(f"Project: {proj.project_name}")
|
| 164 |
+
print("=" * 60)
|
| 165 |
+
print(f" target_object: {proj.target_object!r}")
|
| 166 |
+
print(f" data_root: {proj.local_image_dir()}")
|
| 167 |
+
print(f" r2_bucket: {proj.r2_bucket}")
|
| 168 |
+
print(f" r2 raw prefix: {proj.r2_raw_prefix}")
|
| 169 |
+
print(f" buckets ({len(proj.bucket_queries)}):")
|
| 170 |
+
for b, qs in proj.bucket_queries.items():
|
| 171 |
+
print(f" {b:40s} {len(qs)} queries")
|
| 172 |
+
print(f" falcon_queries: {proj.falcon_queries}")
|
| 173 |
+
print(f" backends: {proj.backends}")
|
| 174 |
+
print(f" total_queries: {proj.total_query_count()}")
|
| 175 |
+
print(f"\n Sample filter prompt:")
|
| 176 |
+
print(f" {proj.prompt('filter')[:250]}")
|
docs/canvas-demo.gif
ADDED
|
Git LFS Details
|
docs/x-launch-thread.md
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# X launch thread — data-label-factory
|
| 2 |
+
|
| 3 |
+
A 6-tweet thread. Target: ML-Twitter, Apple-Silicon devs, dataset-builders.
|
| 4 |
+
Hook: "labeled 1.8k drone images on a 16 GB MacBook." Asset: canvas-demo.gif.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
**1/ (the hook)**
|
| 9 |
+
|
| 10 |
+
I labeled 1,799 fiber-optic drone images on a 16 GB MacBook.
|
| 11 |
+
|
| 12 |
+
No GPU. No cloud. No labeling vendor.
|
| 13 |
+
|
| 14 |
+
One Python CLI + one YAML file + a 26-billion-parameter vision model streamed off the SSD.
|
| 15 |
+
|
| 16 |
+
Open-sourcing the whole pipeline today 🧵
|
| 17 |
+
|
| 18 |
+
[attach: canvas-demo.gif]
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
**2/ (what's in the box)**
|
| 23 |
+
|
| 24 |
+
`data-label-factory` is a generic auto-labeling pipeline for vision datasets.
|
| 25 |
+
|
| 26 |
+
You write a project YAML — `target_object: "fire hydrant"`, a few search queries, done — and run:
|
| 27 |
+
|
| 28 |
+
```
|
| 29 |
+
data_label_factory pipeline --project projects/fire-hydrants.yaml
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
Out the other end: a clean COCO dataset, reviewed in a browser.
|
| 33 |
+
|
| 34 |
+
---
|
| 35 |
+
|
| 36 |
+
**3/ (the pipeline)**
|
| 37 |
+
|
| 38 |
+
Four stages, all running locally on Apple Silicon:
|
| 39 |
+
|
| 40 |
+
```
|
| 41 |
+
gather → filter → label → verify → review
|
| 42 |
+
(DDG) (VLM) (Falcon) (VLM) (Canvas)
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
- **gather** — DuckDuckGo / Wikimedia / Openverse image search per bucket
|
| 46 |
+
- **filter** — image-level YES/NO classification (Qwen 2.5-VL or Gemma 4)
|
| 47 |
+
- **label** — bbox grounding via Falcon Perception (TII)
|
| 48 |
+
- **verify** — per-bbox YES/NO via the same VLM
|
| 49 |
+
- **review** — HTML5 Canvas web UI with hover/click/zoom/pan
|
| 50 |
+
|
| 51 |
+
---
|
| 52 |
+
|
| 53 |
+
**4/ (how it fits in 16 GB RAM)**
|
| 54 |
+
|
| 55 |
+
The trick is MLX Expert Sniper.
|
| 56 |
+
|
| 57 |
+
Gemma 4-26B is a Mixture-of-Experts model — only ~3 GB of weights are active per token. So instead of loading all 13 GB into RAM, we **stream cold experts off the SSD on demand**.
|
| 58 |
+
|
| 59 |
+
Resident set: ~3 GB Gemma + 1.5 GB Falcon = ~5 GB total.
|
| 60 |
+
|
| 61 |
+
You get 26B-param vision quality on a base-model M-series Mac.
|
| 62 |
+
|
| 63 |
+
---
|
| 64 |
+
|
| 65 |
+
**5/ (what we labeled)**
|
| 66 |
+
|
| 67 |
+
Reference run: detect fiber-optic-spool drones (the Ukraine-conflict kind).
|
| 68 |
+
|
| 69 |
+
- 1,421 images gathered from DDG + Wikimedia + Openverse
|
| 70 |
+
- 15,355 Falcon Perception bboxes generated
|
| 71 |
+
- 11,928 (78%) verified YES by Qwen 2.5-VL
|
| 72 |
+
- All reviewed in the canvas UI
|
| 73 |
+
|
| 74 |
+
Per-query Falcon↔Qwen agreement:
|
| 75 |
+
`cable spool` 88% · `quadcopter` 81% · `drone` 80%
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
**6/ (the canvas UI)**
|
| 80 |
+
|
| 81 |
+
The review tool is **pure HTML5 Canvas** — no SVG, no React-DOM bbox elements, just `ctx.drawImage` + `ctx.strokeRect` rendered every frame.
|
| 82 |
+
|
| 83 |
+
Drag to pan, scroll to zoom around the cursor, click a bbox to inspect, ←→ to step through 1,799 images.
|
| 84 |
+
|
| 85 |
+
[attach: canvas-demo.gif]
|
| 86 |
+
|
| 87 |
+
---
|
| 88 |
+
|
| 89 |
+
**7/ (the link)**
|
| 90 |
+
|
| 91 |
+
Repo: https://github.com/<USER>/data-label-factory
|
| 92 |
+
Reference dataset (1.8k drone images, COCO + verdicts): https://huggingface.co/datasets/<USER>/fiber-optic-drones
|
| 93 |
+
|
| 94 |
+
Reproduce in 5 commands:
|
| 95 |
+
```
|
| 96 |
+
git clone <repo>
|
| 97 |
+
cd data-label-factory && pip install pyyaml pillow requests
|
| 98 |
+
python3 -m mlx_vlm.server --model mlx-community/Qwen2.5-VL-3B-Instruct-4bit --port 8291
|
| 99 |
+
data_label_factory pipeline --project projects/stop-signs.yaml
|
| 100 |
+
cd web && PORT=3030 npm run dev # http://localhost:3030/canvas
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
Built on @MLX_apple, @PrinceCanuma's mlx-vlm, Falcon Perception by @TIIuae, and Gemma 4 by @GoogleDeepMind. Apache 2.0 all the way down.
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
## Notes for posting day
|
| 108 |
+
|
| 109 |
+
- Replace `<USER>` with the github org once chosen
|
| 110 |
+
- Confirm HF dataset card exists before posting tweet 7
|
| 111 |
+
- Pin tweet 1 to profile for the day
|
| 112 |
+
- Best post window: Tue/Wed 9-11am PT (ML-Twitter is most active)
|
| 113 |
+
- If engagement spikes, follow up with: a behind-the-scenes thread on the Expert Sniper streaming engine, OR a "label your own dataset in 10 minutes" tutorial
|
projects/drones.yaml
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =====================================================================
|
| 2 |
+
# drones.yaml — example project config for the data labeling factory
|
| 3 |
+
# =====================================================================
|
| 4 |
+
#
|
| 5 |
+
# This is the canonical example. It captures EXACTLY what we built tonight
|
| 6 |
+
# for the fiber-optic drone detector. To make a new project (e.g. stop signs,
|
| 7 |
+
# fire hydrants, manufacturing defects), copy this file, change `target_object`,
|
| 8 |
+
# adjust the queries, and run:
|
| 9 |
+
#
|
| 10 |
+
# data_label_factory pipeline --project projects/drones.yaml
|
| 11 |
+
#
|
| 12 |
+
# Generic shape:
|
| 13 |
+
# project_name → human-readable identifier (used in experiment dir names)
|
| 14 |
+
# target_object → the thing you're trying to detect (templated into prompts)
|
| 15 |
+
# data_root → where local images go
|
| 16 |
+
# r2: → cloud storage config (bucket, prefix per stage)
|
| 17 |
+
# buckets: → gather plan (5 buckets is conventional but any structure works)
|
| 18 |
+
# falcon_queries: → list of queries to run Falcon Perception with
|
| 19 |
+
# prompts: → optional overrides for templated prompts
|
| 20 |
+
# backends: → which model backend to use per stage
|
| 21 |
+
# =====================================================================
|
| 22 |
+
|
| 23 |
+
project_name: drones
|
| 24 |
+
target_object: "fiber optic drone"
|
| 25 |
+
description: |
|
| 26 |
+
Auto-labeling pipeline for fiber-optic drone detection. Falcon Perception
|
| 27 |
+
grounds bboxes for any drone, spool, or cable; Qwen2.5-VL verifies each.
|
| 28 |
+
|
| 29 |
+
# Where local images live (gitignored)
|
| 30 |
+
data_root: ~/drone-falcon-data/v2
|
| 31 |
+
|
| 32 |
+
# Cloudflare R2 storage
|
| 33 |
+
r2:
|
| 34 |
+
bucket: drone-falcon
|
| 35 |
+
raw_prefix: raw_v2/ # gathered images
|
| 36 |
+
labels_prefix: labels/ # COCO + verified JSONs
|
| 37 |
+
reviews_prefix: labels/reviews.json # human verdicts saved by web UI
|
| 38 |
+
|
| 39 |
+
# What to gather, organized by bucket. Each bucket is a folder under data_root
|
| 40 |
+
# and a corresponding R2 prefix. Multiple queries are OR'd via DDG/Wikimedia.
|
| 41 |
+
buckets:
|
| 42 |
+
positive/fiber_spool_drone:
|
| 43 |
+
queries:
|
| 44 |
+
- "fiber optic FPV drone"
|
| 45 |
+
- "tethered fiber optic drone"
|
| 46 |
+
- "Ukraine fiber optic drone war"
|
| 47 |
+
- "fiber optic kamikaze drone"
|
| 48 |
+
- "fiber optic drone payload"
|
| 49 |
+
- "wired FPV drone Ukraine"
|
| 50 |
+
- "fiber optic drone with spool"
|
| 51 |
+
- "Russian fiber optic drone"
|
| 52 |
+
- "fiber optic dispenser drone"
|
| 53 |
+
- "fiber optic combat drone"
|
| 54 |
+
|
| 55 |
+
positive/spool_only:
|
| 56 |
+
queries:
|
| 57 |
+
- "fiber optic cable spool"
|
| 58 |
+
- "optical fiber reel"
|
| 59 |
+
- "fiber optic winding machine"
|
| 60 |
+
- "spooled optical fiber cable"
|
| 61 |
+
- "fiber optic cable on reel"
|
| 62 |
+
- "optical fiber cable drum"
|
| 63 |
+
|
| 64 |
+
negative/drones_no_spool:
|
| 65 |
+
queries:
|
| 66 |
+
- "DJI Mavic 3 Pro photo"
|
| 67 |
+
- "FPV racing drone closeup"
|
| 68 |
+
- "consumer quadcopter flying"
|
| 69 |
+
- "agricultural spraying drone"
|
| 70 |
+
- "DJI Mini 4 Pro photo"
|
| 71 |
+
- "Autel Evo drone"
|
| 72 |
+
- "Skydio 2 drone"
|
| 73 |
+
- "racing drone build"
|
| 74 |
+
|
| 75 |
+
distractor/round_things:
|
| 76 |
+
queries:
|
| 77 |
+
- "garden hose reel"
|
| 78 |
+
- "cable drum reel industrial"
|
| 79 |
+
- "duct tape roll"
|
| 80 |
+
- "fire hose reel"
|
| 81 |
+
- "rope coil pile"
|
| 82 |
+
- "extension cord reel"
|
| 83 |
+
- "thread spool sewing"
|
| 84 |
+
|
| 85 |
+
background/empty:
|
| 86 |
+
queries:
|
| 87 |
+
- "blue sky clouds"
|
| 88 |
+
- "open field landscape"
|
| 89 |
+
- "industrial workshop interior"
|
| 90 |
+
- "outdoor military training"
|
| 91 |
+
|
| 92 |
+
# What Falcon Perception should look for in each image (the bbox grounding queries).
|
| 93 |
+
# These are the specific objects we want bounding boxes on.
|
| 94 |
+
falcon_queries:
|
| 95 |
+
- "fiber optic spool"
|
| 96 |
+
- "cable spool"
|
| 97 |
+
- "drone"
|
| 98 |
+
- "quadcopter"
|
| 99 |
+
- "fiber optic drone"
|
| 100 |
+
|
| 101 |
+
# Optional: prompt overrides. Default templates use {target_object} substitution.
|
| 102 |
+
# Leave commented to use the defaults from lib/project.py.
|
| 103 |
+
prompts:
|
| 104 |
+
filter: |
|
| 105 |
+
Look at this image. Does it show a {target_object}, a related component
|
| 106 |
+
(cable spool, fiber reel, wound cable), or any other relevant object?
|
| 107 |
+
Answer with exactly one word: YES or NO.
|
| 108 |
+
YES if you see ANY of: a {target_object}, a quadcopter, a cable reel, a fiber spool.
|
| 109 |
+
NO if the main subject is something else.
|
| 110 |
+
|
| 111 |
+
verify: |
|
| 112 |
+
Look carefully at this image crop.
|
| 113 |
+
Question: Is the main object in this crop actually a {query}?
|
| 114 |
+
Answer first with one word: YES, NO, or UNSURE.
|
| 115 |
+
Then briefly say what the object actually is in 5-10 words.
|
| 116 |
+
|
| 117 |
+
# Which model backend to use per stage.
|
| 118 |
+
# qwen = Qwen2.5-VL-3B via mlx-vlm server (M4 :8291) — fast, free
|
| 119 |
+
# gemma = Gemma 4 26B via mac_tensor (M4 :8500) — slow chained agent
|
| 120 |
+
# falcon = Falcon Perception via mac_tensor /api/falcon — bbox grounding only
|
| 121 |
+
# pod = remote RunPod GPU pod — fast, ~$0.15-1.65/run
|
| 122 |
+
backends:
|
| 123 |
+
filter: qwen
|
| 124 |
+
label: pod # Falcon on RunPod for production scale
|
| 125 |
+
verify: pod # Qwen on the same pod
|
| 126 |
+
|
| 127 |
+
# Pod settings (only used when a stage backend = pod)
|
| 128 |
+
pod:
|
| 129 |
+
gpu_types: ["NVIDIA L40S"]
|
| 130 |
+
data_centers: ["EU-RO-1", "EU-CZ-1", "EU-NL-1", "US-CA-2"]
|
| 131 |
+
image: "runpod/pytorch:1.0.3-cu1290-torch291-ubuntu2204"
|
| 132 |
+
container_disk_gb: 30
|
| 133 |
+
volume_gb: 30
|
projects/stop-signs.yaml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =====================================================================
|
| 2 |
+
# stop-signs.yaml — proves the data labeling factory is GENERIC
|
| 3 |
+
# =====================================================================
|
| 4 |
+
#
|
| 5 |
+
# Same pipeline as drones.yaml but for stop signs. The only changes are
|
| 6 |
+
# the queries, the target_object, the data_root, and the falcon_queries.
|
| 7 |
+
# All scripts read this YAML and adapt automatically — zero code changes
|
| 8 |
+
# required to onboard a new object class.
|
| 9 |
+
#
|
| 10 |
+
# Run with:
|
| 11 |
+
# data_label_factory pipeline --project projects/stop-signs.yaml
|
| 12 |
+
# =====================================================================
|
| 13 |
+
|
| 14 |
+
project_name: stop-signs
|
| 15 |
+
target_object: "stop sign"
|
| 16 |
+
description: |
|
| 17 |
+
Smoke test project — train a stop sign detector for autonomous driving research.
|
| 18 |
+
|
| 19 |
+
data_root: ~/data-label-factory/stop-signs
|
| 20 |
+
|
| 21 |
+
r2:
|
| 22 |
+
bucket: drone-falcon # reuse the same R2 bucket — different prefixes
|
| 23 |
+
raw_prefix: stop-signs/raw/
|
| 24 |
+
labels_prefix: stop-signs/labels/
|
| 25 |
+
reviews_prefix: stop-signs/labels/reviews.json
|
| 26 |
+
|
| 27 |
+
buckets:
|
| 28 |
+
positive/clear_view:
|
| 29 |
+
queries:
|
| 30 |
+
- "stop sign closeup"
|
| 31 |
+
- "red stop sign octagon"
|
| 32 |
+
- "stop sign daytime"
|
| 33 |
+
- "stop sign intersection"
|
| 34 |
+
|
| 35 |
+
positive/partial_view:
|
| 36 |
+
queries:
|
| 37 |
+
- "stop sign occluded"
|
| 38 |
+
- "stop sign in distance"
|
| 39 |
+
- "stop sign at angle"
|
| 40 |
+
|
| 41 |
+
negative/other_signs:
|
| 42 |
+
queries:
|
| 43 |
+
- "yield sign"
|
| 44 |
+
- "speed limit sign"
|
| 45 |
+
- "do not enter sign"
|
| 46 |
+
- "one way sign"
|
| 47 |
+
|
| 48 |
+
distractor/red_signs:
|
| 49 |
+
queries:
|
| 50 |
+
- "red warning sign"
|
| 51 |
+
- "red circle traffic sign"
|
| 52 |
+
- "red parking sign"
|
| 53 |
+
- "no entry sign"
|
| 54 |
+
|
| 55 |
+
background/no_signs:
|
| 56 |
+
queries:
|
| 57 |
+
- "empty highway"
|
| 58 |
+
- "country road landscape"
|
| 59 |
+
- "city street empty"
|
| 60 |
+
- "parking lot empty"
|
| 61 |
+
|
| 62 |
+
falcon_queries:
|
| 63 |
+
- "stop sign"
|
| 64 |
+
- "traffic sign"
|
| 65 |
+
- "red octagonal sign"
|
| 66 |
+
- "road sign"
|
| 67 |
+
|
| 68 |
+
# Use only Qwen + local M4 for the smoke test (no RunPod)
|
| 69 |
+
backends:
|
| 70 |
+
filter: qwen
|
| 71 |
+
label: qwen # for smoke test, label with Qwen too — proves generic backend selection
|
| 72 |
+
verify: qwen
|
pyproject.toml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=64", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "data-label-factory"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
description = "Generic auto-labeling pipeline for vision datasets — runs on a 16 GB Apple Silicon Mac via SSD-streaming MoE."
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
requires-python = ">=3.10"
|
| 11 |
+
license = { text = "Apache-2.0" }
|
| 12 |
+
authors = [
|
| 13 |
+
{ name = "walter-grace" },
|
| 14 |
+
]
|
| 15 |
+
keywords = [
|
| 16 |
+
"vision",
|
| 17 |
+
"dataset",
|
| 18 |
+
"labeling",
|
| 19 |
+
"annotation",
|
| 20 |
+
"object-detection",
|
| 21 |
+
"mlx",
|
| 22 |
+
"apple-silicon",
|
| 23 |
+
"qwen",
|
| 24 |
+
"gemma",
|
| 25 |
+
"falcon-perception",
|
| 26 |
+
]
|
| 27 |
+
classifiers = [
|
| 28 |
+
"Development Status :: 4 - Beta",
|
| 29 |
+
"Intended Audience :: Developers",
|
| 30 |
+
"Intended Audience :: Science/Research",
|
| 31 |
+
"License :: OSI Approved :: Apache Software License",
|
| 32 |
+
"Operating System :: MacOS",
|
| 33 |
+
"Programming Language :: Python :: 3 :: Only",
|
| 34 |
+
"Programming Language :: Python :: 3.10",
|
| 35 |
+
"Programming Language :: Python :: 3.11",
|
| 36 |
+
"Programming Language :: Python :: 3.12",
|
| 37 |
+
"Programming Language :: Python :: 3.13",
|
| 38 |
+
"Topic :: Scientific/Engineering :: Image Recognition",
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
dependencies = [
|
| 42 |
+
"pyyaml>=6.0",
|
| 43 |
+
"pillow>=9.0",
|
| 44 |
+
"requests>=2.28",
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
[project.optional-dependencies]
|
| 48 |
+
gather = [
|
| 49 |
+
# Image-search gathering (DDG, Wikimedia, YouTube frame extraction)
|
| 50 |
+
"duckduckgo-search>=4.0",
|
| 51 |
+
"yt-dlp>=2024.0.0",
|
| 52 |
+
]
|
| 53 |
+
dev = [
|
| 54 |
+
"pytest>=7.0",
|
| 55 |
+
"ruff>=0.5.0",
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
[project.urls]
|
| 59 |
+
Homepage = "https://github.com/walter-grace/data-label-factory"
|
| 60 |
+
Repository = "https://github.com/walter-grace/data-label-factory"
|
| 61 |
+
HuggingFace = "https://huggingface.co/waltgrace/data-label-factory"
|
| 62 |
+
Issues = "https://github.com/walter-grace/data-label-factory/issues"
|
| 63 |
+
|
| 64 |
+
[project.scripts]
|
| 65 |
+
data_label_factory = "data_label_factory.cli:main"
|
| 66 |
+
data-label-factory = "data_label_factory.cli:main"
|
| 67 |
+
|
| 68 |
+
[tool.setuptools]
|
| 69 |
+
packages = ["data_label_factory"]
|
| 70 |
+
|
| 71 |
+
[tool.setuptools.package-data]
|
| 72 |
+
data_label_factory = ["*.py"]
|
setup.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shim so older pip versions can do editable installs.
|
| 2 |
+
Real metadata lives in pyproject.toml."""
|
| 3 |
+
|
| 4 |
+
from setuptools import setup
|
| 5 |
+
|
| 6 |
+
setup()
|
web/.env.example
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# web/.env.local — Cloudflare R2 credentials for the review UI
|
| 2 |
+
#
|
| 3 |
+
# Copy this file to web/.env.local and fill in your own R2 bucket details.
|
| 4 |
+
# .env.local is gitignored. NEVER commit real credentials.
|
| 5 |
+
#
|
| 6 |
+
# Cloudflare R2 → Manage R2 API Tokens → Create API token (read+write on your bucket)
|
| 7 |
+
|
| 8 |
+
R2_ENDPOINT_URL=https://<your-account-id>.r2.cloudflarestorage.com
|
| 9 |
+
R2_ACCESS_KEY_ID=<your-access-key>
|
| 10 |
+
R2_SECRET_ACCESS_KEY=<your-secret-key>
|
| 11 |
+
R2_BUCKET=<your-bucket-name>
|
web/.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
| 2 |
+
|
| 3 |
+
# dependencies
|
| 4 |
+
/node_modules
|
| 5 |
+
/.pnp
|
| 6 |
+
.pnp.*
|
| 7 |
+
.yarn/*
|
| 8 |
+
!.yarn/patches
|
| 9 |
+
!.yarn/plugins
|
| 10 |
+
!.yarn/releases
|
| 11 |
+
!.yarn/versions
|
| 12 |
+
|
| 13 |
+
# testing
|
| 14 |
+
/coverage
|
| 15 |
+
|
| 16 |
+
# next.js
|
| 17 |
+
/.next/
|
| 18 |
+
/out/
|
| 19 |
+
|
| 20 |
+
# production
|
| 21 |
+
/build
|
| 22 |
+
|
| 23 |
+
# misc
|
| 24 |
+
.DS_Store
|
| 25 |
+
*.pem
|
| 26 |
+
|
| 27 |
+
# debug
|
| 28 |
+
npm-debug.log*
|
| 29 |
+
yarn-debug.log*
|
| 30 |
+
yarn-error.log*
|
| 31 |
+
.pnpm-debug.log*
|
| 32 |
+
|
| 33 |
+
# env files (can opt-in for committing if needed)
|
| 34 |
+
.env*
|
| 35 |
+
|
| 36 |
+
# vercel
|
| 37 |
+
.vercel
|
| 38 |
+
|
| 39 |
+
# typescript
|
| 40 |
+
*.tsbuildinfo
|
| 41 |
+
next-env.d.ts
|
web/README.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
|
| 2 |
+
|
| 3 |
+
## Getting Started
|
| 4 |
+
|
| 5 |
+
First, run the development server:
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
npm run dev
|
| 9 |
+
# or
|
| 10 |
+
yarn dev
|
| 11 |
+
# or
|
| 12 |
+
pnpm dev
|
| 13 |
+
# or
|
| 14 |
+
bun dev
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
| 18 |
+
|
| 19 |
+
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
| 20 |
+
|
| 21 |
+
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
|
| 22 |
+
|
| 23 |
+
## Learn More
|
| 24 |
+
|
| 25 |
+
To learn more about Next.js, take a look at the following resources:
|
| 26 |
+
|
| 27 |
+
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
|
| 28 |
+
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
|
| 29 |
+
|
| 30 |
+
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
|
| 31 |
+
|
| 32 |
+
## Deploy on Vercel
|
| 33 |
+
|
| 34 |
+
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
|
| 35 |
+
|
| 36 |
+
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
|
web/app/api/labels/route.ts
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { NextResponse } from "next/server";
|
| 2 |
+
import { getJson, putJson, presignGet } from "@/lib/r2";
|
| 3 |
+
import type { LabelPartial, ImageReview, VerifiedRun } from "@/lib/types";
|
| 4 |
+
|
| 5 |
+
/**
|
| 6 |
+
* GET /api/labels
|
| 7 |
+
* → returns the labeled dataset converted to ImageReview[] format,
|
| 8 |
+
* with one presigned image URL per entry, plus Qwen VLM verdicts,
|
| 9 |
+
* plus any saved human reviews.
|
| 10 |
+
*
|
| 11 |
+
* Reads from R2:
|
| 12 |
+
* labels/partial.json ← Falcon bboxes (live snapshot from pod)
|
| 13 |
+
* labels/run1.verified.json ← Qwen yes/no verdict per bbox
|
| 14 |
+
* labels/reviews.json ← Human verdicts (saved by this UI)
|
| 15 |
+
*/
|
| 16 |
+
export async function GET() {
|
| 17 |
+
// Try the live partial first
|
| 18 |
+
let partial = await getJson<LabelPartial>("labels/partial.json");
|
| 19 |
+
if (!partial || !partial.results) {
|
| 20 |
+
const finalRun = await getJson<LabelPartial>("labels/run1_partial.json");
|
| 21 |
+
if (finalRun) partial = finalRun;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
if (!partial || !partial.results) {
|
| 25 |
+
return NextResponse.json({ images: [], total: 0, error: "no labels found in r2 yet" });
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
// Load Qwen verdicts (if they exist) — try run2 first, fall back to run1
|
| 29 |
+
let verified = await getJson<VerifiedRun>("labels/run2.verified.json");
|
| 30 |
+
if (!verified?.annotations) {
|
| 31 |
+
verified = await getJson<VerifiedRun>("labels/run1.verified.json");
|
| 32 |
+
}
|
| 33 |
+
const verdictById = new Map<number, { verdict: "YES" | "NO" | "UNSURE"; reasoning: string }>();
|
| 34 |
+
if (verified?.annotations) {
|
| 35 |
+
for (const v of verified.annotations) {
|
| 36 |
+
verdictById.set(v.annotation_id, { verdict: v.verdict, reasoning: v.reasoning });
|
| 37 |
+
}
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
// Load any existing human reviews
|
| 41 |
+
const reviews = (await getJson<Record<string, ImageReview>>("labels/reviews.json")) ?? {};
|
| 42 |
+
|
| 43 |
+
const images: (ImageReview & { url: string })[] = [];
|
| 44 |
+
for (const [path, res] of Object.entries(partial.results)) {
|
| 45 |
+
if (res.error || !res.queries) continue;
|
| 46 |
+
// Convert pod-side path to R2 key, supporting both v1 and v2 layouts:
|
| 47 |
+
// /workspace/images/... → raw/... (v1)
|
| 48 |
+
// /workspace/images_v2/... → raw_v2/... (v2)
|
| 49 |
+
let r2Key: string;
|
| 50 |
+
const idx2 = path.indexOf("/images_v2/");
|
| 51 |
+
const idx1 = path.indexOf("/images/");
|
| 52 |
+
if (idx2 !== -1) {
|
| 53 |
+
r2Key = "raw_v2/" + path.slice(idx2 + "/images_v2/".length);
|
| 54 |
+
} else if (idx1 !== -1) {
|
| 55 |
+
r2Key = "raw/" + path.slice(idx1 + "/images/".length);
|
| 56 |
+
} else {
|
| 57 |
+
continue;
|
| 58 |
+
}
|
| 59 |
+
const stripPrefix = r2Key.startsWith("raw_v2/") ? "raw_v2/" : "raw/";
|
| 60 |
+
const bucket = r2Key.slice(stripPrefix.length).split("/").slice(0, 2).join("/");
|
| 61 |
+
|
| 62 |
+
// Flatten queries → bboxes with query labels + Qwen verdicts
|
| 63 |
+
const flatBboxes: ImageReview["bboxes"] = [];
|
| 64 |
+
for (const [query, qres] of Object.entries(res.queries)) {
|
| 65 |
+
if (qres.error) continue;
|
| 66 |
+
for (const b of qres.bboxes) {
|
| 67 |
+
const v = b.annotation_id != null ? verdictById.get(b.annotation_id) : undefined;
|
| 68 |
+
flatBboxes.push({
|
| 69 |
+
...b,
|
| 70 |
+
query,
|
| 71 |
+
vlm_verdict: v?.verdict,
|
| 72 |
+
vlm_reasoning: v?.reasoning,
|
| 73 |
+
});
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
if (flatBboxes.length === 0) continue; // skip empty for now
|
| 77 |
+
|
| 78 |
+
const existing = reviews[r2Key];
|
| 79 |
+
// Merge existing verdicts onto fresh bboxes (match by index for now)
|
| 80 |
+
if (existing) {
|
| 81 |
+
for (let i = 0; i < flatBboxes.length && i < existing.bboxes.length; i++) {
|
| 82 |
+
flatBboxes[i].verdict = existing.bboxes[i].verdict;
|
| 83 |
+
flatBboxes[i].note = existing.bboxes[i].note;
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
const url = await presignGet(r2Key, 3600);
|
| 88 |
+
|
| 89 |
+
images.push({
|
| 90 |
+
image_path: r2Key,
|
| 91 |
+
bucket,
|
| 92 |
+
width: res.width,
|
| 93 |
+
height: res.height,
|
| 94 |
+
bboxes: flatBboxes,
|
| 95 |
+
image_verdict: existing?.image_verdict,
|
| 96 |
+
reviewed_at: existing?.reviewed_at,
|
| 97 |
+
url,
|
| 98 |
+
});
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
// Sort by bucket priority (positive first, then most detections)
|
| 102 |
+
const PRIORITY: Record<string, number> = {
|
| 103 |
+
"positive/fiber_spool_drone": 0,
|
| 104 |
+
"positive/spool_only": 1,
|
| 105 |
+
"distractor/round_things": 2,
|
| 106 |
+
"negative/drones_no_spool": 3,
|
| 107 |
+
"background/empty": 4,
|
| 108 |
+
};
|
| 109 |
+
images.sort((a, b) => {
|
| 110 |
+
const pa = PRIORITY[a.bucket] ?? 99;
|
| 111 |
+
const pb = PRIORITY[b.bucket] ?? 99;
|
| 112 |
+
if (pa !== pb) return pa - pb;
|
| 113 |
+
return b.bboxes.length - a.bboxes.length;
|
| 114 |
+
});
|
| 115 |
+
|
| 116 |
+
return NextResponse.json({ images, total: images.length });
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
/**
|
| 120 |
+
* POST /api/labels — save a single image's review back to R2.
|
| 121 |
+
*/
|
| 122 |
+
export async function POST(req: Request) {
|
| 123 |
+
const body = (await req.json()) as ImageReview;
|
| 124 |
+
if (!body.image_path) {
|
| 125 |
+
return NextResponse.json({ error: "missing image_path" }, { status: 400 });
|
| 126 |
+
}
|
| 127 |
+
const reviews = (await getJson<Record<string, ImageReview>>("labels/reviews.json")) ?? {};
|
| 128 |
+
reviews[body.image_path] = { ...body, reviewed_at: new Date().toISOString() };
|
| 129 |
+
await putJson("labels/reviews.json", reviews);
|
| 130 |
+
return NextResponse.json({ ok: true, total_reviewed: Object.keys(reviews).length });
|
| 131 |
+
}
|
web/app/canvas/page.tsx
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useEffect, useState, useMemo } from "react";
|
| 4 |
+
import { BboxCanvas } from "@/components/BboxCanvas";
|
| 5 |
+
import { colorForQuery } from "@/components/BboxOverlay";
|
| 6 |
+
import type { ImageReview } from "@/lib/types";
|
| 7 |
+
|
| 8 |
+
type LoadedImage = ImageReview & { url: string };
|
| 9 |
+
|
| 10 |
+
export default function CanvasPage() {
|
| 11 |
+
const [images, setImages] = useState<LoadedImage[]>([]);
|
| 12 |
+
const [loading, setLoading] = useState(true);
|
| 13 |
+
const [error, setError] = useState<string | null>(null);
|
| 14 |
+
const [bucketFilter, setBucketFilter] = useState<string>("all");
|
| 15 |
+
const [selectedIdx, setSelectedIdx] = useState<number>(0);
|
| 16 |
+
const [activeBbox, setActiveBbox] = useState<number | null>(null);
|
| 17 |
+
|
| 18 |
+
useEffect(() => {
|
| 19 |
+
fetch("/api/labels")
|
| 20 |
+
.then((r) => r.json())
|
| 21 |
+
.then((data) => {
|
| 22 |
+
if (data.error) setError(data.error);
|
| 23 |
+
else setImages(data.images ?? []);
|
| 24 |
+
})
|
| 25 |
+
.catch((e) => setError(String(e)))
|
| 26 |
+
.finally(() => setLoading(false));
|
| 27 |
+
}, []);
|
| 28 |
+
|
| 29 |
+
const filtered = useMemo(() => {
|
| 30 |
+
if (bucketFilter === "all") return images;
|
| 31 |
+
return images.filter((i) => i.bucket === bucketFilter);
|
| 32 |
+
}, [images, bucketFilter]);
|
| 33 |
+
|
| 34 |
+
const current = filtered[selectedIdx];
|
| 35 |
+
|
| 36 |
+
const bucketCounts = useMemo(() => {
|
| 37 |
+
const m = new Map<string, number>();
|
| 38 |
+
for (const i of images) m.set(i.bucket, (m.get(i.bucket) ?? 0) + 1);
|
| 39 |
+
return m;
|
| 40 |
+
}, [images]);
|
| 41 |
+
|
| 42 |
+
const queryStats = useMemo(() => {
|
| 43 |
+
if (!current) return [] as { query: string; count: number; yes: number; no: number }[];
|
| 44 |
+
const m = new Map<string, { query: string; count: number; yes: number; no: number }>();
|
| 45 |
+
for (const b of current.bboxes) {
|
| 46 |
+
const e = m.get(b.query) ?? { query: b.query, count: 0, yes: 0, no: 0 };
|
| 47 |
+
e.count++;
|
| 48 |
+
if (b.vlm_verdict === "YES") e.yes++;
|
| 49 |
+
if (b.vlm_verdict === "NO") e.no++;
|
| 50 |
+
m.set(b.query, e);
|
| 51 |
+
}
|
| 52 |
+
return Array.from(m.values()).sort((a, b) => b.count - a.count);
|
| 53 |
+
}, [current]);
|
| 54 |
+
|
| 55 |
+
// Keyboard navigation
|
| 56 |
+
useEffect(() => {
|
| 57 |
+
const onKey = (e: KeyboardEvent) => {
|
| 58 |
+
if (e.target instanceof HTMLInputElement) return;
|
| 59 |
+
if (e.key === "ArrowRight" || e.key === "j") {
|
| 60 |
+
setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1));
|
| 61 |
+
setActiveBbox(null);
|
| 62 |
+
} else if (e.key === "ArrowLeft" || e.key === "k") {
|
| 63 |
+
setSelectedIdx((i) => Math.max(i - 1, 0));
|
| 64 |
+
setActiveBbox(null);
|
| 65 |
+
}
|
| 66 |
+
};
|
| 67 |
+
window.addEventListener("keydown", onKey);
|
| 68 |
+
return () => window.removeEventListener("keydown", onKey);
|
| 69 |
+
}, [filtered.length]);
|
| 70 |
+
|
| 71 |
+
if (loading) {
|
| 72 |
+
return (
|
| 73 |
+
<main className="min-h-screen bg-zinc-950 text-zinc-100 p-8">
|
| 74 |
+
<div className="text-2xl">Loading…</div>
|
| 75 |
+
</main>
|
| 76 |
+
);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
if (error) {
|
| 80 |
+
return (
|
| 81 |
+
<main className="min-h-screen bg-zinc-950 text-zinc-100 p-8">
|
| 82 |
+
<h1 className="text-3xl font-bold mb-4">drone-falcon · canvas review</h1>
|
| 83 |
+
<div className="rounded-lg border border-red-800 bg-red-950 p-6 text-red-200">
|
| 84 |
+
Error: {error}
|
| 85 |
+
</div>
|
| 86 |
+
</main>
|
| 87 |
+
);
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
const buckets = ["all", ...Array.from(bucketCounts.keys()).sort()];
|
| 91 |
+
const total = images.length;
|
| 92 |
+
const totalApproved = images.filter((i) => i.image_verdict === "approved").length;
|
| 93 |
+
const totalRejected = images.filter((i) => i.image_verdict === "rejected").length;
|
| 94 |
+
|
| 95 |
+
return (
|
| 96 |
+
<main className="min-h-screen bg-zinc-950 text-zinc-50 font-sans">
|
| 97 |
+
{/* Header */}
|
| 98 |
+
<header className="border-b border-zinc-800 bg-zinc-900/60 backdrop-blur px-6 py-4 flex items-center justify-between">
|
| 99 |
+
<div>
|
| 100 |
+
<h1 className="text-2xl font-semibold tracking-tight text-zinc-50">
|
| 101 |
+
drone-falcon <span className="text-zinc-500">/</span> canvas review
|
| 102 |
+
</h1>
|
| 103 |
+
<p className="text-sm text-zinc-300 mt-0.5">
|
| 104 |
+
HTML5 Canvas viewer — <span className="text-zinc-100">drag</span> to pan,{" "}
|
| 105 |
+
<span className="text-zinc-100">scroll</span> to zoom,{" "}
|
| 106 |
+
<span className="text-zinc-100">double-click</span> to reset
|
| 107 |
+
</p>
|
| 108 |
+
</div>
|
| 109 |
+
<div className="flex items-center gap-2 text-sm">
|
| 110 |
+
<a href="/" className="text-zinc-300 hover:text-zinc-50 underline-offset-4 hover:underline mr-2">
|
| 111 |
+
← grid view
|
| 112 |
+
</a>
|
| 113 |
+
<span className="rounded-md border border-zinc-700 bg-zinc-900 px-2.5 py-1 text-zinc-100 font-medium">
|
| 114 |
+
{total.toLocaleString()} labeled
|
| 115 |
+
</span>
|
| 116 |
+
<span className="rounded-md border border-emerald-600/60 bg-emerald-500/10 px-2.5 py-1 text-emerald-300 font-medium">
|
| 117 |
+
{totalApproved} approved
|
| 118 |
+
</span>
|
| 119 |
+
<span className="rounded-md border border-red-600/60 bg-red-500/10 px-2.5 py-1 text-red-300 font-medium">
|
| 120 |
+
{totalRejected} rejected
|
| 121 |
+
</span>
|
| 122 |
+
</div>
|
| 123 |
+
</header>
|
| 124 |
+
|
| 125 |
+
{/* Bucket tabs */}
|
| 126 |
+
<div className="border-b border-zinc-800 bg-zinc-950 px-6 py-3">
|
| 127 |
+
<div className="flex flex-wrap items-center gap-2">
|
| 128 |
+
{buckets.map((b) => {
|
| 129 |
+
const active = bucketFilter === b;
|
| 130 |
+
const count = b === "all" ? total : (bucketCounts.get(b) ?? 0);
|
| 131 |
+
const label = b === "all" ? "All" : b;
|
| 132 |
+
return (
|
| 133 |
+
<button
|
| 134 |
+
key={b}
|
| 135 |
+
onClick={() => { setBucketFilter(b); setSelectedIdx(0); setActiveBbox(null); }}
|
| 136 |
+
className={`px-3 py-1.5 rounded-md text-sm font-medium transition-colors border ${
|
| 137 |
+
active
|
| 138 |
+
? "bg-zinc-50 text-zinc-950 border-zinc-50"
|
| 139 |
+
: "bg-zinc-900 text-zinc-200 border-zinc-700 hover:bg-zinc-800 hover:text-zinc-50"
|
| 140 |
+
}`}
|
| 141 |
+
>
|
| 142 |
+
{label}
|
| 143 |
+
<span className={`ml-1.5 text-xs ${active ? "text-zinc-500" : "text-zinc-400"}`}>
|
| 144 |
+
{count}
|
| 145 |
+
</span>
|
| 146 |
+
</button>
|
| 147 |
+
);
|
| 148 |
+
})}
|
| 149 |
+
</div>
|
| 150 |
+
</div>
|
| 151 |
+
|
| 152 |
+
{/* Main canvas + sidebars */}
|
| 153 |
+
<div className="grid grid-cols-12 gap-4 p-4">
|
| 154 |
+
{/* Thumbnail strip */}
|
| 155 |
+
<div className="col-span-2 min-w-0 max-h-[calc(100vh-180px)] overflow-y-auto pr-2 space-y-2">
|
| 156 |
+
<div className="text-xs font-semibold uppercase tracking-wider text-zinc-400 px-1 pb-1">
|
| 157 |
+
Images ({filtered.length})
|
| 158 |
+
</div>
|
| 159 |
+
{filtered.slice(0, 200).map((img, idx) => (
|
| 160 |
+
<button
|
| 161 |
+
key={img.image_path}
|
| 162 |
+
onClick={() => { setSelectedIdx(idx); setActiveBbox(null); }}
|
| 163 |
+
className={`block w-full overflow-hidden rounded-md border-2 transition-all ${
|
| 164 |
+
idx === selectedIdx
|
| 165 |
+
? "border-blue-500 ring-2 ring-blue-500/30"
|
| 166 |
+
: "border-zinc-800 hover:border-zinc-500"
|
| 167 |
+
}`}
|
| 168 |
+
>
|
| 169 |
+
{/* eslint-disable-next-line @next/next/no-img-element */}
|
| 170 |
+
<img src={img.url} alt="" className="w-full h-20 object-cover" />
|
| 171 |
+
<div className="bg-zinc-900 px-2 py-1 text-xs text-zinc-200 font-medium text-left">
|
| 172 |
+
{img.bboxes.length} detections
|
| 173 |
+
</div>
|
| 174 |
+
</button>
|
| 175 |
+
))}
|
| 176 |
+
</div>
|
| 177 |
+
|
| 178 |
+
{/* Canvas area */}
|
| 179 |
+
<div className="col-span-7 min-w-0">
|
| 180 |
+
{current ? (
|
| 181 |
+
<div className="rounded-lg border border-zinc-800 bg-zinc-900 p-4 min-w-0">
|
| 182 |
+
<div className="flex items-center justify-between mb-3">
|
| 183 |
+
<div className="text-sm font-mono text-zinc-200 truncate">
|
| 184 |
+
{current.image_path}
|
| 185 |
+
</div>
|
| 186 |
+
<div className="text-sm text-zinc-300 font-medium whitespace-nowrap ml-3">
|
| 187 |
+
{current.width}×{current.height} · {current.bboxes.length} bboxes ·{" "}
|
| 188 |
+
<span className="text-zinc-50">{selectedIdx + 1}</span>
|
| 189 |
+
<span className="text-zinc-500">/{filtered.length}</span>
|
| 190 |
+
</div>
|
| 191 |
+
</div>
|
| 192 |
+
<BboxCanvas
|
| 193 |
+
src={current.url}
|
| 194 |
+
width={current.width}
|
| 195 |
+
height={current.height}
|
| 196 |
+
bboxes={current.bboxes.map((b, idx) => ({ ...b, idx }))}
|
| 197 |
+
activeIdx={activeBbox}
|
| 198 |
+
onBboxClick={setActiveBbox}
|
| 199 |
+
aspectRatio={16 / 10}
|
| 200 |
+
/>
|
| 201 |
+
<div className="mt-3 flex flex-wrap items-center gap-x-4 gap-y-2 text-sm text-zinc-300">
|
| 202 |
+
<span className="flex items-center gap-1.5">
|
| 203 |
+
<kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">←</kbd>
|
| 204 |
+
<kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">→</kbd>
|
| 205 |
+
navigate
|
| 206 |
+
</span>
|
| 207 |
+
<span className="flex items-center gap-1.5">
|
| 208 |
+
<kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">scroll</kbd>
|
| 209 |
+
zoom
|
| 210 |
+
</span>
|
| 211 |
+
<span className="flex items-center gap-1.5">
|
| 212 |
+
<kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">shift+drag</kbd>
|
| 213 |
+
pan
|
| 214 |
+
</span>
|
| 215 |
+
<span className="flex items-center gap-1.5">
|
| 216 |
+
<kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">dblclick</kbd>
|
| 217 |
+
reset view
|
| 218 |
+
</span>
|
| 219 |
+
<span className="flex items-center gap-1.5">
|
| 220 |
+
<kbd className="bg-zinc-800 border border-zinc-700 text-zinc-100 px-1.5 py-0.5 rounded text-xs font-mono">click</kbd>
|
| 221 |
+
select bbox
|
| 222 |
+
</span>
|
| 223 |
+
</div>
|
| 224 |
+
</div>
|
| 225 |
+
) : (
|
| 226 |
+
<div className="rounded-lg border border-zinc-800 bg-zinc-900 p-8 text-zinc-300 text-center">
|
| 227 |
+
No images in this bucket
|
| 228 |
+
</div>
|
| 229 |
+
)}
|
| 230 |
+
</div>
|
| 231 |
+
|
| 232 |
+
{/* Sidebar: bbox details */}
|
| 233 |
+
<div className="col-span-3 min-w-0 max-h-[calc(100vh-180px)] overflow-y-auto space-y-4 pr-1">
|
| 234 |
+
{/* Selected bbox */}
|
| 235 |
+
{activeBbox !== null && current && (() => {
|
| 236 |
+
const b = current.bboxes[activeBbox];
|
| 237 |
+
return (
|
| 238 |
+
<div className="rounded-lg border-2 border-blue-500 bg-zinc-900 p-4">
|
| 239 |
+
<div className="flex items-center justify-between mb-3">
|
| 240 |
+
<div className="text-sm font-bold text-blue-300 uppercase tracking-wide">
|
| 241 |
+
Bbox #{activeBbox + 1}
|
| 242 |
+
</div>
|
| 243 |
+
<button
|
| 244 |
+
onClick={() => setActiveBbox(null)}
|
| 245 |
+
className="text-zinc-400 hover:text-zinc-100 text-sm"
|
| 246 |
+
>
|
| 247 |
+
✕
|
| 248 |
+
</button>
|
| 249 |
+
</div>
|
| 250 |
+
<div className="flex items-center gap-2 mb-3">
|
| 251 |
+
<span
|
| 252 |
+
className="inline-block w-3 h-3 rounded-sm border border-zinc-600"
|
| 253 |
+
style={{ backgroundColor: colorForQuery(b.query) }}
|
| 254 |
+
/>
|
| 255 |
+
<span className="text-sm text-zinc-100 font-medium">{b.query}</span>
|
| 256 |
+
</div>
|
| 257 |
+
<div className="text-xs text-zinc-400 mb-2">Detected by Falcon</div>
|
| 258 |
+
{b.vlm_verdict && (
|
| 259 |
+
<div className="mt-3 pt-3 border-t border-zinc-800">
|
| 260 |
+
<div className="text-xs text-zinc-400 mb-1">Qwen verdict</div>
|
| 261 |
+
<div className={`text-base font-bold ${
|
| 262 |
+
b.vlm_verdict === "YES" ? "text-emerald-400" :
|
| 263 |
+
b.vlm_verdict === "NO" ? "text-red-400" :
|
| 264 |
+
"text-amber-400"
|
| 265 |
+
}`}>
|
| 266 |
+
{b.vlm_verdict}
|
| 267 |
+
</div>
|
| 268 |
+
{b.vlm_reasoning && (
|
| 269 |
+
<div className="text-zinc-300 italic mt-2 text-sm leading-relaxed">
|
| 270 |
+
“{b.vlm_reasoning}”
|
| 271 |
+
</div>
|
| 272 |
+
)}
|
| 273 |
+
</div>
|
| 274 |
+
)}
|
| 275 |
+
<div className="text-xs text-zinc-400 font-mono mt-3 pt-3 border-t border-zinc-800">
|
| 276 |
+
<div>x1: {Math.round(b.x1)} y1: {Math.round(b.y1)}</div>
|
| 277 |
+
<div>x2: {Math.round(b.x2)} y2: {Math.round(b.y2)}</div>
|
| 278 |
+
</div>
|
| 279 |
+
</div>
|
| 280 |
+
);
|
| 281 |
+
})()}
|
| 282 |
+
|
| 283 |
+
{/* Per-query summary */}
|
| 284 |
+
<div className="rounded-lg border border-zinc-800 bg-zinc-900 p-4">
|
| 285 |
+
<div className="text-xs font-bold uppercase tracking-wider text-zinc-400 mb-3">
|
| 286 |
+
Queries on this image
|
| 287 |
+
</div>
|
| 288 |
+
<div className="space-y-2">
|
| 289 |
+
{queryStats.map((qs) => (
|
| 290 |
+
<div key={qs.query} className="flex items-center justify-between text-sm">
|
| 291 |
+
<div className="flex items-center gap-2 min-w-0">
|
| 292 |
+
<span
|
| 293 |
+
className="h-3 w-3 rounded-sm border border-zinc-600 flex-shrink-0"
|
| 294 |
+
style={{ backgroundColor: colorForQuery(qs.query) }}
|
| 295 |
+
/>
|
| 296 |
+
<span className="text-zinc-100 truncate">{qs.query}</span>
|
| 297 |
+
</div>
|
| 298 |
+
<span className="text-zinc-300 font-medium whitespace-nowrap ml-2">
|
| 299 |
+
{qs.count}
|
| 300 |
+
{qs.yes > 0 && <span className="text-emerald-400 ml-1.5">✓{qs.yes}</span>}
|
| 301 |
+
{qs.no > 0 && <span className="text-red-400 ml-1">✗{qs.no}</span>}
|
| 302 |
+
</span>
|
| 303 |
+
</div>
|
| 304 |
+
))}
|
| 305 |
+
</div>
|
| 306 |
+
</div>
|
| 307 |
+
|
| 308 |
+
{/* Controls */}
|
| 309 |
+
<div className="rounded-lg border border-zinc-800 bg-zinc-900 p-4">
|
| 310 |
+
<div className="text-xs font-bold uppercase tracking-wider text-zinc-400 mb-3">
|
| 311 |
+
Navigate
|
| 312 |
+
</div>
|
| 313 |
+
<div className="flex gap-2">
|
| 314 |
+
<button
|
| 315 |
+
onClick={() => { setSelectedIdx((i) => Math.max(i - 1, 0)); setActiveBbox(null); }}
|
| 316 |
+
className="flex-1 px-3 py-2 rounded-md bg-zinc-800 hover:bg-zinc-700 text-zinc-100 text-sm font-medium border border-zinc-700 transition-colors"
|
| 317 |
+
>
|
| 318 |
+
← Previous
|
| 319 |
+
</button>
|
| 320 |
+
<button
|
| 321 |
+
onClick={() => { setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1)); setActiveBbox(null); }}
|
| 322 |
+
className="flex-1 px-3 py-2 rounded-md bg-zinc-800 hover:bg-zinc-700 text-zinc-100 text-sm font-medium border border-zinc-700 transition-colors"
|
| 323 |
+
>
|
| 324 |
+
Next →
|
| 325 |
+
</button>
|
| 326 |
+
</div>
|
| 327 |
+
</div>
|
| 328 |
+
</div>
|
| 329 |
+
</div>
|
| 330 |
+
</main>
|
| 331 |
+
);
|
| 332 |
+
}
|
web/app/favicon.ico
ADDED
|
|
web/app/globals.css
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@import "tailwindcss";
|
| 2 |
+
@import "tw-animate-css";
|
| 3 |
+
@import "shadcn/tailwind.css";
|
| 4 |
+
|
| 5 |
+
@custom-variant dark (&:is(.dark *));
|
| 6 |
+
|
| 7 |
+
@theme inline {
|
| 8 |
+
--color-background: var(--background);
|
| 9 |
+
--color-foreground: var(--foreground);
|
| 10 |
+
--font-sans: ui-sans-serif, system-ui, -apple-system, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
|
| 11 |
+
--font-mono: ui-monospace, "SF Mono", "Menlo", "Cascadia Mono", "Roboto Mono", monospace;
|
| 12 |
+
--font-heading: ui-sans-serif, system-ui, -apple-system, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
|
| 13 |
+
--color-sidebar-ring: var(--sidebar-ring);
|
| 14 |
+
--color-sidebar-border: var(--sidebar-border);
|
| 15 |
+
--color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
|
| 16 |
+
--color-sidebar-accent: var(--sidebar-accent);
|
| 17 |
+
--color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
|
| 18 |
+
--color-sidebar-primary: var(--sidebar-primary);
|
| 19 |
+
--color-sidebar-foreground: var(--sidebar-foreground);
|
| 20 |
+
--color-sidebar: var(--sidebar);
|
| 21 |
+
--color-chart-5: var(--chart-5);
|
| 22 |
+
--color-chart-4: var(--chart-4);
|
| 23 |
+
--color-chart-3: var(--chart-3);
|
| 24 |
+
--color-chart-2: var(--chart-2);
|
| 25 |
+
--color-chart-1: var(--chart-1);
|
| 26 |
+
--color-ring: var(--ring);
|
| 27 |
+
--color-input: var(--input);
|
| 28 |
+
--color-border: var(--border);
|
| 29 |
+
--color-destructive: var(--destructive);
|
| 30 |
+
--color-accent-foreground: var(--accent-foreground);
|
| 31 |
+
--color-accent: var(--accent);
|
| 32 |
+
--color-muted-foreground: var(--muted-foreground);
|
| 33 |
+
--color-muted: var(--muted);
|
| 34 |
+
--color-secondary-foreground: var(--secondary-foreground);
|
| 35 |
+
--color-secondary: var(--secondary);
|
| 36 |
+
--color-primary-foreground: var(--primary-foreground);
|
| 37 |
+
--color-primary: var(--primary);
|
| 38 |
+
--color-popover-foreground: var(--popover-foreground);
|
| 39 |
+
--color-popover: var(--popover);
|
| 40 |
+
--color-card-foreground: var(--card-foreground);
|
| 41 |
+
--color-card: var(--card);
|
| 42 |
+
--radius-sm: calc(var(--radius) * 0.6);
|
| 43 |
+
--radius-md: calc(var(--radius) * 0.8);
|
| 44 |
+
--radius-lg: var(--radius);
|
| 45 |
+
--radius-xl: calc(var(--radius) * 1.4);
|
| 46 |
+
--radius-2xl: calc(var(--radius) * 1.8);
|
| 47 |
+
--radius-3xl: calc(var(--radius) * 2.2);
|
| 48 |
+
--radius-4xl: calc(var(--radius) * 2.6);
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
:root {
|
| 52 |
+
--background: oklch(1 0 0);
|
| 53 |
+
--foreground: oklch(0.145 0 0);
|
| 54 |
+
--card: oklch(1 0 0);
|
| 55 |
+
--card-foreground: oklch(0.145 0 0);
|
| 56 |
+
--popover: oklch(1 0 0);
|
| 57 |
+
--popover-foreground: oklch(0.145 0 0);
|
| 58 |
+
--primary: oklch(0.205 0 0);
|
| 59 |
+
--primary-foreground: oklch(0.985 0 0);
|
| 60 |
+
--secondary: oklch(0.97 0 0);
|
| 61 |
+
--secondary-foreground: oklch(0.205 0 0);
|
| 62 |
+
--muted: oklch(0.97 0 0);
|
| 63 |
+
--muted-foreground: oklch(0.556 0 0);
|
| 64 |
+
--accent: oklch(0.97 0 0);
|
| 65 |
+
--accent-foreground: oklch(0.205 0 0);
|
| 66 |
+
--destructive: oklch(0.577 0.245 27.325);
|
| 67 |
+
--border: oklch(0.922 0 0);
|
| 68 |
+
--input: oklch(0.922 0 0);
|
| 69 |
+
--ring: oklch(0.708 0 0);
|
| 70 |
+
--chart-1: oklch(0.87 0 0);
|
| 71 |
+
--chart-2: oklch(0.556 0 0);
|
| 72 |
+
--chart-3: oklch(0.439 0 0);
|
| 73 |
+
--chart-4: oklch(0.371 0 0);
|
| 74 |
+
--chart-5: oklch(0.269 0 0);
|
| 75 |
+
--radius: 0.625rem;
|
| 76 |
+
--sidebar: oklch(0.985 0 0);
|
| 77 |
+
--sidebar-foreground: oklch(0.145 0 0);
|
| 78 |
+
--sidebar-primary: oklch(0.205 0 0);
|
| 79 |
+
--sidebar-primary-foreground: oklch(0.985 0 0);
|
| 80 |
+
--sidebar-accent: oklch(0.97 0 0);
|
| 81 |
+
--sidebar-accent-foreground: oklch(0.205 0 0);
|
| 82 |
+
--sidebar-border: oklch(0.922 0 0);
|
| 83 |
+
--sidebar-ring: oklch(0.708 0 0);
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.dark {
|
| 87 |
+
--background: oklch(0.145 0 0);
|
| 88 |
+
--foreground: oklch(0.985 0 0);
|
| 89 |
+
--card: oklch(0.205 0 0);
|
| 90 |
+
--card-foreground: oklch(0.985 0 0);
|
| 91 |
+
--popover: oklch(0.205 0 0);
|
| 92 |
+
--popover-foreground: oklch(0.985 0 0);
|
| 93 |
+
--primary: oklch(0.922 0 0);
|
| 94 |
+
--primary-foreground: oklch(0.205 0 0);
|
| 95 |
+
--secondary: oklch(0.269 0 0);
|
| 96 |
+
--secondary-foreground: oklch(0.985 0 0);
|
| 97 |
+
--muted: oklch(0.269 0 0);
|
| 98 |
+
--muted-foreground: oklch(0.708 0 0);
|
| 99 |
+
--accent: oklch(0.269 0 0);
|
| 100 |
+
--accent-foreground: oklch(0.985 0 0);
|
| 101 |
+
--destructive: oklch(0.704 0.191 22.216);
|
| 102 |
+
--border: oklch(1 0 0 / 10%);
|
| 103 |
+
--input: oklch(1 0 0 / 15%);
|
| 104 |
+
--ring: oklch(0.556 0 0);
|
| 105 |
+
--chart-1: oklch(0.87 0 0);
|
| 106 |
+
--chart-2: oklch(0.556 0 0);
|
| 107 |
+
--chart-3: oklch(0.439 0 0);
|
| 108 |
+
--chart-4: oklch(0.371 0 0);
|
| 109 |
+
--chart-5: oklch(0.269 0 0);
|
| 110 |
+
--sidebar: oklch(0.205 0 0);
|
| 111 |
+
--sidebar-foreground: oklch(0.985 0 0);
|
| 112 |
+
--sidebar-primary: oklch(0.488 0.243 264.376);
|
| 113 |
+
--sidebar-primary-foreground: oklch(0.985 0 0);
|
| 114 |
+
--sidebar-accent: oklch(0.269 0 0);
|
| 115 |
+
--sidebar-accent-foreground: oklch(0.985 0 0);
|
| 116 |
+
--sidebar-border: oklch(1 0 0 / 10%);
|
| 117 |
+
--sidebar-ring: oklch(0.556 0 0);
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
@layer base {
|
| 121 |
+
* {
|
| 122 |
+
@apply border-border outline-ring/50;
|
| 123 |
+
}
|
| 124 |
+
body {
|
| 125 |
+
@apply bg-background text-foreground;
|
| 126 |
+
font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
|
| 127 |
+
font-feature-settings: "cv11", "ss01";
|
| 128 |
+
-webkit-font-smoothing: antialiased;
|
| 129 |
+
-moz-osx-font-smoothing: grayscale;
|
| 130 |
+
}
|
| 131 |
+
html {
|
| 132 |
+
@apply font-sans;
|
| 133 |
+
}
|
| 134 |
+
h1, h2, h3, h4, h5, h6 {
|
| 135 |
+
font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
|
| 136 |
+
letter-spacing: -0.01em;
|
| 137 |
+
}
|
| 138 |
+
code, kbd, pre, .font-mono {
|
| 139 |
+
font-family: ui-monospace, "SF Mono", "Menlo", "Cascadia Mono", "Roboto Mono", monospace;
|
| 140 |
+
}
|
| 141 |
+
}
|
web/app/layout.tsx
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { Metadata } from "next";
|
| 2 |
+
import { Geist, Geist_Mono } from "next/font/google";
|
| 3 |
+
import "./globals.css";
|
| 4 |
+
|
| 5 |
+
const geistSans = Geist({
|
| 6 |
+
variable: "--font-geist-sans",
|
| 7 |
+
subsets: ["latin"],
|
| 8 |
+
});
|
| 9 |
+
|
| 10 |
+
const geistMono = Geist_Mono({
|
| 11 |
+
variable: "--font-geist-mono",
|
| 12 |
+
subsets: ["latin"],
|
| 13 |
+
});
|
| 14 |
+
|
| 15 |
+
export const metadata: Metadata = {
|
| 16 |
+
title: "Create Next App",
|
| 17 |
+
description: "Generated by create next app",
|
| 18 |
+
};
|
| 19 |
+
|
| 20 |
+
export default function RootLayout({
|
| 21 |
+
children,
|
| 22 |
+
}: Readonly<{
|
| 23 |
+
children: React.ReactNode;
|
| 24 |
+
}>) {
|
| 25 |
+
return (
|
| 26 |
+
<html
|
| 27 |
+
lang="en"
|
| 28 |
+
className={`${geistSans.variable} ${geistMono.variable} dark h-full antialiased`}
|
| 29 |
+
>
|
| 30 |
+
<body className="min-h-full flex flex-col bg-zinc-950 text-zinc-100 font-sans">{children}</body>
|
| 31 |
+
</html>
|
| 32 |
+
);
|
| 33 |
+
}
|
web/app/page.tsx
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useEffect, useState, useCallback, useMemo, useRef } from "react";
|
| 4 |
+
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
| 5 |
+
import { Button } from "@/components/ui/button";
|
| 6 |
+
import { Badge } from "@/components/ui/badge";
|
| 7 |
+
import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
|
| 8 |
+
import { Toaster } from "@/components/ui/sonner";
|
| 9 |
+
import { toast } from "sonner";
|
| 10 |
+
import { BboxOverlay, colorForQuery, type AnnotatedBbox } from "@/components/BboxOverlay";
|
| 11 |
+
import type { ImageReview } from "@/lib/types";
|
| 12 |
+
|
| 13 |
+
type LoadedImage = ImageReview & { url: string };
|
| 14 |
+
|
| 15 |
+
export default function Home() {
|
| 16 |
+
const [images, setImages] = useState<LoadedImage[]>([]);
|
| 17 |
+
const [loading, setLoading] = useState(true);
|
| 18 |
+
const [error, setError] = useState<string | null>(null);
|
| 19 |
+
const [bucketFilter, setBucketFilter] = useState<string>("all");
|
| 20 |
+
const [selectedIdx, setSelectedIdx] = useState<number>(0);
|
| 21 |
+
const [activeBbox, setActiveBbox] = useState<number | null>(null);
|
| 22 |
+
const cardRef = useRef<HTMLDivElement>(null);
|
| 23 |
+
|
| 24 |
+
useEffect(() => {
|
| 25 |
+
fetch("/api/labels")
|
| 26 |
+
.then((r) => r.json())
|
| 27 |
+
.then((data) => {
|
| 28 |
+
if (data.error) setError(data.error);
|
| 29 |
+
else setImages(data.images ?? []);
|
| 30 |
+
})
|
| 31 |
+
.catch((e) => setError(String(e)))
|
| 32 |
+
.finally(() => setLoading(false));
|
| 33 |
+
}, []);
|
| 34 |
+
|
| 35 |
+
const filtered = useMemo(() => {
|
| 36 |
+
if (bucketFilter === "all") return images;
|
| 37 |
+
return images.filter((i) => i.bucket === bucketFilter);
|
| 38 |
+
}, [images, bucketFilter]);
|
| 39 |
+
|
| 40 |
+
const current = filtered[selectedIdx];
|
| 41 |
+
|
| 42 |
+
const annotated: AnnotatedBbox[] = useMemo(() => {
|
| 43 |
+
if (!current) return [];
|
| 44 |
+
return current.bboxes.map((b, idx) => ({ ...b, idx }));
|
| 45 |
+
}, [current]);
|
| 46 |
+
|
| 47 |
+
const queryStats = useMemo(() => {
|
| 48 |
+
if (!current) return [] as { query: string; count: number; approved: number; rejected: number }[];
|
| 49 |
+
const m = new Map<string, { query: string; count: number; approved: number; rejected: number }>();
|
| 50 |
+
for (const b of current.bboxes) {
|
| 51 |
+
const e = m.get(b.query) ?? { query: b.query, count: 0, approved: 0, rejected: 0 };
|
| 52 |
+
e.count++;
|
| 53 |
+
if (b.verdict === "approved") e.approved++;
|
| 54 |
+
if (b.verdict === "rejected") e.rejected++;
|
| 55 |
+
m.set(b.query, e);
|
| 56 |
+
}
|
| 57 |
+
return Array.from(m.values()).sort((a, b) => b.count - a.count);
|
| 58 |
+
}, [current]);
|
| 59 |
+
|
| 60 |
+
const totalReviewed = images.filter((i) => i.image_verdict).length;
|
| 61 |
+
const totalApproved = images.filter((i) => i.image_verdict === "approved").length;
|
| 62 |
+
const totalRejected = images.filter((i) => i.image_verdict === "rejected").length;
|
| 63 |
+
|
| 64 |
+
const bucketCounts = useMemo(() => {
|
| 65 |
+
const m = new Map<string, number>();
|
| 66 |
+
for (const i of images) m.set(i.bucket, (m.get(i.bucket) ?? 0) + 1);
|
| 67 |
+
return m;
|
| 68 |
+
}, [images]);
|
| 69 |
+
|
| 70 |
+
const saveReview = useCallback(async (img: LoadedImage) => {
|
| 71 |
+
try {
|
| 72 |
+
const res = await fetch("/api/labels", {
|
| 73 |
+
method: "POST",
|
| 74 |
+
headers: { "Content-Type": "application/json" },
|
| 75 |
+
body: JSON.stringify(img),
|
| 76 |
+
});
|
| 77 |
+
const data = await res.json();
|
| 78 |
+
if (data.ok) toast.success(`Saved (${data.total_reviewed} reviewed)`);
|
| 79 |
+
else toast.error(data.error ?? "save failed");
|
| 80 |
+
} catch (e) {
|
| 81 |
+
toast.error(String(e));
|
| 82 |
+
}
|
| 83 |
+
}, []);
|
| 84 |
+
|
| 85 |
+
const setImageVerdict = useCallback(
|
| 86 |
+
(verdict: "approved" | "rejected" | "unsure") => {
|
| 87 |
+
if (!current) return;
|
| 88 |
+
const updated = { ...current, image_verdict: verdict };
|
| 89 |
+
setImages((prev) => prev.map((p) => (p.image_path === current.image_path ? updated : p)));
|
| 90 |
+
saveReview(updated);
|
| 91 |
+
setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1));
|
| 92 |
+
setActiveBbox(null);
|
| 93 |
+
},
|
| 94 |
+
[current, filtered.length, saveReview],
|
| 95 |
+
);
|
| 96 |
+
|
| 97 |
+
const setBboxVerdict = useCallback(
|
| 98 |
+
(idx: number, verdict: "approved" | "rejected" | "unsure") => {
|
| 99 |
+
if (!current) return;
|
| 100 |
+
const newBboxes = current.bboxes.map((b, i) => (i === idx ? { ...b, verdict } : b));
|
| 101 |
+
const updated = { ...current, bboxes: newBboxes };
|
| 102 |
+
setImages((prev) => prev.map((p) => (p.image_path === current.image_path ? updated : p)));
|
| 103 |
+
saveReview(updated);
|
| 104 |
+
},
|
| 105 |
+
[current, saveReview],
|
| 106 |
+
);
|
| 107 |
+
|
| 108 |
+
useEffect(() => {
|
| 109 |
+
const onKey = (e: KeyboardEvent) => {
|
| 110 |
+
if (e.target instanceof HTMLInputElement) return;
|
| 111 |
+
if (e.key === "ArrowRight" || e.key === "j") {
|
| 112 |
+
setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1));
|
| 113 |
+
setActiveBbox(null);
|
| 114 |
+
} else if (e.key === "ArrowLeft" || e.key === "k") {
|
| 115 |
+
setSelectedIdx((i) => Math.max(i - 1, 0));
|
| 116 |
+
setActiveBbox(null);
|
| 117 |
+
} else if (e.key === "y") {
|
| 118 |
+
setImageVerdict("approved");
|
| 119 |
+
} else if (e.key === "n") {
|
| 120 |
+
setImageVerdict("rejected");
|
| 121 |
+
} else if (e.key === "u") {
|
| 122 |
+
setImageVerdict("unsure");
|
| 123 |
+
}
|
| 124 |
+
};
|
| 125 |
+
window.addEventListener("keydown", onKey);
|
| 126 |
+
return () => window.removeEventListener("keydown", onKey);
|
| 127 |
+
}, [filtered.length, setImageVerdict]);
|
| 128 |
+
|
| 129 |
+
if (loading) {
|
| 130 |
+
return (
|
| 131 |
+
<main className="min-h-screen bg-zinc-950 text-zinc-100 p-8">
|
| 132 |
+
<div className="text-2xl">Loading labels from R2…</div>
|
| 133 |
+
</main>
|
| 134 |
+
);
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
if (error) {
|
| 138 |
+
return (
|
| 139 |
+
<main className="min-h-screen bg-zinc-950 text-zinc-100 p-8">
|
| 140 |
+
<h1 className="text-3xl font-bold mb-4">drone-falcon factory</h1>
|
| 141 |
+
<Card className="bg-red-950 border-red-800">
|
| 142 |
+
<CardContent className="pt-6">
|
| 143 |
+
<div className="text-red-300">Error: {error}</div>
|
| 144 |
+
<div className="text-zinc-400 mt-2 text-sm">
|
| 145 |
+
The labeling pod is probably still running. Sync labels/partial.json to R2 to see them here.
|
| 146 |
+
</div>
|
| 147 |
+
</CardContent>
|
| 148 |
+
</Card>
|
| 149 |
+
</main>
|
| 150 |
+
);
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
const buckets = ["all", ...Array.from(bucketCounts.keys()).sort()];
|
| 154 |
+
|
| 155 |
+
return (
|
| 156 |
+
<main className="min-h-screen bg-zinc-950 text-zinc-100">
|
| 157 |
+
<Toaster theme="dark" position="bottom-right" />
|
| 158 |
+
|
| 159 |
+
<header className="border-b border-zinc-800 px-6 py-4">
|
| 160 |
+
<div className="flex items-center justify-between">
|
| 161 |
+
<div>
|
| 162 |
+
<h1 className="text-2xl font-bold tracking-tight">drone-falcon · review factory</h1>
|
| 163 |
+
<p className="text-sm text-zinc-400">human verification of Falcon Perception bboxes</p>
|
| 164 |
+
</div>
|
| 165 |
+
<div className="flex items-center gap-3 text-sm">
|
| 166 |
+
<Badge variant="outline" className="border-zinc-700 text-zinc-300">{images.length} labeled</Badge>
|
| 167 |
+
<Badge variant="outline" className="border-emerald-700 text-emerald-400">{totalApproved} approved</Badge>
|
| 168 |
+
<Badge variant="outline" className="border-red-700 text-red-400">{totalRejected} rejected</Badge>
|
| 169 |
+
<Badge variant="outline" className="border-zinc-700 text-zinc-300">{totalReviewed}/{images.length} reviewed</Badge>
|
| 170 |
+
</div>
|
| 171 |
+
</div>
|
| 172 |
+
</header>
|
| 173 |
+
|
| 174 |
+
<div className="border-b border-zinc-800 px-6 py-2">
|
| 175 |
+
<Tabs value={bucketFilter} onValueChange={(v) => { setBucketFilter(v); setSelectedIdx(0); }}>
|
| 176 |
+
<TabsList className="bg-zinc-900">
|
| 177 |
+
{buckets.map((b) => (
|
| 178 |
+
<TabsTrigger key={b} value={b} className="data-[state=active]:bg-zinc-800">
|
| 179 |
+
{b === "all" ? `All (${images.length})` : `${b} (${bucketCounts.get(b) ?? 0})`}
|
| 180 |
+
</TabsTrigger>
|
| 181 |
+
))}
|
| 182 |
+
</TabsList>
|
| 183 |
+
</Tabs>
|
| 184 |
+
</div>
|
| 185 |
+
|
| 186 |
+
<div className="grid grid-cols-12 gap-4 p-4">
|
| 187 |
+
<div className="col-span-2 max-h-[calc(100vh-180px)] overflow-y-auto pr-2 space-y-2">
|
| 188 |
+
{filtered.map((img, idx) => (
|
| 189 |
+
<button
|
| 190 |
+
key={img.image_path}
|
| 191 |
+
onClick={() => { setSelectedIdx(idx); setActiveBbox(null); }}
|
| 192 |
+
className={`block w-full overflow-hidden rounded border-2 transition-all ${
|
| 193 |
+
idx === selectedIdx ? "border-blue-500" : "border-zinc-800 hover:border-zinc-600"
|
| 194 |
+
}`}
|
| 195 |
+
>
|
| 196 |
+
{/* eslint-disable-next-line @next/next/no-img-element */}
|
| 197 |
+
<img src={img.url} alt="" className="w-full h-20 object-cover" />
|
| 198 |
+
<div className="bg-zinc-900 px-1 py-0.5 text-[10px] text-zinc-400 flex justify-between">
|
| 199 |
+
<span>{img.bboxes.length} dets</span>
|
| 200 |
+
<span>
|
| 201 |
+
{img.image_verdict === "approved" && "✓"}
|
| 202 |
+
{img.image_verdict === "rejected" && "✗"}
|
| 203 |
+
</span>
|
| 204 |
+
</div>
|
| 205 |
+
</button>
|
| 206 |
+
))}
|
| 207 |
+
</div>
|
| 208 |
+
|
| 209 |
+
<div className="col-span-7" ref={cardRef}>
|
| 210 |
+
{current ? (
|
| 211 |
+
<Card className="bg-zinc-900 border-zinc-800">
|
| 212 |
+
<CardHeader className="pb-2">
|
| 213 |
+
<CardTitle className="text-base font-mono text-zinc-300 truncate">{current.image_path}</CardTitle>
|
| 214 |
+
<div className="text-xs text-zinc-500">
|
| 215 |
+
{current.width}×{current.height} · {current.bboxes.length} bboxes · {selectedIdx + 1}/{filtered.length}
|
| 216 |
+
</div>
|
| 217 |
+
</CardHeader>
|
| 218 |
+
<CardContent>
|
| 219 |
+
<BboxOverlay
|
| 220 |
+
src={current.url}
|
| 221 |
+
width={current.width}
|
| 222 |
+
height={current.height}
|
| 223 |
+
bboxes={annotated}
|
| 224 |
+
activeIdx={activeBbox}
|
| 225 |
+
onBboxClick={(idx) => setActiveBbox(idx)}
|
| 226 |
+
/>
|
| 227 |
+
<div className="mt-4 flex gap-2">
|
| 228 |
+
<Button onClick={() => setImageVerdict("approved")} className="bg-emerald-700 hover:bg-emerald-600">
|
| 229 |
+
✓ Approve image (Y)
|
| 230 |
+
</Button>
|
| 231 |
+
<Button onClick={() => setImageVerdict("rejected")} variant="destructive">
|
| 232 |
+
✗ Reject image (N)
|
| 233 |
+
</Button>
|
| 234 |
+
<Button onClick={() => setImageVerdict("unsure")} variant="outline" className="border-zinc-700">
|
| 235 |
+
? Unsure (U)
|
| 236 |
+
</Button>
|
| 237 |
+
<div className="flex-1" />
|
| 238 |
+
<Button onClick={() => { setSelectedIdx((i) => Math.max(i - 1, 0)); setActiveBbox(null); }} variant="outline" className="border-zinc-700">←</Button>
|
| 239 |
+
<Button onClick={() => { setSelectedIdx((i) => Math.min(i + 1, filtered.length - 1)); setActiveBbox(null); }} variant="outline" className="border-zinc-700">→</Button>
|
| 240 |
+
</div>
|
| 241 |
+
<div className="mt-2 text-xs text-zinc-500">
|
| 242 |
+
Shortcuts: <kbd>Y</kbd> approve · <kbd>N</kbd> reject · <kbd>U</kbd> unsure · <kbd>←</kbd> <kbd>→</kbd> navigate · click a bbox to select
|
| 243 |
+
</div>
|
| 244 |
+
</CardContent>
|
| 245 |
+
</Card>
|
| 246 |
+
) : (
|
| 247 |
+
<Card className="bg-zinc-900 border-zinc-800">
|
| 248 |
+
<CardContent className="pt-6 text-zinc-400">No images in this bucket</CardContent>
|
| 249 |
+
</Card>
|
| 250 |
+
)}
|
| 251 |
+
</div>
|
| 252 |
+
|
| 253 |
+
<div className="col-span-3 max-h-[calc(100vh-180px)] overflow-y-auto">
|
| 254 |
+
<Card className="bg-zinc-900 border-zinc-800">
|
| 255 |
+
<CardHeader className="pb-2">
|
| 256 |
+
<CardTitle className="text-sm">Per-query bboxes</CardTitle>
|
| 257 |
+
</CardHeader>
|
| 258 |
+
<CardContent className="space-y-3">
|
| 259 |
+
{queryStats.map((qs) => (
|
| 260 |
+
<div key={qs.query} className="space-y-1">
|
| 261 |
+
<div className="flex items-center justify-between text-xs">
|
| 262 |
+
<div className="flex items-center gap-2">
|
| 263 |
+
<span className="h-3 w-3 rounded" style={{ backgroundColor: colorForQuery(qs.query) }} />
|
| 264 |
+
<span className="font-medium">{qs.query}</span>
|
| 265 |
+
</div>
|
| 266 |
+
<span className="text-zinc-500">
|
| 267 |
+
{qs.count}
|
| 268 |
+
{qs.approved > 0 && <span className="text-emerald-500"> ✓{qs.approved}</span>}
|
| 269 |
+
{qs.rejected > 0 && <span className="text-red-500"> ✗{qs.rejected}</span>}
|
| 270 |
+
</span>
|
| 271 |
+
</div>
|
| 272 |
+
</div>
|
| 273 |
+
))}
|
| 274 |
+
{activeBbox !== null && current && (() => {
|
| 275 |
+
const bbox = current.bboxes[activeBbox];
|
| 276 |
+
return (
|
| 277 |
+
<div className="border-t border-zinc-800 pt-3 mt-3">
|
| 278 |
+
<div className="text-xs font-bold mb-2">Selected bbox #{activeBbox + 1}</div>
|
| 279 |
+
<div className="text-xs text-zinc-400 mb-1">Falcon: {bbox.query}</div>
|
| 280 |
+
{bbox.vlm_verdict && (
|
| 281 |
+
<div className="text-xs mb-2">
|
| 282 |
+
<span className="text-zinc-500">Qwen: </span>
|
| 283 |
+
<span className={
|
| 284 |
+
bbox.vlm_verdict === "YES" ? "text-emerald-400" :
|
| 285 |
+
bbox.vlm_verdict === "NO" ? "text-red-400" :
|
| 286 |
+
"text-amber-400"
|
| 287 |
+
}>
|
| 288 |
+
{bbox.vlm_verdict}
|
| 289 |
+
</span>
|
| 290 |
+
{bbox.vlm_reasoning && (
|
| 291 |
+
<div className="text-zinc-400 italic mt-1 text-[11px]">
|
| 292 |
+
“{bbox.vlm_reasoning}”
|
| 293 |
+
</div>
|
| 294 |
+
)}
|
| 295 |
+
</div>
|
| 296 |
+
)}
|
| 297 |
+
<div className="flex gap-1 mt-2">
|
| 298 |
+
<Button size="sm" className="bg-emerald-700 hover:bg-emerald-600 text-xs h-7" onClick={() => setBboxVerdict(activeBbox, "approved")}>✓</Button>
|
| 299 |
+
<Button size="sm" variant="destructive" className="text-xs h-7" onClick={() => setBboxVerdict(activeBbox, "rejected")}>✗</Button>
|
| 300 |
+
<Button size="sm" variant="outline" className="text-xs h-7 border-zinc-700" onClick={() => setBboxVerdict(activeBbox, "unsure")}>?</Button>
|
| 301 |
+
</div>
|
| 302 |
+
</div>
|
| 303 |
+
);
|
| 304 |
+
})()}
|
| 305 |
+
</CardContent>
|
| 306 |
+
</Card>
|
| 307 |
+
</div>
|
| 308 |
+
</div>
|
| 309 |
+
</main>
|
| 310 |
+
);
|
| 311 |
+
}
|
web/components.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"$schema": "https://ui.shadcn.com/schema.json",
|
| 3 |
+
"style": "base-nova",
|
| 4 |
+
"rsc": true,
|
| 5 |
+
"tsx": true,
|
| 6 |
+
"tailwind": {
|
| 7 |
+
"config": "",
|
| 8 |
+
"css": "app/globals.css",
|
| 9 |
+
"baseColor": "neutral",
|
| 10 |
+
"cssVariables": true,
|
| 11 |
+
"prefix": ""
|
| 12 |
+
},
|
| 13 |
+
"iconLibrary": "lucide",
|
| 14 |
+
"rtl": false,
|
| 15 |
+
"aliases": {
|
| 16 |
+
"components": "@/components",
|
| 17 |
+
"utils": "@/lib/utils",
|
| 18 |
+
"ui": "@/components/ui",
|
| 19 |
+
"lib": "@/lib",
|
| 20 |
+
"hooks": "@/hooks"
|
| 21 |
+
},
|
| 22 |
+
"menuColor": "default",
|
| 23 |
+
"menuAccent": "subtle",
|
| 24 |
+
"registries": {}
|
| 25 |
+
}
|
web/components/BboxCanvas.tsx
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useEffect, useRef, useState, useCallback } from "react";
|
| 4 |
+
import type { Bbox } from "@/lib/types";
|
| 5 |
+
import {
|
| 6 |
+
fitViewport,
|
| 7 |
+
zoomAt,
|
| 8 |
+
imageToCanvas,
|
| 9 |
+
hitBbox,
|
| 10 |
+
type Viewport,
|
| 11 |
+
} from "@/lib/canvas-utils";
|
| 12 |
+
import { colorForQuery } from "@/components/BboxOverlay";
|
| 13 |
+
|
| 14 |
+
type AnnotatedBbox = Bbox & {
|
| 15 |
+
query: string;
|
| 16 |
+
vlm_verdict?: "YES" | "NO" | "UNSURE";
|
| 17 |
+
vlm_reasoning?: string;
|
| 18 |
+
verdict?: "approved" | "rejected" | "unsure";
|
| 19 |
+
};
|
| 20 |
+
|
| 21 |
+
type Props = {
|
| 22 |
+
src: string;
|
| 23 |
+
width: number; // image native width in pixels
|
| 24 |
+
height: number; // image native height in pixels
|
| 25 |
+
bboxes: AnnotatedBbox[];
|
| 26 |
+
activeIdx: number | null;
|
| 27 |
+
onBboxClick: (idx: number | null) => void;
|
| 28 |
+
onBboxHover?: (idx: number | null) => void;
|
| 29 |
+
showLabels?: boolean;
|
| 30 |
+
/** Optional fixed canvas display size. If omitted, the canvas fills its parent
|
| 31 |
+
* width and uses `aspectRatio` to derive the height. */
|
| 32 |
+
canvasWidth?: number;
|
| 33 |
+
canvasHeight?: number;
|
| 34 |
+
/** Aspect ratio (w/h) used when sizing responsively. Default 16/10. */
|
| 35 |
+
aspectRatio?: number;
|
| 36 |
+
};
|
| 37 |
+
|
| 38 |
+
/**
|
| 39 |
+
* Pure HTML5 Canvas bbox renderer + interaction.
|
| 40 |
+
* Single <canvas>, draws image + bboxes in one pass per frame.
|
| 41 |
+
* Mouse wheel = zoom around cursor. Drag = pan. Click = select. Hover = highlight.
|
| 42 |
+
*/
|
| 43 |
+
export function BboxCanvas({
|
| 44 |
+
src,
|
| 45 |
+
width: imgW,
|
| 46 |
+
height: imgH,
|
| 47 |
+
bboxes,
|
| 48 |
+
activeIdx,
|
| 49 |
+
onBboxClick,
|
| 50 |
+
onBboxHover,
|
| 51 |
+
showLabels = true,
|
| 52 |
+
canvasWidth,
|
| 53 |
+
canvasHeight,
|
| 54 |
+
aspectRatio = 16 / 10,
|
| 55 |
+
}: Props) {
|
| 56 |
+
const containerRef = useRef<HTMLDivElement>(null);
|
| 57 |
+
const canvasRef = useRef<HTMLCanvasElement>(null);
|
| 58 |
+
const imgRef = useRef<HTMLImageElement | null>(null);
|
| 59 |
+
const vpRef = useRef<Viewport>({ scale: 1, offsetX: 0, offsetY: 0 });
|
| 60 |
+
const [hoverIdx, setHoverIdx] = useState<number | null>(null);
|
| 61 |
+
const [isLoaded, setIsLoaded] = useState(false);
|
| 62 |
+
|
| 63 |
+
// Responsive size: if canvasWidth/Height are not provided, measure container.
|
| 64 |
+
const [size, setSize] = useState<{ w: number; h: number }>(() => ({
|
| 65 |
+
w: canvasWidth ?? 800,
|
| 66 |
+
h: canvasHeight ?? Math.round((canvasWidth ?? 800) / aspectRatio),
|
| 67 |
+
}));
|
| 68 |
+
|
| 69 |
+
useEffect(() => {
|
| 70 |
+
if (canvasWidth && canvasHeight) {
|
| 71 |
+
setSize({ w: canvasWidth, h: canvasHeight });
|
| 72 |
+
return;
|
| 73 |
+
}
|
| 74 |
+
const el = containerRef.current;
|
| 75 |
+
if (!el) return;
|
| 76 |
+
const update = () => {
|
| 77 |
+
const w = Math.max(200, Math.floor(el.clientWidth));
|
| 78 |
+
const h = Math.max(150, Math.floor(w / aspectRatio));
|
| 79 |
+
setSize((prev) => (prev.w === w && prev.h === h ? prev : { w, h }));
|
| 80 |
+
};
|
| 81 |
+
update();
|
| 82 |
+
const ro = new ResizeObserver(update);
|
| 83 |
+
ro.observe(el);
|
| 84 |
+
return () => ro.disconnect();
|
| 85 |
+
}, [canvasWidth, canvasHeight, aspectRatio]);
|
| 86 |
+
|
| 87 |
+
// Refit viewport when canvas size changes (e.g., window resize)
|
| 88 |
+
useEffect(() => {
|
| 89 |
+
if (imgRef.current) {
|
| 90 |
+
vpRef.current = fitViewport(imgW, imgH, size.w, size.h);
|
| 91 |
+
requestAnimationFrame(draw);
|
| 92 |
+
}
|
| 93 |
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
| 94 |
+
}, [size.w, size.h]);
|
| 95 |
+
|
| 96 |
+
const [pan, setPan] = useState<{ active: boolean; lastX: number; lastY: number }>({
|
| 97 |
+
active: false,
|
| 98 |
+
lastX: 0,
|
| 99 |
+
lastY: 0,
|
| 100 |
+
});
|
| 101 |
+
|
| 102 |
+
// Load the image element once.
|
| 103 |
+
// NOTE: we deliberately do NOT set crossOrigin — R2 presigned URLs don't send
|
| 104 |
+
// CORS headers, and setting crossOrigin would block the load. The canvas becomes
|
| 105 |
+
// "tainted" but drawImage still works; we just can't call toDataURL/getImageData
|
| 106 |
+
// (which we don't need for rendering).
|
| 107 |
+
useEffect(() => {
|
| 108 |
+
const img = new window.Image();
|
| 109 |
+
img.src = src;
|
| 110 |
+
img.onload = () => {
|
| 111 |
+
imgRef.current = img;
|
| 112 |
+
// Reset viewport to fit on image change
|
| 113 |
+
vpRef.current = fitViewport(imgW, imgH, size.w, size.h);
|
| 114 |
+
setIsLoaded(true);
|
| 115 |
+
requestAnimationFrame(draw);
|
| 116 |
+
};
|
| 117 |
+
img.onerror = () => {
|
| 118 |
+
setIsLoaded(false);
|
| 119 |
+
};
|
| 120 |
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
| 121 |
+
}, [src, imgW, imgH]);
|
| 122 |
+
|
| 123 |
+
// Redraw when bboxes / activeIdx / hover change
|
| 124 |
+
useEffect(() => {
|
| 125 |
+
if (isLoaded) requestAnimationFrame(draw);
|
| 126 |
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
| 127 |
+
}, [bboxes, activeIdx, hoverIdx, isLoaded]);
|
| 128 |
+
|
| 129 |
+
const draw = useCallback(() => {
|
| 130 |
+
const canvas = canvasRef.current;
|
| 131 |
+
const img = imgRef.current;
|
| 132 |
+
if (!canvas || !img) return;
|
| 133 |
+
const ctx = canvas.getContext("2d");
|
| 134 |
+
if (!ctx) return;
|
| 135 |
+
|
| 136 |
+
const vp = vpRef.current;
|
| 137 |
+
const W = canvas.width;
|
| 138 |
+
const H = canvas.height;
|
| 139 |
+
|
| 140 |
+
// Background
|
| 141 |
+
ctx.fillStyle = "#0a0a0a";
|
| 142 |
+
ctx.fillRect(0, 0, W, H);
|
| 143 |
+
|
| 144 |
+
// Image
|
| 145 |
+
ctx.drawImage(img, vp.offsetX, vp.offsetY, imgW * vp.scale, imgH * vp.scale);
|
| 146 |
+
|
| 147 |
+
// Bboxes — draw rejected first (under), then accepted, then highlighted
|
| 148 |
+
const draws = bboxes.map((b, idx) => ({ b, idx }));
|
| 149 |
+
// Sort: rejected first (drawn under), then unverified, then accepted, then active
|
| 150 |
+
draws.sort((a, b) => {
|
| 151 |
+
const sa = sortKey(a.b, a.idx === activeIdx, a.idx === hoverIdx);
|
| 152 |
+
const sb = sortKey(b.b, b.idx === activeIdx, b.idx === hoverIdx);
|
| 153 |
+
return sa - sb;
|
| 154 |
+
});
|
| 155 |
+
|
| 156 |
+
for (const { b, idx } of draws) {
|
| 157 |
+
const isHover = idx === hoverIdx;
|
| 158 |
+
const isActive = idx === activeIdx;
|
| 159 |
+
const color = colorForQuery(b.query);
|
| 160 |
+
const human = b.verdict;
|
| 161 |
+
const vlm = b.vlm_verdict;
|
| 162 |
+
|
| 163 |
+
// Rectangle in canvas coords
|
| 164 |
+
const { cx: x1, cy: y1 } = imageToCanvas(vp, b.x1, b.y1);
|
| 165 |
+
const { cx: x2, cy: y2 } = imageToCanvas(vp, b.x2, b.y2);
|
| 166 |
+
const rectW = x2 - x1;
|
| 167 |
+
const rectH = y2 - y1;
|
| 168 |
+
|
| 169 |
+
// Style
|
| 170 |
+
let strokeStyle = color;
|
| 171 |
+
let lineWidth = 2;
|
| 172 |
+
let fillStyle = `${color}1A`; // ~10% alpha
|
| 173 |
+
ctx.setLineDash([]);
|
| 174 |
+
|
| 175 |
+
if (human === "rejected") {
|
| 176 |
+
strokeStyle = "#ef4444";
|
| 177 |
+
lineWidth = 1.5;
|
| 178 |
+
fillStyle = "#00000020";
|
| 179 |
+
ctx.setLineDash([6, 4]);
|
| 180 |
+
} else if (human === "approved") {
|
| 181 |
+
strokeStyle = "#10b981";
|
| 182 |
+
lineWidth = 3;
|
| 183 |
+
fillStyle = `${color}26`; // ~15% alpha
|
| 184 |
+
} else if (vlm === "NO") {
|
| 185 |
+
strokeStyle = "#f87171";
|
| 186 |
+
lineWidth = 1.5;
|
| 187 |
+
fillStyle = "#00000018";
|
| 188 |
+
ctx.setLineDash([5, 3]);
|
| 189 |
+
} else if (vlm === "YES") {
|
| 190 |
+
strokeStyle = color;
|
| 191 |
+
lineWidth = 2;
|
| 192 |
+
fillStyle = `${color}1F`;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
if (isHover && !isActive) {
|
| 196 |
+
lineWidth += 1;
|
| 197 |
+
fillStyle = `${color}33`;
|
| 198 |
+
}
|
| 199 |
+
if (isActive) {
|
| 200 |
+
lineWidth = 4;
|
| 201 |
+
strokeStyle = "#ffffff";
|
| 202 |
+
fillStyle = `${color}40`;
|
| 203 |
+
ctx.setLineDash([]);
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
ctx.fillStyle = fillStyle;
|
| 207 |
+
ctx.fillRect(x1, y1, rectW, rectH);
|
| 208 |
+
ctx.strokeStyle = strokeStyle;
|
| 209 |
+
ctx.lineWidth = lineWidth;
|
| 210 |
+
ctx.strokeRect(x1, y1, rectW, rectH);
|
| 211 |
+
|
| 212 |
+
// Label (only if showing labels AND bbox is large enough on screen)
|
| 213 |
+
if (showLabels && rectW > 30 && rectH > 14) {
|
| 214 |
+
const label = b.query + (vlm === "YES" ? " ✓" : vlm === "NO" ? " ✗" : "");
|
| 215 |
+
ctx.font = "bold 11px -apple-system, system-ui, sans-serif";
|
| 216 |
+
const metrics = ctx.measureText(label);
|
| 217 |
+
const labelW = metrics.width + 8;
|
| 218 |
+
const labelH = 16;
|
| 219 |
+
const labelY = y1 - labelH;
|
| 220 |
+
if (labelY > 0) {
|
| 221 |
+
ctx.fillStyle = strokeStyle;
|
| 222 |
+
ctx.fillRect(x1, labelY, labelW, labelH);
|
| 223 |
+
ctx.fillStyle = "#000000";
|
| 224 |
+
ctx.fillText(label, x1 + 4, labelY + 12);
|
| 225 |
+
}
|
| 226 |
+
}
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
// Bottom-right HUD: zoom % and bbox count
|
| 230 |
+
ctx.font = "11px monospace";
|
| 231 |
+
ctx.fillStyle = "#ffffff90";
|
| 232 |
+
const hud = `${(vp.scale * 100).toFixed(0)}% zoom · ${bboxes.length} bboxes${hoverIdx !== null ? ` · #${hoverIdx + 1}` : ""}`;
|
| 233 |
+
ctx.fillText(hud, 8, H - 8);
|
| 234 |
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
| 235 |
+
}, [bboxes, activeIdx, hoverIdx, imgW, imgH, showLabels]);
|
| 236 |
+
|
| 237 |
+
// Mouse handlers
|
| 238 |
+
const onMouseMove = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
|
| 239 |
+
const canvas = canvasRef.current;
|
| 240 |
+
if (!canvas) return;
|
| 241 |
+
const rect = canvas.getBoundingClientRect();
|
| 242 |
+
const cx = e.clientX - rect.left;
|
| 243 |
+
const cy = e.clientY - rect.top;
|
| 244 |
+
if (pan.active) {
|
| 245 |
+
const dx = cx - pan.lastX;
|
| 246 |
+
const dy = cy - pan.lastY;
|
| 247 |
+
vpRef.current = {
|
| 248 |
+
...vpRef.current,
|
| 249 |
+
offsetX: vpRef.current.offsetX + dx,
|
| 250 |
+
offsetY: vpRef.current.offsetY + dy,
|
| 251 |
+
};
|
| 252 |
+
setPan({ active: true, lastX: cx, lastY: cy });
|
| 253 |
+
requestAnimationFrame(draw);
|
| 254 |
+
return;
|
| 255 |
+
}
|
| 256 |
+
const idx = hitBbox(bboxes, cx, cy, vpRef.current);
|
| 257 |
+
if (idx !== hoverIdx) {
|
| 258 |
+
setHoverIdx(idx);
|
| 259 |
+
onBboxHover?.(idx);
|
| 260 |
+
}
|
| 261 |
+
}, [bboxes, hoverIdx, onBboxHover, pan, draw]);
|
| 262 |
+
|
| 263 |
+
const onMouseDown = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
|
| 264 |
+
const canvas = canvasRef.current;
|
| 265 |
+
if (!canvas) return;
|
| 266 |
+
const rect = canvas.getBoundingClientRect();
|
| 267 |
+
const cx = e.clientX - rect.left;
|
| 268 |
+
const cy = e.clientY - rect.top;
|
| 269 |
+
if (e.shiftKey) {
|
| 270 |
+
setPan({ active: true, lastX: cx, lastY: cy });
|
| 271 |
+
return;
|
| 272 |
+
}
|
| 273 |
+
const idx = hitBbox(bboxes, cx, cy, vpRef.current);
|
| 274 |
+
onBboxClick(idx);
|
| 275 |
+
}, [bboxes, onBboxClick]);
|
| 276 |
+
|
| 277 |
+
const onMouseUp = useCallback(() => {
|
| 278 |
+
setPan({ active: false, lastX: 0, lastY: 0 });
|
| 279 |
+
}, []);
|
| 280 |
+
|
| 281 |
+
// React 19's synthetic onWheel is passive, so e.preventDefault() is a no-op
|
| 282 |
+
// and the page scrolls alongside the zoom. Attach a native non-passive listener.
|
| 283 |
+
useEffect(() => {
|
| 284 |
+
const canvas = canvasRef.current;
|
| 285 |
+
if (!canvas) return;
|
| 286 |
+
const handler = (e: WheelEvent) => {
|
| 287 |
+
e.preventDefault();
|
| 288 |
+
const rect = canvas.getBoundingClientRect();
|
| 289 |
+
const cx = e.clientX - rect.left;
|
| 290 |
+
const cy = e.clientY - rect.top;
|
| 291 |
+
const factor = e.deltaY < 0 ? 1.1 : 1 / 1.1;
|
| 292 |
+
vpRef.current = zoomAt(vpRef.current, cx, cy, factor);
|
| 293 |
+
requestAnimationFrame(draw);
|
| 294 |
+
};
|
| 295 |
+
canvas.addEventListener("wheel", handler, { passive: false });
|
| 296 |
+
return () => canvas.removeEventListener("wheel", handler);
|
| 297 |
+
}, [draw]);
|
| 298 |
+
|
| 299 |
+
const onDoubleClick = useCallback(() => {
|
| 300 |
+
// Reset viewport
|
| 301 |
+
vpRef.current = fitViewport(imgW, imgH, size.w, size.h);
|
| 302 |
+
requestAnimationFrame(draw);
|
| 303 |
+
}, [imgW, imgH, size.w, size.h, draw]);
|
| 304 |
+
|
| 305 |
+
return (
|
| 306 |
+
<div ref={containerRef} className="w-full">
|
| 307 |
+
<canvas
|
| 308 |
+
ref={canvasRef}
|
| 309 |
+
width={size.w}
|
| 310 |
+
height={size.h}
|
| 311 |
+
className="block rounded border border-zinc-800 cursor-crosshair"
|
| 312 |
+
style={{ width: size.w, height: size.h, maxWidth: "100%" }}
|
| 313 |
+
onMouseMove={onMouseMove}
|
| 314 |
+
onMouseDown={onMouseDown}
|
| 315 |
+
onMouseUp={onMouseUp}
|
| 316 |
+
onMouseLeave={onMouseUp}
|
| 317 |
+
onDoubleClick={onDoubleClick}
|
| 318 |
+
/>
|
| 319 |
+
</div>
|
| 320 |
+
);
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
function sortKey(b: AnnotatedBbox, isActive: boolean, isHover: boolean): number {
|
| 324 |
+
if (isActive) return 4;
|
| 325 |
+
if (isHover) return 3;
|
| 326 |
+
if (b.verdict === "approved") return 2;
|
| 327 |
+
if (b.vlm_verdict === "YES") return 1;
|
| 328 |
+
return 0;
|
| 329 |
+
}
|
web/components/BboxOverlay.tsx
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { cn } from "@/lib/utils";
|
| 4 |
+
import type { Bbox, BboxVerdict } from "@/lib/types";
|
| 5 |
+
|
| 6 |
+
export const QUERY_COLORS: Record<string, string> = {
|
| 7 |
+
"fiber optic spool": "#22c55e", // green
|
| 8 |
+
spool: "#10b981", // emerald
|
| 9 |
+
"cable spool": "#06b6d4", // cyan
|
| 10 |
+
drone: "#3b82f6", // blue
|
| 11 |
+
quadcopter: "#6366f1", // indigo
|
| 12 |
+
"fiber optic drone": "#84cc16", // lime
|
| 13 |
+
cable: "#f59e0b", // amber
|
| 14 |
+
cylinder: "#ec4899", // pink
|
| 15 |
+
objects: "#a3a3a3", // neutral
|
| 16 |
+
};
|
| 17 |
+
|
| 18 |
+
export function colorForQuery(q: string): string {
|
| 19 |
+
return QUERY_COLORS[q] ?? "#ef4444";
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
export type AnnotatedBbox = Bbox & {
|
| 23 |
+
query: string;
|
| 24 |
+
verdict?: BboxVerdict; // human verdict
|
| 25 |
+
vlm_verdict?: "YES" | "NO" | "UNSURE"; // Qwen VLM verdict
|
| 26 |
+
vlm_reasoning?: string;
|
| 27 |
+
idx: number;
|
| 28 |
+
};
|
| 29 |
+
|
| 30 |
+
type Props = {
|
| 31 |
+
src: string;
|
| 32 |
+
width: number;
|
| 33 |
+
height: number;
|
| 34 |
+
bboxes: AnnotatedBbox[];
|
| 35 |
+
activeIdx?: number | null;
|
| 36 |
+
onBboxClick?: (idx: number) => void;
|
| 37 |
+
showLabels?: boolean;
|
| 38 |
+
};
|
| 39 |
+
|
| 40 |
+
/**
|
| 41 |
+
* Image with overlaid bboxes. Bboxes are positioned with absolute % coords
|
| 42 |
+
* so they scale automatically with the image's display size.
|
| 43 |
+
*/
|
| 44 |
+
export function BboxOverlay({
|
| 45 |
+
src,
|
| 46 |
+
width,
|
| 47 |
+
height,
|
| 48 |
+
bboxes,
|
| 49 |
+
activeIdx = null,
|
| 50 |
+
onBboxClick,
|
| 51 |
+
showLabels = true,
|
| 52 |
+
}: Props) {
|
| 53 |
+
return (
|
| 54 |
+
<div className="relative w-full" style={{ aspectRatio: `${width} / ${height}` }}>
|
| 55 |
+
{/* eslint-disable-next-line @next/next/no-img-element */}
|
| 56 |
+
<img
|
| 57 |
+
src={src}
|
| 58 |
+
alt=""
|
| 59 |
+
className="absolute inset-0 h-full w-full object-contain"
|
| 60 |
+
draggable={false}
|
| 61 |
+
/>
|
| 62 |
+
{bboxes.map((b) => {
|
| 63 |
+
const queryColor = colorForQuery(b.query);
|
| 64 |
+
const isActive = activeIdx === b.idx;
|
| 65 |
+
const human = b.verdict;
|
| 66 |
+
const vlm = b.vlm_verdict;
|
| 67 |
+
|
| 68 |
+
// Border style: VLM=NO uses dashed red, VLM=YES uses solid query color,
|
| 69 |
+
// human verdict overrides VLM
|
| 70 |
+
let borderStyle = `2px solid ${queryColor}`;
|
| 71 |
+
let opacity = 1;
|
| 72 |
+
if (human === "rejected") {
|
| 73 |
+
borderStyle = `2px dashed #ef4444`;
|
| 74 |
+
opacity = 0.35;
|
| 75 |
+
} else if (human === "approved") {
|
| 76 |
+
borderStyle = `3px solid #10b981`;
|
| 77 |
+
} else if (vlm === "NO") {
|
| 78 |
+
borderStyle = `2px dashed #f87171`;
|
| 79 |
+
opacity = 0.5;
|
| 80 |
+
} else if (vlm === "YES") {
|
| 81 |
+
borderStyle = `2px solid ${queryColor}`;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
const title = [
|
| 85 |
+
b.query,
|
| 86 |
+
vlm ? `Qwen: ${vlm}` : null,
|
| 87 |
+
b.vlm_reasoning ? `(${b.vlm_reasoning})` : null,
|
| 88 |
+
human ? `Human: ${human}` : null,
|
| 89 |
+
].filter(Boolean).join(" — ");
|
| 90 |
+
|
| 91 |
+
return (
|
| 92 |
+
<div
|
| 93 |
+
key={b.idx}
|
| 94 |
+
className={cn(
|
| 95 |
+
"absolute cursor-pointer transition-all",
|
| 96 |
+
isActive && "ring-4 ring-white",
|
| 97 |
+
)}
|
| 98 |
+
style={{
|
| 99 |
+
left: `${b.x1_norm * 100}%`,
|
| 100 |
+
top: `${b.y1_norm * 100}%`,
|
| 101 |
+
width: `${(b.x2_norm - b.x1_norm) * 100}%`,
|
| 102 |
+
height: `${(b.y2_norm - b.y1_norm) * 100}%`,
|
| 103 |
+
border: borderStyle,
|
| 104 |
+
backgroundColor: human === "rejected" || vlm === "NO" ? "#00000010" : `${queryColor}10`,
|
| 105 |
+
opacity,
|
| 106 |
+
}}
|
| 107 |
+
onClick={(e) => {
|
| 108 |
+
e.stopPropagation();
|
| 109 |
+
onBboxClick?.(b.idx);
|
| 110 |
+
}}
|
| 111 |
+
title={title}
|
| 112 |
+
>
|
| 113 |
+
{showLabels && (
|
| 114 |
+
<span
|
| 115 |
+
className="absolute -top-5 left-0 whitespace-nowrap px-1 py-px text-[10px] font-bold text-black"
|
| 116 |
+
style={{ backgroundColor: queryColor }}
|
| 117 |
+
>
|
| 118 |
+
{b.query}
|
| 119 |
+
{vlm === "YES" && " ✓"}
|
| 120 |
+
{vlm === "NO" && " ✗"}
|
| 121 |
+
{human === "approved" && " 👤✓"}
|
| 122 |
+
{human === "rejected" && " 👤✗"}
|
| 123 |
+
</span>
|
| 124 |
+
)}
|
| 125 |
+
</div>
|
| 126 |
+
);
|
| 127 |
+
})}
|
| 128 |
+
</div>
|
| 129 |
+
);
|
| 130 |
+
}
|
web/components/ui/badge.tsx
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { mergeProps } from "@base-ui/react/merge-props"
|
| 2 |
+
import { useRender } from "@base-ui/react/use-render"
|
| 3 |
+
import { cva, type VariantProps } from "class-variance-authority"
|
| 4 |
+
|
| 5 |
+
import { cn } from "@/lib/utils"
|
| 6 |
+
|
| 7 |
+
const badgeVariants = cva(
|
| 8 |
+
"group/badge inline-flex h-5 w-fit shrink-0 items-center justify-center gap-1 overflow-hidden rounded-4xl border border-transparent px-2 py-0.5 text-xs font-medium whitespace-nowrap transition-all focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 aria-invalid:border-destructive aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 [&>svg]:pointer-events-none [&>svg]:size-3!",
|
| 9 |
+
{
|
| 10 |
+
variants: {
|
| 11 |
+
variant: {
|
| 12 |
+
default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
|
| 13 |
+
secondary:
|
| 14 |
+
"bg-secondary text-secondary-foreground [a]:hover:bg-secondary/80",
|
| 15 |
+
destructive:
|
| 16 |
+
"bg-destructive/10 text-destructive focus-visible:ring-destructive/20 dark:bg-destructive/20 dark:focus-visible:ring-destructive/40 [a]:hover:bg-destructive/20",
|
| 17 |
+
outline:
|
| 18 |
+
"border-border text-foreground [a]:hover:bg-muted [a]:hover:text-muted-foreground",
|
| 19 |
+
ghost:
|
| 20 |
+
"hover:bg-muted hover:text-muted-foreground dark:hover:bg-muted/50",
|
| 21 |
+
link: "text-primary underline-offset-4 hover:underline",
|
| 22 |
+
},
|
| 23 |
+
},
|
| 24 |
+
defaultVariants: {
|
| 25 |
+
variant: "default",
|
| 26 |
+
},
|
| 27 |
+
}
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
function Badge({
|
| 31 |
+
className,
|
| 32 |
+
variant = "default",
|
| 33 |
+
render,
|
| 34 |
+
...props
|
| 35 |
+
}: useRender.ComponentProps<"span"> & VariantProps<typeof badgeVariants>) {
|
| 36 |
+
return useRender({
|
| 37 |
+
defaultTagName: "span",
|
| 38 |
+
props: mergeProps<"span">(
|
| 39 |
+
{
|
| 40 |
+
className: cn(badgeVariants({ variant }), className),
|
| 41 |
+
},
|
| 42 |
+
props
|
| 43 |
+
),
|
| 44 |
+
render,
|
| 45 |
+
state: {
|
| 46 |
+
slot: "badge",
|
| 47 |
+
variant,
|
| 48 |
+
},
|
| 49 |
+
})
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
export { Badge, badgeVariants }
|
web/components/ui/button.tsx
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Button as ButtonPrimitive } from "@base-ui/react/button"
|
| 2 |
+
import { cva, type VariantProps } from "class-variance-authority"
|
| 3 |
+
|
| 4 |
+
import { cn } from "@/lib/utils"
|
| 5 |
+
|
| 6 |
+
const buttonVariants = cva(
|
| 7 |
+
"group/button inline-flex shrink-0 items-center justify-center rounded-lg border border-transparent bg-clip-padding text-sm font-medium whitespace-nowrap transition-all outline-none select-none focus-visible:border-ring focus-visible:ring-3 focus-visible:ring-ring/50 active:not-aria-[haspopup]:translate-y-px disabled:pointer-events-none disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-3 aria-invalid:ring-destructive/20 dark:aria-invalid:border-destructive/50 dark:aria-invalid:ring-destructive/40 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
| 8 |
+
{
|
| 9 |
+
variants: {
|
| 10 |
+
variant: {
|
| 11 |
+
default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
|
| 12 |
+
outline:
|
| 13 |
+
"border-border bg-background hover:bg-muted hover:text-foreground aria-expanded:bg-muted aria-expanded:text-foreground dark:border-input dark:bg-input/30 dark:hover:bg-input/50",
|
| 14 |
+
secondary:
|
| 15 |
+
"bg-secondary text-secondary-foreground hover:bg-secondary/80 aria-expanded:bg-secondary aria-expanded:text-secondary-foreground",
|
| 16 |
+
ghost:
|
| 17 |
+
"hover:bg-muted hover:text-foreground aria-expanded:bg-muted aria-expanded:text-foreground dark:hover:bg-muted/50",
|
| 18 |
+
destructive:
|
| 19 |
+
"bg-destructive/10 text-destructive hover:bg-destructive/20 focus-visible:border-destructive/40 focus-visible:ring-destructive/20 dark:bg-destructive/20 dark:hover:bg-destructive/30 dark:focus-visible:ring-destructive/40",
|
| 20 |
+
link: "text-primary underline-offset-4 hover:underline",
|
| 21 |
+
},
|
| 22 |
+
size: {
|
| 23 |
+
default:
|
| 24 |
+
"h-8 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2",
|
| 25 |
+
xs: "h-6 gap-1 rounded-[min(var(--radius-md),10px)] px-2 text-xs in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3",
|
| 26 |
+
sm: "h-7 gap-1 rounded-[min(var(--radius-md),12px)] px-2.5 text-[0.8rem] in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3.5",
|
| 27 |
+
lg: "h-9 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2",
|
| 28 |
+
icon: "size-8",
|
| 29 |
+
"icon-xs":
|
| 30 |
+
"size-6 rounded-[min(var(--radius-md),10px)] in-data-[slot=button-group]:rounded-lg [&_svg:not([class*='size-'])]:size-3",
|
| 31 |
+
"icon-sm":
|
| 32 |
+
"size-7 rounded-[min(var(--radius-md),12px)] in-data-[slot=button-group]:rounded-lg",
|
| 33 |
+
"icon-lg": "size-9",
|
| 34 |
+
},
|
| 35 |
+
},
|
| 36 |
+
defaultVariants: {
|
| 37 |
+
variant: "default",
|
| 38 |
+
size: "default",
|
| 39 |
+
},
|
| 40 |
+
}
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
function Button({
|
| 44 |
+
className,
|
| 45 |
+
variant = "default",
|
| 46 |
+
size = "default",
|
| 47 |
+
...props
|
| 48 |
+
}: ButtonPrimitive.Props & VariantProps<typeof buttonVariants>) {
|
| 49 |
+
return (
|
| 50 |
+
<ButtonPrimitive
|
| 51 |
+
data-slot="button"
|
| 52 |
+
className={cn(buttonVariants({ variant, size, className }))}
|
| 53 |
+
{...props}
|
| 54 |
+
/>
|
| 55 |
+
)
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
export { Button, buttonVariants }
|
web/components/ui/card.tsx
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import * as React from "react"
|
| 2 |
+
|
| 3 |
+
import { cn } from "@/lib/utils"
|
| 4 |
+
|
| 5 |
+
function Card({
|
| 6 |
+
className,
|
| 7 |
+
size = "default",
|
| 8 |
+
...props
|
| 9 |
+
}: React.ComponentProps<"div"> & { size?: "default" | "sm" }) {
|
| 10 |
+
return (
|
| 11 |
+
<div
|
| 12 |
+
data-slot="card"
|
| 13 |
+
data-size={size}
|
| 14 |
+
className={cn(
|
| 15 |
+
"group/card flex flex-col gap-4 overflow-hidden rounded-xl bg-card py-4 text-sm text-card-foreground ring-1 ring-foreground/10 has-data-[slot=card-footer]:pb-0 has-[>img:first-child]:pt-0 data-[size=sm]:gap-3 data-[size=sm]:py-3 data-[size=sm]:has-data-[slot=card-footer]:pb-0 *:[img:first-child]:rounded-t-xl *:[img:last-child]:rounded-b-xl",
|
| 16 |
+
className
|
| 17 |
+
)}
|
| 18 |
+
{...props}
|
| 19 |
+
/>
|
| 20 |
+
)
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
|
| 24 |
+
return (
|
| 25 |
+
<div
|
| 26 |
+
data-slot="card-header"
|
| 27 |
+
className={cn(
|
| 28 |
+
"group/card-header @container/card-header grid auto-rows-min items-start gap-1 rounded-t-xl px-4 group-data-[size=sm]/card:px-3 has-data-[slot=card-action]:grid-cols-[1fr_auto] has-data-[slot=card-description]:grid-rows-[auto_auto] [.border-b]:pb-4 group-data-[size=sm]/card:[.border-b]:pb-3",
|
| 29 |
+
className
|
| 30 |
+
)}
|
| 31 |
+
{...props}
|
| 32 |
+
/>
|
| 33 |
+
)
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
|
| 37 |
+
return (
|
| 38 |
+
<div
|
| 39 |
+
data-slot="card-title"
|
| 40 |
+
className={cn(
|
| 41 |
+
"font-heading text-base leading-snug font-medium group-data-[size=sm]/card:text-sm",
|
| 42 |
+
className
|
| 43 |
+
)}
|
| 44 |
+
{...props}
|
| 45 |
+
/>
|
| 46 |
+
)
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
|
| 50 |
+
return (
|
| 51 |
+
<div
|
| 52 |
+
data-slot="card-description"
|
| 53 |
+
className={cn("text-sm text-muted-foreground", className)}
|
| 54 |
+
{...props}
|
| 55 |
+
/>
|
| 56 |
+
)
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
function CardAction({ className, ...props }: React.ComponentProps<"div">) {
|
| 60 |
+
return (
|
| 61 |
+
<div
|
| 62 |
+
data-slot="card-action"
|
| 63 |
+
className={cn(
|
| 64 |
+
"col-start-2 row-span-2 row-start-1 self-start justify-self-end",
|
| 65 |
+
className
|
| 66 |
+
)}
|
| 67 |
+
{...props}
|
| 68 |
+
/>
|
| 69 |
+
)
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
function CardContent({ className, ...props }: React.ComponentProps<"div">) {
|
| 73 |
+
return (
|
| 74 |
+
<div
|
| 75 |
+
data-slot="card-content"
|
| 76 |
+
className={cn("px-4 group-data-[size=sm]/card:px-3", className)}
|
| 77 |
+
{...props}
|
| 78 |
+
/>
|
| 79 |
+
)
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
|
| 83 |
+
return (
|
| 84 |
+
<div
|
| 85 |
+
data-slot="card-footer"
|
| 86 |
+
className={cn(
|
| 87 |
+
"flex items-center rounded-b-xl border-t bg-muted/50 p-4 group-data-[size=sm]/card:p-3",
|
| 88 |
+
className
|
| 89 |
+
)}
|
| 90 |
+
{...props}
|
| 91 |
+
/>
|
| 92 |
+
)
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
export {
|
| 96 |
+
Card,
|
| 97 |
+
CardHeader,
|
| 98 |
+
CardFooter,
|
| 99 |
+
CardTitle,
|
| 100 |
+
CardAction,
|
| 101 |
+
CardDescription,
|
| 102 |
+
CardContent,
|
| 103 |
+
}
|
web/components/ui/separator.tsx
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client"
|
| 2 |
+
|
| 3 |
+
import { Separator as SeparatorPrimitive } from "@base-ui/react/separator"
|
| 4 |
+
|
| 5 |
+
import { cn } from "@/lib/utils"
|
| 6 |
+
|
| 7 |
+
function Separator({
|
| 8 |
+
className,
|
| 9 |
+
orientation = "horizontal",
|
| 10 |
+
...props
|
| 11 |
+
}: SeparatorPrimitive.Props) {
|
| 12 |
+
return (
|
| 13 |
+
<SeparatorPrimitive
|
| 14 |
+
data-slot="separator"
|
| 15 |
+
orientation={orientation}
|
| 16 |
+
className={cn(
|
| 17 |
+
"shrink-0 bg-border data-horizontal:h-px data-horizontal:w-full data-vertical:w-px data-vertical:self-stretch",
|
| 18 |
+
className
|
| 19 |
+
)}
|
| 20 |
+
{...props}
|
| 21 |
+
/>
|
| 22 |
+
)
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
export { Separator }
|
web/components/ui/skeleton.tsx
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { cn } from "@/lib/utils"
|
| 2 |
+
|
| 3 |
+
function Skeleton({ className, ...props }: React.ComponentProps<"div">) {
|
| 4 |
+
return (
|
| 5 |
+
<div
|
| 6 |
+
data-slot="skeleton"
|
| 7 |
+
className={cn("animate-pulse rounded-md bg-muted", className)}
|
| 8 |
+
{...props}
|
| 9 |
+
/>
|
| 10 |
+
)
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
export { Skeleton }
|
web/components/ui/sonner.tsx
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client"
|
| 2 |
+
|
| 3 |
+
import { useTheme } from "next-themes"
|
| 4 |
+
import { Toaster as Sonner, type ToasterProps } from "sonner"
|
| 5 |
+
import { CircleCheckIcon, InfoIcon, TriangleAlertIcon, OctagonXIcon, Loader2Icon } from "lucide-react"
|
| 6 |
+
|
| 7 |
+
const Toaster = ({ ...props }: ToasterProps) => {
|
| 8 |
+
const { theme = "system" } = useTheme()
|
| 9 |
+
|
| 10 |
+
return (
|
| 11 |
+
<Sonner
|
| 12 |
+
theme={theme as ToasterProps["theme"]}
|
| 13 |
+
className="toaster group"
|
| 14 |
+
icons={{
|
| 15 |
+
success: (
|
| 16 |
+
<CircleCheckIcon className="size-4" />
|
| 17 |
+
),
|
| 18 |
+
info: (
|
| 19 |
+
<InfoIcon className="size-4" />
|
| 20 |
+
),
|
| 21 |
+
warning: (
|
| 22 |
+
<TriangleAlertIcon className="size-4" />
|
| 23 |
+
),
|
| 24 |
+
error: (
|
| 25 |
+
<OctagonXIcon className="size-4" />
|
| 26 |
+
),
|
| 27 |
+
loading: (
|
| 28 |
+
<Loader2Icon className="size-4 animate-spin" />
|
| 29 |
+
),
|
| 30 |
+
}}
|
| 31 |
+
style={
|
| 32 |
+
{
|
| 33 |
+
"--normal-bg": "var(--popover)",
|
| 34 |
+
"--normal-text": "var(--popover-foreground)",
|
| 35 |
+
"--normal-border": "var(--border)",
|
| 36 |
+
"--border-radius": "var(--radius)",
|
| 37 |
+
} as React.CSSProperties
|
| 38 |
+
}
|
| 39 |
+
toastOptions={{
|
| 40 |
+
classNames: {
|
| 41 |
+
toast: "cn-toast",
|
| 42 |
+
},
|
| 43 |
+
}}
|
| 44 |
+
{...props}
|
| 45 |
+
/>
|
| 46 |
+
)
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
export { Toaster }
|
web/components/ui/tabs.tsx
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client"
|
| 2 |
+
|
| 3 |
+
import { Tabs as TabsPrimitive } from "@base-ui/react/tabs"
|
| 4 |
+
import { cva, type VariantProps } from "class-variance-authority"
|
| 5 |
+
|
| 6 |
+
import { cn } from "@/lib/utils"
|
| 7 |
+
|
| 8 |
+
function Tabs({
|
| 9 |
+
className,
|
| 10 |
+
orientation = "horizontal",
|
| 11 |
+
...props
|
| 12 |
+
}: TabsPrimitive.Root.Props) {
|
| 13 |
+
return (
|
| 14 |
+
<TabsPrimitive.Root
|
| 15 |
+
data-slot="tabs"
|
| 16 |
+
data-orientation={orientation}
|
| 17 |
+
className={cn(
|
| 18 |
+
"group/tabs flex gap-2 data-horizontal:flex-col",
|
| 19 |
+
className
|
| 20 |
+
)}
|
| 21 |
+
{...props}
|
| 22 |
+
/>
|
| 23 |
+
)
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
const tabsListVariants = cva(
|
| 27 |
+
"group/tabs-list inline-flex w-fit items-center justify-center rounded-lg p-[3px] text-muted-foreground group-data-horizontal/tabs:h-8 group-data-vertical/tabs:h-fit group-data-vertical/tabs:flex-col data-[variant=line]:rounded-none",
|
| 28 |
+
{
|
| 29 |
+
variants: {
|
| 30 |
+
variant: {
|
| 31 |
+
default: "bg-muted",
|
| 32 |
+
line: "gap-1 bg-transparent",
|
| 33 |
+
},
|
| 34 |
+
},
|
| 35 |
+
defaultVariants: {
|
| 36 |
+
variant: "default",
|
| 37 |
+
},
|
| 38 |
+
}
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
function TabsList({
|
| 42 |
+
className,
|
| 43 |
+
variant = "default",
|
| 44 |
+
...props
|
| 45 |
+
}: TabsPrimitive.List.Props & VariantProps<typeof tabsListVariants>) {
|
| 46 |
+
return (
|
| 47 |
+
<TabsPrimitive.List
|
| 48 |
+
data-slot="tabs-list"
|
| 49 |
+
data-variant={variant}
|
| 50 |
+
className={cn(tabsListVariants({ variant }), className)}
|
| 51 |
+
{...props}
|
| 52 |
+
/>
|
| 53 |
+
)
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
function TabsTrigger({ className, ...props }: TabsPrimitive.Tab.Props) {
|
| 57 |
+
return (
|
| 58 |
+
<TabsPrimitive.Tab
|
| 59 |
+
data-slot="tabs-trigger"
|
| 60 |
+
className={cn(
|
| 61 |
+
"relative inline-flex h-[calc(100%-1px)] flex-1 items-center justify-center gap-1.5 rounded-md border border-transparent px-1.5 py-0.5 text-sm font-medium whitespace-nowrap text-foreground/60 transition-all group-data-vertical/tabs:w-full group-data-vertical/tabs:justify-start hover:text-foreground focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 focus-visible:outline-1 focus-visible:outline-ring disabled:pointer-events-none disabled:opacity-50 has-data-[icon=inline-end]:pr-1 has-data-[icon=inline-start]:pl-1 aria-disabled:pointer-events-none aria-disabled:opacity-50 dark:text-muted-foreground dark:hover:text-foreground group-data-[variant=default]/tabs-list:data-active:shadow-sm group-data-[variant=line]/tabs-list:data-active:shadow-none [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
|
| 62 |
+
"group-data-[variant=line]/tabs-list:bg-transparent group-data-[variant=line]/tabs-list:data-active:bg-transparent dark:group-data-[variant=line]/tabs-list:data-active:border-transparent dark:group-data-[variant=line]/tabs-list:data-active:bg-transparent",
|
| 63 |
+
"data-active:bg-background data-active:text-foreground dark:data-active:border-input dark:data-active:bg-input/30 dark:data-active:text-foreground",
|
| 64 |
+
"after:absolute after:bg-foreground after:opacity-0 after:transition-opacity group-data-horizontal/tabs:after:inset-x-0 group-data-horizontal/tabs:after:bottom-[-5px] group-data-horizontal/tabs:after:h-0.5 group-data-vertical/tabs:after:inset-y-0 group-data-vertical/tabs:after:-right-1 group-data-vertical/tabs:after:w-0.5 group-data-[variant=line]/tabs-list:data-active:after:opacity-100",
|
| 65 |
+
className
|
| 66 |
+
)}
|
| 67 |
+
{...props}
|
| 68 |
+
/>
|
| 69 |
+
)
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
function TabsContent({ className, ...props }: TabsPrimitive.Panel.Props) {
|
| 73 |
+
return (
|
| 74 |
+
<TabsPrimitive.Panel
|
| 75 |
+
data-slot="tabs-content"
|
| 76 |
+
className={cn("flex-1 text-sm outline-none", className)}
|
| 77 |
+
{...props}
|
| 78 |
+
/>
|
| 79 |
+
)
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
export { Tabs, TabsList, TabsTrigger, TabsContent, tabsListVariants }
|
web/lib/canvas-utils.ts
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// canvas-utils.ts — pure functions for canvas coord transforms + hit testing
|
| 2 |
+
|
| 3 |
+
import type { Bbox } from "./types";
|
| 4 |
+
|
| 5 |
+
export type Viewport = {
|
| 6 |
+
scale: number; // image pixels → canvas pixels multiplier
|
| 7 |
+
offsetX: number; // top-left of image in canvas coords
|
| 8 |
+
offsetY: number;
|
| 9 |
+
};
|
| 10 |
+
|
| 11 |
+
/** Compute the viewport that fits an image inside a canvas, preserving aspect ratio. */
|
| 12 |
+
export function fitViewport(
|
| 13 |
+
imgW: number,
|
| 14 |
+
imgH: number,
|
| 15 |
+
canvasW: number,
|
| 16 |
+
canvasH: number,
|
| 17 |
+
padding = 0.05,
|
| 18 |
+
): Viewport {
|
| 19 |
+
const padPx = Math.min(canvasW, canvasH) * padding;
|
| 20 |
+
const availW = canvasW - padPx * 2;
|
| 21 |
+
const availH = canvasH - padPx * 2;
|
| 22 |
+
const scale = Math.min(availW / imgW, availH / imgH);
|
| 23 |
+
const offsetX = (canvasW - imgW * scale) / 2;
|
| 24 |
+
const offsetY = (canvasH - imgH * scale) / 2;
|
| 25 |
+
return { scale, offsetX, offsetY };
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
/** Apply zoom around a point in canvas coords (e.g. mouse cursor). */
|
| 29 |
+
export function zoomAt(
|
| 30 |
+
vp: Viewport,
|
| 31 |
+
canvasX: number,
|
| 32 |
+
canvasY: number,
|
| 33 |
+
factor: number,
|
| 34 |
+
): Viewport {
|
| 35 |
+
// The image-space point under (canvasX, canvasY) before zoom:
|
| 36 |
+
const imgX = (canvasX - vp.offsetX) / vp.scale;
|
| 37 |
+
const imgY = (canvasY - vp.offsetY) / vp.scale;
|
| 38 |
+
const newScale = vp.scale * factor;
|
| 39 |
+
// Adjust offsets so that same image-space point lands under the same canvas point:
|
| 40 |
+
return {
|
| 41 |
+
scale: newScale,
|
| 42 |
+
offsetX: canvasX - imgX * newScale,
|
| 43 |
+
offsetY: canvasY - imgY * newScale,
|
| 44 |
+
};
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
/** Convert image-pixel coords → canvas coords. */
|
| 48 |
+
export function imageToCanvas(vp: Viewport, x: number, y: number) {
|
| 49 |
+
return { cx: vp.offsetX + x * vp.scale, cy: vp.offsetY + y * vp.scale };
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
/** Convert canvas coords → image-pixel coords. */
|
| 53 |
+
export function canvasToImage(vp: Viewport, cx: number, cy: number) {
|
| 54 |
+
return { x: (cx - vp.offsetX) / vp.scale, y: (cy - vp.offsetY) / vp.scale };
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
/** Hit test: which bbox (by index) contains the canvas point? Returns the SMALLEST. */
|
| 58 |
+
export function hitBbox(
|
| 59 |
+
bboxes: Bbox[],
|
| 60 |
+
canvasX: number,
|
| 61 |
+
canvasY: number,
|
| 62 |
+
vp: Viewport,
|
| 63 |
+
): number | null {
|
| 64 |
+
const { x, y } = canvasToImage(vp, canvasX, canvasY);
|
| 65 |
+
let bestIdx: number | null = null;
|
| 66 |
+
let bestArea = Infinity;
|
| 67 |
+
for (let i = 0; i < bboxes.length; i++) {
|
| 68 |
+
const b = bboxes[i];
|
| 69 |
+
if (x >= b.x1 && x <= b.x2 && y >= b.y1 && y <= b.y2) {
|
| 70 |
+
const area = (b.x2 - b.x1) * (b.y2 - b.y1);
|
| 71 |
+
if (area < bestArea) {
|
| 72 |
+
bestArea = area;
|
| 73 |
+
bestIdx = i;
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
}
|
| 77 |
+
return bestIdx;
|
| 78 |
+
}
|
web/lib/r2.ts
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { S3Client, GetObjectCommand, ListObjectsV2Command, PutObjectCommand } from "@aws-sdk/client-s3";
|
| 2 |
+
import { getSignedUrl } from "@aws-sdk/s3-request-presigner";
|
| 3 |
+
|
| 4 |
+
// R2 credentials must be supplied via environment variables.
|
| 5 |
+
// Set these in web/.env.local (gitignored) — see web/.env.example.
|
| 6 |
+
//
|
| 7 |
+
// R2_ENDPOINT_URL=https://<account>.r2.cloudflarestorage.com
|
| 8 |
+
// R2_ACCESS_KEY_ID=...
|
| 9 |
+
// R2_SECRET_ACCESS_KEY=...
|
| 10 |
+
// R2_BUCKET=your-bucket-name
|
| 11 |
+
//
|
| 12 |
+
// These live server-side only and are never exposed to the browser.
|
| 13 |
+
const R2_ENDPOINT = process.env.R2_ENDPOINT_URL;
|
| 14 |
+
const R2_ACCESS_KEY = process.env.R2_ACCESS_KEY_ID;
|
| 15 |
+
const R2_SECRET_KEY = process.env.R2_SECRET_ACCESS_KEY;
|
| 16 |
+
export const R2_BUCKET = process.env.R2_BUCKET ?? "";
|
| 17 |
+
|
| 18 |
+
if (!R2_ENDPOINT || !R2_ACCESS_KEY || !R2_SECRET_KEY || !R2_BUCKET) {
|
| 19 |
+
throw new Error(
|
| 20 |
+
"R2 is not configured. Set R2_ENDPOINT_URL, R2_ACCESS_KEY_ID, " +
|
| 21 |
+
"R2_SECRET_ACCESS_KEY, and R2_BUCKET in web/.env.local. " +
|
| 22 |
+
"See web/.env.example for the full list."
|
| 23 |
+
);
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
export const r2 = new S3Client({
|
| 27 |
+
region: "auto",
|
| 28 |
+
endpoint: R2_ENDPOINT,
|
| 29 |
+
credentials: {
|
| 30 |
+
accessKeyId: R2_ACCESS_KEY,
|
| 31 |
+
secretAccessKey: R2_SECRET_KEY,
|
| 32 |
+
},
|
| 33 |
+
});
|
| 34 |
+
|
| 35 |
+
/**
|
| 36 |
+
* Generate a short-lived presigned GET URL for an R2 object so the browser
|
| 37 |
+
* can fetch it directly without exposing credentials.
|
| 38 |
+
*/
|
| 39 |
+
export async function presignGet(key: string, expiresIn = 3600): Promise<string> {
|
| 40 |
+
const cmd = new GetObjectCommand({ Bucket: R2_BUCKET, Key: key });
|
| 41 |
+
return getSignedUrl(r2, cmd, { expiresIn });
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
/**
|
| 45 |
+
* List all object keys under a prefix in our bucket.
|
| 46 |
+
*/
|
| 47 |
+
export async function listAll(prefix: string): Promise<string[]> {
|
| 48 |
+
const out: string[] = [];
|
| 49 |
+
let continuationToken: string | undefined = undefined;
|
| 50 |
+
do {
|
| 51 |
+
const resp: Awaited<ReturnType<typeof r2.send<ListObjectsV2Command>>> = await r2.send(
|
| 52 |
+
new ListObjectsV2Command({
|
| 53 |
+
Bucket: R2_BUCKET,
|
| 54 |
+
Prefix: prefix,
|
| 55 |
+
ContinuationToken: continuationToken,
|
| 56 |
+
}),
|
| 57 |
+
);
|
| 58 |
+
for (const obj of resp.Contents ?? []) {
|
| 59 |
+
if (obj.Key) out.push(obj.Key);
|
| 60 |
+
}
|
| 61 |
+
continuationToken = resp.IsTruncated ? resp.NextContinuationToken : undefined;
|
| 62 |
+
} while (continuationToken);
|
| 63 |
+
return out;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
/**
|
| 67 |
+
* Read a JSON object from R2.
|
| 68 |
+
*/
|
| 69 |
+
export async function getJson<T>(key: string): Promise<T | null> {
|
| 70 |
+
try {
|
| 71 |
+
const resp = await r2.send(new GetObjectCommand({ Bucket: R2_BUCKET, Key: key }));
|
| 72 |
+
const text = await resp.Body!.transformToString();
|
| 73 |
+
return JSON.parse(text) as T;
|
| 74 |
+
} catch {
|
| 75 |
+
return null;
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/**
|
| 80 |
+
* Write a JSON object to R2.
|
| 81 |
+
*/
|
| 82 |
+
export async function putJson(key: string, value: unknown): Promise<void> {
|
| 83 |
+
await r2.send(
|
| 84 |
+
new PutObjectCommand({
|
| 85 |
+
Bucket: R2_BUCKET,
|
| 86 |
+
Key: key,
|
| 87 |
+
Body: JSON.stringify(value, null, 2),
|
| 88 |
+
ContentType: "application/json",
|
| 89 |
+
}),
|
| 90 |
+
);
|
| 91 |
+
}
|
web/lib/types.ts
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Types matching pod_label.py output format
|
| 2 |
+
|
| 3 |
+
export type Bbox = {
|
| 4 |
+
x1: number;
|
| 5 |
+
y1: number;
|
| 6 |
+
x2: number;
|
| 7 |
+
y2: number;
|
| 8 |
+
x1_norm: number;
|
| 9 |
+
y1_norm: number;
|
| 10 |
+
x2_norm: number;
|
| 11 |
+
y2_norm: number;
|
| 12 |
+
cx_norm: number;
|
| 13 |
+
cy_norm: number;
|
| 14 |
+
w_norm: number;
|
| 15 |
+
h_norm: number;
|
| 16 |
+
area_fraction: number;
|
| 17 |
+
annotation_id?: number; // links to verified.json verdict (when present)
|
| 18 |
+
};
|
| 19 |
+
|
| 20 |
+
// VLM verdict from verify_vlm.py output
|
| 21 |
+
export type VlmVerdict = {
|
| 22 |
+
annotation_id: number;
|
| 23 |
+
image_id: number;
|
| 24 |
+
image_file: string;
|
| 25 |
+
category_name: string;
|
| 26 |
+
bbox: number[]; // [x, y, w, h]
|
| 27 |
+
verdict: "YES" | "NO" | "UNSURE";
|
| 28 |
+
reasoning: string;
|
| 29 |
+
elapsed: number;
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
export type VerifiedRun = {
|
| 33 |
+
run_name: string;
|
| 34 |
+
model: string;
|
| 35 |
+
prompt_version: string;
|
| 36 |
+
crop_padding: number;
|
| 37 |
+
summary: {
|
| 38 |
+
completed: number;
|
| 39 |
+
total: number;
|
| 40 |
+
yes: number;
|
| 41 |
+
no: number;
|
| 42 |
+
unsure: number;
|
| 43 |
+
yes_rate: number;
|
| 44 |
+
elapsed_seconds: number;
|
| 45 |
+
avg_seconds_per_bbox: number;
|
| 46 |
+
};
|
| 47 |
+
annotations: VlmVerdict[];
|
| 48 |
+
};
|
| 49 |
+
|
| 50 |
+
export type QueryResult = {
|
| 51 |
+
bboxes: Bbox[];
|
| 52 |
+
count: number;
|
| 53 |
+
elapsed?: number;
|
| 54 |
+
error?: string;
|
| 55 |
+
};
|
| 56 |
+
|
| 57 |
+
export type ImageResult = {
|
| 58 |
+
width: number;
|
| 59 |
+
height: number;
|
| 60 |
+
queries: Record<string, QueryResult>;
|
| 61 |
+
error?: string;
|
| 62 |
+
};
|
| 63 |
+
|
| 64 |
+
export type LabelPartial = {
|
| 65 |
+
completed: number;
|
| 66 |
+
results: Record<string, ImageResult>;
|
| 67 |
+
};
|
| 68 |
+
|
| 69 |
+
// Verdicts: human review state stored alongside Falcon labels
|
| 70 |
+
export type BboxVerdict = "approved" | "rejected" | "unsure";
|
| 71 |
+
|
| 72 |
+
export type ImageReview = {
|
| 73 |
+
image_path: string; // R2 key (e.g. "raw/positive/fiber_spool_drone/foo.jpg")
|
| 74 |
+
bucket: string;
|
| 75 |
+
width: number;
|
| 76 |
+
height: number;
|
| 77 |
+
bboxes: Array<Bbox & {
|
| 78 |
+
query: string;
|
| 79 |
+
verdict?: BboxVerdict; // human verdict
|
| 80 |
+
vlm_verdict?: "YES" | "NO" | "UNSURE"; // Qwen verdict from verify_vlm.py
|
| 81 |
+
vlm_reasoning?: string;
|
| 82 |
+
note?: string;
|
| 83 |
+
}>;
|
| 84 |
+
image_verdict?: "approved" | "rejected" | "unsure"; // overall image-level call
|
| 85 |
+
reviewed_at?: string;
|
| 86 |
+
};
|
web/lib/utils.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { clsx, type ClassValue } from "clsx"
|
| 2 |
+
import { twMerge } from "tailwind-merge"
|
| 3 |
+
|
| 4 |
+
export function cn(...inputs: ClassValue[]) {
|
| 5 |
+
return twMerge(clsx(inputs))
|
| 6 |
+
}
|
web/next-env.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/// <reference types="next" />
|
| 2 |
+
/// <reference types="next/image-types/global" />
|
| 3 |
+
import "./.next/dev/types/routes.d.ts";
|
| 4 |
+
|
| 5 |
+
// NOTE: This file should not be edited
|
| 6 |
+
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
|
web/next.config.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { NextConfig } from "next";
|
| 2 |
+
|
| 3 |
+
const nextConfig: NextConfig = {
|
| 4 |
+
/* config options here */
|
| 5 |
+
};
|
| 6 |
+
|
| 7 |
+
export default nextConfig;
|
web/package-lock.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
web/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "web",
|
| 3 |
+
"version": "0.1.0",
|
| 4 |
+
"private": true,
|
| 5 |
+
"scripts": {
|
| 6 |
+
"dev": "next dev",
|
| 7 |
+
"build": "next build",
|
| 8 |
+
"start": "next start"
|
| 9 |
+
},
|
| 10 |
+
"dependencies": {
|
| 11 |
+
"@aws-sdk/client-s3": "^3.1026.0",
|
| 12 |
+
"@aws-sdk/s3-request-presigner": "^3.1026.0",
|
| 13 |
+
"@base-ui/react": "^1.3.0",
|
| 14 |
+
"class-variance-authority": "^0.7.1",
|
| 15 |
+
"clsx": "^2.1.1",
|
| 16 |
+
"lucide-react": "^1.7.0",
|
| 17 |
+
"next": "16.2.2",
|
| 18 |
+
"next-themes": "^0.4.6",
|
| 19 |
+
"react": "19.2.4",
|
| 20 |
+
"react-dom": "19.2.4",
|
| 21 |
+
"shadcn": "^4.2.0",
|
| 22 |
+
"sonner": "^2.0.7",
|
| 23 |
+
"tailwind-merge": "^3.5.0",
|
| 24 |
+
"tw-animate-css": "^1.4.0"
|
| 25 |
+
},
|
| 26 |
+
"devDependencies": {
|
| 27 |
+
"@tailwindcss/postcss": "^4",
|
| 28 |
+
"@types/node": "^20",
|
| 29 |
+
"@types/react": "^19",
|
| 30 |
+
"@types/react-dom": "^19",
|
| 31 |
+
"tailwindcss": "^4",
|
| 32 |
+
"typescript": "^5"
|
| 33 |
+
}
|
| 34 |
+
}
|
web/postcss.config.mjs
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const config = {
|
| 2 |
+
plugins: {
|
| 3 |
+
"@tailwindcss/postcss": {},
|
| 4 |
+
},
|
| 5 |
+
};
|
| 6 |
+
|
| 7 |
+
export default config;
|
web/public/file.svg
ADDED
|
|
web/public/globe.svg
ADDED
|
|
web/public/next.svg
ADDED
|
|
web/public/vercel.svg
ADDED
|
|
web/public/window.svg
ADDED
|
|
web/tsconfig.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"target": "ES2017",
|
| 4 |
+
"lib": ["dom", "dom.iterable", "esnext"],
|
| 5 |
+
"allowJs": true,
|
| 6 |
+
"skipLibCheck": true,
|
| 7 |
+
"strict": true,
|
| 8 |
+
"noEmit": true,
|
| 9 |
+
"esModuleInterop": true,
|
| 10 |
+
"module": "esnext",
|
| 11 |
+
"moduleResolution": "bundler",
|
| 12 |
+
"resolveJsonModule": true,
|
| 13 |
+
"isolatedModules": true,
|
| 14 |
+
"jsx": "react-jsx",
|
| 15 |
+
"incremental": true,
|
| 16 |
+
"plugins": [
|
| 17 |
+
{
|
| 18 |
+
"name": "next"
|
| 19 |
+
}
|
| 20 |
+
],
|
| 21 |
+
"paths": {
|
| 22 |
+
"@/*": ["./*"]
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
"include": [
|
| 26 |
+
"next-env.d.ts",
|
| 27 |
+
"**/*.ts",
|
| 28 |
+
"**/*.tsx",
|
| 29 |
+
".next/types/**/*.ts",
|
| 30 |
+
".next/dev/types/**/*.ts",
|
| 31 |
+
"**/*.mts"
|
| 32 |
+
],
|
| 33 |
+
"exclude": ["node_modules"]
|
| 34 |
+
}
|