Image Feature Extraction
MLX
English
data-label-factory
vision
dataset-labeling
object-detection
apple-silicon
gemma
falcon-perception
openrouter
yolo
Instructions to use waltgrace/data-label-factory with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLX
How to use waltgrace/data-label-factory with MLX:
# Download the model from the Hub pip install huggingface_hub[hf_xet] huggingface-cli download --local-dir data-label-factory waltgrace/data-label-factory
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- LM Studio
feat: full pipeline end-to-end, OpenRouter labeling, verify stage
Browse files- data_label_factory/cli.py +361 -14
- data_label_factory/providers/openrouter.py +61 -0
- data_label_factory/serve.py +51 -0
- web/app/api/dlf/route.ts +7 -2
- web/app/label/page.tsx +67 -2
data_label_factory/cli.py
CHANGED
|
@@ -645,34 +645,368 @@ def cmd_label(args):
|
|
| 645 |
print(f" {len(coco['images'])} images, {len(coco['annotations'])} bboxes")
|
| 646 |
|
| 647 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 648 |
def cmd_pipeline(args):
|
| 649 |
-
"""Full pipeline: gather → filter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
proj = load_project(args.project)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
print("=" * 70)
|
| 652 |
print(f"PIPELINE — {proj.project_name} ({proj.target_object})")
|
|
|
|
|
|
|
|
|
|
| 653 |
print("=" * 70)
|
| 654 |
|
| 655 |
exp = make_experiment_dir(f"pipeline-{proj.project_name}")
|
| 656 |
write_readme(exp, f"pipeline-{proj.project_name}",
|
| 657 |
description=f"Full pipeline for {proj.target_object}",
|
| 658 |
params=vars(args))
|
| 659 |
-
write_config(exp, {"project": proj.raw, **vars(args)
|
|
|
|
|
|
|
|
|
|
| 660 |
update_latest_symlink(exp)
|
| 661 |
print(f"Experiment: {exp}\n")
|
| 662 |
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 672 |
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
|
| 677 |
|
| 678 |
def cmd_list(args):
|
|
@@ -818,14 +1152,26 @@ def main():
|
|
| 818 |
sl.add_argument("--experiment", default=None)
|
| 819 |
sl.add_argument("--limit", type=int, default=0)
|
| 820 |
|
| 821 |
-
spi = sub.add_parser("pipeline", help="Full chain: gather → filter
|
| 822 |
spi.add_argument("--project", required=True)
|
| 823 |
spi.add_argument("--max-per-query", type=int, default=20)
|
| 824 |
spi.add_argument("--workers", type=int, default=50)
|
| 825 |
spi.add_argument("--experiment", default=None)
|
| 826 |
spi.add_argument("--limit", type=int, default=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 827 |
add_backend_flag(spi)
|
| 828 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
sl2 = sub.add_parser("label-v2", help="Label via provider registry (falcon, wilddet3d, chandra)")
|
| 830 |
sl2.add_argument("--project", required=True)
|
| 831 |
sl2.add_argument("--backend", default=None,
|
|
@@ -885,6 +1231,7 @@ def main():
|
|
| 885 |
"filter": cmd_filter,
|
| 886 |
"label": cmd_label,
|
| 887 |
"label-v2": cmd_label_v2,
|
|
|
|
| 888 |
"pipeline": cmd_pipeline,
|
| 889 |
"list": cmd_list,
|
| 890 |
"providers": cmd_providers,
|
|
|
|
| 645 |
print(f" {len(coco['images'])} images, {len(coco['annotations'])} bboxes")
|
| 646 |
|
| 647 |
|
| 648 |
+
def cmd_verify_v2(args):
|
| 649 |
+
"""Verify bboxes from a COCO file using a VLM provider (per-bbox YES/NO)."""
|
| 650 |
+
from .providers import create_provider
|
| 651 |
+
|
| 652 |
+
proj = load_project(args.project)
|
| 653 |
+
backend = args.backend or proj.backend_for("verify") or "openrouter"
|
| 654 |
+
|
| 655 |
+
# Find COCO file
|
| 656 |
+
exp_dir = resolve_experiment(args.experiment) if args.experiment else resolve_experiment("latest")
|
| 657 |
+
coco_files = []
|
| 658 |
+
for dirpath, _, filenames in os.walk(exp_dir):
|
| 659 |
+
for fn in filenames:
|
| 660 |
+
if fn.endswith(".coco.json"):
|
| 661 |
+
coco_files.append(os.path.join(dirpath, fn))
|
| 662 |
+
if not coco_files:
|
| 663 |
+
print(f" No COCO files in {exp_dir}")
|
| 664 |
+
return
|
| 665 |
+
coco_path = coco_files[0]
|
| 666 |
+
print(f"Verifying bboxes in {coco_path} via {backend}")
|
| 667 |
+
|
| 668 |
+
with open(coco_path) as f:
|
| 669 |
+
coco = json.load(f)
|
| 670 |
+
|
| 671 |
+
img_root = proj.local_image_dir()
|
| 672 |
+
images_by_id = {img["id"]: img for img in coco.get("images", [])}
|
| 673 |
+
categories = {cat["id"]: cat["name"] for cat in coco.get("categories", [])}
|
| 674 |
+
annotations = coco.get("annotations", [])
|
| 675 |
+
|
| 676 |
+
if args.limit > 0:
|
| 677 |
+
annotations = annotations[:args.limit]
|
| 678 |
+
|
| 679 |
+
try:
|
| 680 |
+
provider = create_provider(backend)
|
| 681 |
+
except Exception as e:
|
| 682 |
+
print(f" {e}")
|
| 683 |
+
return
|
| 684 |
+
|
| 685 |
+
print(f" {len(annotations)} bboxes to verify")
|
| 686 |
+
results = []
|
| 687 |
+
counts = {"YES": 0, "NO": 0, "UNSURE": 0, "ERROR": 0}
|
| 688 |
+
t0 = time.time()
|
| 689 |
+
|
| 690 |
+
for i, ann in enumerate(annotations, 1):
|
| 691 |
+
img = images_by_id.get(ann["image_id"], {})
|
| 692 |
+
img_path = os.path.join(img_root, img.get("file_name", ""))
|
| 693 |
+
cat_name = categories.get(ann.get("category_id"), "object")
|
| 694 |
+
bbox = ann["bbox"]
|
| 695 |
+
|
| 696 |
+
if not os.path.exists(img_path):
|
| 697 |
+
results.append({"ann_id": ann["id"], "verdict": "ERROR", "detail": "image not found"})
|
| 698 |
+
counts["ERROR"] += 1
|
| 699 |
+
continue
|
| 700 |
+
|
| 701 |
+
try:
|
| 702 |
+
vr = provider.verify_bbox(img_path, bbox, cat_name)
|
| 703 |
+
verdict = vr.verdict
|
| 704 |
+
except Exception as e:
|
| 705 |
+
verdict = "ERROR"
|
| 706 |
+
vr = type("VR", (), {"raw_answer": str(e), "elapsed": 0})()
|
| 707 |
+
|
| 708 |
+
counts[verdict] = counts.get(verdict, 0) + 1
|
| 709 |
+
results.append({
|
| 710 |
+
"ann_id": ann["id"], "image": img.get("file_name", ""),
|
| 711 |
+
"category": cat_name, "bbox": bbox,
|
| 712 |
+
"verdict": verdict, "raw_answer": vr.raw_answer[:120],
|
| 713 |
+
"elapsed": round(vr.elapsed, 2),
|
| 714 |
+
})
|
| 715 |
+
|
| 716 |
+
if i % 10 == 0 or i == len(annotations):
|
| 717 |
+
elapsed_total = time.time() - t0
|
| 718 |
+
rate = i / max(elapsed_total, 1)
|
| 719 |
+
eta = (len(annotations) - i) / max(rate, 0.001) / 60
|
| 720 |
+
print(f" [{i:4d}/{len(annotations)}] YES={counts.get('YES',0)} NO={counts.get('NO',0)} "
|
| 721 |
+
f"ERR={counts.get('ERROR',0)} ETA {eta:.1f} min")
|
| 722 |
+
|
| 723 |
+
# Save
|
| 724 |
+
out_dir = os.path.join(exp_dir, f"verify_{backend}")
|
| 725 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 726 |
+
out_path = os.path.join(out_dir, "verified.json")
|
| 727 |
+
with open(out_path, "w") as f:
|
| 728 |
+
json.dump({"backend": backend, "project": proj.project_name,
|
| 729 |
+
"counts": counts, "results": results}, f, indent=2)
|
| 730 |
+
print(f"\nSaved {out_path}")
|
| 731 |
+
approve_rate = counts.get("YES", 0) / max(1, len(results))
|
| 732 |
+
print(f" Approval rate: {approve_rate:.0%} ({counts.get('YES',0)}/{len(results)})")
|
| 733 |
+
|
| 734 |
+
|
| 735 |
def cmd_pipeline(args):
|
| 736 |
+
"""Full pipeline: gather → filter → label → verify → score.
|
| 737 |
+
|
| 738 |
+
Runs the complete data labeling factory end-to-end.
|
| 739 |
+
Uses the v2 provider registry for all stages.
|
| 740 |
+
"""
|
| 741 |
+
from .providers import create_provider
|
| 742 |
+
from .metrics import score_coco
|
| 743 |
+
|
| 744 |
proj = load_project(args.project)
|
| 745 |
+
filter_backend = resolve_backend(args, proj, "filter")
|
| 746 |
+
label_backend = getattr(args, "label_backend", None) or proj.backend_for("label") or "falcon"
|
| 747 |
+
verify_backend = getattr(args, "verify_backend", None) or proj.backend_for("verify") or filter_backend
|
| 748 |
+
|
| 749 |
print("=" * 70)
|
| 750 |
print(f"PIPELINE — {proj.project_name} ({proj.target_object})")
|
| 751 |
+
print(f" filter: {filter_backend}")
|
| 752 |
+
print(f" label: {label_backend}")
|
| 753 |
+
print(f" verify: {verify_backend}")
|
| 754 |
print("=" * 70)
|
| 755 |
|
| 756 |
exp = make_experiment_dir(f"pipeline-{proj.project_name}")
|
| 757 |
write_readme(exp, f"pipeline-{proj.project_name}",
|
| 758 |
description=f"Full pipeline for {proj.target_object}",
|
| 759 |
params=vars(args))
|
| 760 |
+
write_config(exp, {"project": proj.raw, **vars(args),
|
| 761 |
+
"filter_backend": filter_backend,
|
| 762 |
+
"label_backend": label_backend,
|
| 763 |
+
"verify_backend": verify_backend})
|
| 764 |
update_latest_symlink(exp)
|
| 765 |
print(f"Experiment: {exp}\n")
|
| 766 |
|
| 767 |
+
skip_gather = getattr(args, "skip_gather", False)
|
| 768 |
+
img_root = proj.local_image_dir()
|
| 769 |
+
|
| 770 |
+
# ── 1. GATHER ──
|
| 771 |
+
if not skip_gather:
|
| 772 |
+
print("=" * 50)
|
| 773 |
+
print(">>> [1/4] GATHER")
|
| 774 |
+
print("=" * 50)
|
| 775 |
+
args.experiment = os.path.basename(exp).split("_", 2)[-1]
|
| 776 |
+
cmd_gather(args)
|
| 777 |
+
else:
|
| 778 |
+
print(">>> [1/4] GATHER — skipped (--skip-gather)")
|
| 779 |
+
|
| 780 |
+
# Collect images
|
| 781 |
+
images = []
|
| 782 |
+
if os.path.exists(img_root):
|
| 783 |
+
for root, _, names in os.walk(img_root):
|
| 784 |
+
for n in names:
|
| 785 |
+
if n.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
|
| 786 |
+
full = os.path.join(root, n)
|
| 787 |
+
rel = os.path.relpath(full, img_root)
|
| 788 |
+
parts = rel.split("/")
|
| 789 |
+
if len(parts) < 2:
|
| 790 |
+
continue
|
| 791 |
+
images.append(("/".join(parts[:2]), rel, full))
|
| 792 |
+
if args.limit > 0:
|
| 793 |
+
images = images[:args.limit]
|
| 794 |
+
print(f"\n {len(images)} images found in {img_root}")
|
| 795 |
+
|
| 796 |
+
if not images:
|
| 797 |
+
print(" No images — pipeline stopped. Run gather first.")
|
| 798 |
+
return
|
| 799 |
+
|
| 800 |
+
# ── 2. FILTER ──
|
| 801 |
+
print("\n" + "=" * 50)
|
| 802 |
+
print(f">>> [2/4] FILTER via {filter_backend}")
|
| 803 |
+
print("=" * 50)
|
| 804 |
+
|
| 805 |
+
try:
|
| 806 |
+
filter_prov = create_provider(filter_backend)
|
| 807 |
+
except Exception as e:
|
| 808 |
+
print(f" Filter provider error: {e}")
|
| 809 |
+
print(" Falling back to all-YES (no filter)")
|
| 810 |
+
filter_prov = None
|
| 811 |
+
|
| 812 |
+
prompt = proj.prompt("filter")
|
| 813 |
+
filter_results = []
|
| 814 |
+
counts = {"YES": 0, "NO": 0, "UNKNOWN": 0, "ERROR": 0}
|
| 815 |
+
t0 = time.time()
|
| 816 |
+
|
| 817 |
+
for i, (bucket, rel, full) in enumerate(images, 1):
|
| 818 |
+
if filter_prov:
|
| 819 |
+
fr = filter_prov.filter_image(full, prompt)
|
| 820 |
+
verdict = fr.verdict
|
| 821 |
+
raw = fr.raw_answer
|
| 822 |
+
elapsed_img = fr.elapsed
|
| 823 |
+
else:
|
| 824 |
+
verdict, raw, elapsed_img = "YES", "no filter", 0
|
| 825 |
+
|
| 826 |
+
counts[verdict] = counts.get(verdict, 0) + 1
|
| 827 |
+
filter_results.append({
|
| 828 |
+
"image_path": rel, "bucket": bucket, "verdict": verdict,
|
| 829 |
+
"raw_answer": raw[:120], "elapsed_seconds": round(elapsed_img, 3),
|
| 830 |
+
})
|
| 831 |
+
if i % 10 == 0 or i == len(images):
|
| 832 |
+
elapsed_total = time.time() - t0
|
| 833 |
+
rate = i / max(elapsed_total, 1)
|
| 834 |
+
eta = (len(images) - i) / max(rate, 0.001) / 60
|
| 835 |
+
print(f" [{i:4d}/{len(images)}] YES={counts['YES']} NO={counts['NO']} ETA {eta:.0f} min")
|
| 836 |
+
|
| 837 |
+
out_dir = os.path.join(exp, f"filter_{filter_backend}")
|
| 838 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 839 |
+
with open(os.path.join(out_dir, "keep_list.json"), "w") as f:
|
| 840 |
+
json.dump({"backend": filter_backend, "project": proj.project_name,
|
| 841 |
+
"counts": counts, "results": filter_results}, f, indent=2)
|
| 842 |
+
print(f" YES rate: {counts['YES']}/{len(images)} ({counts['YES']/max(1,len(images)):.0%})")
|
| 843 |
+
|
| 844 |
+
# Keep only YES images for labeling
|
| 845 |
+
yes_images = [(b, r, full) for (b, r, full), fr in zip(images, filter_results)
|
| 846 |
+
if fr["verdict"] == "YES"]
|
| 847 |
+
print(f" {len(yes_images)} images pass filter → label stage")
|
| 848 |
+
|
| 849 |
+
if not yes_images:
|
| 850 |
+
print(" No images passed filter — pipeline stopped.")
|
| 851 |
+
print(f"\nPIPELINE DONE — {exp}")
|
| 852 |
+
return
|
| 853 |
+
|
| 854 |
+
# ── 3. LABEL ──
|
| 855 |
+
print("\n" + "=" * 50)
|
| 856 |
+
print(f">>> [3/4] LABEL via {label_backend}")
|
| 857 |
+
print("=" * 50)
|
| 858 |
+
|
| 859 |
+
try:
|
| 860 |
+
label_prov = create_provider(label_backend)
|
| 861 |
+
except Exception as e:
|
| 862 |
+
print(f" Label provider error: {e}")
|
| 863 |
+
print(f"\nPIPELINE STOPPED at label stage — {exp}")
|
| 864 |
+
return
|
| 865 |
|
| 866 |
+
from PIL import Image
|
| 867 |
+
coco = {
|
| 868 |
+
"info": {
|
| 869 |
+
"description": f"data_label_factory pipeline for {proj.project_name}",
|
| 870 |
+
"date_created": datetime.now().isoformat(timespec="seconds"),
|
| 871 |
+
"target_object": proj.target_object,
|
| 872 |
+
"filter_backend": filter_backend,
|
| 873 |
+
"label_backend": label_backend,
|
| 874 |
+
},
|
| 875 |
+
"images": [],
|
| 876 |
+
"annotations": [],
|
| 877 |
+
"categories": [
|
| 878 |
+
{"id": i + 1, "name": q, "supercategory": "object"}
|
| 879 |
+
for i, q in enumerate(proj.falcon_queries)
|
| 880 |
+
],
|
| 881 |
+
}
|
| 882 |
+
cat_id = {q: i + 1 for i, q in enumerate(proj.falcon_queries)}
|
| 883 |
+
next_img_id, next_ann_id = 1, 1
|
| 884 |
+
n_total_dets = 0
|
| 885 |
+
t0 = time.time()
|
| 886 |
|
| 887 |
+
for i, (bucket, rel, full) in enumerate(yes_images, 1):
|
| 888 |
+
try:
|
| 889 |
+
im = Image.open(full)
|
| 890 |
+
iw, ih = im.size
|
| 891 |
+
except Exception as e:
|
| 892 |
+
continue
|
| 893 |
+
|
| 894 |
+
img_id = next_img_id
|
| 895 |
+
next_img_id += 1
|
| 896 |
+
coco["images"].append({
|
| 897 |
+
"id": img_id, "file_name": rel, "width": iw, "height": ih, "bucket": bucket
|
| 898 |
+
})
|
| 899 |
+
|
| 900 |
+
result = label_prov.label_image(full, proj.falcon_queries, image_wh=(iw, ih))
|
| 901 |
+
for ann in result.annotations:
|
| 902 |
+
cat_name = ann.get("category", proj.falcon_queries[0])
|
| 903 |
+
cid = cat_id.get(cat_name)
|
| 904 |
+
if cid is None:
|
| 905 |
+
cid = len(coco["categories"]) + 1
|
| 906 |
+
coco["categories"].append({"id": cid, "name": cat_name, "supercategory": "object"})
|
| 907 |
+
cat_id[cat_name] = cid
|
| 908 |
+
|
| 909 |
+
coco["annotations"].append({
|
| 910 |
+
"id": next_ann_id, "image_id": img_id,
|
| 911 |
+
"category_id": cid,
|
| 912 |
+
"bbox": ann["bbox"],
|
| 913 |
+
"area": round(ann["bbox"][2] * ann["bbox"][3], 2),
|
| 914 |
+
"iscrowd": 0,
|
| 915 |
+
"score": ann.get("score", 1.0),
|
| 916 |
+
})
|
| 917 |
+
next_ann_id += 1
|
| 918 |
+
n_total_dets += 1
|
| 919 |
+
|
| 920 |
+
if i % 5 == 0 or i == len(yes_images):
|
| 921 |
+
elapsed = time.time() - t0
|
| 922 |
+
rate = i / max(elapsed, 1)
|
| 923 |
+
eta = (len(yes_images) - i) / max(rate, 0.001) / 60
|
| 924 |
+
print(f" [{i:4d}/{len(yes_images)}] dets={n_total_dets} ETA {eta:.0f} min")
|
| 925 |
+
|
| 926 |
+
out_dir = os.path.join(exp, f"label_{label_backend}")
|
| 927 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 928 |
+
coco_path = os.path.join(out_dir, f"{proj.project_name}.coco.json")
|
| 929 |
+
with open(coco_path, "w") as f:
|
| 930 |
+
json.dump(coco, f, indent=2)
|
| 931 |
+
print(f" {len(coco['images'])} images, {n_total_dets} bboxes → {coco_path}")
|
| 932 |
+
|
| 933 |
+
# ── 4. VERIFY ──
|
| 934 |
+
print("\n" + "=" * 50)
|
| 935 |
+
print(f">>> [4/4] VERIFY via {verify_backend}")
|
| 936 |
+
print("=" * 50)
|
| 937 |
+
|
| 938 |
+
try:
|
| 939 |
+
verify_prov = create_provider(verify_backend)
|
| 940 |
+
except Exception as e:
|
| 941 |
+
print(f" Verify provider error: {e} — skipping verify")
|
| 942 |
+
verify_prov = None
|
| 943 |
+
|
| 944 |
+
verify_results = []
|
| 945 |
+
v_counts = {"YES": 0, "NO": 0, "UNSURE": 0, "ERROR": 0}
|
| 946 |
+
|
| 947 |
+
if verify_prov and n_total_dets > 0:
|
| 948 |
+
verify_limit = args.limit if args.limit > 0 else len(coco["annotations"])
|
| 949 |
+
anns_to_verify = coco["annotations"][:verify_limit]
|
| 950 |
+
t0 = time.time()
|
| 951 |
+
|
| 952 |
+
for i, ann in enumerate(anns_to_verify, 1):
|
| 953 |
+
img = {im["id"]: im for im in coco["images"]}.get(ann["image_id"], {})
|
| 954 |
+
img_path = os.path.join(img_root, img.get("file_name", ""))
|
| 955 |
+
cat_name = {c["id"]: c["name"] for c in coco["categories"]}.get(ann["category_id"], "object")
|
| 956 |
+
|
| 957 |
+
if not os.path.exists(img_path):
|
| 958 |
+
verify_results.append({"ann_id": ann["id"], "verdict": "ERROR"})
|
| 959 |
+
v_counts["ERROR"] += 1
|
| 960 |
+
continue
|
| 961 |
+
|
| 962 |
+
try:
|
| 963 |
+
vr = verify_prov.verify_bbox(img_path, ann["bbox"], cat_name)
|
| 964 |
+
verdict = vr.verdict
|
| 965 |
+
except Exception:
|
| 966 |
+
verdict = "ERROR"
|
| 967 |
+
|
| 968 |
+
v_counts[verdict] = v_counts.get(verdict, 0) + 1
|
| 969 |
+
verify_results.append({
|
| 970 |
+
"ann_id": ann["id"], "category": cat_name,
|
| 971 |
+
"verdict": verdict,
|
| 972 |
+
})
|
| 973 |
+
|
| 974 |
+
if i % 10 == 0 or i == len(anns_to_verify):
|
| 975 |
+
elapsed_total = time.time() - t0
|
| 976 |
+
rate = i / max(elapsed_total, 1)
|
| 977 |
+
eta = (len(anns_to_verify) - i) / max(rate, 0.001) / 60
|
| 978 |
+
print(f" [{i:4d}/{len(anns_to_verify)}] YES={v_counts['YES']} NO={v_counts['NO']} ETA {eta:.1f} min")
|
| 979 |
+
|
| 980 |
+
out_dir = os.path.join(exp, f"verify_{verify_backend}")
|
| 981 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 982 |
+
with open(os.path.join(out_dir, "verified.json"), "w") as f:
|
| 983 |
+
json.dump({"backend": verify_backend, "counts": v_counts,
|
| 984 |
+
"results": verify_results}, f, indent=2)
|
| 985 |
+
approve = v_counts.get("YES", 0) / max(1, len(verify_results))
|
| 986 |
+
print(f" Approval: {v_counts.get('YES',0)}/{len(verify_results)} ({approve:.0%})")
|
| 987 |
+
else:
|
| 988 |
+
print(" Skipped (no provider or no detections)")
|
| 989 |
+
|
| 990 |
+
# ── SCORE ──
|
| 991 |
+
print("\n" + "=" * 50)
|
| 992 |
+
print(">>> QUALITY SCORE")
|
| 993 |
+
print("=" * 50)
|
| 994 |
+
score = score_coco(coco)
|
| 995 |
+
print(f" Images: {score.total_images}")
|
| 996 |
+
print(f" Annotations: {score.total_annotations}")
|
| 997 |
+
print(f" Pass rate: {score.pass_rate:.0%}")
|
| 998 |
+
print(f" Mean score: {score.mean_score:.3f}")
|
| 999 |
+
for rule, rate in sorted(score.rule_breakdown.items()):
|
| 1000 |
+
flag = "ok" if rate >= 0.95 else "WARN"
|
| 1001 |
+
print(f" {rule:20s} {rate:6.1%} {flag}")
|
| 1002 |
+
|
| 1003 |
+
print(f"\n{'=' * 70}")
|
| 1004 |
+
print(f"PIPELINE DONE — {exp}")
|
| 1005 |
+
print(f" COCO: {coco_path}")
|
| 1006 |
+
print(f" {len(coco['images'])} images, {n_total_dets} bboxes, "
|
| 1007 |
+
f"filter={counts['YES']}/{len(images)} YES, "
|
| 1008 |
+
f"verify={v_counts.get('YES','?')}/{len(verify_results) if verify_results else '?'} approved")
|
| 1009 |
+
print(f"{'=' * 70}")
|
| 1010 |
|
| 1011 |
|
| 1012 |
def cmd_list(args):
|
|
|
|
| 1152 |
sl.add_argument("--experiment", default=None)
|
| 1153 |
sl.add_argument("--limit", type=int, default=0)
|
| 1154 |
|
| 1155 |
+
spi = sub.add_parser("pipeline", help="Full chain: gather → filter → label → verify → score")
|
| 1156 |
spi.add_argument("--project", required=True)
|
| 1157 |
spi.add_argument("--max-per-query", type=int, default=20)
|
| 1158 |
spi.add_argument("--workers", type=int, default=50)
|
| 1159 |
spi.add_argument("--experiment", default=None)
|
| 1160 |
spi.add_argument("--limit", type=int, default=0)
|
| 1161 |
+
spi.add_argument("--skip-gather", action="store_true",
|
| 1162 |
+
help="Skip image gathering (use existing images)")
|
| 1163 |
+
spi.add_argument("--label-backend", default=None,
|
| 1164 |
+
help="Backend for bbox labeling (falcon, openrouter, etc.)")
|
| 1165 |
+
spi.add_argument("--verify-backend", default=None,
|
| 1166 |
+
help="Backend for per-bbox verification")
|
| 1167 |
add_backend_flag(spi)
|
| 1168 |
|
| 1169 |
+
sv = sub.add_parser("verify", help="Verify bboxes in a COCO file via VLM")
|
| 1170 |
+
sv.add_argument("--project", required=True)
|
| 1171 |
+
sv.add_argument("--experiment", default=None)
|
| 1172 |
+
sv.add_argument("--limit", type=int, default=0)
|
| 1173 |
+
add_backend_flag(sv)
|
| 1174 |
+
|
| 1175 |
sl2 = sub.add_parser("label-v2", help="Label via provider registry (falcon, wilddet3d, chandra)")
|
| 1176 |
sl2.add_argument("--project", required=True)
|
| 1177 |
sl2.add_argument("--backend", default=None,
|
|
|
|
| 1231 |
"filter": cmd_filter,
|
| 1232 |
"label": cmd_label,
|
| 1233 |
"label-v2": cmd_label_v2,
|
| 1234 |
+
"verify": cmd_verify_v2,
|
| 1235 |
"pipeline": cmd_pipeline,
|
| 1236 |
"list": cmd_list,
|
| 1237 |
"providers": cmd_providers,
|
data_label_factory/providers/openrouter.py
CHANGED
|
@@ -38,6 +38,8 @@ import time
|
|
| 38 |
import urllib.request
|
| 39 |
from typing import Any
|
| 40 |
|
|
|
|
|
|
|
| 41 |
from . import Provider, FilterResult, VerifyResult, LabelResult, register_provider
|
| 42 |
|
| 43 |
|
|
@@ -242,3 +244,62 @@ class OpenRouterProvider(Provider):
|
|
| 242 |
elapsed=elapsed,
|
| 243 |
confidence=conf,
|
| 244 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
import urllib.request
|
| 39 |
from typing import Any
|
| 40 |
|
| 41 |
+
import re as _re
|
| 42 |
+
|
| 43 |
from . import Provider, FilterResult, VerifyResult, LabelResult, register_provider
|
| 44 |
|
| 45 |
|
|
|
|
| 244 |
elapsed=elapsed,
|
| 245 |
confidence=conf,
|
| 246 |
)
|
| 247 |
+
|
| 248 |
+
def label_image(self, image_path: str, queries: list[str],
|
| 249 |
+
image_wh: tuple[int, int] | None = None) -> LabelResult:
|
| 250 |
+
"""Bbox detection via Gemma 4 vision grounding.
|
| 251 |
+
|
| 252 |
+
Prompts the model to return bounding box coordinates for each query.
|
| 253 |
+
Gemma 4 supports grounded detection — it returns [y1, x1, y2, x2]
|
| 254 |
+
normalized to 0-1000 when prompted correctly.
|
| 255 |
+
"""
|
| 256 |
+
if image_wh is None:
|
| 257 |
+
from PIL import Image
|
| 258 |
+
im = Image.open(image_path)
|
| 259 |
+
image_wh = im.size
|
| 260 |
+
|
| 261 |
+
iw, ih = image_wh
|
| 262 |
+
all_annotations = []
|
| 263 |
+
total_elapsed = 0.0
|
| 264 |
+
|
| 265 |
+
for query in queries:
|
| 266 |
+
prompt = (
|
| 267 |
+
f"Detect all instances of \"{query}\" in this image. "
|
| 268 |
+
f"For each instance, return a bounding box as [ymin, xmin, ymax, xmax] "
|
| 269 |
+
f"with coordinates normalized from 0 to 1000. "
|
| 270 |
+
f"Format each detection on its own line as: "
|
| 271 |
+
f"[ymin, xmin, ymax, xmax] label\n"
|
| 272 |
+
f"If none found, say NONE."
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
try:
|
| 276 |
+
answer, elapsed, _ = self._call(image_path, prompt, max_tokens=512, timeout=30)
|
| 277 |
+
answer = _strip_thinking(answer)
|
| 278 |
+
total_elapsed += elapsed
|
| 279 |
+
except Exception as e:
|
| 280 |
+
continue
|
| 281 |
+
|
| 282 |
+
# Parse bbox lines: [y1, x1, y2, x2] label
|
| 283 |
+
for line in answer.split("\n"):
|
| 284 |
+
line = line.strip()
|
| 285 |
+
match = _re.search(r'\[(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\]', line)
|
| 286 |
+
if match:
|
| 287 |
+
y1 = int(match.group(1)) / 1000.0 * ih
|
| 288 |
+
x1 = int(match.group(2)) / 1000.0 * iw
|
| 289 |
+
y2 = int(match.group(3)) / 1000.0 * ih
|
| 290 |
+
x2 = int(match.group(4)) / 1000.0 * iw
|
| 291 |
+
w = max(0, x2 - x1)
|
| 292 |
+
h = max(0, y2 - y1)
|
| 293 |
+
if w > 0 and h > 0:
|
| 294 |
+
all_annotations.append({
|
| 295 |
+
"bbox": [round(x1, 2), round(y1, 2), round(w, 2), round(h, 2)],
|
| 296 |
+
"category": query,
|
| 297 |
+
"score": 0.8,
|
| 298 |
+
"source": "openrouter",
|
| 299 |
+
})
|
| 300 |
+
|
| 301 |
+
return LabelResult(
|
| 302 |
+
annotations=all_annotations,
|
| 303 |
+
elapsed=total_elapsed,
|
| 304 |
+
metadata={"model": self._model()},
|
| 305 |
+
)
|
data_label_factory/serve.py
CHANGED
|
@@ -197,6 +197,57 @@ async def label_image(
|
|
| 197 |
os.unlink(tmp_path)
|
| 198 |
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
# ─── Verify ────────────────────────────────────────────────
|
| 201 |
|
| 202 |
@app.post("/api/verify")
|
|
|
|
| 197 |
os.unlink(tmp_path)
|
| 198 |
|
| 199 |
|
| 200 |
+
# ─── Ask (free-form VLM question) ─────────────────────────
|
| 201 |
+
|
| 202 |
+
@app.post("/api/ask")
|
| 203 |
+
async def ask_image(
|
| 204 |
+
image: UploadFile = File(...),
|
| 205 |
+
question: str = Form(default="What do you see in this image?"),
|
| 206 |
+
backend: str = Form(default="gemma"),
|
| 207 |
+
):
|
| 208 |
+
"""Ask a free-form question about an image via any VLM backend."""
|
| 209 |
+
from .providers import create_provider
|
| 210 |
+
|
| 211 |
+
suffix = Path(image.filename).suffix or ".jpg"
|
| 212 |
+
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False, dir=str(UPLOAD_DIR)) as f:
|
| 213 |
+
f.write(await image.read())
|
| 214 |
+
tmp_path = f.name
|
| 215 |
+
|
| 216 |
+
try:
|
| 217 |
+
provider = create_provider(backend)
|
| 218 |
+
# Use _call for richer answers (more tokens than filter's 32)
|
| 219 |
+
if hasattr(provider, '_call'):
|
| 220 |
+
call_result = provider._call(tmp_path, question, max_tokens=256)
|
| 221 |
+
# Some providers return (text, elapsed), others (text, elapsed, usage)
|
| 222 |
+
if len(call_result) == 3:
|
| 223 |
+
answer, elapsed, _ = call_result
|
| 224 |
+
else:
|
| 225 |
+
answer, elapsed = call_result
|
| 226 |
+
else:
|
| 227 |
+
result = provider.filter_image(tmp_path, question)
|
| 228 |
+
answer = result.raw_answer
|
| 229 |
+
elapsed = result.elapsed
|
| 230 |
+
|
| 231 |
+
# Strip thinking tokens
|
| 232 |
+
if hasattr(provider, '_strip_thinking'):
|
| 233 |
+
from .providers.gemma import _strip_thinking
|
| 234 |
+
answer = _strip_thinking(answer)
|
| 235 |
+
elif 'thought' in answer.lower()[:20]:
|
| 236 |
+
import re
|
| 237 |
+
answer = re.sub(r'^(?:thought\s*\n?\s*)+', '', answer, flags=re.IGNORECASE).strip()
|
| 238 |
+
|
| 239 |
+
return {
|
| 240 |
+
"answer": answer,
|
| 241 |
+
"elapsed": round(elapsed, 2),
|
| 242 |
+
"backend": backend,
|
| 243 |
+
"question": question,
|
| 244 |
+
}
|
| 245 |
+
except Exception as e:
|
| 246 |
+
raise HTTPException(500, str(e))
|
| 247 |
+
finally:
|
| 248 |
+
os.unlink(tmp_path)
|
| 249 |
+
|
| 250 |
+
|
| 251 |
# ─── Verify ────────────────────────────────────────────────
|
| 252 |
|
| 253 |
@app.post("/api/verify")
|
web/app/api/dlf/route.ts
CHANGED
|
@@ -16,11 +16,16 @@ export async function GET(req: NextRequest) {
|
|
| 16 |
export async function POST(req: NextRequest) {
|
| 17 |
const path = req.nextUrl.searchParams.get("path") || "/api/filter";
|
| 18 |
try {
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
| 20 |
const res = await fetch(`${DLF_API}${path}`, {
|
| 21 |
method: "POST",
|
| 22 |
-
|
|
|
|
| 23 |
});
|
|
|
|
| 24 |
const data = await res.json();
|
| 25 |
return NextResponse.json(data);
|
| 26 |
} catch (e: any) {
|
|
|
|
| 16 |
export async function POST(req: NextRequest) {
|
| 17 |
const path = req.nextUrl.searchParams.get("path") || "/api/filter";
|
| 18 |
try {
|
| 19 |
+
// Forward the raw request body + content-type header to preserve multipart boundaries
|
| 20 |
+
const contentType = req.headers.get("content-type") || "";
|
| 21 |
+
const body = await req.arrayBuffer();
|
| 22 |
+
|
| 23 |
const res = await fetch(`${DLF_API}${path}`, {
|
| 24 |
method: "POST",
|
| 25 |
+
headers: { "Content-Type": contentType },
|
| 26 |
+
body: body,
|
| 27 |
});
|
| 28 |
+
|
| 29 |
const data = await res.json();
|
| 30 |
return NextResponse.json(data);
|
| 31 |
} catch (e: any) {
|
web/app/label/page.tsx
CHANGED
|
@@ -62,6 +62,8 @@ export default function LabelPage() {
|
|
| 62 |
const [loading, setLoading] = useState(false);
|
| 63 |
const [loadingMsg, setLoadingMsg] = useState("");
|
| 64 |
const [apiStatus, setApiStatus] = useState<"checking" | "up" | "down">("checking");
|
|
|
|
|
|
|
| 65 |
const canvasRef = useRef<HTMLCanvasElement>(null);
|
| 66 |
const fileInputRef = useRef<HTMLInputElement>(null);
|
| 67 |
|
|
@@ -172,6 +174,51 @@ export default function LabelPage() {
|
|
| 172 |
setLoadingMsg("");
|
| 173 |
};
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
// Draw bboxes on canvas
|
| 176 |
const drawAnnotations = (idx: number, result: LabelResult) => {
|
| 177 |
const canvas = canvasRef.current;
|
|
@@ -298,13 +345,13 @@ export default function LabelPage() {
|
|
| 298 |
{/* Description */}
|
| 299 |
<div>
|
| 300 |
<label className="block text-sm font-medium text-zinc-400 mb-1">
|
| 301 |
-
|
| 302 |
</label>
|
| 303 |
<input
|
| 304 |
type="text"
|
| 305 |
value={description}
|
| 306 |
onChange={(e) => setDescription(e.target.value)}
|
| 307 |
-
placeholder="e.g. stop signs, fire hydrants,
|
| 308 |
className="w-full px-4 py-3 rounded-lg bg-zinc-900 border border-zinc-700 text-zinc-100 placeholder:text-zinc-600 focus:border-blue-500 focus:outline-none"
|
| 309 |
/>
|
| 310 |
</div>
|
|
@@ -394,8 +441,26 @@ export default function LabelPage() {
|
|
| 394 |
>
|
| 395 |
{loading ? loadingMsg : "Filter All"}
|
| 396 |
</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
</div>
|
| 398 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
{/* Filter summary */}
|
| 400 |
{filterResults.length > 0 && (
|
| 401 |
<div className="bg-zinc-900 rounded-lg p-4 border border-zinc-800">
|
|
|
|
| 62 |
const [loading, setLoading] = useState(false);
|
| 63 |
const [loadingMsg, setLoadingMsg] = useState("");
|
| 64 |
const [apiStatus, setApiStatus] = useState<"checking" | "up" | "down">("checking");
|
| 65 |
+
const [askAnswer, setAskAnswer] = useState<string | null>(null);
|
| 66 |
+
const [askElapsed, setAskElapsed] = useState(0);
|
| 67 |
const canvasRef = useRef<HTMLCanvasElement>(null);
|
| 68 |
const fileInputRef = useRef<HTMLInputElement>(null);
|
| 69 |
|
|
|
|
| 174 |
setLoadingMsg("");
|
| 175 |
};
|
| 176 |
|
| 177 |
+
// Ask AI — runs BOTH question answering AND bbox detection in parallel
|
| 178 |
+
const askAI = async () => {
|
| 179 |
+
if (selectedImage === null || !files[selectedImage] || !description) return;
|
| 180 |
+
setLoading(true);
|
| 181 |
+
setLoadingMsg("Asking AI + detecting objects...");
|
| 182 |
+
setAskAnswer(null);
|
| 183 |
+
|
| 184 |
+
const file = files[selectedImage];
|
| 185 |
+
|
| 186 |
+
// Run ask + label in parallel
|
| 187 |
+
const askForm = new FormData();
|
| 188 |
+
askForm.append("image", file);
|
| 189 |
+
askForm.append("question", description);
|
| 190 |
+
askForm.append("backend", filterBackend);
|
| 191 |
+
|
| 192 |
+
const labelForm = new FormData();
|
| 193 |
+
labelForm.append("image", file);
|
| 194 |
+
labelForm.append("queries", description.replace(/\?/g, "").replace(/how many /gi, ""));
|
| 195 |
+
labelForm.append("backend", labelBackend);
|
| 196 |
+
|
| 197 |
+
const [askRes, labelRes] = await Promise.allSettled([
|
| 198 |
+
fetch(`${DLF_API}?path=/api/ask`, { method: "POST", body: askForm }).then(r => r.json()),
|
| 199 |
+
fetch(`${DLF_API}?path=/api/label`, { method: "POST", body: labelForm }).then(r => r.json()),
|
| 200 |
+
]);
|
| 201 |
+
|
| 202 |
+
// Process ask result
|
| 203 |
+
if (askRes.status === "fulfilled") {
|
| 204 |
+
const data = askRes.value;
|
| 205 |
+
setAskAnswer(data.answer || data.error || "No response");
|
| 206 |
+
setAskElapsed(data.elapsed || 0);
|
| 207 |
+
} else {
|
| 208 |
+
setAskAnswer(`Error: ${askRes.reason}`);
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
// Process label result — draw bboxes
|
| 212 |
+
if (labelRes.status === "fulfilled" && labelRes.value.annotations) {
|
| 213 |
+
const data = labelRes.value as LabelResult;
|
| 214 |
+
setLabelResults((prev) => new Map(prev).set(file.name, data));
|
| 215 |
+
drawAnnotations(selectedImage, data);
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
setLoading(false);
|
| 219 |
+
setLoadingMsg("");
|
| 220 |
+
};
|
| 221 |
+
|
| 222 |
// Draw bboxes on canvas
|
| 223 |
const drawAnnotations = (idx: number, result: LabelResult) => {
|
| 224 |
const canvas = canvasRef.current;
|
|
|
|
| 345 |
{/* Description */}
|
| 346 |
<div>
|
| 347 |
<label className="block text-sm font-medium text-zinc-400 mb-1">
|
| 348 |
+
Target object or question
|
| 349 |
</label>
|
| 350 |
<input
|
| 351 |
type="text"
|
| 352 |
value={description}
|
| 353 |
onChange={(e) => setDescription(e.target.value)}
|
| 354 |
+
placeholder="e.g. stop signs, fire hydrants, or ask: how many birds?"
|
| 355 |
className="w-full px-4 py-3 rounded-lg bg-zinc-900 border border-zinc-700 text-zinc-100 placeholder:text-zinc-600 focus:border-blue-500 focus:outline-none"
|
| 356 |
/>
|
| 357 |
</div>
|
|
|
|
| 441 |
>
|
| 442 |
{loading ? loadingMsg : "Filter All"}
|
| 443 |
</button>
|
| 444 |
+
<button
|
| 445 |
+
onClick={askAI}
|
| 446 |
+
disabled={loading || !files.length || !description || selectedImage === null}
|
| 447 |
+
className="flex-1 px-4 py-3 bg-purple-600 hover:bg-purple-500 disabled:bg-zinc-800 disabled:text-zinc-600 rounded-lg font-medium transition-colors"
|
| 448 |
+
>
|
| 449 |
+
Ask AI
|
| 450 |
+
</button>
|
| 451 |
</div>
|
| 452 |
|
| 453 |
+
{/* Ask AI answer */}
|
| 454 |
+
{askAnswer && (
|
| 455 |
+
<div className="bg-zinc-900 rounded-lg p-4 border border-purple-500/30">
|
| 456 |
+
<div className="flex justify-between text-sm mb-2">
|
| 457 |
+
<span className="text-purple-400 font-medium">AI Answer</span>
|
| 458 |
+
<span className="text-zinc-500">{askElapsed}s</span>
|
| 459 |
+
</div>
|
| 460 |
+
<p className="text-zinc-200 text-sm whitespace-pre-wrap">{askAnswer}</p>
|
| 461 |
+
</div>
|
| 462 |
+
)}
|
| 463 |
+
|
| 464 |
{/* Filter summary */}
|
| 465 |
{filterResults.length > 0 && (
|
| 466 |
<div className="bg-zinc-900 rounded-lg p-4 border border-zinc-800">
|