ui: drop head selector, switch to universal mode + domain badges
Browse filesThe head dropdown is gone — Gemma 4 now auto-detects the domain per
detection via universal mode. Drops a UI element AND lifts the
single-domain-per-photo restriction.
- run() signature: (image, jurisdiction) — no more head argument
- Routes through engine.infer_universal_with_trace
- Each action card gets a small domain pill ('Domestic', 'EV battery',
'Medical', etc.) next to the class name, color-matched to the theme
- KPI strip's 4th tile is dynamic:
* single-domain scene → 'Domain · Domestic'
* mixed-domain scene → '🌐 Domains involved · Domestic, Medical, ...'
- Friendly note above the jurisdiction box explains auto-routing
- Examples gallery pre-fills only (image, jurisdiction)
Killer demo: upload a photo with a bottle, a battery, and a syringe →
three Passports, three domains, three correct routings, in one inference.
- app.py +88 -27
- matter/engine.py +84 -1
- matter/heads.py +64 -0
|
@@ -48,8 +48,6 @@ DEFAULT_BBOX_COLOR = "#00d97e"
|
|
| 48 |
ROOT = Path(__file__).parent
|
| 49 |
EXAMPLES_DIR = ROOT / "examples"
|
| 50 |
|
| 51 |
-
HEAD_NAMES = list(HEADS.keys()) # domestic, ewaste, ev, medical, cd, textile
|
| 52 |
-
|
| 53 |
SAMPLE_IMAGES: dict[str, str] = {
|
| 54 |
"domestic": "domestic_pet_bottle.jpg",
|
| 55 |
"ewaste": "ewaste_dead_laptop.jpg",
|
|
@@ -310,6 +308,23 @@ def render_kpi_strip(passports: list, scene_trace: dict) -> str:
|
|
| 310 |
juris = scene_trace.get("metadata", {}).get("jurisdiction", "")
|
| 311 |
juris_short = juris.split(" (")[0].strip() or "—"
|
| 312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
co2_class = "kpi-num"
|
| 314 |
co2_color = "" if total_co2 >= 0 else 'style="color:#ffb547;"'
|
| 315 |
hazard_class = "kpi-card kpi-card-alert" if hazards_caught else "kpi-card"
|
|
@@ -326,9 +341,9 @@ def render_kpi_strip(passports: list, scene_trace: dict) -> str:
|
|
| 326 |
+ f'<div class="{hazard_class}"><div class="kpi-emoji">{hazard_emoji}</div>'
|
| 327 |
f'<div class="kpi-num">{hazards_caught}</div>'
|
| 328 |
f'<div class="kpi-label">{"hazard caught" if hazards_caught == 1 else "hazards caught"}</div></div>'
|
| 329 |
-
+ f'<div class="kpi-card"><div class="kpi-emoji">
|
| 330 |
-
f'<div class="kpi-num kpi-num-small">{safe(
|
| 331 |
-
f'<div class="kpi-label">
|
| 332 |
+ '</div>'
|
| 333 |
)
|
| 334 |
|
|
@@ -356,12 +371,24 @@ def render_action_cards(passports: list, scene_trace: dict) -> str:
|
|
| 356 |
return "\n".join(cards)
|
| 357 |
|
| 358 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
def _render_action_card(idx: int, p, det: dict | None) -> str:
|
| 360 |
cls = p.identity.class_
|
| 361 |
emoji, display_name = _class_look(cls)
|
| 362 |
primary = p.next_best_action.primary
|
| 363 |
verb, bin_label, accent = _action_label(primary)
|
| 364 |
confidence_pct = int(round(p.identity.confidence * 100))
|
|
|
|
|
|
|
| 365 |
# Guardrail accent colors are constants we control; render via the `safe`
|
| 366 |
# template values where the source is the model.
|
| 367 |
|
|
@@ -403,7 +430,8 @@ def _render_action_card(idx: int, p, det: dict | None) -> str:
|
|
| 403 |
f' <span class="card-num">{int(idx)}</span>'
|
| 404 |
f' <span class="card-emoji">{safe(emoji)}</span>'
|
| 405 |
f' <span class="card-name">{safe(display_name)}</span>'
|
| 406 |
-
f'
|
|
|
|
| 407 |
f' <div class="card-badge badge-hazard">⚠️ Hazard · {safe(sev_label)}</div>'
|
| 408 |
f' </div>'
|
| 409 |
f' <div class="card-body">'
|
|
@@ -429,10 +457,11 @@ def _render_action_card(idx: int, p, det: dict | None) -> str:
|
|
| 429 |
f' <span class="card-num">{int(idx)}</span>'
|
| 430 |
f' <span class="card-emoji">{safe(emoji)}</span>'
|
| 431 |
f' <span class="card-name">{safe(display_name)}</span>'
|
| 432 |
-
f'
|
| 433 |
-
f' <div
|
| 434 |
-
f'
|
| 435 |
-
|
|
|
|
| 436 |
f' <div class="card-body">'
|
| 437 |
f' <div class="card-action">→ {safe(verb)}</div>'
|
| 438 |
+ (f' <div class="card-reason">{safe(reason)}</div>' if reason else '')
|
|
@@ -528,9 +557,11 @@ def _head_from_taxonomy(uri: str | None) -> str | None:
|
|
| 528 |
# Run handlers
|
| 529 |
# =====================================================================
|
| 530 |
|
| 531 |
-
def run(image_path: str | None,
|
| 532 |
-
"""
|
| 533 |
-
|
|
|
|
|
|
|
| 534 |
if image_path is None:
|
| 535 |
return (
|
| 536 |
None,
|
|
@@ -565,7 +596,7 @@ def run(image_path: str | None, head: str, jurisdiction: str) -> tuple:
|
|
| 565 |
image_path=Path(safe_image_path),
|
| 566 |
jurisdiction=jurisdiction.strip() or None,
|
| 567 |
)
|
| 568 |
-
passports, scene_trace = engine.
|
| 569 |
|
| 570 |
annotated = render_bbox_overlay(safe_image_path, passports) if passports else Image.open(safe_image_path)
|
| 571 |
passports_json = [p.to_dict() for p in passports]
|
|
@@ -1190,6 +1221,35 @@ html, body, gradio-app, .gradio-container {
|
|
| 1190 |
border: 1px solid rgba(0, 217, 126, 0.22);
|
| 1191 |
}
|
| 1192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1193 |
/* ===== Empty state ===== */
|
| 1194 |
.empty-state {
|
| 1195 |
text-align: center;
|
|
@@ -1231,10 +1291,10 @@ HERO_HTML = """
|
|
| 1231 |
|
| 1232 |
def build_examples() -> list[list]:
|
| 1233 |
rows = []
|
| 1234 |
-
for
|
| 1235 |
p = EXAMPLES_DIR / fname
|
| 1236 |
if p.exists():
|
| 1237 |
-
rows.append([str(p),
|
| 1238 |
return rows
|
| 1239 |
|
| 1240 |
|
|
@@ -1245,29 +1305,30 @@ with gr.Blocks(title="Matter — Material Intelligence") as demo:
|
|
| 1245 |
with gr.Column(scale=5):
|
| 1246 |
gr.Markdown("### Capture")
|
| 1247 |
image_in = gr.Image(
|
| 1248 |
-
label="
|
| 1249 |
type="filepath",
|
| 1250 |
height=320,
|
| 1251 |
sources=["upload", "webcam", "clipboard"],
|
| 1252 |
)
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
|
|
|
| 1258 |
)
|
| 1259 |
juris_in = gr.Textbox(
|
| 1260 |
-
label="Jurisdiction (optional
|
| 1261 |
-
placeholder="
|
| 1262 |
value="",
|
| 1263 |
)
|
| 1264 |
-
run_btn = gr.Button("Generate
|
| 1265 |
|
| 1266 |
ex = build_examples()
|
| 1267 |
if ex:
|
| 1268 |
gr.Examples(
|
| 1269 |
examples=ex,
|
| 1270 |
-
inputs=[image_in,
|
| 1271 |
label="Sample materials",
|
| 1272 |
examples_per_page=6,
|
| 1273 |
)
|
|
@@ -1312,7 +1373,7 @@ with gr.Blocks(title="Matter — Material Intelligence") as demo:
|
|
| 1312 |
|
| 1313 |
run_btn.click(
|
| 1314 |
run,
|
| 1315 |
-
inputs=[image_in,
|
| 1316 |
outputs=[annotated_out, kpi_out, cards_out, technical_out, json_out],
|
| 1317 |
)
|
| 1318 |
|
|
|
|
| 48 |
ROOT = Path(__file__).parent
|
| 49 |
EXAMPLES_DIR = ROOT / "examples"
|
| 50 |
|
|
|
|
|
|
|
| 51 |
SAMPLE_IMAGES: dict[str, str] = {
|
| 52 |
"domestic": "domestic_pet_bottle.jpg",
|
| 53 |
"ewaste": "ewaste_dead_laptop.jpg",
|
|
|
|
| 308 |
juris = scene_trace.get("metadata", {}).get("jurisdiction", "")
|
| 309 |
juris_short = juris.split(" (")[0].strip() or "—"
|
| 310 |
|
| 311 |
+
# 4th tile: in universal mode, surface the heads detected; otherwise the
|
| 312 |
+
# legacy single-head jurisdiction.
|
| 313 |
+
heads_seen = scene_trace.get("metadata", {}).get("heads_seen") or []
|
| 314 |
+
if heads_seen:
|
| 315 |
+
if len(heads_seen) == 1:
|
| 316 |
+
tile_emoji = "📋"
|
| 317 |
+
tile_value = DOMAIN_LABELS.get(heads_seen[0], heads_seen[0].title())
|
| 318 |
+
tile_label = "domain"
|
| 319 |
+
else:
|
| 320 |
+
tile_emoji = "🌐"
|
| 321 |
+
tile_value = ", ".join(DOMAIN_LABELS.get(h, h.title()) for h in heads_seen)
|
| 322 |
+
tile_label = "domains involved"
|
| 323 |
+
else:
|
| 324 |
+
tile_emoji = "📋"
|
| 325 |
+
tile_value = juris_short
|
| 326 |
+
tile_label = "jurisdiction"
|
| 327 |
+
|
| 328 |
co2_class = "kpi-num"
|
| 329 |
co2_color = "" if total_co2 >= 0 else 'style="color:#ffb547;"'
|
| 330 |
hazard_class = "kpi-card kpi-card-alert" if hazards_caught else "kpi-card"
|
|
|
|
| 341 |
+ f'<div class="{hazard_class}"><div class="kpi-emoji">{hazard_emoji}</div>'
|
| 342 |
f'<div class="kpi-num">{hazards_caught}</div>'
|
| 343 |
f'<div class="kpi-label">{"hazard caught" if hazards_caught == 1 else "hazards caught"}</div></div>'
|
| 344 |
+
+ f'<div class="kpi-card"><div class="kpi-emoji">{tile_emoji}</div>'
|
| 345 |
+
f'<div class="kpi-num kpi-num-small">{safe(tile_value)}</div>'
|
| 346 |
+
f'<div class="kpi-label">{safe(tile_label)}</div></div>'
|
| 347 |
+ '</div>'
|
| 348 |
)
|
| 349 |
|
|
|
|
| 371 |
return "\n".join(cards)
|
| 372 |
|
| 373 |
|
| 374 |
+
DOMAIN_LABELS: dict[str, str] = {
|
| 375 |
+
"domestic": "Domestic",
|
| 376 |
+
"ewaste": "E-waste",
|
| 377 |
+
"ev": "EV battery",
|
| 378 |
+
"medical": "Medical",
|
| 379 |
+
"cd": "C&D",
|
| 380 |
+
"textile": "Textile",
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
|
| 384 |
def _render_action_card(idx: int, p, det: dict | None) -> str:
|
| 385 |
cls = p.identity.class_
|
| 386 |
emoji, display_name = _class_look(cls)
|
| 387 |
primary = p.next_best_action.primary
|
| 388 |
verb, bin_label, accent = _action_label(primary)
|
| 389 |
confidence_pct = int(round(p.identity.confidence * 100))
|
| 390 |
+
head = (det or {}).get("head") or _head_from_taxonomy(p.identity.taxonomy) or ""
|
| 391 |
+
domain_label = DOMAIN_LABELS.get(head, head.title() if head else "")
|
| 392 |
# Guardrail accent colors are constants we control; render via the `safe`
|
| 393 |
# template values where the source is the model.
|
| 394 |
|
|
|
|
| 430 |
f' <span class="card-num">{int(idx)}</span>'
|
| 431 |
f' <span class="card-emoji">{safe(emoji)}</span>'
|
| 432 |
f' <span class="card-name">{safe(display_name)}</span>'
|
| 433 |
+
+ (f' <span class="domain-pill">{safe(domain_label)}</span>' if domain_label else '')
|
| 434 |
+
+ f' </div>'
|
| 435 |
f' <div class="card-badge badge-hazard">⚠️ Hazard · {safe(sev_label)}</div>'
|
| 436 |
f' </div>'
|
| 437 |
f' <div class="card-body">'
|
|
|
|
| 457 |
f' <span class="card-num">{int(idx)}</span>'
|
| 458 |
f' <span class="card-emoji">{safe(emoji)}</span>'
|
| 459 |
f' <span class="card-name">{safe(display_name)}</span>'
|
| 460 |
+
+ (f' <span class="domain-pill">{safe(domain_label)}</span>' if domain_label else '')
|
| 461 |
+
+ f' </div>'
|
| 462 |
+
+ f' <div class="card-badge" style="background:linear-gradient(135deg,{accent}33,{accent}11);'
|
| 463 |
+
f'border:1px solid {accent}55;color:{accent};">{safe(bin_label)}</div>'
|
| 464 |
+
f' </div>'
|
| 465 |
f' <div class="card-body">'
|
| 466 |
f' <div class="card-action">→ {safe(verb)}</div>'
|
| 467 |
+ (f' <div class="card-reason">{safe(reason)}</div>' if reason else '')
|
|
|
|
| 557 |
# Run handlers
|
| 558 |
# =====================================================================
|
| 559 |
|
| 560 |
+
def run(image_path: str | None, jurisdiction: str = "") -> tuple:
|
| 561 |
+
"""Universal-mode inference (head auto-detected per detection).
|
| 562 |
+
|
| 563 |
+
Returns (annotated_image, kpi_strip, action_cards, technical_details,
|
| 564 |
+
scene_json)."""
|
| 565 |
if image_path is None:
|
| 566 |
return (
|
| 567 |
None,
|
|
|
|
| 596 |
image_path=Path(safe_image_path),
|
| 597 |
jurisdiction=jurisdiction.strip() or None,
|
| 598 |
)
|
| 599 |
+
passports, scene_trace = engine.infer_universal_with_trace(capture)
|
| 600 |
|
| 601 |
annotated = render_bbox_overlay(safe_image_path, passports) if passports else Image.open(safe_image_path)
|
| 602 |
passports_json = [p.to_dict() for p in passports]
|
|
|
|
| 1221 |
border: 1px solid rgba(0, 217, 126, 0.22);
|
| 1222 |
}
|
| 1223 |
|
| 1224 |
+
/* ===== Domain pill (per-detection) + auto-route note ===== */
|
| 1225 |
+
.domain-pill {
|
| 1226 |
+
display: inline-flex;
|
| 1227 |
+
align-items: center;
|
| 1228 |
+
padding: 3px 10px;
|
| 1229 |
+
margin-left: 6px;
|
| 1230 |
+
border-radius: 999px;
|
| 1231 |
+
font-size: 0.66rem;
|
| 1232 |
+
letter-spacing: 0.10em;
|
| 1233 |
+
text-transform: uppercase;
|
| 1234 |
+
font-weight: 700;
|
| 1235 |
+
font-family: "JetBrains Mono", ui-monospace, monospace;
|
| 1236 |
+
color: #c4d8cd;
|
| 1237 |
+
background: rgba(125, 211, 168, 0.10);
|
| 1238 |
+
border: 1px solid rgba(125, 211, 168, 0.28);
|
| 1239 |
+
white-space: nowrap;
|
| 1240 |
+
}
|
| 1241 |
+
.auto-route-note {
|
| 1242 |
+
margin: 8px 0 14px;
|
| 1243 |
+
padding: 12px 14px;
|
| 1244 |
+
border-radius: 12px;
|
| 1245 |
+
background: linear-gradient(135deg, rgba(0, 217, 126, 0.06), rgba(0, 229, 255, 0.03));
|
| 1246 |
+
border: 1px solid rgba(125, 211, 168, 0.24);
|
| 1247 |
+
color: #c4d8cd;
|
| 1248 |
+
font-size: 0.86rem;
|
| 1249 |
+
line-height: 1.45;
|
| 1250 |
+
}
|
| 1251 |
+
.auto-route-note strong { color: #f1faf4; }
|
| 1252 |
+
|
| 1253 |
/* ===== Empty state ===== */
|
| 1254 |
.empty-state {
|
| 1255 |
text-align: center;
|
|
|
|
| 1291 |
|
| 1292 |
def build_examples() -> list[list]:
|
| 1293 |
rows = []
|
| 1294 |
+
for _head, fname in SAMPLE_IMAGES.items():
|
| 1295 |
p = EXAMPLES_DIR / fname
|
| 1296 |
if p.exists():
|
| 1297 |
+
rows.append([str(p), ""]) # jurisdiction blank — let auto-routing pick the head's default
|
| 1298 |
return rows
|
| 1299 |
|
| 1300 |
|
|
|
|
| 1305 |
with gr.Column(scale=5):
|
| 1306 |
gr.Markdown("### Capture")
|
| 1307 |
image_in = gr.Image(
|
| 1308 |
+
label="Upload an image",
|
| 1309 |
type="filepath",
|
| 1310 |
height=320,
|
| 1311 |
sources=["upload", "webcam", "clipboard"],
|
| 1312 |
)
|
| 1313 |
+
gr.Markdown(
|
| 1314 |
+
"<div class=\"auto-route-note\">"
|
| 1315 |
+
"🤖 <strong>Domain auto-detected</strong> — Gemma 4 routes each item "
|
| 1316 |
+
"to the right material domain on its own. Mix bottles, batteries, "
|
| 1317 |
+
"and medical waste in one photo if you like."
|
| 1318 |
+
"</div>"
|
| 1319 |
)
|
| 1320 |
juris_in = gr.Textbox(
|
| 1321 |
+
label="Jurisdiction (optional)",
|
| 1322 |
+
placeholder="defaults to the per-domain jurisdiction",
|
| 1323 |
value="",
|
| 1324 |
)
|
| 1325 |
+
run_btn = gr.Button("Generate Passports", variant="primary", size="lg")
|
| 1326 |
|
| 1327 |
ex = build_examples()
|
| 1328 |
if ex:
|
| 1329 |
gr.Examples(
|
| 1330 |
examples=ex,
|
| 1331 |
+
inputs=[image_in, juris_in],
|
| 1332 |
label="Sample materials",
|
| 1333 |
examples_per_page=6,
|
| 1334 |
)
|
|
|
|
| 1373 |
|
| 1374 |
run_btn.click(
|
| 1375 |
run,
|
| 1376 |
+
inputs=[image_in, juris_in],
|
| 1377 |
outputs=[annotated_out, kpi_out, cards_out, technical_out, json_out],
|
| 1378 |
)
|
| 1379 |
|
|
@@ -25,7 +25,7 @@ from typing import Protocol, runtime_checkable
|
|
| 25 |
from matter import calibration as cal
|
| 26 |
from matter.guardrail import apply_guardrail, load_rules as load_safety_rules
|
| 27 |
from matter.hazard_flagger import apply_hazard_flagger, load_hazard_rules
|
| 28 |
-
from matter.heads import HEADS, build_prompt, build_scene_prompt
|
| 29 |
from matter.impact import Co2eEntry, enrich_with_co2e, load_factors as load_co2e_factors
|
| 30 |
from matter.passport import (
|
| 31 |
Capture,
|
|
@@ -513,6 +513,89 @@ class MIE:
|
|
| 513 |
}
|
| 514 |
return passports, scene_trace
|
| 515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
# ------------------------ shared per-detection pipeline --------------
|
| 517 |
|
| 518 |
def _run_detection_pipeline(
|
|
|
|
| 25 |
from matter import calibration as cal
|
| 26 |
from matter.guardrail import apply_guardrail, load_rules as load_safety_rules
|
| 27 |
from matter.hazard_flagger import apply_hazard_flagger, load_hazard_rules
|
| 28 |
+
from matter.heads import HEADS, build_prompt, build_scene_prompt, build_universal_prompt
|
| 29 |
from matter.impact import Co2eEntry, enrich_with_co2e, load_factors as load_co2e_factors
|
| 30 |
from matter.passport import (
|
| 31 |
Capture,
|
|
|
|
| 513 |
}
|
| 514 |
return passports, scene_trace
|
| 515 |
|
| 516 |
+
# --------------------- universal mode (auto-head) --------------------
|
| 517 |
+
|
| 518 |
+
def infer_universal(
|
| 519 |
+
self, capture: CaptureInput
|
| 520 |
+
) -> list[Passport]:
|
| 521 |
+
"""Cross-head inference: caller doesn't pre-pick a domain.
|
| 522 |
+
|
| 523 |
+
Gemma sees ALL six taxonomies and routes each detection to the right
|
| 524 |
+
one via a `head` field. Mixed-domain scenes (a syringe + a bottle +
|
| 525 |
+
a battery in one photo) get correctly per-item routing.
|
| 526 |
+
"""
|
| 527 |
+
passports, _ = self.infer_universal_with_trace(capture)
|
| 528 |
+
return passports
|
| 529 |
+
|
| 530 |
+
def infer_universal_with_trace(
|
| 531 |
+
self, capture: CaptureInput
|
| 532 |
+
) -> tuple[list[Passport], dict]:
|
| 533 |
+
"""Universal inference with full per-detection trace. The scene trace's
|
| 534 |
+
`metadata.heads_seen` lists the unique heads detected in this image."""
|
| 535 |
+
prompt = build_universal_prompt()
|
| 536 |
+
raw = self.runtime.infer(prompt, capture.image_path)
|
| 537 |
+
parsed_scene = _parse_json_block(raw)
|
| 538 |
+
objects = parsed_scene.get("objects")
|
| 539 |
+
if objects is None and parsed_scene.get("identity") is not None:
|
| 540 |
+
objects = [parsed_scene]
|
| 541 |
+
objects = objects or []
|
| 542 |
+
|
| 543 |
+
modality, content_hash = _content_hash(capture)
|
| 544 |
+
passports: list[Passport] = []
|
| 545 |
+
detection_traces: list[dict] = []
|
| 546 |
+
heads_seen: set[str] = set()
|
| 547 |
+
|
| 548 |
+
for obj in objects:
|
| 549 |
+
head_name = obj.get("head")
|
| 550 |
+
if head_name not in HEADS:
|
| 551 |
+
detection_traces.append({
|
| 552 |
+
"error": f"head {head_name!r} not in {list(HEADS)}",
|
| 553 |
+
"raw_object": obj,
|
| 554 |
+
})
|
| 555 |
+
continue
|
| 556 |
+
head = HEADS[head_name]
|
| 557 |
+
jurisdiction = capture.jurisdiction or head.default_jurisdiction
|
| 558 |
+
normalized = _normalize_detection(obj)
|
| 559 |
+
try:
|
| 560 |
+
passport, det_trace = self._run_detection_pipeline(
|
| 561 |
+
normalized=normalized,
|
| 562 |
+
head=head,
|
| 563 |
+
head_name=head_name,
|
| 564 |
+
jurisdiction=jurisdiction,
|
| 565 |
+
modality=modality,
|
| 566 |
+
content_hash=content_hash,
|
| 567 |
+
geohash=capture.geohash_coarse,
|
| 568 |
+
)
|
| 569 |
+
except MIEError as e:
|
| 570 |
+
detection_traces.append({
|
| 571 |
+
"error": str(e),
|
| 572 |
+
"raw_object": obj,
|
| 573 |
+
"head": head_name,
|
| 574 |
+
})
|
| 575 |
+
continue
|
| 576 |
+
passports.append(passport)
|
| 577 |
+
heads_seen.add(head_name)
|
| 578 |
+
detection_traces.append({
|
| 579 |
+
**det_trace,
|
| 580 |
+
"head": head_name,
|
| 581 |
+
"passport_id": passport.passport_id,
|
| 582 |
+
})
|
| 583 |
+
|
| 584 |
+
scene_trace = {
|
| 585 |
+
"raw_output": raw,
|
| 586 |
+
"parsed_scene": parsed_scene,
|
| 587 |
+
"detections": detection_traces,
|
| 588 |
+
"metadata": {
|
| 589 |
+
"mode": "universal",
|
| 590 |
+
"heads_seen": sorted(heads_seen),
|
| 591 |
+
"runtime": self.runtime.name,
|
| 592 |
+
"model_id": self.runtime.model_id,
|
| 593 |
+
"n_objects_detected": len(objects),
|
| 594 |
+
"n_passports_produced": len(passports),
|
| 595 |
+
},
|
| 596 |
+
}
|
| 597 |
+
return passports, scene_trace
|
| 598 |
+
|
| 599 |
# ------------------------ shared per-detection pipeline --------------
|
| 600 |
|
| 601 |
def _run_detection_pipeline(
|
|
@@ -160,3 +160,67 @@ def build_scene_prompt(
|
|
| 160 |
nba_classes=", ".join(head.nba_classes),
|
| 161 |
max_objects=max_objects,
|
| 162 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
nba_classes=", ".join(head.nba_classes),
|
| 161 |
max_objects=max_objects,
|
| 162 |
)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# ============================ universal prompt ============================
|
| 166 |
+
# Lists every head's classes + NBAs with the head name attached, so the model
|
| 167 |
+
# routes each detection to the right domain in one call. Mixed-domain scenes
|
| 168 |
+
# (a syringe + a bottle + a battery in the same photo) get correctly per-item
|
| 169 |
+
# routing without the user picking a head upfront.
|
| 170 |
+
|
| 171 |
+
UNIVERSAL_PROMPT_TEMPLATE = """You are a Material Intelligence model. Identify the most prominent distinct material objects in the image. RETURN AT MOST {max_objects} OBJECTS — pick the most important. Skip background, hands, faces, text, and items that don't fit any allowed class.
|
| 172 |
+
|
| 173 |
+
We support six material domains. Each detection MUST declare which domain it belongs to via the "head" field. Use the (head, class) pairing below — if a class is listed under one head, that's the only head it can be paired with.
|
| 174 |
+
|
| 175 |
+
{taxonomy_block}
|
| 176 |
+
|
| 177 |
+
For EACH object, return a compact JSON record:
|
| 178 |
+
- "head": one of the six domain names above ("domestic", "ewaste", "ev", "medical", "cd", "textile")
|
| 179 |
+
- "class": one of the identity classes for the chosen head
|
| 180 |
+
- "subclass": 2-3 word descriptor (e.g. "PET water bottle")
|
| 181 |
+
- "bbox": [x1, y1, x2, y2] normalized 0-1 (top-left → bottom-right)
|
| 182 |
+
- "bbox_label": ≤4 words (e.g. "bottle on left")
|
| 183 |
+
- "confidence": float 0-1
|
| 184 |
+
- "reason": ONE short clause, ≤12 words
|
| 185 |
+
- "state": {{"condition": "good|degraded|contaminated|unknown", "hazard_flags": [...], "confidence": <0-1>}}
|
| 186 |
+
- "next_best_action": {{"primary": "<NBA from the chosen head>", "secondary": null, "do_not": [], "confidence": <0-1>}}
|
| 187 |
+
|
| 188 |
+
CONFIDENCE RUBRIC:
|
| 189 |
+
- 0.95-1.00 unambiguous
|
| 190 |
+
- 0.80-0.94 likely correct, minor ambiguity
|
| 191 |
+
- 0.60-0.79 best guess, weak/occluded evidence
|
| 192 |
+
- 0.40-0.59 coin-flip
|
| 193 |
+
- below 0.40 uncertain — prefer the "other" class within the most likely head
|
| 194 |
+
|
| 195 |
+
SAFETY: For sharps, diagnostics, batteries, or pharmaceuticals, route to the safest action.
|
| 196 |
+
|
| 197 |
+
Respond ONLY with valid JSON, NO markdown fences, in this shape:
|
| 198 |
+
{{"objects": [{{"head": "...", "class": "...", "subclass": "...", "bbox": [0.0, 0.0, 1.0, 1.0], "bbox_label": "...", "confidence": 0.0, "reason": "...", "state": {{...}}, "next_best_action": {{...}}}}]}}
|
| 199 |
+
|
| 200 |
+
If nothing in the image fits any of the allowed classes, return {{"objects": []}}.
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def build_universal_prompt(
|
| 205 |
+
jurisdiction_per_head: dict[str, str] | None = None,
|
| 206 |
+
max_objects: int = 6,
|
| 207 |
+
) -> str:
|
| 208 |
+
"""Cross-head prompt that lets Gemma route each detection to its own domain.
|
| 209 |
+
|
| 210 |
+
`jurisdiction_per_head` lets callers override the default jurisdiction for
|
| 211 |
+
specific heads (e.g. running the Space in a different region). Defaults to
|
| 212 |
+
each head's `default_jurisdiction`.
|
| 213 |
+
"""
|
| 214 |
+
blocks: list[str] = []
|
| 215 |
+
for head_name, head in HEADS.items():
|
| 216 |
+
juris = (jurisdiction_per_head or {}).get(head_name, head.default_jurisdiction)
|
| 217 |
+
blocks.append(
|
| 218 |
+
f"== {head_name} ==\n"
|
| 219 |
+
f" jurisdiction: {juris}\n"
|
| 220 |
+
f" classes: {', '.join(head.identity_classes)}\n"
|
| 221 |
+
f" next_best_actions: {', '.join(head.nba_classes)}"
|
| 222 |
+
)
|
| 223 |
+
return UNIVERSAL_PROMPT_TEMPLATE.format(
|
| 224 |
+
taxonomy_block="\n\n".join(blocks),
|
| 225 |
+
max_objects=max_objects,
|
| 226 |
+
)
|