Spaces:

ashu-1069
/

matter

Sleeping

ashu1069 commited on May 2

Commit

f1493d4

1 Parent(s): e8956ff

ui: drop head selector, switch to universal mode + domain badges

The head dropdown is gone — Gemma 4 now auto-detects the domain per
detection via universal mode. Drops a UI element AND lifts the
single-domain-per-photo restriction.

- run() signature: (image, jurisdiction) — no more head argument
- Routes through engine.infer_universal_with_trace
- Each action card gets a small domain pill ('Domestic', 'EV battery',
'Medical', etc.) next to the class name, color-matched to the theme
- KPI strip's 4th tile is dynamic:
* single-domain scene → 'Domain · Domestic'
* mixed-domain scene → '🌐 Domains involved · Domestic, Medical, ...'
- Friendly note above the jurisdiction box explains auto-routing
- Examples gallery pre-fills only (image, jurisdiction)

Killer demo: upload a photo with a bottle, a battery, and a syringe →
three Passports, three domains, three correct routings, in one inference.

Files changed (3) hide show

app.py +88 -27
matter/engine.py +84 -1
matter/heads.py +64 -0

app.py CHANGED Viewed

@@ -48,8 +48,6 @@ DEFAULT_BBOX_COLOR = "#00d97e"
 ROOT = Path(__file__).parent
 EXAMPLES_DIR = ROOT / "examples"
-HEAD_NAMES = list(HEADS.keys())  # domestic, ewaste, ev, medical, cd, textile
 SAMPLE_IMAGES: dict[str, str] = {
     "domestic": "domestic_pet_bottle.jpg",
     "ewaste":   "ewaste_dead_laptop.jpg",
@@ -310,6 +308,23 @@ def render_kpi_strip(passports: list, scene_trace: dict) -> str:
     juris = scene_trace.get("metadata", {}).get("jurisdiction", "")
     juris_short = juris.split(" (")[0].strip() or "—"
     co2_class = "kpi-num"
     co2_color = "" if total_co2 >= 0 else 'style="color:#ffb547;"'
     hazard_class = "kpi-card kpi-card-alert" if hazards_caught else "kpi-card"
@@ -326,9 +341,9 @@ def render_kpi_strip(passports: list, scene_trace: dict) -> str:
         + f'<div class="{hazard_class}"><div class="kpi-emoji">{hazard_emoji}</div>'
           f'<div class="kpi-num">{hazards_caught}</div>'
           f'<div class="kpi-label">{"hazard caught" if hazards_caught == 1 else "hazards caught"}</div></div>'
-        + f'<div class="kpi-card"><div class="kpi-emoji">📋</div>'
-          f'<div class="kpi-num kpi-num-small">{safe(juris_short)}</div>'
-          f'<div class="kpi-label">jurisdiction</div></div>'
         + '</div>'
     )
@@ -356,12 +371,24 @@ def render_action_cards(passports: list, scene_trace: dict) -> str:
     return "\n".join(cards)
 def _render_action_card(idx: int, p, det: dict | None) -> str:
     cls = p.identity.class_
     emoji, display_name = _class_look(cls)
     primary = p.next_best_action.primary
     verb, bin_label, accent = _action_label(primary)
     confidence_pct = int(round(p.identity.confidence * 100))
     # Guardrail accent colors are constants we control; render via the `safe`
     # template values where the source is the model.
@@ -403,7 +430,8 @@ def _render_action_card(idx: int, p, det: dict | None) -> str:
             f'      <span class="card-num">{int(idx)}</span>'
             f'      <span class="card-emoji">{safe(emoji)}</span>'
             f'      <span class="card-name">{safe(display_name)}</span>'
-            f'    </div>'
             f'    <div class="card-badge badge-hazard">⚠️ Hazard · {safe(sev_label)}</div>'
             f'  </div>'
             f'  <div class="card-body">'
@@ -429,10 +457,11 @@ def _render_action_card(idx: int, p, det: dict | None) -> str:
         f'      <span class="card-num">{int(idx)}</span>'
         f'      <span class="card-emoji">{safe(emoji)}</span>'
         f'      <span class="card-name">{safe(display_name)}</span>'
-        f'    </div>'
-        f'    <div class="card-badge" style="background:linear-gradient(135deg,{accent}33,{accent}11);'
-        f'border:1px solid {accent}55;color:{accent};">{safe(bin_label)}</div>'
-        f'  </div>'
         f'  <div class="card-body">'
         f'    <div class="card-action">→ {safe(verb)}</div>'
         + (f'    <div class="card-reason">{safe(reason)}</div>' if reason else '')
@@ -528,9 +557,11 @@ def _head_from_taxonomy(uri: str | None) -> str | None:
 # Run handlers
 # =====================================================================
-def run(image_path: str | None, head: str, jurisdiction: str) -> tuple:
-    """Scene-mode inference. Returns (annotated_image, kpi_strip,
-    action_cards, technical_details, scene_json)."""
     if image_path is None:
         return (
             None,
@@ -565,7 +596,7 @@ def run(image_path: str | None, head: str, jurisdiction: str) -> tuple:
             image_path=Path(safe_image_path),
             jurisdiction=jurisdiction.strip() or None,
         )
-        passports, scene_trace = engine.infer_scene_with_trace(capture, head)
         annotated = render_bbox_overlay(safe_image_path, passports) if passports else Image.open(safe_image_path)
         passports_json = [p.to_dict() for p in passports]
@@ -1190,6 +1221,35 @@ html, body, gradio-app, .gradio-container {
   border: 1px solid rgba(0, 217, 126, 0.22);
 }
 /* ===== Empty state ===== */
 .empty-state {
   text-align: center;
@@ -1231,10 +1291,10 @@ HERO_HTML = """
 def build_examples() -> list[list]:
     rows = []
-    for head, fname in SAMPLE_IMAGES.items():
         p = EXAMPLES_DIR / fname
         if p.exists():
-            rows.append([str(p), head, HEADS[head].default_jurisdiction])
     return rows
@@ -1245,29 +1305,30 @@ with gr.Blocks(title="Matter — Material Intelligence") as demo:
         with gr.Column(scale=5):
             gr.Markdown("### Capture")
             image_in = gr.Image(
-                label="Material image",
                 type="filepath",
                 height=320,
                 sources=["upload", "webcam", "clipboard"],
             )
-            head_in = gr.Dropdown(
-                label="Material head",
-                choices=HEAD_NAMES,
-                value="domestic",
-                info="Which taxonomy and prompt to use.",
             )
             juris_in = gr.Textbox(
-                label="Jurisdiction (optional override)",
-                placeholder="leave blank to use the head default",
                 value="",
             )
-            run_btn = gr.Button("Generate Passport", variant="primary", size="lg")
             ex = build_examples()
             if ex:
                 gr.Examples(
                     examples=ex,
-                    inputs=[image_in, head_in, juris_in],
                     label="Sample materials",
                     examples_per_page=6,
                 )
@@ -1312,7 +1373,7 @@ with gr.Blocks(title="Matter — Material Intelligence") as demo:
     run_btn.click(
         run,
-        inputs=[image_in, head_in, juris_in],
         outputs=[annotated_out, kpi_out, cards_out, technical_out, json_out],
     )

 ROOT = Path(__file__).parent
 EXAMPLES_DIR = ROOT / "examples"
 SAMPLE_IMAGES: dict[str, str] = {
     "domestic": "domestic_pet_bottle.jpg",
     "ewaste":   "ewaste_dead_laptop.jpg",
     juris = scene_trace.get("metadata", {}).get("jurisdiction", "")
     juris_short = juris.split(" (")[0].strip() or "—"
+    # 4th tile: in universal mode, surface the heads detected; otherwise the
+    # legacy single-head jurisdiction.
+    heads_seen = scene_trace.get("metadata", {}).get("heads_seen") or []
+    if heads_seen:
+        if len(heads_seen) == 1:
+            tile_emoji = "📋"
+            tile_value = DOMAIN_LABELS.get(heads_seen[0], heads_seen[0].title())
+            tile_label = "domain"
+        else:
+            tile_emoji = "🌐"
+            tile_value = ", ".join(DOMAIN_LABELS.get(h, h.title()) for h in heads_seen)
+            tile_label = "domains involved"
+    else:
+        tile_emoji = "📋"
+        tile_value = juris_short
+        tile_label = "jurisdiction"
     co2_class = "kpi-num"
     co2_color = "" if total_co2 >= 0 else 'style="color:#ffb547;"'
     hazard_class = "kpi-card kpi-card-alert" if hazards_caught else "kpi-card"
         + f'<div class="{hazard_class}"><div class="kpi-emoji">{hazard_emoji}</div>'
           f'<div class="kpi-num">{hazards_caught}</div>'
           f'<div class="kpi-label">{"hazard caught" if hazards_caught == 1 else "hazards caught"}</div></div>'
+        + f'<div class="kpi-card"><div class="kpi-emoji">{tile_emoji}</div>'
+          f'<div class="kpi-num kpi-num-small">{safe(tile_value)}</div>'
+          f'<div class="kpi-label">{safe(tile_label)}</div></div>'
         + '</div>'
     )
     return "\n".join(cards)
+DOMAIN_LABELS: dict[str, str] = {
+    "domestic": "Domestic",
+    "ewaste":   "E-waste",
+    "ev":       "EV battery",
+    "medical":  "Medical",
+    "cd":       "C&D",
+    "textile":  "Textile",
+}
 def _render_action_card(idx: int, p, det: dict | None) -> str:
     cls = p.identity.class_
     emoji, display_name = _class_look(cls)
     primary = p.next_best_action.primary
     verb, bin_label, accent = _action_label(primary)
     confidence_pct = int(round(p.identity.confidence * 100))
+    head = (det or {}).get("head") or _head_from_taxonomy(p.identity.taxonomy) or ""
+    domain_label = DOMAIN_LABELS.get(head, head.title() if head else "")
     # Guardrail accent colors are constants we control; render via the `safe`
     # template values where the source is the model.
             f'      <span class="card-num">{int(idx)}</span>'
             f'      <span class="card-emoji">{safe(emoji)}</span>'
             f'      <span class="card-name">{safe(display_name)}</span>'
+            + (f'      <span class="domain-pill">{safe(domain_label)}</span>' if domain_label else '')
+            + f'    </div>'
             f'    <div class="card-badge badge-hazard">⚠️ Hazard · {safe(sev_label)}</div>'
             f'  </div>'
             f'  <div class="card-body">'
         f'      <span class="card-num">{int(idx)}</span>'
         f'      <span class="card-emoji">{safe(emoji)}</span>'
         f'      <span class="card-name">{safe(display_name)}</span>'
+        + (f'      <span class="domain-pill">{safe(domain_label)}</span>' if domain_label else '')
+        + f'    </div>'
+        + f'    <div class="card-badge" style="background:linear-gradient(135deg,{accent}33,{accent}11);'
+          f'border:1px solid {accent}55;color:{accent};">{safe(bin_label)}</div>'
+          f'  </div>'
         f'  <div class="card-body">'
         f'    <div class="card-action">→ {safe(verb)}</div>'
         + (f'    <div class="card-reason">{safe(reason)}</div>' if reason else '')
 # Run handlers
 # =====================================================================
+def run(image_path: str | None, jurisdiction: str = "") -> tuple:
+    """Universal-mode inference (head auto-detected per detection).
+    Returns (annotated_image, kpi_strip, action_cards, technical_details,
+    scene_json)."""
     if image_path is None:
         return (
             None,
             image_path=Path(safe_image_path),
             jurisdiction=jurisdiction.strip() or None,
         )
+        passports, scene_trace = engine.infer_universal_with_trace(capture)
         annotated = render_bbox_overlay(safe_image_path, passports) if passports else Image.open(safe_image_path)
         passports_json = [p.to_dict() for p in passports]
   border: 1px solid rgba(0, 217, 126, 0.22);
 }
+/* ===== Domain pill (per-detection) + auto-route note ===== */
+.domain-pill {
+  display: inline-flex;
+  align-items: center;
+  padding: 3px 10px;
+  margin-left: 6px;
+  border-radius: 999px;
+  font-size: 0.66rem;
+  letter-spacing: 0.10em;
+  text-transform: uppercase;
+  font-weight: 700;
+  font-family: "JetBrains Mono", ui-monospace, monospace;
+  color: #c4d8cd;
+  background: rgba(125, 211, 168, 0.10);
+  border: 1px solid rgba(125, 211, 168, 0.28);
+  white-space: nowrap;
+}
+.auto-route-note {
+  margin: 8px 0 14px;
+  padding: 12px 14px;
+  border-radius: 12px;
+  background: linear-gradient(135deg, rgba(0, 217, 126, 0.06), rgba(0, 229, 255, 0.03));
+  border: 1px solid rgba(125, 211, 168, 0.24);
+  color: #c4d8cd;
+  font-size: 0.86rem;
+  line-height: 1.45;
+}
+.auto-route-note strong { color: #f1faf4; }
 /* ===== Empty state ===== */
 .empty-state {
   text-align: center;
 def build_examples() -> list[list]:
     rows = []
+    for _head, fname in SAMPLE_IMAGES.items():
         p = EXAMPLES_DIR / fname
         if p.exists():
+            rows.append([str(p), ""])  # jurisdiction blank — let auto-routing pick the head's default
     return rows
         with gr.Column(scale=5):
             gr.Markdown("### Capture")
             image_in = gr.Image(
+                label="Upload an image",
                 type="filepath",
                 height=320,
                 sources=["upload", "webcam", "clipboard"],
             )
+            gr.Markdown(
+                "<div class=\"auto-route-note\">"
+                "🤖 <strong>Domain auto-detected</strong> — Gemma 4 routes each item "
+                "to the right material domain on its own. Mix bottles, batteries, "
+                "and medical waste in one photo if you like."
+                "</div>"
             )
             juris_in = gr.Textbox(
+                label="Jurisdiction (optional)",
+                placeholder="defaults to the per-domain jurisdiction",
                 value="",
             )
+            run_btn = gr.Button("Generate Passports", variant="primary", size="lg")
             ex = build_examples()
             if ex:
                 gr.Examples(
                     examples=ex,
+                    inputs=[image_in, juris_in],
                     label="Sample materials",
                     examples_per_page=6,
                 )
     run_btn.click(
         run,
+        inputs=[image_in, juris_in],
         outputs=[annotated_out, kpi_out, cards_out, technical_out, json_out],
     )

matter/engine.py CHANGED Viewed

@@ -25,7 +25,7 @@ from typing import Protocol, runtime_checkable
 from matter import calibration as cal
 from matter.guardrail import apply_guardrail, load_rules as load_safety_rules
 from matter.hazard_flagger import apply_hazard_flagger, load_hazard_rules
-from matter.heads import HEADS, build_prompt, build_scene_prompt
 from matter.impact import Co2eEntry, enrich_with_co2e, load_factors as load_co2e_factors
 from matter.passport import (
     Capture,
@@ -513,6 +513,89 @@ class MIE:
         }
         return passports, scene_trace
     # ------------------------ shared per-detection pipeline --------------
     def _run_detection_pipeline(

 from matter import calibration as cal
 from matter.guardrail import apply_guardrail, load_rules as load_safety_rules
 from matter.hazard_flagger import apply_hazard_flagger, load_hazard_rules
+from matter.heads import HEADS, build_prompt, build_scene_prompt, build_universal_prompt
 from matter.impact import Co2eEntry, enrich_with_co2e, load_factors as load_co2e_factors
 from matter.passport import (
     Capture,
         }
         return passports, scene_trace
+    # --------------------- universal mode (auto-head) --------------------
+    def infer_universal(
+        self, capture: CaptureInput
+    ) -> list[Passport]:
+        """Cross-head inference: caller doesn't pre-pick a domain.
+        Gemma sees ALL six taxonomies and routes each detection to the right
+        one via a `head` field. Mixed-domain scenes (a syringe + a bottle +
+        a battery in one photo) get correctly per-item routing.
+        """
+        passports, _ = self.infer_universal_with_trace(capture)
+        return passports
+    def infer_universal_with_trace(
+        self, capture: CaptureInput
+    ) -> tuple[list[Passport], dict]:
+        """Universal inference with full per-detection trace. The scene trace's
+        `metadata.heads_seen` lists the unique heads detected in this image."""
+        prompt = build_universal_prompt()
+        raw = self.runtime.infer(prompt, capture.image_path)
+        parsed_scene = _parse_json_block(raw)
+        objects = parsed_scene.get("objects")
+        if objects is None and parsed_scene.get("identity") is not None:
+            objects = [parsed_scene]
+        objects = objects or []
+        modality, content_hash = _content_hash(capture)
+        passports: list[Passport] = []
+        detection_traces: list[dict] = []
+        heads_seen: set[str] = set()
+        for obj in objects:
+            head_name = obj.get("head")
+            if head_name not in HEADS:
+                detection_traces.append({
+                    "error": f"head {head_name!r} not in {list(HEADS)}",
+                    "raw_object": obj,
+                })
+                continue
+            head = HEADS[head_name]
+            jurisdiction = capture.jurisdiction or head.default_jurisdiction
+            normalized = _normalize_detection(obj)
+            try:
+                passport, det_trace = self._run_detection_pipeline(
+                    normalized=normalized,
+                    head=head,
+                    head_name=head_name,
+                    jurisdiction=jurisdiction,
+                    modality=modality,
+                    content_hash=content_hash,
+                    geohash=capture.geohash_coarse,
+                )
+            except MIEError as e:
+                detection_traces.append({
+                    "error": str(e),
+                    "raw_object": obj,
+                    "head": head_name,
+                })
+                continue
+            passports.append(passport)
+            heads_seen.add(head_name)
+            detection_traces.append({
+                **det_trace,
+                "head": head_name,
+                "passport_id": passport.passport_id,
+            })
+        scene_trace = {
+            "raw_output": raw,
+            "parsed_scene": parsed_scene,
+            "detections": detection_traces,
+            "metadata": {
+                "mode": "universal",
+                "heads_seen": sorted(heads_seen),
+                "runtime": self.runtime.name,
+                "model_id": self.runtime.model_id,
+                "n_objects_detected": len(objects),
+                "n_passports_produced": len(passports),
+            },
+        }
+        return passports, scene_trace
     # ------------------------ shared per-detection pipeline --------------
     def _run_detection_pipeline(

matter/heads.py CHANGED Viewed

@@ -160,3 +160,67 @@ def build_scene_prompt(
         nba_classes=", ".join(head.nba_classes),
         max_objects=max_objects,
     )

         nba_classes=", ".join(head.nba_classes),
         max_objects=max_objects,
     )
+# ============================ universal prompt ============================
+# Lists every head's classes + NBAs with the head name attached, so the model
+# routes each detection to the right domain in one call. Mixed-domain scenes
+# (a syringe + a bottle + a battery in the same photo) get correctly per-item
+# routing without the user picking a head upfront.
+UNIVERSAL_PROMPT_TEMPLATE = """You are a Material Intelligence model. Identify the most prominent distinct material objects in the image. RETURN AT MOST {max_objects} OBJECTS — pick the most important. Skip background, hands, faces, text, and items that don't fit any allowed class.
+We support six material domains. Each detection MUST declare which domain it belongs to via the "head" field. Use the (head, class) pairing below — if a class is listed under one head, that's the only head it can be paired with.
+{taxonomy_block}
+For EACH object, return a compact JSON record:
+- "head": one of the six domain names above ("domestic", "ewaste", "ev", "medical", "cd", "textile")
+- "class": one of the identity classes for the chosen head
+- "subclass": 2-3 word descriptor (e.g. "PET water bottle")
+- "bbox": [x1, y1, x2, y2] normalized 0-1 (top-left → bottom-right)
+- "bbox_label": ≤4 words (e.g. "bottle on left")
+- "confidence": float 0-1
+- "reason": ONE short clause, ≤12 words
+- "state": {{"condition": "good|degraded|contaminated|unknown", "hazard_flags": [...], "confidence": <0-1>}}
+- "next_best_action": {{"primary": "<NBA from the chosen head>", "secondary": null, "do_not": [], "confidence": <0-1>}}
+CONFIDENCE RUBRIC:
+- 0.95-1.00  unambiguous
+- 0.80-0.94  likely correct, minor ambiguity
+- 0.60-0.79  best guess, weak/occluded evidence
+- 0.40-0.59  coin-flip
+- below 0.40 uncertain — prefer the "other" class within the most likely head
+SAFETY: For sharps, diagnostics, batteries, or pharmaceuticals, route to the safest action.
+Respond ONLY with valid JSON, NO markdown fences, in this shape:
+{{"objects": [{{"head": "...", "class": "...", "subclass": "...", "bbox": [0.0, 0.0, 1.0, 1.0], "bbox_label": "...", "confidence": 0.0, "reason": "...", "state": {{...}}, "next_best_action": {{...}}}}]}}
+If nothing in the image fits any of the allowed classes, return {{"objects": []}}.
+"""
+def build_universal_prompt(
+    jurisdiction_per_head: dict[str, str] | None = None,
+    max_objects: int = 6,
+) -> str:
+    """Cross-head prompt that lets Gemma route each detection to its own domain.
+    `jurisdiction_per_head` lets callers override the default jurisdiction for
+    specific heads (e.g. running the Space in a different region). Defaults to
+    each head's `default_jurisdiction`.
+    """
+    blocks: list[str] = []
+    for head_name, head in HEADS.items():
+        juris = (jurisdiction_per_head or {}).get(head_name, head.default_jurisdiction)
+        blocks.append(
+            f"== {head_name} ==\n"
+            f"  jurisdiction: {juris}\n"
+            f"  classes: {', '.join(head.identity_classes)}\n"
+            f"  next_best_actions: {', '.join(head.nba_classes)}"
+        )
+    return UNIVERSAL_PROMPT_TEMPLATE.format(
+        taxonomy_block="\n\n".join(blocks),
+        max_objects=max_objects,
+    )