Spaces:

ashu-1069
/

matter

Sleeping

ashu1069 commited on May 2

Commit

cc060d7

1 Parent(s): dabb502

ui: pipeline trace UI + verifier panel + insights banner

The Space now surfaces every MIE layer's intermediate state:
- Insights banner: punchy headline (CO2e avoided, guardrail fires, hazards)
- Layer A validator (D012): JSON shape + enum
- Layer B calibration (D015): raw vs calibrated table
- Layer C hazard auto-flagger (D019): before/added/after diff
- Layer D do_not guardrail (D018): proposed vs safe-default
- Verifier panel: independent attestation via matter.verifier
- Passport JSON

Backed by engine.infer_with_trace() which captures per-layer state
without changing infer()'s API. Demo mode shows passport + verifier
(no trace, since model didn't run); Live mode shows the full trace.

Files changed (3) hide show

app.py +267 -41
matter/engine.py +138 -0
matter/verifier.py +341 -0

app.py CHANGED Viewed

@@ -18,8 +18,11 @@ import gradio as gr
 from matter.engine import MIE, CaptureInput, MIEError
 from matter.heads import HEADS
 from transformers_runtime import TransformersRuntime
 ROOT = Path(__file__).parent
 EXAMPLES_DIR = ROOT / "examples"
 SPEC_EXAMPLES = ROOT / "spec" / "examples"
@@ -55,27 +58,95 @@ def get_engine() -> MIE:
     return MIE(runtime=_runtime, on_device=True)
 def render_summary(p: dict) -> str:
     ident = p.get("identity", {})
     state = p.get("state", {})
     nba = p.get("next_best_action", {})
     routing = p.get("routing", {})
     prov = p.get("provenance", {})
-    val = (p.get("value") or {}).get("environmental") or {}
     hazards = state.get("hazard_flags") or []
     do_not = nba.get("do_not") or []
     badge = "🟢 clear"
     if nba.get("fallback_used"):
-        badge = "🟡 guardrail fired — safe default applied"
     if any(h in {"biohazard", "sharps_injury_risk", "thermal_runaway_risk"} for h in hazards):
         badge = "🔴 hazard"
-    lines = [
-        f"### {ident.get('class', '?')} · _{ident.get('subclass', '')}_",
         "",
-        f"**Status** · {badge}",
         "",
         "| | |",
         "|---|---|",
@@ -84,51 +155,152 @@ def render_summary(p: dict) -> str:
         f"| **Do not** | "
         + (", ".join(f"`{x}`" for x in do_not) if do_not else "_none_") + " |",
         f"| **Confidence** | `{ident.get('confidence', 0):.3f}` "
-        + ("(calibrated)" if prov.get("confidence_calibrated") else "(raw)") + " |",
         f"| **Hazards** | "
         + (", ".join(f"`{h}`" for h in hazards) if hazards else "_none_") + " |",
         f"| **Condition** | `{state.get('condition', '?')}` |",
         f"| **Jurisdiction** | {routing.get('jurisdiction', '?')} |",
     ]
-    if val.get("co2e_avoided_kg") is not None:
-        lines.append(f"| **CO₂e avoided** | `{val['co2e_avoided_kg']} kg` |")
-    lines += [
-        f"| **Model** | `{prov.get('model', '?')}` ({prov.get('runtime', '?')}) |",
-        f"| **On-device** | {'✅' if prov.get('on_device') else '—'} |",
-    ]
-    return "\n".join(lines)
-def render_pipeline(p: dict) -> str:
-    nba = p.get("next_best_action", {})
-    state = p.get("state", {})
-    fallback = nba.get("fallback_used", False)
-    hazards = state.get("hazard_flags") or []
     return "\n".join([
-        "**MIE pipeline**",
         "",
-        "| Step | Status |",
         "|---|---|",
-        "| 01 · Validator | ✅ JSON shape + taxonomy enum |",
-        "| 02 · Calibration | ✅ histogram-calibrated |",
-        "| 03 · Hazard auto-flagger | "
-        + (f"⚠️ flagged: {', '.join(hazards)}" if hazards else "✅ no class-implied hazard") + " |",
-        "| 04 · Guardrail | "
-        + ("⚠️ fired — unsafe action overridden" if fallback else "✅ action passed `do_not` rules") + " |",
     ])
-def run_demo(head: str) -> tuple[str, str, str]:
     fname = DEMO_PASSPORTS.get(head, DEMO_PASSPORTS["domestic"])
     p = json.loads((SPEC_EXAMPLES / fname).read_text())
-    return render_summary(p), render_pipeline(p), json.dumps(p, indent=2)
-def run_live(image_path: str | None, head: str, jurisdiction: str) -> tuple[str, str, str]:
     if image_path is None:
         return (
-            "⚠️ Upload an image first, or switch to **Demo** mode for the canonical example.",
-            "", "",
         )
     try:
         engine = get_engine()
@@ -136,21 +308,30 @@ def run_live(image_path: str | None, head: str, jurisdiction: str) -> tuple[str,
             image_path=Path(image_path),
             jurisdiction=jurisdiction.strip() or None,
         )
-        passport = engine.infer(capture, head)
         p = passport.to_dict()
-        return render_summary(p), render_pipeline(p), json.dumps(p, indent=2)
     except MIEError as e:
         return (
-            f"### ❌ MIE pipeline rejected the model output\n\n```\n{e}\n```\n\n"
-            "_The model returned malformed or out-of-taxonomy JSON. Try a clearer image or switch to Demo mode._",
-            "", "",
         )
     except Exception as e:
         return (
             f"### ❌ Runtime error\n\n```\n{e.__class__.__name__}: {e}\n```\n\n"
-            "_If this is the first call after a cold start, the GPU worker is still loading Gemma 4 (≈30s). Try again in a moment, or use Demo mode._",
             f"<details><summary>traceback</summary>\n\n```\n{traceback.format_exc()}\n```\n</details>",
-            "",
         )
@@ -519,6 +700,32 @@ html, body, gradio-app, .gradio-container {
 /* ===== Selection ===== */
 ::selection { background: rgba(0, 217, 126, 0.35); color: white; }
 /* ===== Catch-all readability ===== */
 .gradio-container { color: #f1faf4; }
 .gradio-container input::placeholder,
@@ -587,10 +794,28 @@ with gr.Blocks(title="Matter — Material Intelligence") as demo:
                 )
         with gr.Column(scale=7):
             gr.Markdown("### Passport")
             summary_out = gr.Markdown(value="_Pick a mode and press_ **Generate Passport**.")
-            pipeline_out = gr.Markdown()
-            with gr.Accordion("Passport JSON", open=True):
                 json_out = gr.Code(language="json", label=None, lines=22)
     gr.Markdown(
@@ -603,7 +828,8 @@ with gr.Blocks(title="Matter — Material Intelligence") as demo:
     run_btn.click(
         dispatch,
         inputs=[mode_in, image_in, head_in, juris_in],
-        outputs=[summary_out, pipeline_out, json_out],
     )

 from matter.engine import MIE, CaptureInput, MIEError
 from matter.heads import HEADS
+from matter.verifier import Verifier
 from transformers_runtime import TransformersRuntime
+verifier = Verifier()
 ROOT = Path(__file__).parent
 EXAMPLES_DIR = ROOT / "examples"
 SPEC_EXAMPLES = ROOT / "spec" / "examples"
     return MIE(runtime=_runtime, on_device=True)
+# =====================================================================
+# Rendering helpers — every section returns a Markdown string.
+# Outputs in order:
+#   insights, summary, raw, validator, calibration, hazards, guardrail,
+#   verifier, json
+# =====================================================================
+EMPTY_OUTPUT = ("",) * 9
+def render_insights(p: dict, trace: dict | None) -> str:
+    """Top banner — the punchy 'value generated' headline.
+    Pulls 1–3 high-impact statements out of the Passport. Examples:
+      🌱 0.0315 kg CO₂e avoided — routes to blue_bin_recycle
+      🛡️ Guardrail caught a sharps misroute (CRITICAL)
+      ⚠️  Biohazard auto-flagged
+    """
+    lines: list[str] = []
+    nba = p.get("next_best_action", {})
+    state = p.get("state", {})
+    routing = p.get("routing", {})
+    val = (p.get("value") or {}).get("environmental") or {}
+    hazards = state.get("hazard_flags") or []
+    # Guardrail save (highest priority — safety win)
+    if nba.get("fallback_used") and trace and trace.get("guardrail", {}).get("fired"):
+        g = trace["guardrail"]
+        sev = (g.get("severity") or "").upper()
+        sev_emoji = "🚨" if sev == "CRITICAL" else "🛡️"
+        lines.append(
+            f"{sev_emoji} **Guardrail fired ({sev or 'high severity'})** — "
+            f"unsafe action `{g['proposed_action']}` overridden to `{g['safe_default']}`"
+        )
+    # Hazards detected
+    canonical = {"biohazard", "sharps_injury_risk", "thermal_runaway_risk",
+                 "lead_toxicity", "acid_corrosion"}
+    flagged = [h for h in hazards if h in canonical]
+    if flagged:
+        lines.append(f"⚠️ **Hazards detected**: {', '.join(f'`{h}`' for h in flagged)}")
+    # Hazards added by auto-flagger (different from 'detected' — these are the ones the model missed)
+    if trace and trace.get("hazards", {}).get("added"):
+        lines.append(f"🔍 **Auto-flagger added** missing hazards: {', '.join(f'`{h}`' for h in trace['hazards']['added'])}")
+    # Environmental impact
+    if val.get("co2e_avoided_kg") is not None:
+        co2 = val["co2e_avoided_kg"]
+        lines.append(f"🌱 **{co2} kg CO₂e avoided** by routing to `{nba.get('primary', '?')}`")
+    # Routing / jurisdiction
+    if routing.get("jurisdiction"):
+        lines.append(f"📋 **Jurisdiction**: {routing['jurisdiction']}")
+    # Calibration shift (if Live)
+    if trace:
+        cr = trace["calibration"]["raw"]["identity"]
+        cc = trace["calibration"]["calibrated"]["identity"]
+        if abs(cr - cc) > 0.05:
+            arrow = "↓" if cc < cr else "↑"
+            lines.append(f"📊 **Confidence calibrated**: {cr:.2f} {arrow} {cc:.2f} (histogram-corrected)")
+    if not lines:
+        return ""
+    return "\n\n".join(lines)
 def render_summary(p: dict) -> str:
+    """The Passport headline card — always visible."""
     ident = p.get("identity", {})
     state = p.get("state", {})
     nba = p.get("next_best_action", {})
     routing = p.get("routing", {})
     prov = p.get("provenance", {})
     hazards = state.get("hazard_flags") or []
     do_not = nba.get("do_not") or []
     badge = "🟢 clear"
     if nba.get("fallback_used"):
+        badge = "🟡 guardrail fired"
     if any(h in {"biohazard", "sharps_injury_risk", "thermal_runaway_risk"} for h in hazards):
         badge = "🔴 hazard"
+    rows = [
+        f"### {ident.get('class', '?')} · _{ident.get('subclass') or ''}_",
         "",
+        f"**Status** — {badge}",
         "",
         "| | |",
         "|---|---|",
         f"| **Do not** | "
         + (", ".join(f"`{x}`" for x in do_not) if do_not else "_none_") + " |",
         f"| **Confidence** | `{ident.get('confidence', 0):.3f}` "
+        + ("calibrated" if prov.get("confidence_calibrated") else "raw") + " |",
         f"| **Hazards** | "
         + (", ".join(f"`{h}`" for h in hazards) if hazards else "_none_") + " |",
         f"| **Condition** | `{state.get('condition', '?')}` |",
         f"| **Jurisdiction** | {routing.get('jurisdiction', '?')} |",
+        f"| **Model** | `{prov.get('model', '?')}` |",
     ]
+    return "\n".join(rows)
+def render_raw(trace: dict | None) -> str:
+    """Step 1 — what Gemma actually emitted."""
+    if not trace:
+        return "_Live mode only — Demo mode shows a pre-computed Passport, no model output._"
+    raw = trace["raw_output"]
+    parsed = trace["parsed"]
+    pretty = json.dumps(parsed, indent=2)
+    return (
+        "**Gemma 4's raw output (post `parse_response`):**\n\n"
+        f"```\n{raw[:600] if len(raw) > 600 else raw}\n```\n\n"
+        "**Parsed by the validator (D012):**\n\n"
+        f"```json\n{pretty}\n```"
+    )
+def render_validator(trace: dict | None) -> str:
+    if not trace:
+        return ""
+    v = trace["validators"]
+    return "\n".join([
+        "**Layer A — JSON validator (D012)**",
+        "",
+        "| Check | Result |",
+        "|---|---|",
+        f"| JSON shape | {'✅ valid' if v['json_ok'] else '❌ malformed'} |",
+        f"| Taxonomy enum | {'✅ in domestic' if v['enum_ok'] else '❌ out of taxonomy'} |",
+    ])
+def render_calibration(trace: dict | None) -> str:
+    if not trace:
+        return ""
+    c = trace["calibration"]
+    raw = c["raw"]
+    cal = c["calibrated"]
+    arrow = lambda r, k: "→" if abs(r - k) > 0.001 else "="
     return "\n".join([
+        f"**Layer B — Confidence calibration (D015 · `{c['method']}`)**",
         "",
+        "| Block | Raw | | Calibrated | Δ |",
+        "|---|---:|:---:|---:|---:|",
+        f"| identity | `{raw['identity']:.3f}` | {arrow(raw['identity'], cal['identity'])} | `{cal['identity']:.3f}` | `{cal['identity'] - raw['identity']:+.3f}` |",
+        f"| state    | `{raw['state']:.3f}` | {arrow(raw['state'], cal['state'])} | `{cal['state']:.3f}` | `{cal['state'] - raw['state']:+.3f}` |",
+        f"| nba      | `{raw['nba']:.3f}` | {arrow(raw['nba'], cal['nba'])} | `{cal['nba']:.3f}` | `{cal['nba'] - raw['nba']:+.3f}` |",
+    ])
+def render_hazards(trace: dict | None) -> str:
+    if not trace:
+        return ""
+    h = trace["hazards"]
+    return "\n".join([
+        "**Layer C — Hazard auto-flagger (D019)**",
+        "",
+        "| | |",
         "|---|---|",
+        f"| Model said | "
+        + (", ".join(f"`{x}`" for x in h["before"]) if h["before"] else "_none_") + " |",
+        f"| Auto-flagger added | "
+        + (", ".join(f"`{x}`" for x in h["added"]) if h["added"] else "_none_") + " |",
+        f"| Final hazard set | "
+        + (", ".join(f"`{x}`" for x in h["after"]) if h["after"] else "_none_") + " |",
     ])
+def render_guardrail(trace: dict | None) -> str:
+    if not trace:
+        return ""
+    g = trace["guardrail"]
+    if not g["fired"]:
+        return "\n".join([
+            "**Layer D — `do_not` guardrail (D018)**",
+            "",
+            f"✅ Proposed action `{g['proposed_action']}` passed all `do_not` rules.",
+        ])
+    return "\n".join([
+        "**Layer D — `do_not` guardrail (D018)**",
+        "",
+        f"⚠️ **Guardrail fired** — severity: `{g['severity']}`",
+        "",
+        "| | |",
+        "|---|---|",
+        f"| Triggered class | `{g['triggered_class']}` |",
+        f"| Model proposed | `{g['proposed_action']}` |",
+        f"| Safe default applied | `{g['safe_default']}` |",
+    ])
+def render_verifier(p: dict, head: str) -> str:
+    """Run the Passport back through the verifier — independent attestation that
+    every layer's contract held."""
+    raw = json.dumps({
+        "identity": p.get("identity", {}),
+        "state": p.get("state", {}),
+        "next_best_action": p.get("next_best_action", {}),
+    })
+    score = verifier.score(raw, head, ground_truth=None)
+    rows = [
+        "**Verifier scoring** (`matter.verifier.Verifier`)",
+        "",
+        "| Component | Score | Status |",
+        "|---|---:|:---:|",
+        f"| `json_valid` | `{score.json_valid:.0f}` | {'✅' if score.json_valid else '❌'} |",
+        f"| `enum_valid` | `{score.enum_valid:.0f}` | {'✅' if score.enum_valid else '❌'} |",
+        f"| `do_not_compliance` | `{score.do_not_compliance:.0f}` | {'✅' if score.do_not_compliance else '❌'} |",
+        f"| `hazard_completeness` | `{score.hazard_completeness:.0f}` | {'✅' if score.hazard_completeness else '❌'} |",
+        f"| **Structural total** | **`{score.structural:.3f}`** | {'✅' if score.structural >= 0.99 else '⚠️'} |",
+    ]
+    return "\n".join(rows)
+# =====================================================================
+# Run handlers
+# =====================================================================
+def run_demo(head: str) -> tuple:
     fname = DEMO_PASSPORTS.get(head, DEMO_PASSPORTS["domestic"])
     p = json.loads((SPEC_EXAMPLES / fname).read_text())
+    return (
+        render_insights(p, trace=None),
+        render_summary(p),
+        render_raw(trace=None),
+        render_validator(trace=None),
+        render_calibration(trace=None),
+        render_hazards(trace=None),
+        render_guardrail(trace=None),
+        render_verifier(p, head),
+        json.dumps(p, indent=2),
+    )
+def run_live(image_path: str | None, head: str, jurisdiction: str) -> tuple:
     if image_path is None:
         return (
+            "⚠️ Upload an image first, or switch to **Demo** mode.",
+            "_no Passport yet_", "", "", "", "", "", "", "",
         )
     try:
         engine = get_engine()
             image_path=Path(image_path),
             jurisdiction=jurisdiction.strip() or None,
         )
+        passport, trace = engine.infer_with_trace(capture, head)
         p = passport.to_dict()
+        return (
+            render_insights(p, trace),
+            render_summary(p),
+            render_raw(trace),
+            render_validator(trace),
+            render_calibration(trace),
+            render_hazards(trace),
+            render_guardrail(trace),
+            render_verifier(p, head),
+            json.dumps(p, indent=2),
+        )
     except MIEError as e:
         return (
+            f"### ❌ MIE rejected the model output\n\n```\n{e}\n```",
+            "_pipeline halted_", "", "", "", "", "", "", "",
         )
     except Exception as e:
         return (
             f"### ❌ Runtime error\n\n```\n{e.__class__.__name__}: {e}\n```\n\n"
+            "_If this is the first call after a cold start, the GPU worker is still loading Gemma 4 (≈30s). Try again or use Demo mode._",
             f"<details><summary>traceback</summary>\n\n```\n{traceback.format_exc()}\n```\n</details>",
+            "", "", "", "", "", "", "",
         )
 /* ===== Selection ===== */
 ::selection { background: rgba(0, 217, 126, 0.35); color: white; }
+/* ===== Insights banner — value-generated headline ===== */
+#insights-banner {
+  background: linear-gradient(135deg, rgba(0, 217, 126, 0.10), rgba(0, 229, 255, 0.05)) !important;
+  border: 1px solid rgba(0, 217, 126, 0.32) !important;
+  border-radius: 14px !important;
+  padding: 18px 22px !important;
+  margin-bottom: 18px !important;
+  box-shadow: 0 0 24px rgba(0, 217, 126, 0.10), inset 0 1px 0 rgba(255, 255, 255, 0.04);
+}
+#insights-banner p, #insights-banner strong {
+  color: #f1faf4 !important;
+  font-size: 0.96rem !important;
+  line-height: 1.55 !important;
+  margin: 4px 0 !important;
+}
+#insights-banner code {
+  background: rgba(0, 217, 126, 0.12) !important;
+  color: #00ff8c !important;
+  border: 1px solid rgba(0, 217, 126, 0.28) !important;
+  font-weight: 500;
+}
+#insights-banner:has(p:empty),
+#insights-banner:not(:has(*)) {
+  display: none;
+}
 /* ===== Catch-all readability ===== */
 .gradio-container { color: #f1faf4; }
 .gradio-container input::placeholder,
                 )
         with gr.Column(scale=7):
+            # Insights banner — punchy 1-3 line summary of the value generated
+            insights_out = gr.Markdown(
+                value="",
+                elem_id="insights-banner",
+            )
             gr.Markdown("### Passport")
             summary_out = gr.Markdown(value="_Pick a mode and press_ **Generate Passport**.")
+            # Step-by-step trace — one accordion per pipeline layer
+            with gr.Accordion("🔍 Gemma 4's raw output (pre-pipeline)", open=False):
+                raw_out = gr.Markdown()
+            with gr.Accordion("✅ Layer A — JSON validator (D012)", open=False):
+                validator_out = gr.Markdown()
+            with gr.Accordion("📐 Layer B — Confidence calibration (D015)", open=False):
+                calibration_out = gr.Markdown()
+            with gr.Accordion("⚠️ Layer C — Hazard auto-flagger (D019)", open=False):
+                hazards_out = gr.Markdown()
+            with gr.Accordion("🛡️ Layer D — `do_not` guardrail (D018)", open=False):
+                guardrail_out = gr.Markdown()
+            with gr.Accordion("🧪 Verifier — independent attestation", open=False):
+                verifier_out = gr.Markdown()
+            with gr.Accordion("📋 Passport JSON", open=False):
                 json_out = gr.Code(language="json", label=None, lines=22)
     gr.Markdown(
     run_btn.click(
         dispatch,
         inputs=[mode_in, image_in, head_in, juris_in],
+        outputs=[insights_out, summary_out, raw_out, validator_out, calibration_out,
+                 hazards_out, guardrail_out, verifier_out, json_out],
     )

matter/engine.py CHANGED Viewed

@@ -201,6 +201,144 @@ class MIE:
         # 7. Final Pydantic validation against the v0.1 schema
         return Passport.model_validate(draft)
 __all__ = ["MIE", "MIEError", "Runtime", "CaptureInput", "Capture", "Identity",
            "State", "NextBestAction", "Provenance", "Routing", "Passport"]

         # 7. Final Pydantic validation against the v0.1 schema
         return Passport.model_validate(draft)
+    def infer_with_trace(
+        self, capture: CaptureInput, head_name: str
+    ) -> tuple[Passport, dict]:
+        """Run the pipeline and capture every layer's intermediate state.
+        Returns (passport, trace) where trace is a JSON-serializable dict with:
+          - raw_output:        Gemma's raw response text
+          - parsed:            JSON-parsed model output (pre-pipeline)
+          - validators:        {json_ok, enum_ok}
+          - calibration:       {raw, calibrated, method}  (per-block confidences)
+          - hazards:           {before, after, added}
+          - guardrail:         {proposed_action, fired, safe_default,
+                                triggered_class, severity}
+          - metadata:          {head, jurisdiction, runtime, model_id}
+        Used by the demo UI to render a step-by-step pipeline view. The Passport
+        return value is identical to what `infer()` would have produced.
+        """
+        if head_name not in HEADS:
+            raise MIEError(f"unknown head: {head_name}. Heads: {list(HEADS)}")
+        head = HEADS[head_name]
+        jurisdiction = capture.jurisdiction or head.default_jurisdiction
+        prompt = build_prompt(head_name, jurisdiction)
+        # 1. Runtime
+        raw = self.runtime.infer(prompt, capture.image_path)
+        # 2. Validator
+        parsed = _parse_json_block(raw)
+        _validate_enum(parsed, head_name)
+        ident = parsed["identity"]
+        st = parsed.get("state", {})
+        nba = parsed["next_best_action"]
+        confidences_raw = {
+            "identity": float(ident.get("confidence", 0.0)),
+            "state": float(st.get("confidence", 0.0)),
+            "nba": float(nba.get("confidence", 0.0)),
+        }
+        # 3. Calibration
+        ident_conf_cal = _calibrate(confidences_raw["identity"], self.calib)
+        state_conf_cal = _calibrate(confidences_raw["state"], self.calib)
+        nba_conf_cal = _calibrate(confidences_raw["nba"], self.calib)
+        confidences_cal = {
+            "identity": ident_conf_cal,
+            "state": state_conf_cal,
+            "nba": nba_conf_cal,
+        }
+        # 4. Build draft
+        modality, content_hash = _content_hash(capture)
+        ts = now_utc()
+        passport_id = make_passport_id(content_hash, ident["class"], ts)
+        draft: dict = {
+            "schema": "matter-passport/v0.1",
+            "passport_id": passport_id,
+            "prev": None,
+            "timestamp": ts,
+            "capture": {
+                "modality": modality,
+                "content_hash": content_hash,
+                **({"geohash_coarse": capture.geohash_coarse} if capture.geohash_coarse else {}),
+            },
+            "identity": {
+                "class": ident["class"],
+                "subclass": ident.get("subclass"),
+                "taxonomy": head.taxonomy_uri,
+                "confidence": ident_conf_cal,
+            },
+            "state": {
+                "condition": st.get("condition", "unknown"),
+                "hazard_flags": list(st.get("hazard_flags") or []),
+                "confidence": state_conf_cal,
+            },
+            "next_best_action": {
+                "primary": nba["primary"],
+                "secondary": nba.get("secondary"),
+                "do_not": list(nba.get("do_not") or []),
+                "confidence": nba_conf_cal,
+                "fallback_used": False,
+            },
+            "routing": {"jurisdiction": jurisdiction, "regulation_refs": []},
+            "provenance": {
+                "model": self.runtime.model_id,
+                "runtime": self.runtime.name,
+                "on_device": self.on_device,
+                "confidence_calibrated": True,
+                "calibration_ref": self._calibration_ref,
+            },
+        }
+        # 5. Hazard auto-flagger — capture before/after
+        hazards_before = list(draft["state"].get("hazard_flags") or [])
+        apply_hazard_flagger(draft, self.hazard_rules)
+        hazards_after = list(draft["state"].get("hazard_flags") or [])
+        hazards_added = [h for h in hazards_after if h not in hazards_before]
+        # 6. Guardrail — capture decision via GuardrailResult
+        proposed_action = draft["next_best_action"]["primary"]
+        g_result = apply_guardrail(draft, self.safety_rules)
+        # 7. Final Pydantic validation
+        passport = Passport.model_validate(draft)
+        trace = {
+            "raw_output": raw,
+            "parsed": parsed,
+            "validators": {"json_ok": True, "enum_ok": True},
+            "calibration": {
+                "raw": confidences_raw,
+                "calibrated": confidences_cal,
+                "method": self.calib.method,
+                "ref": self._calibration_ref,
+            },
+            "hazards": {
+                "before": hazards_before,
+                "after": hazards_after,
+                "added": hazards_added,
+            },
+            "guardrail": {
+                "proposed_action": proposed_action,
+                "fired": g_result.fallback_used,
+                "safe_default": (g_result.triggered_rule.safe_default
+                                 if g_result.fallback_used and g_result.triggered_rule else None),
+                "triggered_class": ident["class"] if g_result.fallback_used else None,
+                "severity": (g_result.triggered_rule.severity
+                             if g_result.triggered_rule else None),
+            },
+            "metadata": {
+                "head": head_name,
+                "jurisdiction": jurisdiction,
+                "runtime": self.runtime.name,
+                "model_id": self.runtime.model_id,
+            },
+        }
+        return passport, trace
 __all__ = ["MIE", "MIEError", "Runtime", "CaptureInput", "Capture", "Identity",
            "State", "NextBestAction", "Provenance", "Routing", "Passport"]

matter/verifier.py ADDED Viewed

	@@ -0,0 +1,341 @@

+"""Matter Verifier — programmatic reward signal for Passport generation.
+The MIE pipeline already encodes "what makes a Passport good": JSON shape,
+taxonomy enums, hazard flags, do_not guardrails, calibrated confidence.
+The Verifier turns that pipeline into a deterministic, per-example score
+suitable for:
+  - Offline eval (compare model outputs head-to-head on a fixed test set)
+  - DPO/KTO data generation (rank N samples per prompt, build preference pairs)
+  - GRPO / RL with verifiable rewards (used directly as the reward function)
+  - Test-time scaling (best-of-N selection)
+The verifier is split into two layers:
+  Structural (no ground truth needed) — usable for unsupervised RL:
+    - json_valid           Did the output parse as a JSON object in the expected shape?
+    - enum_valid           Are class and next_best_action.primary in the head's enums?
+    - do_not_compliance    Is the proposed primary action NOT in the class's do_not set?
+    - hazard_completeness  Are all class-required hazard flags present?
+  Semantic (needs a ground-truth Passport / dict) — used for eval and offline DPO:
+    - class_correct        Does identity.class match GT?
+    - subclass_match       Is subclass a string-match (exact or substring) of GT?
+    - nba_correct          Does next_best_action.primary match GT?
+    - confidence_brier     1 - (predicted_confidence - is_correct)^2 — per-example
+                           Brier-like calibration signal in [0, 1].
+Aggregation:
+    total = (sum of w_i * score_i) / (sum of w_i scored)
+    Hard gate: if json_valid == 0, total = 0 (model emitted garbage; nothing else matters).
+For RL, prefer `reward()` which returns a single scalar with sensible shaping
+(garbage → low, structurally valid but wrong → mid, fully correct → high).
+"""
+from __future__ import annotations
+import json
+import re
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any
+from matter.guardrail import Rule, load_rules as load_safety_rules
+from matter.hazard_flagger import HazardRule, load_hazard_rules
+from matter.heads import HEADS
+_SPEC_DIR = Path(__file__).resolve().parent.parent / "spec"
+_SAFETY_PATH = _SPEC_DIR / "safety_rules_v1.json"
+_HAZARD_PATH = _SPEC_DIR / "hazard_flags_v1.json"
+_JSON_RE = re.compile(r"\{.*\}", re.DOTALL)
+# Default reward weights. Tunable per experiment.
+DEFAULT_WEIGHTS: dict[str, float] = {
+    # Structural — gates and shape (cheap, always scored)
+    "json_valid": 1.0,
+    "enum_valid": 1.0,
+    "do_not_compliance": 3.0,
+    "hazard_completeness": 2.0,
+    # Semantic — needs ground truth
+    "class_correct": 4.0,
+    "subclass_match": 1.0,
+    "nba_correct": 2.0,
+    "confidence_brier": 1.0,
+}
+@dataclass
+class VerifierScore:
+    """Per-example scoring result.
+    Each component is in [0, 1] (or None when not scorable). `total` is the
+    weight-normalized aggregate over the components that were actually
+    scored. `parsed` carries the parsed dict on success, useful for callers
+    that want to inspect / cache.
+    """
+    # Structural
+    json_valid: float = 0.0
+    enum_valid: float = 0.0
+    do_not_compliance: float = 0.0
+    hazard_completeness: float = 0.0
+    # Semantic (None if no ground truth)
+    class_correct: float | None = None
+    subclass_match: float | None = None
+    nba_correct: float | None = None
+    confidence_brier: float | None = None
+    # Aggregates
+    structural: float = 0.0
+    semantic: float | None = None
+    total: float = 0.0
+    # Diagnostics
+    parsed: dict | None = None
+    parse_error: str | None = None
+    weights: dict[str, float] = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        return asdict(self)
+class Verifier:
+    """Score raw model outputs against the MIE specification.
+    Stateless across calls except for the loaded rule tables; safe to share
+    one instance across an entire training/eval run.
+    """
+    def __init__(
+        self,
+        weights: dict[str, float] | None = None,
+        safety_rules_path: Path = _SAFETY_PATH,
+        hazard_rules_path: Path = _HAZARD_PATH,
+    ):
+        self.weights = dict(weights or DEFAULT_WEIGHTS)
+        self.safety_rules: dict[str, Rule] = load_safety_rules(safety_rules_path)
+        self.hazard_rules: dict[str, HazardRule] = load_hazard_rules(hazard_rules_path)
+    # ----------------------------- public API -----------------------------
+    def score(
+        self,
+        raw: str,
+        head_name: str,
+        ground_truth: dict | None = None,
+    ) -> VerifierScore:
+        """Score a raw model output for a given head, optionally against GT.
+        Args:
+            raw: the model's untrimmed output text. May contain prose around
+                 the JSON; we extract the first {...} block.
+            head_name: which head's enums + jurisdiction apply (e.g. "domestic").
+            ground_truth: optional dict shaped like a model output OR a full
+                          Passport. We look at identity.class / .subclass and
+                          next_best_action.primary.
+        Returns:
+            VerifierScore with per-component scores and a total in [0, 1].
+        """
+        if head_name not in HEADS:
+            raise ValueError(f"Unknown head: {head_name!r}. Known: {list(HEADS)}")
+        head = HEADS[head_name]
+        s = VerifierScore(weights=dict(self.weights))
+        # 1. JSON parse
+        parsed, err = _parse_json(raw)
+        if parsed is None:
+            s.parse_error = err
+            self._aggregate(s, has_gt=ground_truth is not None)
+            return s
+        s.json_valid = 1.0
+        s.parsed = parsed
+        # 2. Enum validity
+        cls = _get(parsed, "identity", "class") or parsed.get("class")
+        nba_primary = _get(parsed, "next_best_action", "primary")
+        s.enum_valid = float(
+            cls in head.identity_classes and (nba_primary is None or nba_primary in head.nba_classes)
+        )
+        # 3. do_not compliance — given the predicted class, is the proposed
+        #    primary action allowed by the safety rules? If the class isn't in
+        #    the rule table, it has no constraints and trivially complies.
+        s.do_not_compliance = self._score_do_not(cls, nba_primary)
+        # 4. Hazard completeness — predicted hazard_flags include all the
+        #    canonical class-implied hazards. If the class has no required
+        #    hazards, this trivially passes (1.0).
+        predicted_hazards = (
+            _get(parsed, "state", "hazard_flags") or parsed.get("hazard_flags") or []
+        )
+        s.hazard_completeness = self._score_hazards(cls, predicted_hazards)
+        # 5. Semantic — only if GT supplied
+        if ground_truth is not None:
+            gt_cls = _get(ground_truth, "identity", "class") or ground_truth.get("class")
+            gt_subclass = _get(ground_truth, "identity", "subclass") or ground_truth.get("subclass")
+            gt_nba = _get(ground_truth, "next_best_action", "primary") or ground_truth.get("primary")
+            pred_subclass = _get(parsed, "identity", "subclass") or parsed.get("subclass")
+            pred_conf = _confidence(parsed)
+            s.class_correct = float(cls is not None and cls == gt_cls)
+            s.subclass_match = _string_match(pred_subclass, gt_subclass)
+            s.nba_correct = (
+                float(nba_primary == gt_nba) if gt_nba is not None and nba_primary is not None else None
+            )
+            s.confidence_brier = (
+                _brier(pred_conf, s.class_correct) if pred_conf is not None else None
+            )
+        self._aggregate(s, has_gt=ground_truth is not None)
+        return s
+    def reward(
+        self,
+        raw: str,
+        head_name: str,
+        ground_truth: dict | None = None,
+    ) -> float:
+        """Single-scalar reward in [-1, 1] suitable for RL.
+        Shaping:
+          - garbage (json_valid == 0): -1.0   (strong signal: don't emit non-JSON)
+          - parsed but otherwise zero: 0.0
+          - perfect: 1.0
+        For unsupervised RL (no GT), structural score alone determines reward.
+        """
+        s = self.score(raw, head_name, ground_truth)
+        if s.json_valid == 0:
+            return -1.0
+        return s.total
+    # --------------------------- internals --------------------------------
+    def _score_do_not(self, cls: str | None, nba_primary: str | None) -> float:
+        if cls is None or nba_primary is None:
+            return 0.0
+        rule = self.safety_rules.get(cls)
+        if rule is None:
+            return 1.0  # class unconstrained, trivially compliant
+        return 0.0 if nba_primary in rule.do_not else 1.0
+    def _score_hazards(self, cls: str | None, predicted: list[str]) -> float:
+        if cls is None:
+            return 0.0
+        rule = self.hazard_rules.get(cls)
+        if rule is None or not rule.required:
+            return 1.0  # no required hazards for this class
+        present = set(predicted)
+        return float(all(h in present for h in rule.required))
+    def _aggregate(self, s: VerifierScore, *, has_gt: bool) -> None:
+        # Structural always contributes
+        struct_keys = ("json_valid", "enum_valid", "do_not_compliance", "hazard_completeness")
+        struct_w = sum(self.weights[k] for k in struct_keys)
+        struct_v = sum(self.weights[k] * getattr(s, k) for k in struct_keys)
+        s.structural = struct_v / struct_w if struct_w > 0 else 0.0
+        # Hard gate: garbage → 0
+        if s.json_valid == 0.0:
+            s.semantic = None
+            s.total = 0.0
+            return
+        if has_gt:
+            sem_pairs = [
+                ("class_correct", s.class_correct),
+                ("subclass_match", s.subclass_match),
+                ("nba_correct", s.nba_correct),
+                ("confidence_brier", s.confidence_brier),
+            ]
+            sem_w = sum(self.weights[k] for k, v in sem_pairs if v is not None)
+            sem_v = sum(self.weights[k] * v for k, v in sem_pairs if v is not None)
+            s.semantic = (sem_v / sem_w) if sem_w > 0 else None
+            total_w = struct_w + sem_w
+            total_v = struct_v + sem_v
+        else:
+            s.semantic = None
+            total_w = struct_w
+            total_v = struct_v
+        s.total = total_v / total_w if total_w > 0 else 0.0
+# ----------------------------- helpers ------------------------------------
+def _parse_json(raw: str) -> tuple[dict | None, str | None]:
+    """Extract and parse the first JSON object in raw. Returns (parsed, error)."""
+    if not isinstance(raw, str) or not raw.strip():
+        return None, "empty input"
+    m = _JSON_RE.search(raw)
+    if m is None:
+        return None, "no JSON object found"
+    try:
+        obj = json.loads(m.group(0))
+    except json.JSONDecodeError as e:
+        return None, f"JSONDecodeError: {e}"
+    if not isinstance(obj, dict):
+        return None, "top-level JSON is not an object"
+    return obj, None
+def _get(d: dict | None, *keys: str) -> Any:
+    """Nested dict lookup, returning None if any step is missing."""
+    cur: Any = d
+    for k in keys:
+        if not isinstance(cur, dict):
+            return None
+        cur = cur.get(k)
+    return cur
+def _confidence(parsed: dict) -> float | None:
+    """Pull a confidence value from common output shapes."""
+    for path in (("identity", "confidence"), ("confidence",)):
+        v = _get(parsed, *path)
+        if isinstance(v, (int, float)):
+            return float(v)
+    return None
+def _string_match(pred: str | None, gt: str | None) -> float | None:
+    """Soft string match: 1.0 exact (case-insensitive), 0.5 substring, 0 else.
+    Returns None if either side is missing — caller decides whether to score it.
+    """
+    if not pred or not gt:
+        return None
+    p = pred.strip().lower()
+    g = gt.strip().lower()
+    if p == g:
+        return 1.0
+    if p in g or g in p:
+        return 0.5
+    return 0.0
+def _brier(confidence: float, is_correct: float | None) -> float | None:
+    """Per-example Brier-like calibration signal in [0, 1].
+    Returns 1 - (confidence - is_correct)^2:
+      - confident & correct (1.0, 1) -> 1.0
+      - confident & wrong   (1.0, 0) -> 0.0
+      - unsure & correct    (0.5, 1) -> 0.75
+      - unsure & wrong      (0.5, 0) -> 0.75
+      - underconfident & correct (0.0, 1) -> 0.0
+    Returns None if is_correct is None.
+    """
+    if is_correct is None:
+        return None
+    c = max(0.0, min(1.0, confidence))
+    return 1.0 - (c - is_correct) ** 2
+__all__ = [
+    "Verifier",
+    "VerifierScore",
+    "DEFAULT_WEIGHTS",
+]