Spaces:
Runtime error
Runtime error
| """The QA Inspector β a SEPARATE reviewer persona (the hybrid evaluator). | |
| Integrity, restated: the Chief Engineer never grades its own work. The | |
| deterministic simulated world (`sim/outcome.py`) produces the ground-truth | |
| pass/fail. The Inspector is a *distinct* voice β skeptical, conservative β | |
| that reads what the Engineer claimed and what actually happened and writes a | |
| verdict. The grade is therefore "hybrid": deterministic physics + a second LLM | |
| opinion, never the proposer marking its own homework. | |
| One persona, three sets of rules depending on where it stands in the workflow: | |
| β’ second_opinion(...) β BUILD: critique the PLAN before any print runs. | |
| β’ grade_outcome(...) β PRINT: grade one finished (simulated) print vs the | |
| Engineer's prediction β did the called risk hold? | |
| β’ summarize_run(...) β REVIEW: one verdict across a whole iteration run. | |
| LLM-backed via `llm.chat_json` with a distinct system prompt; each mode has a | |
| deterministic fallback so the verdict is always present offline. | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import dataclass | |
| from . import llm | |
| from .models import Advice, Environment, Job, PrintSettings | |
| from sim.outcome import SimResult | |
| PERSONA = """You are La Forge, the QA Inspector: a skeptical, conservative print-shop \ | |
| inspector. You did NOT propose these settings β Chief Engineer O'Brien did, and \ | |
| O'Brien is an optimist. Your job is to second-guess, not to please. You are \ | |
| terse and physical. You never flatter. You call out optimism, thin margins, and \ | |
| unflagged risks, and you give credit only when the evidence earns it.""" | |
| # predicted-risk vocabulary β simulated failure_mode it corresponds to | |
| _RISK_TO_MODE = { | |
| "sag": "sag", "stringing": "stringing", "adhesion": "adhesion", | |
| "warping": "warp", "warp": "warp", "delamination": "under_extrusion", | |
| } | |
| _MODE_HUMAN = { | |
| "sag": "sagging", "stringing": "stringing", "adhesion": "first-layer adhesion", | |
| "warp": "warping", "under_extrusion": "under-extrusion", "none": "no failure", | |
| } | |
| class InspectorVerdict: | |
| stance: str # short label, e.g. "concur" / "caution" / "held" / "missed" | |
| headline: str # one-line verdict | |
| detail: str # 1-2 lines of rationale | |
| agreement: bool | None = None # outcome modes: did the Engineer's prediction match reality? | |
| def color(self) -> str: | |
| s = self.stance.lower() | |
| if s in ("dispute", "missed", "fail"): | |
| return "var(--ao-red, #d9534f)" | |
| if s in ("caution", "overcautious", "watch"): | |
| return "var(--ao-amber, #e0a458)" | |
| return "var(--ao-green)" | |
| def _predicted_modes(advice: Advice) -> set[str]: | |
| out: set[str] = set() | |
| for r in advice.risks: | |
| key = (r.risk or "").strip().lower() | |
| out.add(_RISK_TO_MODE.get(key, key)) | |
| return out | |
| def _settings_line(s: PrintSettings) -> str: | |
| return (f"nozzle {s.nozzle_temp:.0f}Β°C, bed {s.bed_temp:.0f}Β°C, fan {s.fan_pct:.0f}%, " | |
| f"first-layer fan {s.first_layer_fan_pct:.0f}%, retraction {s.retraction_mm:.1f}mm") | |
| # ββ BUILD: a second opinion on the plan, before anything prints βββββββββββββββ | |
| def second_opinion(job: Job, env: Environment, settings: PrintSettings, advice: Advice) -> InspectorVerdict: | |
| raw = llm.chat_json( | |
| PERSONA + "\n\nRespond ONLY with JSON: " | |
| '{"stance":"concur|caution|dispute","headline":"<one line>","detail":"<1-2 lines>"}', | |
| "Review this PLAN before it prints β do not re-propose, just critique.\n" | |
| f"JOB: {job.material}/{job.geometry_type}, bed position {job.bed_position}, " | |
| f"room {env.temp:.0f}Β°C/{env.humidity:.0f}%RH on a {env.printer}.\n" | |
| f"ENGINEER PROPOSED: {_settings_line(settings)}.\n" | |
| f"ENGINEER REASONING: {advice.reasoning}\n" | |
| f"ENGINEER FLAGGED RISKS: {[r.risk for r in advice.risks] or 'none'}.\n" | |
| "Where is the Engineer being optimistic? What would you watch?", | |
| ) | |
| if raw and {"stance", "headline", "detail"} <= set(raw): | |
| return InspectorVerdict(str(raw["stance"]), str(raw["headline"]), str(raw["detail"])) | |
| return _second_opinion_fallback(job, env, settings, advice) | |
| def _second_opinion_fallback(job: Job, env: Environment, settings: PrintSettings, advice: Advice) -> InspectorVerdict: | |
| geo, mat = job.geometry_type, job.material.upper() | |
| flags: list[str] = [] | |
| if geo in ("overhang", "bridge") and settings.fan_pct < 60: | |
| flags.append(f"fan {settings.fan_pct:.0f}% is thin for a {geo} β sagging risk the Engineer may be underweighting") | |
| if mat == "ABS" and job.bed_position in ("edge", "corner"): | |
| flags.append(f"ABS off-center ({job.bed_position}) will pull at the edges β I'd second a warp watch and a brim") | |
| if mat == "ABS" and settings.fan_pct > 40: | |
| flags.append(f"fan {settings.fan_pct:.0f}% on ABS invites cracking/warp") | |
| if env.humidity > 55 and mat in ("PETG", "TPU", "ABS") and settings.retraction_mm < 3: | |
| flags.append(f"humid air ({env.humidity:.0f}%RH) + {settings.retraction_mm:.1f}mm retraction β expect stringing") | |
| if not advice.risks: | |
| flags.append("Engineer flagged NO failure regions β verify that's confidence, not optimism") | |
| if not flags: | |
| return InspectorVerdict("concur", "No red flags from a second look.", | |
| "Plan sits inside sane bounds for this material and room. Cleared to print.") | |
| stance = "dispute" if len(flags) >= 2 else "caution" | |
| return InspectorVerdict(stance, f"Second opinion: {flags[0]}.", | |
| " Β· ".join(flags[1:]) or "Print it, but watch that region.") | |
| # ββ PRINT: grade one finished (simulated) print against the prediction ββββββββ | |
| def grade_outcome(job: Job, env: Environment, settings: PrintSettings, | |
| advice: Advice, result: SimResult) -> InspectorVerdict: | |
| predicted = _predicted_modes(advice) | |
| raw = llm.chat_json( | |
| PERSONA + "\n\nRespond ONLY with JSON: " | |
| '{"stance":"held|missed|overcautious|confirmed","headline":"<one line>","detail":"<1-2 lines>"}', | |
| "Grade this finished print. The outcome below came from the deterministic " | |
| "world, not from the Engineer β you are checking the Engineer's CALL against it.\n" | |
| f"JOB: {job.material}/{job.geometry_type} @ {env.temp:.0f}Β°C/{env.humidity:.0f}%RH.\n" | |
| f"ENGINEER PREDICTED RISKS: {[r.risk for r in advice.risks] or 'none'}.\n" | |
| f"ACTUAL OUTCOME: {result.outcome} β {result.detail} " | |
| f"(failure mode: {result.failure_mode}).\n" | |
| "Did the Engineer's prediction hold? Be blunt.", | |
| ) | |
| agreement = _agreement(predicted, result) | |
| if raw and {"stance", "headline", "detail"} <= set(raw): | |
| return InspectorVerdict(str(raw["stance"]), str(raw["headline"]), str(raw["detail"]), agreement) | |
| return _grade_fallback(predicted, result, agreement) | |
| def _agreement(predicted: set[str], result: SimResult) -> bool: | |
| if result.failure_mode == "none": | |
| return True # clean print β nothing to have missed | |
| return result.failure_mode in predicted | |
| def _grade_fallback(predicted: set[str], result: SimResult, agreement: bool) -> InspectorVerdict: | |
| mode = _MODE_HUMAN.get(result.failure_mode, result.failure_mode) | |
| if result.failure_mode == "none": | |
| if predicted: | |
| return InspectorVerdict("overcautious", f"Print held (q={result.quality:.2f}).", | |
| f"Engineer flagged {', '.join(sorted(predicted))}; the settings covered it. " | |
| "Credit the call β or it was conservative.", True) | |
| return InspectorVerdict("held", f"Clean print (q={result.quality:.2f}).", | |
| "No failure flagged, none occurred. Plan and reality agree.", True) | |
| if agreement: | |
| return InspectorVerdict("confirmed", f"Failed on {mode} β exactly as called.", | |
| f"Quality {result.quality:.2f}. The Engineer's risk flag was right; " | |
| "the loop now has the lesson.", True) | |
| return InspectorVerdict("missed", f"Failed on {mode} β and it wasn't flagged.", | |
| f"Quality {result.quality:.2f}. The Engineer didn't predict this mode. " | |
| "That gap is what the next iteration has to close.", False) | |
| # canonical failure mode each geometry is expected to risk (the loop's implicit | |
| # prediction β the deterministic policy loop carries no LLM Advice per iteration) | |
| _GEO_EXPECT = {"overhang": "sag", "bridge": "sag", "stringing": "stringing", | |
| "adhesion": "adhesion", "vase": "warp"} | |
| def grade_iteration(geometry_type: str, result: SimResult) -> InspectorVerdict: | |
| """Deterministic-only grade for one loop iteration (no LLM β the loop runs | |
| many fast, reproducible iterations). Checks the outcome against the failure | |
| mode this geometry is expected to risk.""" | |
| expected = {_GEO_EXPECT.get(geometry_type, "sag")} | |
| return _grade_fallback(expected, result, _agreement(expected, result)) | |
| # ββ REVIEW: one verdict across a whole iteration run ββββββββββββββββββββββββββ | |
| def summarize_run(records: list, *, material: str, geometry: str) -> InspectorVerdict: | |
| if not records: | |
| return InspectorVerdict("watch", "No run to review yet.", "Run the Print loop first.") | |
| qualities = [r.result.quality for r in records] | |
| first_clean = next((r.n for r in records if r.result.outcome == "success"), None) | |
| start, end = qualities[0], qualities[-1] | |
| climbed = end - start | |
| if first_clean: | |
| stance, head = "concur", f"Converged to clean by iteration {first_clean}." | |
| detail = (f"{material}/{geometry} climbed {start:.2f} β {end:.2f}. The compounding is real: " | |
| "each simulated outcome tightened the policy and the next run was better-informed.") | |
| elif climbed > 0.05: | |
| stance, head = "caution", f"Improving but not yet clean (best {max(qualities):.2f})." | |
| detail = (f"Quality rose {start:.2f} β {end:.2f} over {len(records)} runs but never crossed the " | |
| "bar. More iterations or a different lever needed β the loop is learning, slowly.") | |
| else: | |
| stance, head = "dispute", "No real progress this run." | |
| detail = (f"Quality stuck around {start:.2f}. Either the job is mis-specified or the policy is " | |
| "saturated for these conditions β worth a human look before trusting the trend.") | |
| return InspectorVerdict(stance, head, detail) | |