"""Learned policy — the parametric layer that actually *improves*. This is what makes the Chief Engineer more than a lookup. Each (material, geometry, environment-BUCKET) cell holds learned offsets to the baseline settings, updated from observed outcomes. Because cells are bucketed (not exact env points), a lesson from one humid PETG bridge transfers to the *next* humid PETG bridge — it generalizes, rather than recalling a single past job. Two knowledge sources feed a recommendation, exactly as intended: • RAG — retrieved prior jobs reasoned over by the LLM (chief_engineer.py) • policy — these learned offsets, applied deterministically + shown to the LLM Persisted to data/policy.json. Pure-Python, deterministic, no network. """ from __future__ import annotations import json from dataclasses import dataclass from pathlib import Path from core.chief_engineer import _FALLBACK_SETTINGS from core.models import Environment, Job, PrintSettings from sim.outcome import SimResult POLICY_PATH = Path(__file__).resolve().parent.parent / "data" / "policy.json" # Corrective step per observed failure mode: which offsets to nudge, and by how # much. Each move reduces the matching penalty in sim.outcome — so cells climb. _CORRECTIONS = { "sag": {"fan_pct": +12, "nozzle_temp": -3}, "stringing": {"retraction_mm": +0.5, "nozzle_temp": -4}, "adhesion": {"bed_temp": +6, "first_layer_fan_pct": -10}, "under_extrusion": {"nozzle_temp": +5}, "warp": {"fan_pct": -10}, } # Keep learned offsets sane; the Spine still clamps the final settings. _OFFSET_CLAMP = { "nozzle_temp": 30, "bed_temp": 25, "retraction_mm": 3, "fan_pct": 80, "first_layer_fan_pct": 60, } def env_bucket(temp: float, humidity: float) -> tuple[str, str]: tb = "cool" if temp < 20 else ("warm" if temp > 26 else "mid") hb = "dry" if humidity < 35 else ("humid" if humidity > 55 else "mid") return tb, hb def cell_key(material: str, geometry: str, env: Environment) -> str: tb, hb = env_bucket(env.temp, env.humidity) return f"{material}/{geometry}/{tb}/{hb}" @dataclass class Cell: offsets: dict[str, float] trials: int = 0 successes: int = 0 quality_history: list[float] | None = None @property def success_rate(self) -> float: return self.successes / self.trials if self.trials else 0.0 class LearnedPolicy: def __init__(self, path: Path = POLICY_PATH) -> None: self.path = path self.cells: dict[str, Cell] = {} self._load() def _load(self) -> None: if not self.path.exists(): return try: raw = json.loads(self.path.read_text(encoding="utf-8")) except Exception: return for k, v in raw.items(): self.cells[k] = Cell(offsets=v.get("offsets", {}), trials=v.get("trials", 0), successes=v.get("successes", 0), quality_history=v.get("quality_history", [])) def save(self) -> None: self.path.parent.mkdir(parents=True, exist_ok=True) out = {k: {"offsets": c.offsets, "trials": c.trials, "successes": c.successes, "quality_history": c.quality_history or []} for k, c in self.cells.items()} self.path.write_text(json.dumps(out, indent=2), encoding="utf-8") def reset(self) -> None: """Clear all learned cells back to baseline (and remove the saved file). Powers the UI 'reset' button alongside the ledger reset.""" self.cells = {} try: self.path.unlink(missing_ok=True) except Exception: pass # --- read side --------------------------------------------------------- def _baseline(self, material: str) -> dict[str, float]: return dict(_FALLBACK_SETTINGS.get(material.upper(), _FALLBACK_SETTINGS["PLA"])) def offsets_for(self, material: str, geometry: str, env: Environment) -> dict[str, float]: c = self.cells.get(cell_key(material, geometry, env)) return dict(c.offsets) if c else {} def propose(self, material: str, geometry: str, env: Environment) -> PrintSettings: """Deterministic proposal = material baseline + learned offsets (clamped).""" base = self._baseline(material) for k, dv in self.offsets_for(material, geometry, env).items(): base[k] = base.get(k, 0.0) + dv base["fan_pct"] = max(0.0, min(100.0, base["fan_pct"])) base["first_layer_fan_pct"] = max(0.0, min(100.0, base["first_layer_fan_pct"])) base["retraction_mm"] = max(0.0, base["retraction_mm"]) return PrintSettings(**base) def cell_stats(self, material: str, geometry: str, env: Environment) -> Cell | None: return self.cells.get(cell_key(material, geometry, env)) def policy_note(self, material: str, geometry: str, env: Environment) -> str: """One line for the system prompt — steers the LLM with what's been learned.""" c = self.cell_stats(material, geometry, env) if not c or not c.offsets: return "" tb, hb = env_bucket(env.temp, env.humidity) deltas = ", ".join(f"{k} {v:+g}" for k, v in c.offsets.items()) return (f"LEARNED POLICY for {material}/{geometry} in {tb}/{hb} conditions " f"(earned over {c.trials} runs, {c.success_rate*100:.0f}% clean): adjust baseline by {deltas}. " f"Weigh this against the precedent above.") # --- write side (learning) -------------------------------------------- def update(self, material: str, geometry: str, env: Environment, result: SimResult) -> str: """Fold one observed outcome into the cell. Returns a human log line.""" key = cell_key(material, geometry, env) c = self.cells.setdefault(key, Cell(offsets={})) c.trials += 1 c.quality_history = (c.quality_history or []) + [round(result.quality, 3)] if result.outcome == "success": c.successes += 1 self.save() return f"{key}: success (q={result.quality:.2f}) — holding policy" moved = [] for field, step in _CORRECTIONS.get(result.failure_mode, {}).items(): cur = c.offsets.get(field, 0.0) + step lim = _OFFSET_CLAMP.get(field, 1e9) c.offsets[field] = max(-lim, min(lim, cur)) moved.append(f"{field} {step:+g}") self.save() return f"{key}: {result.failure_mode} (q={result.quality:.2f}) — learned: {', '.join(moved) or 'no-op'}"