microfactory-lab / learn /policy.py
kylebrodeur's picture
Upload folder using huggingface_hub
e9c4780 verified
Raw
History Blame Contribute Delete
6.6 kB
"""Learned policy — the parametric layer that actually *improves*.
This is what makes the Chief Engineer more than a lookup. Each (material,
geometry, environment-BUCKET) cell holds learned offsets to the baseline
settings, updated from observed outcomes. Because cells are bucketed (not
exact env points), a lesson from one humid PETG bridge transfers to the *next*
humid PETG bridge — it generalizes, rather than recalling a single past job.
Two knowledge sources feed a recommendation, exactly as intended:
• RAG — retrieved prior jobs reasoned over by the LLM (chief_engineer.py)
• policy — these learned offsets, applied deterministically + shown to the LLM
Persisted to data/policy.json. Pure-Python, deterministic, no network.
"""
from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
from core.chief_engineer import _FALLBACK_SETTINGS
from core.models import Environment, Job, PrintSettings
from sim.outcome import SimResult
POLICY_PATH = Path(__file__).resolve().parent.parent / "data" / "policy.json"
# Corrective step per observed failure mode: which offsets to nudge, and by how
# much. Each move reduces the matching penalty in sim.outcome — so cells climb.
_CORRECTIONS = {
"sag": {"fan_pct": +12, "nozzle_temp": -3},
"stringing": {"retraction_mm": +0.5, "nozzle_temp": -4},
"adhesion": {"bed_temp": +6, "first_layer_fan_pct": -10},
"under_extrusion": {"nozzle_temp": +5},
"warp": {"fan_pct": -10},
}
# Keep learned offsets sane; the Spine still clamps the final settings.
_OFFSET_CLAMP = {
"nozzle_temp": 30, "bed_temp": 25, "retraction_mm": 3, "fan_pct": 80, "first_layer_fan_pct": 60,
}
def env_bucket(temp: float, humidity: float) -> tuple[str, str]:
tb = "cool" if temp < 20 else ("warm" if temp > 26 else "mid")
hb = "dry" if humidity < 35 else ("humid" if humidity > 55 else "mid")
return tb, hb
def cell_key(material: str, geometry: str, env: Environment) -> str:
tb, hb = env_bucket(env.temp, env.humidity)
return f"{material}/{geometry}/{tb}/{hb}"
@dataclass
class Cell:
offsets: dict[str, float]
trials: int = 0
successes: int = 0
quality_history: list[float] | None = None
@property
def success_rate(self) -> float:
return self.successes / self.trials if self.trials else 0.0
class LearnedPolicy:
def __init__(self, path: Path = POLICY_PATH) -> None:
self.path = path
self.cells: dict[str, Cell] = {}
self._load()
def _load(self) -> None:
if not self.path.exists():
return
try:
raw = json.loads(self.path.read_text(encoding="utf-8"))
except Exception:
return
for k, v in raw.items():
self.cells[k] = Cell(offsets=v.get("offsets", {}), trials=v.get("trials", 0),
successes=v.get("successes", 0),
quality_history=v.get("quality_history", []))
def save(self) -> None:
self.path.parent.mkdir(parents=True, exist_ok=True)
out = {k: {"offsets": c.offsets, "trials": c.trials, "successes": c.successes,
"quality_history": c.quality_history or []} for k, c in self.cells.items()}
self.path.write_text(json.dumps(out, indent=2), encoding="utf-8")
def reset(self) -> None:
"""Clear all learned cells back to baseline (and remove the saved file).
Powers the UI 'reset' button alongside the ledger reset."""
self.cells = {}
try:
self.path.unlink(missing_ok=True)
except Exception:
pass
# --- read side ---------------------------------------------------------
def _baseline(self, material: str) -> dict[str, float]:
return dict(_FALLBACK_SETTINGS.get(material.upper(), _FALLBACK_SETTINGS["PLA"]))
def offsets_for(self, material: str, geometry: str, env: Environment) -> dict[str, float]:
c = self.cells.get(cell_key(material, geometry, env))
return dict(c.offsets) if c else {}
def propose(self, material: str, geometry: str, env: Environment) -> PrintSettings:
"""Deterministic proposal = material baseline + learned offsets (clamped)."""
base = self._baseline(material)
for k, dv in self.offsets_for(material, geometry, env).items():
base[k] = base.get(k, 0.0) + dv
base["fan_pct"] = max(0.0, min(100.0, base["fan_pct"]))
base["first_layer_fan_pct"] = max(0.0, min(100.0, base["first_layer_fan_pct"]))
base["retraction_mm"] = max(0.0, base["retraction_mm"])
return PrintSettings(**base)
def cell_stats(self, material: str, geometry: str, env: Environment) -> Cell | None:
return self.cells.get(cell_key(material, geometry, env))
def policy_note(self, material: str, geometry: str, env: Environment) -> str:
"""One line for the system prompt — steers the LLM with what's been learned."""
c = self.cell_stats(material, geometry, env)
if not c or not c.offsets:
return ""
tb, hb = env_bucket(env.temp, env.humidity)
deltas = ", ".join(f"{k} {v:+g}" for k, v in c.offsets.items())
return (f"LEARNED POLICY for {material}/{geometry} in {tb}/{hb} conditions "
f"(earned over {c.trials} runs, {c.success_rate*100:.0f}% clean): adjust baseline by {deltas}. "
f"Weigh this against the precedent above.")
# --- write side (learning) --------------------------------------------
def update(self, material: str, geometry: str, env: Environment, result: SimResult) -> str:
"""Fold one observed outcome into the cell. Returns a human log line."""
key = cell_key(material, geometry, env)
c = self.cells.setdefault(key, Cell(offsets={}))
c.trials += 1
c.quality_history = (c.quality_history or []) + [round(result.quality, 3)]
if result.outcome == "success":
c.successes += 1
self.save()
return f"{key}: success (q={result.quality:.2f}) — holding policy"
moved = []
for field, step in _CORRECTIONS.get(result.failure_mode, {}).items():
cur = c.offsets.get(field, 0.0) + step
lim = _OFFSET_CLAMP.get(field, 1e9)
c.offsets[field] = max(-lim, min(lim, cur))
moved.append(f"{field} {step:+g}")
self.save()
return f"{key}: {result.failure_mode} (q={result.quality:.2f}) — learned: {', '.join(moved) or 'no-op'}"