Spaces:

build-small-hackathon
/

microfactory-lab

Runtime error

App Files Files Community

microfactory-lab / learn /policy.py

kylebrodeur

Upload folder using huggingface_hub

e9c4780 verified 22 days ago

Raw

History Blame Contribute Delete

6.6 kB

	"""Learned policy — the parametric layer that actually improves.

	This is what makes the Chief Engineer more than a lookup. Each (material,
	geometry, environment-BUCKET) cell holds learned offsets to the baseline
	settings, updated from observed outcomes. Because cells are bucketed (not
	exact env points), a lesson from one humid PETG bridge transfers to the next
	humid PETG bridge — it generalizes, rather than recalling a single past job.

	Two knowledge sources feed a recommendation, exactly as intended:
	• RAG — retrieved prior jobs reasoned over by the LLM (chief_engineer.py)
	• policy — these learned offsets, applied deterministically + shown to the LLM

	Persisted to data/policy.json. Pure-Python, deterministic, no network.
	"""

	from __future__ import annotations

	import json
	from dataclasses import dataclass
	from pathlib import Path

	from core.chief_engineer import _FALLBACK_SETTINGS
	from core.models import Environment, Job, PrintSettings
	from sim.outcome import SimResult

	POLICY_PATH = Path(__file__).resolve().parent.parent / "data" / "policy.json"

	# Corrective step per observed failure mode: which offsets to nudge, and by how
	# much. Each move reduces the matching penalty in sim.outcome — so cells climb.
	_CORRECTIONS = {
	"sag": {"fan_pct": +12, "nozzle_temp": -3},
	"stringing": {"retraction_mm": +0.5, "nozzle_temp": -4},
	"adhesion": {"bed_temp": +6, "first_layer_fan_pct": -10},
	"under_extrusion": {"nozzle_temp": +5},
	"warp": {"fan_pct": -10},
	}
	# Keep learned offsets sane; the Spine still clamps the final settings.
	_OFFSET_CLAMP = {
	"nozzle_temp": 30, "bed_temp": 25, "retraction_mm": 3, "fan_pct": 80, "first_layer_fan_pct": 60,
	}


	def env_bucket(temp: float, humidity: float) -> tuple[str, str]:
	tb = "cool" if temp < 20 else ("warm" if temp > 26 else "mid")
	hb = "dry" if humidity < 35 else ("humid" if humidity > 55 else "mid")
	return tb, hb


	def cell_key(material: str, geometry: str, env: Environment) -> str:
	tb, hb = env_bucket(env.temp, env.humidity)
	return f"{material}/{geometry}/{tb}/{hb}"


	@dataclass
	class Cell:
	offsets: dict[str, float]
	trials: int = 0
	successes: int = 0
	quality_history: list[float] \| None = None

	@property
	def success_rate(self) -> float:
	return self.successes / self.trials if self.trials else 0.0


	class LearnedPolicy:
	def __init__(self, path: Path = POLICY_PATH) -> None:
	self.path = path
	self.cells: dict[str, Cell] = {}
	self._load()

	def _load(self) -> None:
	if not self.path.exists():
	return
	try:
	raw = json.loads(self.path.read_text(encoding="utf-8"))
	except Exception:
	return
	for k, v in raw.items():
	self.cells[k] = Cell(offsets=v.get("offsets", {}), trials=v.get("trials", 0),
	successes=v.get("successes", 0),
	quality_history=v.get("quality_history", []))

	def save(self) -> None:
	self.path.parent.mkdir(parents=True, exist_ok=True)
	out = {k: {"offsets": c.offsets, "trials": c.trials, "successes": c.successes,
	"quality_history": c.quality_history or []} for k, c in self.cells.items()}
	self.path.write_text(json.dumps(out, indent=2), encoding="utf-8")

	def reset(self) -> None:
	"""Clear all learned cells back to baseline (and remove the saved file).
	Powers the UI 'reset' button alongside the ledger reset."""
	self.cells = {}
	try:
	self.path.unlink(missing_ok=True)
	except Exception:
	pass

	# --- read side ---------------------------------------------------------
	def _baseline(self, material: str) -> dict[str, float]:
	return dict(_FALLBACK_SETTINGS.get(material.upper(), _FALLBACK_SETTINGS["PLA"]))

	def offsets_for(self, material: str, geometry: str, env: Environment) -> dict[str, float]:
	c = self.cells.get(cell_key(material, geometry, env))
	return dict(c.offsets) if c else {}

	def propose(self, material: str, geometry: str, env: Environment) -> PrintSettings:
	"""Deterministic proposal = material baseline + learned offsets (clamped)."""
	base = self._baseline(material)
	for k, dv in self.offsets_for(material, geometry, env).items():
	base[k] = base.get(k, 0.0) + dv
	base["fan_pct"] = max(0.0, min(100.0, base["fan_pct"]))
	base["first_layer_fan_pct"] = max(0.0, min(100.0, base["first_layer_fan_pct"]))
	base["retraction_mm"] = max(0.0, base["retraction_mm"])
	return PrintSettings(**base)

	def cell_stats(self, material: str, geometry: str, env: Environment) -> Cell \| None:
	return self.cells.get(cell_key(material, geometry, env))

	def policy_note(self, material: str, geometry: str, env: Environment) -> str:
	"""One line for the system prompt — steers the LLM with what's been learned."""
	c = self.cell_stats(material, geometry, env)
	if not c or not c.offsets:
	return ""
	tb, hb = env_bucket(env.temp, env.humidity)
	deltas = ", ".join(f"{k} {v:+g}" for k, v in c.offsets.items())
	return (f"LEARNED POLICY for {material}/{geometry} in {tb}/{hb} conditions "
	f"(earned over {c.trials} runs, {c.success_rate*100:.0f}% clean): adjust baseline by {deltas}. "
	f"Weigh this against the precedent above.")

	# --- write side (learning) --------------------------------------------
	def update(self, material: str, geometry: str, env: Environment, result: SimResult) -> str:
	"""Fold one observed outcome into the cell. Returns a human log line."""
	key = cell_key(material, geometry, env)
	c = self.cells.setdefault(key, Cell(offsets={}))
	c.trials += 1
	c.quality_history = (c.quality_history or []) + [round(result.quality, 3)]
	if result.outcome == "success":
	c.successes += 1
	self.save()
	return f"{key}: success (q={result.quality:.2f}) — holding policy"
	moved = []
	for field, step in _CORRECTIONS.get(result.failure_mode, {}).items():
	cur = c.offsets.get(field, 0.0) + step
	lim = _OFFSET_CLAMP.get(field, 1e9)
	c.offsets[field] = max(-lim, min(lim, cur))
	moved.append(f"{field} {step:+g}")
	self.save()
	return f"{key}: {result.failure_mode} (q={result.quality:.2f}) — learned: {', '.join(moved) or 'no-op'}"