Spaces:

build-small-hackathon
/

microfactory-lab

Runtime error

App Files Files Community

microfactory-lab / core /ledger.py

kylebrodeur

Upload folder using huggingface_hub

e9c4780 verified 20 days ago

Raw

History Blame Contribute Delete

3.75 kB

	"""Append-only lesson ledger + environment-keyed retrieval.

	Modeled on pi-qmd-ledger's `append_ledger` (pattern-review.md §3) but with the
	qmd embeddings / vector search deliberately left behind. Retrieval is the
	locked design: exact match on material AND geometry_type, then rank by
	Euclidean distance on NORMALIZED [temp, humidity], top 2-3. No vector DB.
	"""

	from __future__ import annotations

	import json
	import math
	from pathlib import Path

	from .models import LessonEntry

	DATA_DIR = Path(__file__).resolve().parent.parent / "data"
	LEDGER_PATH = DATA_DIR / "lessons.jsonl"

	# Normalization ranges (02-ARCHITECTURE.md / 04-BUILD-PROMPT.md): map both env
	# axes to 0-1 so humidity's 0-100 span doesn't swamp temperature in the metric.
	TEMP_MIN, TEMP_MAX = 15.0, 35.0
	HUM_MIN, HUM_MAX = 20.0, 80.0


	def _norm(temp: float, humidity: float) -> tuple[float, float]:
	t = (temp - TEMP_MIN) / (TEMP_MAX - TEMP_MIN)
	h = (humidity - HUM_MIN) / (HUM_MAX - HUM_MIN)
	return t, h


	class LedgerManager:
	def __init__(self, path: Path = LEDGER_PATH) -> None:
	self.path = path
	self.path.parent.mkdir(parents=True, exist_ok=True)
	self.path.touch(exist_ok=True)

	# --- storage -----------------------------------------------------------
	def all(self) -> list[LessonEntry]:
	out: list[LessonEntry] = []
	for line in self.path.read_text(encoding="utf-8").splitlines():
	line = line.strip()
	if not line:
	continue
	try:
	out.append(LessonEntry(**json.loads(line)))
	except Exception:
	continue # never let one bad line crash the demo
	return out

	def append(self, entry: LessonEntry) -> None:
	with self.path.open("a", encoding="utf-8") as f:
	f.write(entry.model_dump_json() + "\n")

	def count(self) -> dict[str, int]:
	buckets = {"seed": 0, "earned": 0, "ingested": 0, "sim": 0}
	for e in self.all():
	buckets[e.source if e.source in buckets else "earned"] += 1
	buckets["total"] = sum(buckets.values())
	return buckets

	def reset_to_baseline(self, keep_sources: tuple[str, ...] = ("seed", "ingested")) -> int:
	"""Drop runtime-accumulated lessons (earned/sim), keeping only the curated
	baseline. Returns the count removed. Powers the UI 'reset' button — works on
	the Space's ephemeral filesystem where `git checkout` isn't available."""
	entries = self.all()
	kept = [e for e in entries if e.source in keep_sources]
	with self.path.open("w", encoding="utf-8") as f:
	for e in kept:
	f.write(e.model_dump_json() + "\n")
	return len(entries) - len(kept)

	# --- retrieval (the thesis-critical query) -----------------------------
	def retrieve(
	self, material: str, geometry_type: str, temp: float, humidity: float, k: int = 3
	) -> list[tuple[LessonEntry, float]]:
	"""Return up to k (lesson, env_distance) sorted nearest-first.

	Exact match on material AND geometry_type; ranked by normalized
	Euclidean env-distance. Returns [] when no precedent matches — which
	is a valid, strong outcome (the model reasons from material
	properties instead).
	"""
	tn, hn = _norm(temp, humidity)
	scored: list[tuple[LessonEntry, float]] = []
	for e in self.all():
	if e.material != material or e.geometry_type != geometry_type:
	continue
	etn, ehn = _norm(e.env_temp, e.env_humidity)
	dist = math.hypot(tn - etn, hn - ehn)
	scored.append((e, dist))
	scored.sort(key=lambda x: x[1])
	return scored[:k]