microfactory-lab / core /ledger.py
kylebrodeur's picture
Upload folder using huggingface_hub
e9c4780 verified
Raw
History Blame Contribute Delete
3.75 kB
"""Append-only lesson ledger + environment-keyed retrieval.
Modeled on pi-qmd-ledger's `append_ledger` (pattern-review.md §3) but with the
qmd embeddings / vector search deliberately left behind. Retrieval is the
locked design: exact match on material AND geometry_type, then rank by
Euclidean distance on NORMALIZED [temp, humidity], top 2-3. No vector DB.
"""
from __future__ import annotations
import json
import math
from pathlib import Path
from .models import LessonEntry
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
LEDGER_PATH = DATA_DIR / "lessons.jsonl"
# Normalization ranges (02-ARCHITECTURE.md / 04-BUILD-PROMPT.md): map both env
# axes to 0-1 so humidity's 0-100 span doesn't swamp temperature in the metric.
TEMP_MIN, TEMP_MAX = 15.0, 35.0
HUM_MIN, HUM_MAX = 20.0, 80.0
def _norm(temp: float, humidity: float) -> tuple[float, float]:
t = (temp - TEMP_MIN) / (TEMP_MAX - TEMP_MIN)
h = (humidity - HUM_MIN) / (HUM_MAX - HUM_MIN)
return t, h
class LedgerManager:
def __init__(self, path: Path = LEDGER_PATH) -> None:
self.path = path
self.path.parent.mkdir(parents=True, exist_ok=True)
self.path.touch(exist_ok=True)
# --- storage -----------------------------------------------------------
def all(self) -> list[LessonEntry]:
out: list[LessonEntry] = []
for line in self.path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line:
continue
try:
out.append(LessonEntry(**json.loads(line)))
except Exception:
continue # never let one bad line crash the demo
return out
def append(self, entry: LessonEntry) -> None:
with self.path.open("a", encoding="utf-8") as f:
f.write(entry.model_dump_json() + "\n")
def count(self) -> dict[str, int]:
buckets = {"seed": 0, "earned": 0, "ingested": 0, "sim": 0}
for e in self.all():
buckets[e.source if e.source in buckets else "earned"] += 1
buckets["total"] = sum(buckets.values())
return buckets
def reset_to_baseline(self, keep_sources: tuple[str, ...] = ("seed", "ingested")) -> int:
"""Drop runtime-accumulated lessons (earned/sim), keeping only the curated
baseline. Returns the count removed. Powers the UI 'reset' button — works on
the Space's ephemeral filesystem where `git checkout` isn't available."""
entries = self.all()
kept = [e for e in entries if e.source in keep_sources]
with self.path.open("w", encoding="utf-8") as f:
for e in kept:
f.write(e.model_dump_json() + "\n")
return len(entries) - len(kept)
# --- retrieval (the thesis-critical query) -----------------------------
def retrieve(
self, material: str, geometry_type: str, temp: float, humidity: float, k: int = 3
) -> list[tuple[LessonEntry, float]]:
"""Return up to k (lesson, env_distance) sorted nearest-first.
Exact match on material AND geometry_type; ranked by normalized
Euclidean env-distance. Returns [] when no precedent matches — which
is a *valid, strong* outcome (the model reasons from material
properties instead).
"""
tn, hn = _norm(temp, humidity)
scored: list[tuple[LessonEntry, float]] = []
for e in self.all():
if e.material != material or e.geometry_type != geometry_type:
continue
etn, ehn = _norm(e.env_temp, e.env_humidity)
dist = math.hypot(tn - etn, hn - ehn)
scored.append((e, dist))
scored.sort(key=lambda x: x[1])
return scored[:k]