Spaces:
Runtime error
Runtime error
File size: 9,342 Bytes
a2fa1e1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | """Export the Chief Engineer's multi-persona DELIBERATION as a HF-ready trace.
The lesson ledger (scripts/export_trace.py) shares *what the agent learned*. This
shares *how the agent thinks*: the turn-by-turn argument between the personas on
each job β O'Brien proposes, the Spine vetoes unsafe values, La Forge gives a
skeptical second opinion (and can dispute β the operator overrides), the
deterministic world prints, La Forge grades each run, then delivers a run verdict.
Our own schema (one row per turn): session_id, track, turn, agent, role, act, stance,
content, + the job context (material/geometry/bed/env) so each row is self-describing.
Side-effect-free: runs against a throwaway ledger + policy in a temp dir, so the
shipped state is never touched. Offline-safe: with no LLM the personas fall back to
their deterministic voices, so the trace is fully reproducible.
Run: `make deliberation` (or `uv run python -m scripts.export_deliberation`) β dist/deliberation/
"""
from __future__ import annotations
import sys
import tempfile
from datetime import datetime, timedelta, timezone
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) # repo root on path
from core import inspector, seed_lessons
from core.chief_engineer import advise
from core.ledger import LedgerManager
from core.models import Advice, Environment, Job, PrintSettings
from core.spine import SpineValidator
from learn.loop import run_iteration
from learn.policy import LearnedPolicy
try: # ingestion is optional / removable (mirrors app.py)
from ingest.distill import reference_block
except Exception:
def reference_block(_material): # type: ignore
return []
DIST = Path(__file__).resolve().parent.parent / "dist" / "deliberation"
HF_REPO = "kylebrodeur/chief-engineer-deliberation"
# Representative jobs, chosen to exercise the full range of La Forge's stances
# (concur / caution / disputeβoverride) and the print-loop's climb to clean.
JOBS = [
("ABS", "overhang", "edge", 26.0, 60.0, 4), # off-center ABS + thin fan β dispute β override
("PETG", "overhang", "center", 24.0, 55.0, 3), # thin fan for an overhang β caution
("PLA", "adhesion", "center", 21.0, 45.0, 3), # inside sane bounds β concur
("TPU", "stringing", "edge", 23.0, 65.0, 3), # humid + short retraction β caution
]
ROLE = {
"O'Brien": "Chief Engineer",
"La Forge": "QA Inspector",
"Spine": "Safety Spine",
"World": "Outcome Simulator",
"Operator": "Operator",
}
CARD = """---
license: mit
task_categories: [text-generation]
language: [en]
tags: [3d-printing, additive-manufacturing, agent-trace, multi-agent, deliberation, build-small-hackathon]
pretty_name: Chief Engineer β Deliberation Traces
---
# Chief Engineer β Deliberation Traces
Turn-by-turn **multi-persona deliberation** from **The Chief Engineer**, a small local
Gemma agent built for the HF Build Small hackathon (Backyard AI). Where the
[lesson ledger](https://huggingface.co/datasets/kylebrodeur/chief-engineer-ledger)
records *what the agent learned*, this records *how it reasons*: the argument between
the personas on each job. It grows two ways: a reproducible static export
(`make deliberation`) and **live turns logged on every run of the Space** (gated on
`HF_TOKEN`; config + agent reasoning only, never PII or uploaded files).
Each row is one **turn**:
- **O'Brien** (Chief Engineer) β proposes settings + reasoning over precedent.
- **Spine** (Safety Spine) β deterministically vetoes/clamps unsafe values.
- **La Forge** (QA Inspector) β a separate, skeptical voice: second opinion before the
print (`concur` / `caution` / `dispute`), a grade on each run, and a run verdict.
- **Operator** β the human, who can override a `dispute` and proceed.
- **World** (Outcome Simulator) β the deterministic physics-lite world that reports the
actual print outcome (the agent never grades its own work).
The integrity rule made literal: the proposer never marks its own homework.
## Schema
`session_id, track, turn, agent, role, act, stance, content, material, geometry,
bed_position, env_temp, env_humidity, ts`
`track` is the phase β `preflight` (propose β veto β second opinion β override),
`print-loop` (simulate β grade, per iteration), `review` (run verdict).
"""
def _settings_line(s: PrintSettings) -> str:
return (f"nozzle {s.nozzle_temp:.0f}Β°C, bed {s.bed_temp:.0f}Β°C, fan {s.fan_pct:.0f}%, "
f"first-layer fan {s.first_layer_fan_pct:.0f}%, retraction {s.retraction_mm:.1f}mm")
def export() -> Path:
DIST.mkdir(parents=True, exist_ok=True)
out = DIST / "deliberations.jsonl"
# throwaway state so the shipped ledger/policy are never mutated
tmp = Path(tempfile.mkdtemp(prefix="ce-delib-"))
ledger = LedgerManager(path=tmp / "ledger.jsonl")
seed_lessons.ensure_seeded(ledger)
spine = SpineValidator()
import json
clock = datetime(2026, 6, 14, 12, 0, 0, tzinfo=timezone.utc)
rows: list[dict] = []
def emit(job_id, track, turn, agent, act, content, *, ctx, stance=""):
nonlocal clock
clock += timedelta(seconds=7)
rows.append({
"session_id": job_id, "track": track, "turn": turn,
"agent": agent, "role": ROLE[agent], "act": act, "stance": stance,
"content": content.strip(),
"material": ctx["material"], "geometry": ctx["geometry"],
"bed_position": ctx["bed_position"],
"env_temp": ctx["env_temp"], "env_humidity": ctx["env_humidity"],
"ts": clock.isoformat(),
})
for material, geometry, bed, temp, hum, iters in JOBS:
job_id = f"{material}-{geometry}-{bed}".lower()
job = Job(geometry_type=geometry, material=material, bed_position=bed)
env = Environment(temp=temp, humidity=hum)
ctx = {"material": material, "geometry": geometry, "bed_position": bed,
"env_temp": temp, "env_humidity": hum}
# fresh policy per job so the loop's climb starts from baseline each time
policy = LearnedPolicy(path=tmp / f"policy-{job_id}.json")
# ββ preflight: propose β veto β second opinion β (override) ββ
retrieved = ledger.retrieve(material, geometry, env.temp, env.humidity)
rec = advise(job, env, retrieved, reference_block(material),
policy.policy_note(material, geometry, env))
checked = spine.check(rec.advice.settings, material)
t = 1
emit(job_id, "preflight", t, "O'Brien", "propose",
f"{rec.advice.reasoning}\nProposed: {_settings_line(checked.settings)}.", ctx=ctx)
t += 1
emit(job_id, "preflight", t, "Spine", "veto",
("Clamped: " + " Β· ".join(checked.vetoes)) if checked.vetoes
else "Within the safe envelope for this material β no clamp.", ctx=ctx,
stance="clamped" if checked.requires_approval else "clear")
t += 1
verdict = inspector.second_opinion(job, env, checked.settings, rec.advice)
emit(job_id, "preflight", t, "La Forge", "second_opinion",
f"{verdict.headline} β {verdict.detail}", ctx=ctx, stance=verdict.stance)
if verdict.stance.lower() == "dispute":
t += 1
emit(job_id, "preflight", t, "Operator", "override",
"Acknowledged La Forge's objection. Proceeding to print on the operator's call.",
ctx=ctx, stance="override")
# ββ print-loop: simulate β grade, per iteration ββ
for n in range(1, iters + 1):
t += 1
r = run_iteration(job, env, policy, ledger, n, record=False)
clamp = " (Spine clamped a setting)" if r.clamped else ""
emit(job_id, "print-loop", t, "World", "simulate",
f"Iteration {n}: {r.result.detail}.{clamp} Policy: {r.learned}.", ctx=ctx,
stance=r.result.outcome)
t += 1
g = inspector.grade_iteration(geometry, r.result)
emit(job_id, "print-loop", t, "La Forge", "grade",
f"{g.headline} β {g.detail}", ctx=ctx, stance=g.stance)
# ββ review: one verdict across the run ββ
# rebuild records for the summary from a fresh deterministic pass
sess_records = []
rpolicy = LearnedPolicy(path=tmp / f"policy-rev-{job_id}.json")
for n in range(1, iters + 1):
sess_records.append(run_iteration(job, env, rpolicy, ledger, n, record=False))
summary = inspector.summarize_run(sess_records, material=material, geometry=geometry)
t += 1
emit(job_id, "review", t, "La Forge", "verdict",
f"{summary.headline} β {summary.detail}", ctx=ctx, stance=summary.stance)
with out.open("w", encoding="utf-8") as f:
for row in rows:
f.write(json.dumps(row, ensure_ascii=False) + "\n")
(DIST / "README.md").write_text(CARD, encoding="utf-8")
jobs = len({r["session_id"] for r in rows})
print(f"exported {len(rows)} turns across {jobs} jobs β {out}")
print(f"dataset card β {DIST / 'README.md'}")
print(f"publish: hf upload {HF_REPO} {DIST} . --repo-type dataset")
return out
if __name__ == "__main__":
export()
|