JacobLinCool Codex commited on
Commit
2b2e65d
·
verified ·
1 Parent(s): ba32aed

feat: export lora training dataset

Browse files

Co-authored-by: Codex <noreply@openai.com>

README.md CHANGED
@@ -75,6 +75,13 @@ The `chapter` Gradio API endpoint and `Chapter` button export the public-facing
75
  one fate page per idea, each with verdict, score, targets, and closest cited pages. It is the shareable companion to
76
  the private Field Notes artifact.
77
 
 
 
 
 
 
 
 
78
  ## Prize Ledger
79
 
80
  `/api/prize-ledger` and the in-app Prize Ledger panel expose submission evidence: the documented model stack, total
 
75
  one fate page per idea, each with verdict, score, targets, and closest cited pages. It is the shareable companion to
76
  the private Field Notes artifact.
77
 
78
+ ## LoRA Dataset Artifact
79
+
80
+ The `lora_dataset` Gradio API endpoint and `LoRA` button export a compact chat JSONL dataset from successful session
81
+ turns. Each included turn yields a tool-call example and an advisor-response example for `openbmb/MiniCPM5-1B`, with the
82
+ selected targets, parsed XML tool call, tool observations, and score context preserved. This prepares the Well-Tuned
83
+ path without claiming that the adapter has already been trained or published.
84
+
85
  ## Prize Ledger
86
 
87
  `/api/prize-ledger` and the in-app Prize Ledger panel expose submission evidence: the documented model stack, total
app.py CHANGED
@@ -12,6 +12,7 @@ from hackathon_advisor.agent import AdvisorEngine
12
  from hackathon_advisor.chapter import build_chapter_markdown
13
  from hackathon_advisor.data import ProjectIndex
14
  from hackathon_advisor.field_notes import build_field_notes_markdown
 
15
  from hackathon_advisor.prize_ledger import prize_ledger
16
  from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
17
  from hackathon_advisor.tools import TARGETS
@@ -134,6 +135,21 @@ def chapter_artifact(session_json: str = "{}") -> str:
134
  )
135
 
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  @app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
138
  def agent_turn(message: str, session_json: str = "{}") -> Iterator[str]:
139
  try:
 
12
  from hackathon_advisor.chapter import build_chapter_markdown
13
  from hackathon_advisor.data import ProjectIndex
14
  from hackathon_advisor.field_notes import build_field_notes_markdown
15
+ from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
16
  from hackathon_advisor.prize_ledger import prize_ledger
17
  from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
18
  from hackathon_advisor.tools import TARGETS
 
135
  )
136
 
137
 
138
+ @app.api(name="lora_dataset", concurrency_limit=8)
139
+ def lora_dataset_artifact(session_json: str = "{}") -> str:
140
+ try:
141
+ session = json.loads(session_json or "{}")
142
+ except json.JSONDecodeError:
143
+ session = {}
144
+ return build_lora_dataset_jsonl(
145
+ session,
146
+ {
147
+ **trace_metadata(index),
148
+ "project_count": len(index.projects),
149
+ },
150
+ )
151
+
152
+
153
  @app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
154
  def agent_turn(message: str, session_json: str = "{}") -> Iterator[str]:
155
  try:
hackathon_advisor/lora_dataset.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+ import json
5
+ from typing import Any
6
+
7
+
8
+ LORA_DATASET_SCHEMA_VERSION = 1
9
+ BASE_MODEL = "openbmb/MiniCPM5-1B"
10
+ ADAPTER_TASK = "hackathon_advisor_tool_call_and_voice"
11
+
12
+ TOOL_CALL_SYSTEM_PROMPT = (
13
+ "You are Mothback, the Build Small Hackathon advisor. Choose exactly one validated tool call for the user's "
14
+ "project-advice request. Return only the XML function call."
15
+ )
16
+
17
+ RESPONSE_SYSTEM_PROMPT = (
18
+ "You are Mothback, the Build Small Hackathon advisor. Write concise, evidence-grounded advice from the tool "
19
+ "observations, cited pages, score, and selected prize targets."
20
+ )
21
+
22
+
23
+ def build_lora_dataset_jsonl(session: dict[str, Any], metadata: dict[str, Any]) -> str:
24
+ trace = _list_of_dicts(session.get("trace"))
25
+ ideas = _list_of_dicts(session.get("ideas"))
26
+ targets = [str(target) for target in session.get("targets") or []]
27
+ examples = _examples(trace, targets)
28
+ records = [
29
+ {
30
+ "type": "lora_sft_manifest",
31
+ "schema_version": LORA_DATASET_SCHEMA_VERSION,
32
+ "generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
33
+ "app": "hackathon-advisor",
34
+ "base_model": BASE_MODEL,
35
+ "adapter_task": ADAPTER_TASK,
36
+ "format": "chat-jsonl",
37
+ "record_kinds": ["tool_call", "advisor_response"],
38
+ "source": "exact_session_trace",
39
+ "idea_count": len(ideas),
40
+ "turn_count": len(trace),
41
+ "included_turn_count": len({example["turn_index"] for example in examples}),
42
+ "example_count": len(examples),
43
+ "index": _index_metadata(metadata),
44
+ }
45
+ ]
46
+ records.extend(examples)
47
+ return "\n".join(json.dumps(record, ensure_ascii=False, sort_keys=True) for record in records) + "\n"
48
+
49
+
50
+ def _examples(trace: list[dict[str, Any]], targets: list[str]) -> list[dict[str, Any]]:
51
+ examples: list[dict[str, Any]] = []
52
+ for turn_index, event in enumerate(trace, start=1):
53
+ if not _is_successful_turn(event):
54
+ continue
55
+ input_text = _clean(event.get("input"))
56
+ response = _clean(event.get("response"))
57
+ if not input_text or not response:
58
+ continue
59
+ tool_call = _tool_call(event)
60
+ if not tool_call["name"]:
61
+ continue
62
+ shared = {
63
+ "type": "lora_sft_example",
64
+ "schema_version": LORA_DATASET_SCHEMA_VERSION,
65
+ "base_model": BASE_MODEL,
66
+ "adapter_task": ADAPTER_TASK,
67
+ "turn_index": turn_index,
68
+ "targets": targets,
69
+ "score": _score(event),
70
+ "tool_call": tool_call,
71
+ "tool_observations": _tool_observations(event),
72
+ }
73
+ examples.append(
74
+ {
75
+ **shared,
76
+ "example_index": len(examples) + 1,
77
+ "example_kind": "tool_call",
78
+ "messages": [
79
+ {"role": "system", "content": TOOL_CALL_SYSTEM_PROMPT},
80
+ {"role": "user", "content": input_text},
81
+ {"role": "assistant", "content": _tool_call_xml(tool_call)},
82
+ ],
83
+ }
84
+ )
85
+ examples.append(
86
+ {
87
+ **shared,
88
+ "example_index": len(examples) + 1,
89
+ "example_kind": "advisor_response",
90
+ "messages": [
91
+ {"role": "system", "content": RESPONSE_SYSTEM_PROMPT},
92
+ {"role": "user", "content": _response_context(input_text, event, tool_call)},
93
+ {"role": "assistant", "content": response},
94
+ ],
95
+ }
96
+ )
97
+ return examples
98
+
99
+
100
+ def _is_successful_turn(event: dict[str, Any]) -> bool:
101
+ resolution = event.get("tool_resolution") if isinstance(event.get("tool_resolution"), dict) else {}
102
+ return str(resolution.get("status") or "") == "valid"
103
+
104
+
105
+ def _tool_call(event: dict[str, Any]) -> dict[str, Any]:
106
+ resolution = event.get("tool_resolution") if isinstance(event.get("tool_resolution"), dict) else {}
107
+ call = resolution.get("call") if isinstance(resolution.get("call"), dict) else {}
108
+ arguments = call.get("arguments") if isinstance(call.get("arguments"), dict) else {}
109
+ return {
110
+ "name": _clean(call.get("name")),
111
+ "arguments": arguments,
112
+ }
113
+
114
+
115
+ def _tool_call_xml(tool_call: dict[str, Any]) -> str:
116
+ arguments = json.dumps(tool_call["arguments"], ensure_ascii=False, sort_keys=True, separators=(",", ":"))
117
+ return f'<function name="{tool_call["name"]}">{arguments}</function>'
118
+
119
+
120
+ def _response_context(input_text: str, event: dict[str, Any], tool_call: dict[str, Any]) -> str:
121
+ observations = _tool_observations(event)
122
+ lines = [
123
+ input_text,
124
+ "",
125
+ f"Tool call: {_tool_call_xml(tool_call)}",
126
+ "Tool observations:",
127
+ ]
128
+ if observations:
129
+ for observation in observations:
130
+ lines.append(f"- {observation['name']}: {observation['summary']}")
131
+ else:
132
+ lines.append("- none")
133
+
134
+ score = _score(event)
135
+ verdict = score["verdict"] or "n/a"
136
+ overall = score["overall"] if score["overall"] is not None else "n/a"
137
+ lines.extend(
138
+ [
139
+ f"Verdict: {verdict}",
140
+ f"Overall: {overall}",
141
+ f"Plan steps: {score['plan_steps']}",
142
+ ]
143
+ )
144
+ return "\n".join(lines)
145
+
146
+
147
+ def _tool_observations(event: dict[str, Any]) -> list[dict[str, str]]:
148
+ observations = []
149
+ for tool in _list_of_dicts(event.get("tools")):
150
+ name = _clean(tool.get("name"))
151
+ summary = _clean(tool.get("summary"))
152
+ if name or summary:
153
+ observations.append({"name": name, "summary": summary})
154
+ return observations
155
+
156
+
157
+ def _score(event: dict[str, Any]) -> dict[str, Any]:
158
+ return {
159
+ "verdict": _clean(event.get("verdict")),
160
+ "overall": event.get("overall"),
161
+ "plan_steps": int(event.get("plan_steps") or 0),
162
+ }
163
+
164
+
165
+ def _index_metadata(metadata: dict[str, Any]) -> dict[str, str]:
166
+ return {
167
+ "algorithm": _clean(metadata.get("index_algorithm")),
168
+ "snapshot_generated_at": _clean(metadata.get("snapshot_generated_at")),
169
+ "index_generated_at": _clean(metadata.get("index_generated_at")),
170
+ "snapshot_digest": _clean(metadata.get("snapshot_digest")),
171
+ }
172
+
173
+
174
+ def _list_of_dicts(value: Any) -> list[dict[str, Any]]:
175
+ if not isinstance(value, list):
176
+ return []
177
+ return [item for item in value if isinstance(item, dict)]
178
+
179
+
180
+ def _clean(value: Any) -> str:
181
+ if value is None:
182
+ return ""
183
+ return " ".join(str(value).split())
hackathon_advisor/prize_ledger.py CHANGED
@@ -63,8 +63,8 @@ BADGE_LEDGER = [
63
  },
64
  {
65
  "name": "Well-Tuned",
66
- "status": "planned",
67
- "evidence": "Plan includes a MiniCPM5 LoRA path; adapter publication remains a separate build milestone.",
68
  },
69
  {
70
  "name": "Llama Champion",
@@ -74,6 +74,17 @@ BADGE_LEDGER = [
74
  ]
75
 
76
 
 
 
 
 
 
 
 
 
 
 
 
77
  def prize_ledger(runtime: dict[str, Any]) -> dict[str, Any]:
78
  total_params = round(sum(float(item["params_b"]) for item in MODEL_STACK), 2)
79
  largest = max(MODEL_STACK, key=lambda item: float(item["params_b"]))
@@ -88,4 +99,5 @@ def prize_ledger(runtime: dict[str, Any]) -> dict[str, Any]:
88
  "tiny_titan_limit_b": 4.0,
89
  "tiny_titan_eligible": total_params <= 4.0 and float(largest["params_b"]) <= 4.0,
90
  "badges": BADGE_LEDGER,
 
91
  }
 
63
  },
64
  {
65
  "name": "Well-Tuned",
66
+ "status": "dataset-ready",
67
+ "evidence": "LoRA SFT dataset export is generated from exact session traces; adapter publication remains a separate build milestone.",
68
  },
69
  {
70
  "name": "Llama Champion",
 
74
  ]
75
 
76
 
77
+ TRAINING_ARTIFACTS = [
78
+ {
79
+ "name": "MiniCPM5 LoRA SFT dataset",
80
+ "status": "export-ready",
81
+ "endpoint": "lora_dataset",
82
+ "format": "chat-jsonl",
83
+ "base_model": "openbmb/MiniCPM5-1B",
84
+ }
85
+ ]
86
+
87
+
88
  def prize_ledger(runtime: dict[str, Any]) -> dict[str, Any]:
89
  total_params = round(sum(float(item["params_b"]) for item in MODEL_STACK), 2)
90
  largest = max(MODEL_STACK, key=lambda item: float(item["params_b"]))
 
99
  "tiny_titan_limit_b": 4.0,
100
  "tiny_titan_eligible": total_params <= 4.0 and float(largest["params_b"]) <= 4.0,
101
  "badges": BADGE_LEDGER,
102
+ "training_artifacts": TRAINING_ARTIFACTS,
103
  }
static/app.js CHANGED
@@ -22,6 +22,7 @@ const exportButton = document.querySelector("#export-artifact");
22
  const exportTraceButton = document.querySelector("#export-trace");
23
  const exportNotesButton = document.querySelector("#export-notes");
24
  const exportChapterButton = document.querySelector("#export-chapter");
 
25
  const resetButton = document.querySelector("#reset-session");
26
 
27
  const SESSION_STORAGE_KEY = "hackathon-advisor-session-v1";
@@ -66,6 +67,10 @@ exportChapterButton.addEventListener("click", async () => {
66
  await exportChapter();
67
  });
68
 
 
 
 
 
69
  resetButton.addEventListener("click", () => {
70
  clearSavedSession();
71
  window.location.reload();
@@ -183,6 +188,7 @@ function renderRestoredSession(data) {
183
  exportTraceButton.disabled = !(session.trace?.length);
184
  exportNotesButton.disabled = !(session.trace?.length);
185
  exportChapterButton.disabled = !(session.ideas?.length);
 
186
  }
187
 
188
  function readSavedSession() {
@@ -297,6 +303,21 @@ function renderPrizeLedger(ledger) {
297
  badges.append(item);
298
  }
299
  prizeLedgerEl.append(header, badges);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  }
301
 
302
  function handleEvent(event) {
@@ -350,6 +371,7 @@ function handleEvent(event) {
350
  exportTraceButton.disabled = !(session.trace?.length);
351
  exportNotesButton.disabled = !(session.trace?.length);
352
  exportChapterButton.disabled = !(session.ideas?.length);
 
353
  saveSession();
354
  }
355
  }
@@ -525,12 +547,14 @@ function setCommandDisabled(disabled) {
525
  const isTrace = button.id === "export-trace";
526
  const isNotes = button.id === "export-notes";
527
  const isChapter = button.id === "export-chapter";
 
528
  button.disabled =
529
  disabled ||
530
  (isArtifact && !currentArtifact) ||
531
  (isTrace && !session.trace?.length) ||
532
  (isNotes && !session.trace?.length) ||
533
- (isChapter && !session.ideas?.length);
 
534
  });
535
  }
536
 
@@ -594,6 +618,15 @@ async function exportChapter() {
594
  downloadText("hackathon-advisor-chapter.md", String(data || ""), "text/markdown;charset=utf-8");
595
  }
596
 
 
 
 
 
 
 
 
 
 
597
  function exportArtifact(artifact) {
598
  const canvas = document.createElement("canvas");
599
  canvas.width = 1200;
 
22
  const exportTraceButton = document.querySelector("#export-trace");
23
  const exportNotesButton = document.querySelector("#export-notes");
24
  const exportChapterButton = document.querySelector("#export-chapter");
25
+ const exportLoraButton = document.querySelector("#export-lora");
26
  const resetButton = document.querySelector("#reset-session");
27
 
28
  const SESSION_STORAGE_KEY = "hackathon-advisor-session-v1";
 
67
  await exportChapter();
68
  });
69
 
70
+ exportLoraButton.addEventListener("click", async () => {
71
+ await exportLoraDataset();
72
+ });
73
+
74
  resetButton.addEventListener("click", () => {
75
  clearSavedSession();
76
  window.location.reload();
 
188
  exportTraceButton.disabled = !(session.trace?.length);
189
  exportNotesButton.disabled = !(session.trace?.length);
190
  exportChapterButton.disabled = !(session.ideas?.length);
191
+ exportLoraButton.disabled = !(session.trace?.length);
192
  }
193
 
194
  function readSavedSession() {
 
303
  badges.append(item);
304
  }
305
  prizeLedgerEl.append(header, badges);
306
+ if (ledger.training_artifacts?.length) {
307
+ const artifacts = document.createElement("div");
308
+ artifacts.className = "training-artifact-list";
309
+ for (const artifact of ledger.training_artifacts.slice(0, 3)) {
310
+ const item = document.createElement("div");
311
+ item.className = "training-artifact";
312
+ item.title = artifact.endpoint || artifact.name;
313
+ item.innerHTML = `
314
+ <strong>${escapeHtml(artifact.name)}</strong>
315
+ <span>${escapeHtml(artifact.status)} · ${escapeHtml(artifact.format || "jsonl")}</span>
316
+ `;
317
+ artifacts.append(item);
318
+ }
319
+ prizeLedgerEl.append(artifacts);
320
+ }
321
  }
322
 
323
  function handleEvent(event) {
 
371
  exportTraceButton.disabled = !(session.trace?.length);
372
  exportNotesButton.disabled = !(session.trace?.length);
373
  exportChapterButton.disabled = !(session.ideas?.length);
374
+ exportLoraButton.disabled = !(session.trace?.length);
375
  saveSession();
376
  }
377
  }
 
547
  const isTrace = button.id === "export-trace";
548
  const isNotes = button.id === "export-notes";
549
  const isChapter = button.id === "export-chapter";
550
+ const isLora = button.id === "export-lora";
551
  button.disabled =
552
  disabled ||
553
  (isArtifact && !currentArtifact) ||
554
  (isTrace && !session.trace?.length) ||
555
  (isNotes && !session.trace?.length) ||
556
+ (isChapter && !session.ideas?.length) ||
557
+ (isLora && !session.trace?.length);
558
  });
559
  }
560
 
 
618
  downloadText("hackathon-advisor-chapter.md", String(data || ""), "text/markdown;charset=utf-8");
619
  }
620
 
621
+ async function exportLoraDataset() {
622
+ const client = await clientPromise;
623
+ const result = await client.predict("/lora_dataset", {
624
+ session_json: JSON.stringify(session),
625
+ });
626
+ const data = Array.isArray(result.data) ? result.data[0] : result.data;
627
+ downloadText("hackathon-advisor-lora-sft.jsonl", String(data || ""));
628
+ }
629
+
630
  function exportArtifact(artifact) {
631
  const canvas = document.createElement("canvas");
632
  canvas.width = 1200;
static/index.html CHANGED
@@ -35,6 +35,7 @@
35
  <button type="button" id="export-trace" title="Export the tool trace" disabled>JSONL</button>
36
  <button type="button" id="export-notes" title="Export Field Notes" disabled>Notes</button>
37
  <button type="button" id="export-chapter" title="Export the Almanac chapter" disabled>Chapter</button>
 
38
  <button type="button" id="export-artifact" title="Export the current fate page" disabled>PNG</button>
39
  <button type="button" id="reset-session" title="Clear the saved session">Reset</button>
40
  </div>
 
35
  <button type="button" id="export-trace" title="Export the tool trace" disabled>JSONL</button>
36
  <button type="button" id="export-notes" title="Export Field Notes" disabled>Notes</button>
37
  <button type="button" id="export-chapter" title="Export the Almanac chapter" disabled>Chapter</button>
38
+ <button type="button" id="export-lora" title="Export the LoRA SFT dataset" disabled>LoRA</button>
39
  <button type="button" id="export-artifact" title="Export the current fate page" disabled>PNG</button>
40
  <button type="button" id="reset-session" title="Clear the saved session">Reset</button>
41
  </div>
static/styles.css CHANGED
@@ -313,7 +313,8 @@ button:disabled {
313
  .idea,
314
  .trace,
315
  .target-toggle,
316
- .profile-field {
 
317
  border-left: 3px solid rgba(80, 47, 22, 0.48);
318
  padding: 8px 10px;
319
  background: rgba(255, 241, 196, 0.34);
@@ -468,6 +469,10 @@ button:disabled {
468
  border-left-color: var(--gold);
469
  }
470
 
 
 
 
 
471
  .badge-item.planned {
472
  border-left-color: var(--muted-ink);
473
  }
@@ -478,6 +483,31 @@ button:disabled {
478
  text-transform: uppercase;
479
  }
480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  .wood-map-field {
482
  position: relative;
483
  min-height: 138px;
 
313
  .idea,
314
  .trace,
315
  .target-toggle,
316
+ .profile-field,
317
+ .training-artifact {
318
  border-left: 3px solid rgba(80, 47, 22, 0.48);
319
  padding: 8px 10px;
320
  background: rgba(255, 241, 196, 0.34);
 
469
  border-left-color: var(--gold);
470
  }
471
 
472
+ .badge-item.dataset-ready {
473
+ border-left-color: #5f6d38;
474
+ }
475
+
476
  .badge-item.planned {
477
  border-left-color: var(--muted-ink);
478
  }
 
483
  text-transform: uppercase;
484
  }
485
 
486
+ .training-artifact-list {
487
+ display: grid;
488
+ gap: 7px;
489
+ }
490
+
491
+ .training-artifact {
492
+ display: grid;
493
+ gap: 4px;
494
+ min-width: 0;
495
+ }
496
+
497
+ .training-artifact strong {
498
+ color: #2a170d;
499
+ font-size: 0.82rem;
500
+ line-height: 1.25;
501
+ }
502
+
503
+ .training-artifact span {
504
+ color: var(--muted-ink);
505
+ font-size: 0.72rem;
506
+ line-height: 1.25;
507
+ font-weight: 900;
508
+ overflow-wrap: anywhere;
509
+ }
510
+
511
  .wood-map-field {
512
  position: relative;
513
  min-height: 138px;
tests/test_app.py CHANGED
@@ -7,6 +7,7 @@ from app import (
7
  field_notes_artifact,
8
  health,
9
  index,
 
10
  prize_ledger_endpoint,
11
  runtime,
12
  tool_contract_check,
@@ -76,6 +77,23 @@ def test_chapter_endpoint_exports_markdown() -> None:
76
  assert "Closest inked pages:" in payload
77
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def test_tool_contracts_endpoint_exposes_schemas() -> None:
80
  payload = tool_contracts()
81
 
@@ -104,3 +122,4 @@ def test_prize_ledger_endpoint_reports_submission_evidence() -> None:
104
  assert payload["runtime"]["backend"] == "rules"
105
  assert payload["tiny_titan_eligible"] is True
106
  assert any(badge["name"] == "Sharing is Caring" for badge in payload["badges"])
 
 
7
  field_notes_artifact,
8
  health,
9
  index,
10
+ lora_dataset_artifact,
11
  prize_ledger_endpoint,
12
  runtime,
13
  tool_contract_check,
 
77
  assert "Closest inked pages:" in payload
78
 
79
 
80
+ def test_lora_dataset_endpoint_exports_sft_jsonl() -> None:
81
+ state = engine.turn(
82
+ "A local-first archive cartographer for family photos",
83
+ {"targets": ["Well-Tuned"]},
84
+ ).state
85
+ state = engine.turn("make a build plan", state).state
86
+
87
+ payload = lora_dataset_artifact(json.dumps(state))
88
+ lines = [json.loads(line) for line in payload.splitlines()]
89
+
90
+ assert lines[0]["type"] == "lora_sft_manifest"
91
+ assert lines[0]["example_count"] == len(lines) - 1
92
+ assert lines[1]["example_kind"] == "tool_call"
93
+ assert lines[1]["base_model"] == "openbmb/MiniCPM5-1B"
94
+ assert lines[2]["example_kind"] == "advisor_response"
95
+
96
+
97
  def test_tool_contracts_endpoint_exposes_schemas() -> None:
98
  payload = tool_contracts()
99
 
 
122
  assert payload["runtime"]["backend"] == "rules"
123
  assert payload["tiny_titan_eligible"] is True
124
  assert any(badge["name"] == "Sharing is Caring" for badge in payload["badges"])
125
+ assert payload["training_artifacts"][0]["endpoint"] == "lora_dataset"
tests/test_lora_dataset.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from hackathon_advisor.agent import AdvisorEngine
5
+ from hackathon_advisor.data import ProjectIndex
6
+ from hackathon_advisor.lora_dataset import BASE_MODEL, build_lora_dataset_jsonl
7
+ from hackathon_advisor.trace_export import trace_metadata
8
+
9
+
10
+ def test_lora_dataset_exports_tool_call_and_response_examples() -> None:
11
+ index = ProjectIndex.from_files(Path("data/projects.json"), Path("data/project_index.json"))
12
+ engine = AdvisorEngine(index)
13
+ state = {"targets": ["Well-Tuned", "Field Notes"]}
14
+ state = engine.turn("A local-first archive cartographer for family photos", state).state
15
+ state = engine.turn("make a build plan", state).state
16
+
17
+ lines = [json.loads(line) for line in build_lora_dataset_jsonl(state, trace_metadata(index)).splitlines()]
18
+ manifest = lines[0]
19
+ examples = lines[1:]
20
+
21
+ assert manifest["type"] == "lora_sft_manifest"
22
+ assert manifest["base_model"] == BASE_MODEL
23
+ assert manifest["record_kinds"] == ["tool_call", "advisor_response"]
24
+ assert manifest["example_count"] == len(examples)
25
+ assert manifest["included_turn_count"] == 2
26
+ assert manifest["index"]["algorithm"] == "tfidf-sparse-v1"
27
+ assert {example["example_kind"] for example in examples} == {"tool_call", "advisor_response"}
28
+ assert examples[0]["messages"][2]["content"].startswith('<function name="save_idea">')
29
+ assert examples[0]["targets"] == ["Well-Tuned", "Field Notes"]
30
+ assert examples[1]["messages"][1]["content"].startswith("A local-first archive")
31
+ assert "Tool observations:" in examples[1]["messages"][1]["content"]
32
+ assert examples[1]["messages"][2]["content"]
33
+
34
+
35
+ def test_empty_lora_dataset_only_exports_manifest() -> None:
36
+ payload = build_lora_dataset_jsonl(
37
+ {},
38
+ {
39
+ "index_algorithm": "tfidf-sparse-v1",
40
+ "snapshot_generated_at": "2026-06-06T00:00:00+00:00",
41
+ "index_generated_at": "2026-06-06T01:00:00+00:00",
42
+ "snapshot_digest": "abc",
43
+ },
44
+ )
45
+ lines = [json.loads(line) for line in payload.splitlines()]
46
+
47
+ assert len(lines) == 1
48
+ assert lines[0]["example_count"] == 0
49
+ assert lines[0]["turn_count"] == 0
tests/test_prize_ledger.py CHANGED
@@ -10,4 +10,5 @@ def test_prize_ledger_tracks_param_budget_and_badges() -> None:
10
  assert payload["largest_model"]["model"] == "openbmb/MiniCPM5-1B"
11
  badges = {badge["name"]: badge["status"] for badge in payload["badges"]}
12
  assert badges["Off the Grid"] == "ready"
13
- assert badges["Well-Tuned"] == "planned"
 
 
10
  assert payload["largest_model"]["model"] == "openbmb/MiniCPM5-1B"
11
  badges = {badge["name"]: badge["status"] for badge in payload["badges"]}
12
  assert badges["Off the Grid"] == "ready"
13
+ assert badges["Well-Tuned"] == "dataset-ready"
14
+ assert payload["training_artifacts"][0]["base_model"] == "openbmb/MiniCPM5-1B"