Spaces:
Running on Zero
Running on Zero
feat: export lora training dataset
Browse filesCo-authored-by: Codex <noreply@openai.com>
- README.md +7 -0
- app.py +16 -0
- hackathon_advisor/lora_dataset.py +183 -0
- hackathon_advisor/prize_ledger.py +14 -2
- static/app.js +34 -1
- static/index.html +1 -0
- static/styles.css +31 -1
- tests/test_app.py +19 -0
- tests/test_lora_dataset.py +49 -0
- tests/test_prize_ledger.py +2 -1
README.md
CHANGED
|
@@ -75,6 +75,13 @@ The `chapter` Gradio API endpoint and `Chapter` button export the public-facing
|
|
| 75 |
one fate page per idea, each with verdict, score, targets, and closest cited pages. It is the shareable companion to
|
| 76 |
the private Field Notes artifact.
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
## Prize Ledger
|
| 79 |
|
| 80 |
`/api/prize-ledger` and the in-app Prize Ledger panel expose submission evidence: the documented model stack, total
|
|
|
|
| 75 |
one fate page per idea, each with verdict, score, targets, and closest cited pages. It is the shareable companion to
|
| 76 |
the private Field Notes artifact.
|
| 77 |
|
| 78 |
+
## LoRA Dataset Artifact
|
| 79 |
+
|
| 80 |
+
The `lora_dataset` Gradio API endpoint and `LoRA` button export a compact chat JSONL dataset from successful session
|
| 81 |
+
turns. Each included turn yields a tool-call example and an advisor-response example for `openbmb/MiniCPM5-1B`, with the
|
| 82 |
+
selected targets, parsed XML tool call, tool observations, and score context preserved. This prepares the Well-Tuned
|
| 83 |
+
path without claiming that the adapter has already been trained or published.
|
| 84 |
+
|
| 85 |
## Prize Ledger
|
| 86 |
|
| 87 |
`/api/prize-ledger` and the in-app Prize Ledger panel expose submission evidence: the documented model stack, total
|
app.py
CHANGED
|
@@ -12,6 +12,7 @@ from hackathon_advisor.agent import AdvisorEngine
|
|
| 12 |
from hackathon_advisor.chapter import build_chapter_markdown
|
| 13 |
from hackathon_advisor.data import ProjectIndex
|
| 14 |
from hackathon_advisor.field_notes import build_field_notes_markdown
|
|
|
|
| 15 |
from hackathon_advisor.prize_ledger import prize_ledger
|
| 16 |
from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
|
| 17 |
from hackathon_advisor.tools import TARGETS
|
|
@@ -134,6 +135,21 @@ def chapter_artifact(session_json: str = "{}") -> str:
|
|
| 134 |
)
|
| 135 |
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
@app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
|
| 138 |
def agent_turn(message: str, session_json: str = "{}") -> Iterator[str]:
|
| 139 |
try:
|
|
|
|
| 12 |
from hackathon_advisor.chapter import build_chapter_markdown
|
| 13 |
from hackathon_advisor.data import ProjectIndex
|
| 14 |
from hackathon_advisor.field_notes import build_field_notes_markdown
|
| 15 |
+
from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
|
| 16 |
from hackathon_advisor.prize_ledger import prize_ledger
|
| 17 |
from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
|
| 18 |
from hackathon_advisor.tools import TARGETS
|
|
|
|
| 135 |
)
|
| 136 |
|
| 137 |
|
| 138 |
+
@app.api(name="lora_dataset", concurrency_limit=8)
|
| 139 |
+
def lora_dataset_artifact(session_json: str = "{}") -> str:
|
| 140 |
+
try:
|
| 141 |
+
session = json.loads(session_json or "{}")
|
| 142 |
+
except json.JSONDecodeError:
|
| 143 |
+
session = {}
|
| 144 |
+
return build_lora_dataset_jsonl(
|
| 145 |
+
session,
|
| 146 |
+
{
|
| 147 |
+
**trace_metadata(index),
|
| 148 |
+
"project_count": len(index.projects),
|
| 149 |
+
},
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
@app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
|
| 154 |
def agent_turn(message: str, session_json: str = "{}") -> Iterator[str]:
|
| 155 |
try:
|
hackathon_advisor/lora_dataset.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from datetime import datetime, timezone
|
| 4 |
+
import json
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
LORA_DATASET_SCHEMA_VERSION = 1
|
| 9 |
+
BASE_MODEL = "openbmb/MiniCPM5-1B"
|
| 10 |
+
ADAPTER_TASK = "hackathon_advisor_tool_call_and_voice"
|
| 11 |
+
|
| 12 |
+
TOOL_CALL_SYSTEM_PROMPT = (
|
| 13 |
+
"You are Mothback, the Build Small Hackathon advisor. Choose exactly one validated tool call for the user's "
|
| 14 |
+
"project-advice request. Return only the XML function call."
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
RESPONSE_SYSTEM_PROMPT = (
|
| 18 |
+
"You are Mothback, the Build Small Hackathon advisor. Write concise, evidence-grounded advice from the tool "
|
| 19 |
+
"observations, cited pages, score, and selected prize targets."
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def build_lora_dataset_jsonl(session: dict[str, Any], metadata: dict[str, Any]) -> str:
|
| 24 |
+
trace = _list_of_dicts(session.get("trace"))
|
| 25 |
+
ideas = _list_of_dicts(session.get("ideas"))
|
| 26 |
+
targets = [str(target) for target in session.get("targets") or []]
|
| 27 |
+
examples = _examples(trace, targets)
|
| 28 |
+
records = [
|
| 29 |
+
{
|
| 30 |
+
"type": "lora_sft_manifest",
|
| 31 |
+
"schema_version": LORA_DATASET_SCHEMA_VERSION,
|
| 32 |
+
"generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 33 |
+
"app": "hackathon-advisor",
|
| 34 |
+
"base_model": BASE_MODEL,
|
| 35 |
+
"adapter_task": ADAPTER_TASK,
|
| 36 |
+
"format": "chat-jsonl",
|
| 37 |
+
"record_kinds": ["tool_call", "advisor_response"],
|
| 38 |
+
"source": "exact_session_trace",
|
| 39 |
+
"idea_count": len(ideas),
|
| 40 |
+
"turn_count": len(trace),
|
| 41 |
+
"included_turn_count": len({example["turn_index"] for example in examples}),
|
| 42 |
+
"example_count": len(examples),
|
| 43 |
+
"index": _index_metadata(metadata),
|
| 44 |
+
}
|
| 45 |
+
]
|
| 46 |
+
records.extend(examples)
|
| 47 |
+
return "\n".join(json.dumps(record, ensure_ascii=False, sort_keys=True) for record in records) + "\n"
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _examples(trace: list[dict[str, Any]], targets: list[str]) -> list[dict[str, Any]]:
|
| 51 |
+
examples: list[dict[str, Any]] = []
|
| 52 |
+
for turn_index, event in enumerate(trace, start=1):
|
| 53 |
+
if not _is_successful_turn(event):
|
| 54 |
+
continue
|
| 55 |
+
input_text = _clean(event.get("input"))
|
| 56 |
+
response = _clean(event.get("response"))
|
| 57 |
+
if not input_text or not response:
|
| 58 |
+
continue
|
| 59 |
+
tool_call = _tool_call(event)
|
| 60 |
+
if not tool_call["name"]:
|
| 61 |
+
continue
|
| 62 |
+
shared = {
|
| 63 |
+
"type": "lora_sft_example",
|
| 64 |
+
"schema_version": LORA_DATASET_SCHEMA_VERSION,
|
| 65 |
+
"base_model": BASE_MODEL,
|
| 66 |
+
"adapter_task": ADAPTER_TASK,
|
| 67 |
+
"turn_index": turn_index,
|
| 68 |
+
"targets": targets,
|
| 69 |
+
"score": _score(event),
|
| 70 |
+
"tool_call": tool_call,
|
| 71 |
+
"tool_observations": _tool_observations(event),
|
| 72 |
+
}
|
| 73 |
+
examples.append(
|
| 74 |
+
{
|
| 75 |
+
**shared,
|
| 76 |
+
"example_index": len(examples) + 1,
|
| 77 |
+
"example_kind": "tool_call",
|
| 78 |
+
"messages": [
|
| 79 |
+
{"role": "system", "content": TOOL_CALL_SYSTEM_PROMPT},
|
| 80 |
+
{"role": "user", "content": input_text},
|
| 81 |
+
{"role": "assistant", "content": _tool_call_xml(tool_call)},
|
| 82 |
+
],
|
| 83 |
+
}
|
| 84 |
+
)
|
| 85 |
+
examples.append(
|
| 86 |
+
{
|
| 87 |
+
**shared,
|
| 88 |
+
"example_index": len(examples) + 1,
|
| 89 |
+
"example_kind": "advisor_response",
|
| 90 |
+
"messages": [
|
| 91 |
+
{"role": "system", "content": RESPONSE_SYSTEM_PROMPT},
|
| 92 |
+
{"role": "user", "content": _response_context(input_text, event, tool_call)},
|
| 93 |
+
{"role": "assistant", "content": response},
|
| 94 |
+
],
|
| 95 |
+
}
|
| 96 |
+
)
|
| 97 |
+
return examples
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _is_successful_turn(event: dict[str, Any]) -> bool:
|
| 101 |
+
resolution = event.get("tool_resolution") if isinstance(event.get("tool_resolution"), dict) else {}
|
| 102 |
+
return str(resolution.get("status") or "") == "valid"
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _tool_call(event: dict[str, Any]) -> dict[str, Any]:
|
| 106 |
+
resolution = event.get("tool_resolution") if isinstance(event.get("tool_resolution"), dict) else {}
|
| 107 |
+
call = resolution.get("call") if isinstance(resolution.get("call"), dict) else {}
|
| 108 |
+
arguments = call.get("arguments") if isinstance(call.get("arguments"), dict) else {}
|
| 109 |
+
return {
|
| 110 |
+
"name": _clean(call.get("name")),
|
| 111 |
+
"arguments": arguments,
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _tool_call_xml(tool_call: dict[str, Any]) -> str:
|
| 116 |
+
arguments = json.dumps(tool_call["arguments"], ensure_ascii=False, sort_keys=True, separators=(",", ":"))
|
| 117 |
+
return f'<function name="{tool_call["name"]}">{arguments}</function>'
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def _response_context(input_text: str, event: dict[str, Any], tool_call: dict[str, Any]) -> str:
|
| 121 |
+
observations = _tool_observations(event)
|
| 122 |
+
lines = [
|
| 123 |
+
input_text,
|
| 124 |
+
"",
|
| 125 |
+
f"Tool call: {_tool_call_xml(tool_call)}",
|
| 126 |
+
"Tool observations:",
|
| 127 |
+
]
|
| 128 |
+
if observations:
|
| 129 |
+
for observation in observations:
|
| 130 |
+
lines.append(f"- {observation['name']}: {observation['summary']}")
|
| 131 |
+
else:
|
| 132 |
+
lines.append("- none")
|
| 133 |
+
|
| 134 |
+
score = _score(event)
|
| 135 |
+
verdict = score["verdict"] or "n/a"
|
| 136 |
+
overall = score["overall"] if score["overall"] is not None else "n/a"
|
| 137 |
+
lines.extend(
|
| 138 |
+
[
|
| 139 |
+
f"Verdict: {verdict}",
|
| 140 |
+
f"Overall: {overall}",
|
| 141 |
+
f"Plan steps: {score['plan_steps']}",
|
| 142 |
+
]
|
| 143 |
+
)
|
| 144 |
+
return "\n".join(lines)
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def _tool_observations(event: dict[str, Any]) -> list[dict[str, str]]:
|
| 148 |
+
observations = []
|
| 149 |
+
for tool in _list_of_dicts(event.get("tools")):
|
| 150 |
+
name = _clean(tool.get("name"))
|
| 151 |
+
summary = _clean(tool.get("summary"))
|
| 152 |
+
if name or summary:
|
| 153 |
+
observations.append({"name": name, "summary": summary})
|
| 154 |
+
return observations
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def _score(event: dict[str, Any]) -> dict[str, Any]:
|
| 158 |
+
return {
|
| 159 |
+
"verdict": _clean(event.get("verdict")),
|
| 160 |
+
"overall": event.get("overall"),
|
| 161 |
+
"plan_steps": int(event.get("plan_steps") or 0),
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def _index_metadata(metadata: dict[str, Any]) -> dict[str, str]:
|
| 166 |
+
return {
|
| 167 |
+
"algorithm": _clean(metadata.get("index_algorithm")),
|
| 168 |
+
"snapshot_generated_at": _clean(metadata.get("snapshot_generated_at")),
|
| 169 |
+
"index_generated_at": _clean(metadata.get("index_generated_at")),
|
| 170 |
+
"snapshot_digest": _clean(metadata.get("snapshot_digest")),
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def _list_of_dicts(value: Any) -> list[dict[str, Any]]:
|
| 175 |
+
if not isinstance(value, list):
|
| 176 |
+
return []
|
| 177 |
+
return [item for item in value if isinstance(item, dict)]
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def _clean(value: Any) -> str:
|
| 181 |
+
if value is None:
|
| 182 |
+
return ""
|
| 183 |
+
return " ".join(str(value).split())
|
hackathon_advisor/prize_ledger.py
CHANGED
|
@@ -63,8 +63,8 @@ BADGE_LEDGER = [
|
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"name": "Well-Tuned",
|
| 66 |
-
"status": "
|
| 67 |
-
"evidence": "
|
| 68 |
},
|
| 69 |
{
|
| 70 |
"name": "Llama Champion",
|
|
@@ -74,6 +74,17 @@ BADGE_LEDGER = [
|
|
| 74 |
]
|
| 75 |
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
def prize_ledger(runtime: dict[str, Any]) -> dict[str, Any]:
|
| 78 |
total_params = round(sum(float(item["params_b"]) for item in MODEL_STACK), 2)
|
| 79 |
largest = max(MODEL_STACK, key=lambda item: float(item["params_b"]))
|
|
@@ -88,4 +99,5 @@ def prize_ledger(runtime: dict[str, Any]) -> dict[str, Any]:
|
|
| 88 |
"tiny_titan_limit_b": 4.0,
|
| 89 |
"tiny_titan_eligible": total_params <= 4.0 and float(largest["params_b"]) <= 4.0,
|
| 90 |
"badges": BADGE_LEDGER,
|
|
|
|
| 91 |
}
|
|
|
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"name": "Well-Tuned",
|
| 66 |
+
"status": "dataset-ready",
|
| 67 |
+
"evidence": "LoRA SFT dataset export is generated from exact session traces; adapter publication remains a separate build milestone.",
|
| 68 |
},
|
| 69 |
{
|
| 70 |
"name": "Llama Champion",
|
|
|
|
| 74 |
]
|
| 75 |
|
| 76 |
|
| 77 |
+
TRAINING_ARTIFACTS = [
|
| 78 |
+
{
|
| 79 |
+
"name": "MiniCPM5 LoRA SFT dataset",
|
| 80 |
+
"status": "export-ready",
|
| 81 |
+
"endpoint": "lora_dataset",
|
| 82 |
+
"format": "chat-jsonl",
|
| 83 |
+
"base_model": "openbmb/MiniCPM5-1B",
|
| 84 |
+
}
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
|
| 88 |
def prize_ledger(runtime: dict[str, Any]) -> dict[str, Any]:
|
| 89 |
total_params = round(sum(float(item["params_b"]) for item in MODEL_STACK), 2)
|
| 90 |
largest = max(MODEL_STACK, key=lambda item: float(item["params_b"]))
|
|
|
|
| 99 |
"tiny_titan_limit_b": 4.0,
|
| 100 |
"tiny_titan_eligible": total_params <= 4.0 and float(largest["params_b"]) <= 4.0,
|
| 101 |
"badges": BADGE_LEDGER,
|
| 102 |
+
"training_artifacts": TRAINING_ARTIFACTS,
|
| 103 |
}
|
static/app.js
CHANGED
|
@@ -22,6 +22,7 @@ const exportButton = document.querySelector("#export-artifact");
|
|
| 22 |
const exportTraceButton = document.querySelector("#export-trace");
|
| 23 |
const exportNotesButton = document.querySelector("#export-notes");
|
| 24 |
const exportChapterButton = document.querySelector("#export-chapter");
|
|
|
|
| 25 |
const resetButton = document.querySelector("#reset-session");
|
| 26 |
|
| 27 |
const SESSION_STORAGE_KEY = "hackathon-advisor-session-v1";
|
|
@@ -66,6 +67,10 @@ exportChapterButton.addEventListener("click", async () => {
|
|
| 66 |
await exportChapter();
|
| 67 |
});
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
resetButton.addEventListener("click", () => {
|
| 70 |
clearSavedSession();
|
| 71 |
window.location.reload();
|
|
@@ -183,6 +188,7 @@ function renderRestoredSession(data) {
|
|
| 183 |
exportTraceButton.disabled = !(session.trace?.length);
|
| 184 |
exportNotesButton.disabled = !(session.trace?.length);
|
| 185 |
exportChapterButton.disabled = !(session.ideas?.length);
|
|
|
|
| 186 |
}
|
| 187 |
|
| 188 |
function readSavedSession() {
|
|
@@ -297,6 +303,21 @@ function renderPrizeLedger(ledger) {
|
|
| 297 |
badges.append(item);
|
| 298 |
}
|
| 299 |
prizeLedgerEl.append(header, badges);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
}
|
| 301 |
|
| 302 |
function handleEvent(event) {
|
|
@@ -350,6 +371,7 @@ function handleEvent(event) {
|
|
| 350 |
exportTraceButton.disabled = !(session.trace?.length);
|
| 351 |
exportNotesButton.disabled = !(session.trace?.length);
|
| 352 |
exportChapterButton.disabled = !(session.ideas?.length);
|
|
|
|
| 353 |
saveSession();
|
| 354 |
}
|
| 355 |
}
|
|
@@ -525,12 +547,14 @@ function setCommandDisabled(disabled) {
|
|
| 525 |
const isTrace = button.id === "export-trace";
|
| 526 |
const isNotes = button.id === "export-notes";
|
| 527 |
const isChapter = button.id === "export-chapter";
|
|
|
|
| 528 |
button.disabled =
|
| 529 |
disabled ||
|
| 530 |
(isArtifact && !currentArtifact) ||
|
| 531 |
(isTrace && !session.trace?.length) ||
|
| 532 |
(isNotes && !session.trace?.length) ||
|
| 533 |
-
(isChapter && !session.ideas?.length)
|
|
|
|
| 534 |
});
|
| 535 |
}
|
| 536 |
|
|
@@ -594,6 +618,15 @@ async function exportChapter() {
|
|
| 594 |
downloadText("hackathon-advisor-chapter.md", String(data || ""), "text/markdown;charset=utf-8");
|
| 595 |
}
|
| 596 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 597 |
function exportArtifact(artifact) {
|
| 598 |
const canvas = document.createElement("canvas");
|
| 599 |
canvas.width = 1200;
|
|
|
|
| 22 |
const exportTraceButton = document.querySelector("#export-trace");
|
| 23 |
const exportNotesButton = document.querySelector("#export-notes");
|
| 24 |
const exportChapterButton = document.querySelector("#export-chapter");
|
| 25 |
+
const exportLoraButton = document.querySelector("#export-lora");
|
| 26 |
const resetButton = document.querySelector("#reset-session");
|
| 27 |
|
| 28 |
const SESSION_STORAGE_KEY = "hackathon-advisor-session-v1";
|
|
|
|
| 67 |
await exportChapter();
|
| 68 |
});
|
| 69 |
|
| 70 |
+
exportLoraButton.addEventListener("click", async () => {
|
| 71 |
+
await exportLoraDataset();
|
| 72 |
+
});
|
| 73 |
+
|
| 74 |
resetButton.addEventListener("click", () => {
|
| 75 |
clearSavedSession();
|
| 76 |
window.location.reload();
|
|
|
|
| 188 |
exportTraceButton.disabled = !(session.trace?.length);
|
| 189 |
exportNotesButton.disabled = !(session.trace?.length);
|
| 190 |
exportChapterButton.disabled = !(session.ideas?.length);
|
| 191 |
+
exportLoraButton.disabled = !(session.trace?.length);
|
| 192 |
}
|
| 193 |
|
| 194 |
function readSavedSession() {
|
|
|
|
| 303 |
badges.append(item);
|
| 304 |
}
|
| 305 |
prizeLedgerEl.append(header, badges);
|
| 306 |
+
if (ledger.training_artifacts?.length) {
|
| 307 |
+
const artifacts = document.createElement("div");
|
| 308 |
+
artifacts.className = "training-artifact-list";
|
| 309 |
+
for (const artifact of ledger.training_artifacts.slice(0, 3)) {
|
| 310 |
+
const item = document.createElement("div");
|
| 311 |
+
item.className = "training-artifact";
|
| 312 |
+
item.title = artifact.endpoint || artifact.name;
|
| 313 |
+
item.innerHTML = `
|
| 314 |
+
<strong>${escapeHtml(artifact.name)}</strong>
|
| 315 |
+
<span>${escapeHtml(artifact.status)} · ${escapeHtml(artifact.format || "jsonl")}</span>
|
| 316 |
+
`;
|
| 317 |
+
artifacts.append(item);
|
| 318 |
+
}
|
| 319 |
+
prizeLedgerEl.append(artifacts);
|
| 320 |
+
}
|
| 321 |
}
|
| 322 |
|
| 323 |
function handleEvent(event) {
|
|
|
|
| 371 |
exportTraceButton.disabled = !(session.trace?.length);
|
| 372 |
exportNotesButton.disabled = !(session.trace?.length);
|
| 373 |
exportChapterButton.disabled = !(session.ideas?.length);
|
| 374 |
+
exportLoraButton.disabled = !(session.trace?.length);
|
| 375 |
saveSession();
|
| 376 |
}
|
| 377 |
}
|
|
|
|
| 547 |
const isTrace = button.id === "export-trace";
|
| 548 |
const isNotes = button.id === "export-notes";
|
| 549 |
const isChapter = button.id === "export-chapter";
|
| 550 |
+
const isLora = button.id === "export-lora";
|
| 551 |
button.disabled =
|
| 552 |
disabled ||
|
| 553 |
(isArtifact && !currentArtifact) ||
|
| 554 |
(isTrace && !session.trace?.length) ||
|
| 555 |
(isNotes && !session.trace?.length) ||
|
| 556 |
+
(isChapter && !session.ideas?.length) ||
|
| 557 |
+
(isLora && !session.trace?.length);
|
| 558 |
});
|
| 559 |
}
|
| 560 |
|
|
|
|
| 618 |
downloadText("hackathon-advisor-chapter.md", String(data || ""), "text/markdown;charset=utf-8");
|
| 619 |
}
|
| 620 |
|
| 621 |
+
async function exportLoraDataset() {
|
| 622 |
+
const client = await clientPromise;
|
| 623 |
+
const result = await client.predict("/lora_dataset", {
|
| 624 |
+
session_json: JSON.stringify(session),
|
| 625 |
+
});
|
| 626 |
+
const data = Array.isArray(result.data) ? result.data[0] : result.data;
|
| 627 |
+
downloadText("hackathon-advisor-lora-sft.jsonl", String(data || ""));
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
function exportArtifact(artifact) {
|
| 631 |
const canvas = document.createElement("canvas");
|
| 632 |
canvas.width = 1200;
|
static/index.html
CHANGED
|
@@ -35,6 +35,7 @@
|
|
| 35 |
<button type="button" id="export-trace" title="Export the tool trace" disabled>JSONL</button>
|
| 36 |
<button type="button" id="export-notes" title="Export Field Notes" disabled>Notes</button>
|
| 37 |
<button type="button" id="export-chapter" title="Export the Almanac chapter" disabled>Chapter</button>
|
|
|
|
| 38 |
<button type="button" id="export-artifact" title="Export the current fate page" disabled>PNG</button>
|
| 39 |
<button type="button" id="reset-session" title="Clear the saved session">Reset</button>
|
| 40 |
</div>
|
|
|
|
| 35 |
<button type="button" id="export-trace" title="Export the tool trace" disabled>JSONL</button>
|
| 36 |
<button type="button" id="export-notes" title="Export Field Notes" disabled>Notes</button>
|
| 37 |
<button type="button" id="export-chapter" title="Export the Almanac chapter" disabled>Chapter</button>
|
| 38 |
+
<button type="button" id="export-lora" title="Export the LoRA SFT dataset" disabled>LoRA</button>
|
| 39 |
<button type="button" id="export-artifact" title="Export the current fate page" disabled>PNG</button>
|
| 40 |
<button type="button" id="reset-session" title="Clear the saved session">Reset</button>
|
| 41 |
</div>
|
static/styles.css
CHANGED
|
@@ -313,7 +313,8 @@ button:disabled {
|
|
| 313 |
.idea,
|
| 314 |
.trace,
|
| 315 |
.target-toggle,
|
| 316 |
-
.profile-field
|
|
|
|
| 317 |
border-left: 3px solid rgba(80, 47, 22, 0.48);
|
| 318 |
padding: 8px 10px;
|
| 319 |
background: rgba(255, 241, 196, 0.34);
|
|
@@ -468,6 +469,10 @@ button:disabled {
|
|
| 468 |
border-left-color: var(--gold);
|
| 469 |
}
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
.badge-item.planned {
|
| 472 |
border-left-color: var(--muted-ink);
|
| 473 |
}
|
|
@@ -478,6 +483,31 @@ button:disabled {
|
|
| 478 |
text-transform: uppercase;
|
| 479 |
}
|
| 480 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
.wood-map-field {
|
| 482 |
position: relative;
|
| 483 |
min-height: 138px;
|
|
|
|
| 313 |
.idea,
|
| 314 |
.trace,
|
| 315 |
.target-toggle,
|
| 316 |
+
.profile-field,
|
| 317 |
+
.training-artifact {
|
| 318 |
border-left: 3px solid rgba(80, 47, 22, 0.48);
|
| 319 |
padding: 8px 10px;
|
| 320 |
background: rgba(255, 241, 196, 0.34);
|
|
|
|
| 469 |
border-left-color: var(--gold);
|
| 470 |
}
|
| 471 |
|
| 472 |
+
.badge-item.dataset-ready {
|
| 473 |
+
border-left-color: #5f6d38;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
.badge-item.planned {
|
| 477 |
border-left-color: var(--muted-ink);
|
| 478 |
}
|
|
|
|
| 483 |
text-transform: uppercase;
|
| 484 |
}
|
| 485 |
|
| 486 |
+
.training-artifact-list {
|
| 487 |
+
display: grid;
|
| 488 |
+
gap: 7px;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
.training-artifact {
|
| 492 |
+
display: grid;
|
| 493 |
+
gap: 4px;
|
| 494 |
+
min-width: 0;
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
.training-artifact strong {
|
| 498 |
+
color: #2a170d;
|
| 499 |
+
font-size: 0.82rem;
|
| 500 |
+
line-height: 1.25;
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
.training-artifact span {
|
| 504 |
+
color: var(--muted-ink);
|
| 505 |
+
font-size: 0.72rem;
|
| 506 |
+
line-height: 1.25;
|
| 507 |
+
font-weight: 900;
|
| 508 |
+
overflow-wrap: anywhere;
|
| 509 |
+
}
|
| 510 |
+
|
| 511 |
.wood-map-field {
|
| 512 |
position: relative;
|
| 513 |
min-height: 138px;
|
tests/test_app.py
CHANGED
|
@@ -7,6 +7,7 @@ from app import (
|
|
| 7 |
field_notes_artifact,
|
| 8 |
health,
|
| 9 |
index,
|
|
|
|
| 10 |
prize_ledger_endpoint,
|
| 11 |
runtime,
|
| 12 |
tool_contract_check,
|
|
@@ -76,6 +77,23 @@ def test_chapter_endpoint_exports_markdown() -> None:
|
|
| 76 |
assert "Closest inked pages:" in payload
|
| 77 |
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def test_tool_contracts_endpoint_exposes_schemas() -> None:
|
| 80 |
payload = tool_contracts()
|
| 81 |
|
|
@@ -104,3 +122,4 @@ def test_prize_ledger_endpoint_reports_submission_evidence() -> None:
|
|
| 104 |
assert payload["runtime"]["backend"] == "rules"
|
| 105 |
assert payload["tiny_titan_eligible"] is True
|
| 106 |
assert any(badge["name"] == "Sharing is Caring" for badge in payload["badges"])
|
|
|
|
|
|
| 7 |
field_notes_artifact,
|
| 8 |
health,
|
| 9 |
index,
|
| 10 |
+
lora_dataset_artifact,
|
| 11 |
prize_ledger_endpoint,
|
| 12 |
runtime,
|
| 13 |
tool_contract_check,
|
|
|
|
| 77 |
assert "Closest inked pages:" in payload
|
| 78 |
|
| 79 |
|
| 80 |
+
def test_lora_dataset_endpoint_exports_sft_jsonl() -> None:
|
| 81 |
+
state = engine.turn(
|
| 82 |
+
"A local-first archive cartographer for family photos",
|
| 83 |
+
{"targets": ["Well-Tuned"]},
|
| 84 |
+
).state
|
| 85 |
+
state = engine.turn("make a build plan", state).state
|
| 86 |
+
|
| 87 |
+
payload = lora_dataset_artifact(json.dumps(state))
|
| 88 |
+
lines = [json.loads(line) for line in payload.splitlines()]
|
| 89 |
+
|
| 90 |
+
assert lines[0]["type"] == "lora_sft_manifest"
|
| 91 |
+
assert lines[0]["example_count"] == len(lines) - 1
|
| 92 |
+
assert lines[1]["example_kind"] == "tool_call"
|
| 93 |
+
assert lines[1]["base_model"] == "openbmb/MiniCPM5-1B"
|
| 94 |
+
assert lines[2]["example_kind"] == "advisor_response"
|
| 95 |
+
|
| 96 |
+
|
| 97 |
def test_tool_contracts_endpoint_exposes_schemas() -> None:
|
| 98 |
payload = tool_contracts()
|
| 99 |
|
|
|
|
| 122 |
assert payload["runtime"]["backend"] == "rules"
|
| 123 |
assert payload["tiny_titan_eligible"] is True
|
| 124 |
assert any(badge["name"] == "Sharing is Caring" for badge in payload["badges"])
|
| 125 |
+
assert payload["training_artifacts"][0]["endpoint"] == "lora_dataset"
|
tests/test_lora_dataset.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
from hackathon_advisor.agent import AdvisorEngine
|
| 5 |
+
from hackathon_advisor.data import ProjectIndex
|
| 6 |
+
from hackathon_advisor.lora_dataset import BASE_MODEL, build_lora_dataset_jsonl
|
| 7 |
+
from hackathon_advisor.trace_export import trace_metadata
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def test_lora_dataset_exports_tool_call_and_response_examples() -> None:
|
| 11 |
+
index = ProjectIndex.from_files(Path("data/projects.json"), Path("data/project_index.json"))
|
| 12 |
+
engine = AdvisorEngine(index)
|
| 13 |
+
state = {"targets": ["Well-Tuned", "Field Notes"]}
|
| 14 |
+
state = engine.turn("A local-first archive cartographer for family photos", state).state
|
| 15 |
+
state = engine.turn("make a build plan", state).state
|
| 16 |
+
|
| 17 |
+
lines = [json.loads(line) for line in build_lora_dataset_jsonl(state, trace_metadata(index)).splitlines()]
|
| 18 |
+
manifest = lines[0]
|
| 19 |
+
examples = lines[1:]
|
| 20 |
+
|
| 21 |
+
assert manifest["type"] == "lora_sft_manifest"
|
| 22 |
+
assert manifest["base_model"] == BASE_MODEL
|
| 23 |
+
assert manifest["record_kinds"] == ["tool_call", "advisor_response"]
|
| 24 |
+
assert manifest["example_count"] == len(examples)
|
| 25 |
+
assert manifest["included_turn_count"] == 2
|
| 26 |
+
assert manifest["index"]["algorithm"] == "tfidf-sparse-v1"
|
| 27 |
+
assert {example["example_kind"] for example in examples} == {"tool_call", "advisor_response"}
|
| 28 |
+
assert examples[0]["messages"][2]["content"].startswith('<function name="save_idea">')
|
| 29 |
+
assert examples[0]["targets"] == ["Well-Tuned", "Field Notes"]
|
| 30 |
+
assert examples[1]["messages"][1]["content"].startswith("A local-first archive")
|
| 31 |
+
assert "Tool observations:" in examples[1]["messages"][1]["content"]
|
| 32 |
+
assert examples[1]["messages"][2]["content"]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def test_empty_lora_dataset_only_exports_manifest() -> None:
|
| 36 |
+
payload = build_lora_dataset_jsonl(
|
| 37 |
+
{},
|
| 38 |
+
{
|
| 39 |
+
"index_algorithm": "tfidf-sparse-v1",
|
| 40 |
+
"snapshot_generated_at": "2026-06-06T00:00:00+00:00",
|
| 41 |
+
"index_generated_at": "2026-06-06T01:00:00+00:00",
|
| 42 |
+
"snapshot_digest": "abc",
|
| 43 |
+
},
|
| 44 |
+
)
|
| 45 |
+
lines = [json.loads(line) for line in payload.splitlines()]
|
| 46 |
+
|
| 47 |
+
assert len(lines) == 1
|
| 48 |
+
assert lines[0]["example_count"] == 0
|
| 49 |
+
assert lines[0]["turn_count"] == 0
|
tests/test_prize_ledger.py
CHANGED
|
@@ -10,4 +10,5 @@ def test_prize_ledger_tracks_param_budget_and_badges() -> None:
|
|
| 10 |
assert payload["largest_model"]["model"] == "openbmb/MiniCPM5-1B"
|
| 11 |
badges = {badge["name"]: badge["status"] for badge in payload["badges"]}
|
| 12 |
assert badges["Off the Grid"] == "ready"
|
| 13 |
-
assert badges["Well-Tuned"] == "
|
|
|
|
|
|
| 10 |
assert payload["largest_model"]["model"] == "openbmb/MiniCPM5-1B"
|
| 11 |
badges = {badge["name"]: badge["status"] for badge in payload["badges"]}
|
| 12 |
assert badges["Off the Grid"] == "ready"
|
| 13 |
+
assert badges["Well-Tuned"] == "dataset-ready"
|
| 14 |
+
assert payload["training_artifacts"][0]["base_model"] == "openbmb/MiniCPM5-1B"
|