Spaces:
Running on Zero
Running on Zero
feat: add model runtime adapter
Browse filesCo-authored-by: Codex <noreply@openai.com>
- README.md +6 -0
- app.py +7 -0
- data/sample_trace.jsonl +4 -4
- hackathon_advisor/agent.py +9 -1
- hackathon_advisor/model_runtime.py +177 -0
- hackathon_advisor/trace_export.py +14 -0
- pyproject.toml +5 -0
- tests/test_agent.py +2 -0
- tests/test_app.py +11 -1
- tests/test_model_runtime.py +75 -0
- tests/test_trace_export.py +2 -0
README.md
CHANGED
|
@@ -69,6 +69,12 @@ a manifest row followed by one row per agent turn. `data/sample_trace.jsonl` is
|
|
| 69 |
MiniCPM XML call such as `<function name="search_projects">{"query":"lullaby audio"}</function>`, validates it against
|
| 70 |
the schemas, and returns either the valid call or a safe default call for the UI watchdog path.
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
## Test
|
| 73 |
|
| 74 |
```bash
|
|
|
|
| 69 |
MiniCPM XML call such as `<function name="search_projects">{"query":"lullaby audio"}</function>`, validates it against
|
| 70 |
the schemas, and returns either the valid call or a safe default call for the UI watchdog path.
|
| 71 |
|
| 72 |
+
## Runtime Backend
|
| 73 |
+
|
| 74 |
+
The deployed Space defaults to `ADVISOR_MODEL_BACKEND=rules`, a deterministic planner that emits the same validated XML
|
| 75 |
+
tool calls as the MiniCPM path. To enable the optional MiniCPM adapter in a GPU environment, install the `model` extra
|
| 76 |
+
and set `ADVISOR_MODEL_BACKEND=minicpm-transformers` plus `ADVISOR_MODEL_ID=openbmb/MiniCPM5-1B`.
|
| 77 |
+
|
| 78 |
## Test
|
| 79 |
|
| 80 |
```bash
|
app.py
CHANGED
|
@@ -46,6 +46,7 @@ def health() -> dict:
|
|
| 46 |
return {
|
| 47 |
"ok": True,
|
| 48 |
"projects": len(index.projects),
|
|
|
|
| 49 |
**trace_metadata(index),
|
| 50 |
}
|
| 51 |
|
|
@@ -54,12 +55,18 @@ def health() -> dict:
|
|
| 54 |
def bootstrap() -> dict:
|
| 55 |
return {
|
| 56 |
"project_count": len(index.projects),
|
|
|
|
| 57 |
**trace_metadata(index),
|
| 58 |
"top_projects": [project.to_public_dict() for project in index.top_projects(limit=8)],
|
| 59 |
"whitespace": [item.to_dict() for item in index.find_whitespace(limit=5)],
|
| 60 |
}
|
| 61 |
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
@app.get("/api/tool-contracts")
|
| 64 |
def tool_contracts() -> dict:
|
| 65 |
return {
|
|
|
|
| 46 |
return {
|
| 47 |
"ok": True,
|
| 48 |
"projects": len(index.projects),
|
| 49 |
+
"runtime": engine.runtime_status(),
|
| 50 |
**trace_metadata(index),
|
| 51 |
}
|
| 52 |
|
|
|
|
| 55 |
def bootstrap() -> dict:
|
| 56 |
return {
|
| 57 |
"project_count": len(index.projects),
|
| 58 |
+
"runtime": engine.runtime_status(),
|
| 59 |
**trace_metadata(index),
|
| 60 |
"top_projects": [project.to_public_dict() for project in index.top_projects(limit=8)],
|
| 61 |
"whitespace": [item.to_dict() for item in index.find_whitespace(limit=5)],
|
| 62 |
}
|
| 63 |
|
| 64 |
|
| 65 |
+
@app.get("/api/runtime")
|
| 66 |
+
def runtime() -> dict:
|
| 67 |
+
return engine.runtime_status()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
@app.get("/api/tool-contracts")
|
| 71 |
def tool_contracts() -> dict:
|
| 72 |
return {
|
data/sample_trace.jsonl
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
{"app": "hackathon-advisor", "generated_at": "2026-06-
|
| 2 |
-
{"artifact_title": "A local-first archive cartographer for family photos", "input": "A local-first archive cartographer for family photos", "overall": 4.9, "plan_steps": 0, "response": "The ink bleeds around A local-first archive cartographer for family photos. Closest echoes: page 1: Family Bill Assistant; page 2: Kasualdad LFED; page 3: Mycelium. The seal reads ECHO x4 at 4.9/10. Keep the audience, but change the mechanism or artifact so the demo proves a gap instead of joining a cluster.", "schema_version": 1, "tools": [{"name": "save_idea", "summary": "Wrote idea page 'A local-first archive cartographer for family photos'."}, {"name": "search_projects", "summary": "Checked 5 closest project echoes."}, {"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.9/10."}], "turn_index": 1, "type": "agent_turn", "verdict": "ECHO x4"}
|
| 3 |
-
{"artifact_title": "Hands-on science coach", "input": "write bolder and find whitespace", "overall": 6.0, "plan_steps": 0, "response": "Gold gathers on Hands-on science coach. A lab-notebook companion that designs safe experiments from household materials. No close project echoes in the current snapshot. The seal reads UNWRITTEN at 6.0/10. The next move is to make one concrete before/after scene and cite the two weakest nearby echoes in the margin.", "schema_version": 1, "tools": [{"name": "find_whitespace", "summary": "Ranked 4 under-explored regions."}, {"name": "save_idea", "summary": "Wrote idea page 'Hands-on science coach'."}, {"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.8/10."}], "turn_index": 2, "type": "agent_turn", "verdict": "UNWRITTEN"}
|
| 4 |
-
{"artifact_title": "Hands-on science coach", "input": "make a build plan", "overall": 6.0, "plan_steps": 6, "response": "Mothback presses the wax for Hands-on science coach: 6.0/10, UNWRITTEN. The build path is: 1. Lock a one-sentence promise and one demo input that proves originality. 2. Refresh the Space snapshot, then tune the bleed threshold against the closest echoes. 3. Build the smallest happy path: input, citations, score seal, and shareable artifact. 4. Add one prize ", "schema_version": 1, "tools": [{"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.8/10."}, {"name": "make_plan", "summary": "Drafted 6 build steps."}], "turn_index": 3, "type": "agent_turn", "verdict": "UNWRITTEN"}
|
|
|
|
| 1 |
+
{"app": "hackathon-advisor", "generated_at": "2026-06-06T20:04:46+00:00", "idea_count": 1, "index": {"algorithm": "tfidf-sparse-v1", "index_generated_at": "2026-06-06T19:44:42+00:00", "snapshot_digest": "1d59c168b446bc1268ef648897fac523e85b258fd6048a37409b9cfbc5c28329", "snapshot_generated_at": "2026-06-06T19:20:47+00:00"}, "schema_version": 1, "turn_count": 3, "type": "trace_manifest"}
|
| 2 |
+
{"artifact_title": "A local-first archive cartographer for family photos", "input": "A local-first archive cartographer for family photos", "overall": 4.9, "plan_steps": 0, "response": "The ink bleeds around A local-first archive cartographer for family photos. Closest echoes: page 1: Family Bill Assistant; page 2: Kasualdad LFED; page 3: Mycelium. The seal reads ECHO x4 at 4.9/10. Keep the audience, but change the mechanism or artifact so the demo proves a gap instead of joining a cluster.", "schema_version": 1, "tool_resolution": {"call": {"arguments": {"pitch": "A local-first archive cartographer for family photos", "title": "A local-first archive cartographer for family photos"}, "name": "save_idea"}, "errors": [], "status": "valid"}, "tools": [{"name": "save_idea", "summary": "Wrote idea page 'A local-first archive cartographer for family photos'."}, {"name": "search_projects", "summary": "Checked 5 closest project echoes."}, {"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.9/10."}], "turn_index": 1, "type": "agent_turn", "verdict": "ECHO x4"}
|
| 3 |
+
{"artifact_title": "Hands-on science coach", "input": "write bolder and find whitespace", "overall": 6.0, "plan_steps": 0, "response": "Gold gathers on Hands-on science coach. A lab-notebook companion that designs safe experiments from household materials. No close project echoes in the current snapshot. The seal reads UNWRITTEN at 6.0/10. The next move is to make one concrete before/after scene and cite the two weakest nearby echoes in the margin.", "schema_version": 1, "tool_resolution": {"call": {"arguments": {}, "name": "find_whitespace"}, "errors": [], "status": "valid"}, "tools": [{"name": "find_whitespace", "summary": "Ranked 4 under-explored regions."}, {"name": "save_idea", "summary": "Wrote idea page 'Hands-on science coach'."}, {"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.8/10."}], "turn_index": 2, "type": "agent_turn", "verdict": "UNWRITTEN"}
|
| 4 |
+
{"artifact_title": "Hands-on science coach", "input": "make a build plan", "overall": 6.0, "plan_steps": 6, "response": "Mothback presses the wax for Hands-on science coach: 6.0/10, UNWRITTEN. The build path is: 1. Lock a one-sentence promise and one demo input that proves originality. 2. Refresh the Space snapshot, then tune the bleed threshold against the closest echoes. 3. Build the smallest happy path: input, citations, score seal, and shareable artifact. 4. Add one prize ", "schema_version": 1, "tool_resolution": {"call": {"arguments": {}, "name": "make_plan"}, "errors": [], "status": "valid"}, "tools": [{"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.8/10."}, {"name": "make_plan", "summary": "Drafted 6 build steps."}], "turn_index": 3, "type": "agent_turn", "verdict": "UNWRITTEN"}
|
hackathon_advisor/agent.py
CHANGED
|
@@ -7,6 +7,7 @@ import re
|
|
| 7 |
|
| 8 |
from hackathon_advisor.aliases import Correction, normalize_text
|
| 9 |
from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
|
|
|
|
| 10 |
from hackathon_advisor.scoring import ScoreCard
|
| 11 |
from hackathon_advisor.tools import AdvisorTools, Idea, ToolEvent, idea_from_text
|
| 12 |
|
|
@@ -45,14 +46,20 @@ class TurnResult:
|
|
| 45 |
|
| 46 |
|
| 47 |
class AdvisorEngine:
|
| 48 |
-
def __init__(self, index: ProjectIndex) -> None:
|
| 49 |
self.index = index
|
| 50 |
self.tools = AdvisorTools(index)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def turn(self, message: str, state: dict[str, Any] | None = None) -> TurnResult:
|
| 53 |
state = dict(state or {})
|
| 54 |
state.setdefault("ideas", [])
|
| 55 |
normalized, corrections = normalize_text(message)
|
|
|
|
|
|
|
| 56 |
tool_events: list[ToolEvent] = []
|
| 57 |
projects: list[Project] = []
|
| 58 |
whitespace: list[WhitespaceItem] = []
|
|
@@ -230,6 +237,7 @@ class AdvisorEngine:
|
|
| 230 |
"plan_steps": len(plan),
|
| 231 |
"artifact_title": artifact.get("title", ""),
|
| 232 |
"response": response[:360],
|
|
|
|
| 233 |
}
|
| 234 |
)
|
| 235 |
state["trace"] = trace[-12:]
|
|
|
|
| 7 |
|
| 8 |
from hackathon_advisor.aliases import Correction, normalize_text
|
| 9 |
from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
|
| 10 |
+
from hackathon_advisor.model_runtime import ToolPlanner, create_tool_planner, runtime_status
|
| 11 |
from hackathon_advisor.scoring import ScoreCard
|
| 12 |
from hackathon_advisor.tools import AdvisorTools, Idea, ToolEvent, idea_from_text
|
| 13 |
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
class AdvisorEngine:
|
| 49 |
+
def __init__(self, index: ProjectIndex, planner: ToolPlanner | None = None) -> None:
|
| 50 |
self.index = index
|
| 51 |
self.tools = AdvisorTools(index)
|
| 52 |
+
self.planner = planner or create_tool_planner()
|
| 53 |
+
|
| 54 |
+
def runtime_status(self) -> dict[str, Any]:
|
| 55 |
+
return runtime_status(self.planner).to_dict()
|
| 56 |
|
| 57 |
def turn(self, message: str, state: dict[str, Any] | None = None) -> TurnResult:
|
| 58 |
state = dict(state or {})
|
| 59 |
state.setdefault("ideas", [])
|
| 60 |
normalized, corrections = normalize_text(message)
|
| 61 |
+
resolution = self.planner.plan(normalized, state)
|
| 62 |
+
state["last_tool_resolution"] = resolution.to_dict()
|
| 63 |
tool_events: list[ToolEvent] = []
|
| 64 |
projects: list[Project] = []
|
| 65 |
whitespace: list[WhitespaceItem] = []
|
|
|
|
| 237 |
"plan_steps": len(plan),
|
| 238 |
"artifact_title": artifact.get("title", ""),
|
| 239 |
"response": response[:360],
|
| 240 |
+
"tool_resolution": state.get("last_tool_resolution") or {},
|
| 241 |
}
|
| 242 |
)
|
| 243 |
state["trace"] = trace[-12:]
|
hackathon_advisor/model_runtime.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
import os
|
| 5 |
+
from typing import Any, Protocol
|
| 6 |
+
|
| 7 |
+
from hackathon_advisor.tool_contracts import ToolResolution, resolve_tool_call, tool_schemas
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
DEFAULT_MODEL_ID = "openbmb/MiniCPM5-1B"
|
| 11 |
+
DEFAULT_BACKEND = "rules"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ToolPlanner(Protocol):
|
| 15 |
+
backend: str
|
| 16 |
+
model_id: str
|
| 17 |
+
|
| 18 |
+
def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
|
| 19 |
+
...
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@dataclass(frozen=True)
|
| 23 |
+
class RuntimeStatus:
|
| 24 |
+
backend: str
|
| 25 |
+
model_id: str
|
| 26 |
+
loaded: bool
|
| 27 |
+
tool_count: int
|
| 28 |
+
|
| 29 |
+
def to_dict(self) -> dict[str, Any]:
|
| 30 |
+
return {
|
| 31 |
+
"backend": self.backend,
|
| 32 |
+
"model_id": self.model_id,
|
| 33 |
+
"loaded": self.loaded,
|
| 34 |
+
"tool_count": self.tool_count,
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class RuleBasedPlanner:
|
| 39 |
+
backend = "rules"
|
| 40 |
+
model_id = "deterministic-tool-router"
|
| 41 |
+
|
| 42 |
+
def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
|
| 43 |
+
text = " ".join(message.strip().split())
|
| 44 |
+
lower = text.lower()
|
| 45 |
+
if not text:
|
| 46 |
+
output = '<function name="list_projects">{"sort":"likes"}</function>'
|
| 47 |
+
elif any(term in lower for term in ("compare", "choose", "rank")) and state.get("ideas"):
|
| 48 |
+
output = '<function name="compare_ideas">{}</function>'
|
| 49 |
+
elif any(term in lower for term in ("plan", "roadmap", "next step", "milestone")) and state.get("ideas"):
|
| 50 |
+
output = '<function name="make_plan">{}</function>'
|
| 51 |
+
elif any(term in lower for term in ("whitespace", "original", "new", "bolder", "unwritten", "gap")):
|
| 52 |
+
output = '<function name="find_whitespace">{}</function>'
|
| 53 |
+
elif any(term in lower for term in ("search", "similar", "already", "existing", "overlap", "echo")):
|
| 54 |
+
output = f'<function name="search_projects">{{"query":{_json_string(text)}}}</function>'
|
| 55 |
+
else:
|
| 56 |
+
output = f'<function name="save_idea">{{"title":{_json_string(_title(text))},"pitch":{_json_string(text)}}}</function>'
|
| 57 |
+
return resolve_tool_call(output, fallback_query=text)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class MiniCPMTransformersPlanner:
|
| 61 |
+
backend = "minicpm-transformers"
|
| 62 |
+
|
| 63 |
+
def __init__(self, model_id: str = DEFAULT_MODEL_ID) -> None:
|
| 64 |
+
self.model_id = model_id
|
| 65 |
+
self._tokenizer = None
|
| 66 |
+
self._model = None
|
| 67 |
+
|
| 68 |
+
def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
|
| 69 |
+
self._ensure_loaded()
|
| 70 |
+
prompt = render_context(message, state)
|
| 71 |
+
output = self._generate_tool_call(prompt)
|
| 72 |
+
return resolve_tool_call(output, fallback_query=message)
|
| 73 |
+
|
| 74 |
+
def _ensure_loaded(self) -> None:
|
| 75 |
+
if self._model is not None and self._tokenizer is not None:
|
| 76 |
+
return
|
| 77 |
+
try:
|
| 78 |
+
import torch
|
| 79 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 80 |
+
except ImportError as error:
|
| 81 |
+
raise RuntimeError(
|
| 82 |
+
"ADVISOR_MODEL_BACKEND=minicpm-transformers requires optional model dependencies. "
|
| 83 |
+
"Install the model extra before enabling it."
|
| 84 |
+
) from error
|
| 85 |
+
self._tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
|
| 86 |
+
self._model = AutoModelForCausalLM.from_pretrained(
|
| 87 |
+
self.model_id,
|
| 88 |
+
torch_dtype="auto",
|
| 89 |
+
device_map="auto",
|
| 90 |
+
trust_remote_code=True,
|
| 91 |
+
)
|
| 92 |
+
if hasattr(torch, "inference_mode"):
|
| 93 |
+
self._inference_mode = torch.inference_mode
|
| 94 |
+
|
| 95 |
+
def _generate_tool_call(self, prompt: str) -> str:
|
| 96 |
+
assert self._tokenizer is not None
|
| 97 |
+
assert self._model is not None
|
| 98 |
+
messages = [
|
| 99 |
+
{"role": "system", "content": system_prompt()},
|
| 100 |
+
{"role": "user", "content": prompt},
|
| 101 |
+
]
|
| 102 |
+
inputs = self._tokenizer.apply_chat_template(
|
| 103 |
+
messages,
|
| 104 |
+
tools=tool_schemas(),
|
| 105 |
+
add_generation_prompt=True,
|
| 106 |
+
enable_thinking=False,
|
| 107 |
+
tokenize=True,
|
| 108 |
+
return_dict=True,
|
| 109 |
+
return_tensors="pt",
|
| 110 |
+
).to(self._model.device)
|
| 111 |
+
generated = self._model.generate(
|
| 112 |
+
**inputs,
|
| 113 |
+
max_new_tokens=180,
|
| 114 |
+
do_sample=False,
|
| 115 |
+
)
|
| 116 |
+
new_tokens = generated[:, inputs["input_ids"].shape[-1] :]
|
| 117 |
+
return self._tokenizer.decode(new_tokens[0], skip_special_tokens=True).strip()
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def create_tool_planner() -> ToolPlanner:
|
| 121 |
+
backend = os.environ.get("ADVISOR_MODEL_BACKEND", DEFAULT_BACKEND).strip().lower()
|
| 122 |
+
if backend in ("", "rules"):
|
| 123 |
+
return RuleBasedPlanner()
|
| 124 |
+
if backend in ("minicpm", "minicpm-transformers"):
|
| 125 |
+
return MiniCPMTransformersPlanner(os.environ.get("ADVISOR_MODEL_ID", DEFAULT_MODEL_ID))
|
| 126 |
+
raise RuntimeError(f"Unsupported ADVISOR_MODEL_BACKEND={backend!r}")
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
|
| 130 |
+
return RuntimeStatus(
|
| 131 |
+
backend=planner.backend,
|
| 132 |
+
model_id=planner.model_id,
|
| 133 |
+
loaded=not isinstance(planner, MiniCPMTransformersPlanner) or planner._model is not None,
|
| 134 |
+
tool_count=len(tool_schemas()),
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def render_context(message: str, state: dict[str, Any]) -> str:
|
| 139 |
+
ideas = state.get("ideas") or []
|
| 140 |
+
trace = state.get("trace") or []
|
| 141 |
+
idea_lines = [
|
| 142 |
+
f"- {idea.get('title', 'Untitled')}: {idea.get('pitch', '')}"
|
| 143 |
+
for idea in ideas[-3:]
|
| 144 |
+
]
|
| 145 |
+
trace_lines = [
|
| 146 |
+
f"- {event.get('input', '')} -> {event.get('verdict', '')} {event.get('overall', '')}"
|
| 147 |
+
for event in trace[-3:]
|
| 148 |
+
]
|
| 149 |
+
return "\n".join(
|
| 150 |
+
[
|
| 151 |
+
"Choose exactly one tool call for the next advisor action.",
|
| 152 |
+
"Return only <function name=\"tool_name\">{...json...}</function>.",
|
| 153 |
+
f"User message: {message}",
|
| 154 |
+
"Idea board:",
|
| 155 |
+
*(idea_lines or ["- empty"]),
|
| 156 |
+
"Recent trace:",
|
| 157 |
+
*(trace_lines or ["- empty"]),
|
| 158 |
+
]
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def system_prompt() -> str:
|
| 163 |
+
return (
|
| 164 |
+
"You are Mothback, a dry but benevolent Build Small Hackathon advisor. "
|
| 165 |
+
"Use tools to inspect existing projects, find whitespace, save ideas, score ideas, and make plans. "
|
| 166 |
+
"Emit exactly one XML tool call."
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _json_string(value: str) -> str:
|
| 171 |
+
import json
|
| 172 |
+
|
| 173 |
+
return json.dumps(value, ensure_ascii=False)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def _title(text: str) -> str:
|
| 177 |
+
return (text[:64].strip(" .") or "Unwritten Page").capitalize()
|
hackathon_advisor/trace_export.py
CHANGED
|
@@ -40,6 +40,7 @@ def build_trace_jsonl(session: dict[str, Any], metadata: dict[str, Any]) -> str:
|
|
| 40 |
"plan_steps": int(event.get("plan_steps") or 0),
|
| 41 |
"artifact_title": str(event.get("artifact_title") or ""),
|
| 42 |
"response": str(event.get("response") or ""),
|
|
|
|
| 43 |
}
|
| 44 |
)
|
| 45 |
return "\n".join(json.dumps(record, ensure_ascii=False, sort_keys=True) for record in records) + "\n"
|
|
@@ -64,3 +65,16 @@ def _tools(event: dict[str, Any]) -> list[dict[str, str]]:
|
|
| 64 |
for tool in tools
|
| 65 |
if isinstance(tool, dict)
|
| 66 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
"plan_steps": int(event.get("plan_steps") or 0),
|
| 41 |
"artifact_title": str(event.get("artifact_title") or ""),
|
| 42 |
"response": str(event.get("response") or ""),
|
| 43 |
+
"tool_resolution": _tool_resolution(event),
|
| 44 |
}
|
| 45 |
)
|
| 46 |
return "\n".join(json.dumps(record, ensure_ascii=False, sort_keys=True) for record in records) + "\n"
|
|
|
|
| 65 |
for tool in tools
|
| 66 |
if isinstance(tool, dict)
|
| 67 |
]
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _tool_resolution(event: dict[str, Any]) -> dict[str, Any]:
|
| 71 |
+
resolution = event.get("tool_resolution") or {}
|
| 72 |
+
call = resolution.get("call") if isinstance(resolution, dict) else {}
|
| 73 |
+
return {
|
| 74 |
+
"status": str(resolution.get("status") or "") if isinstance(resolution, dict) else "",
|
| 75 |
+
"call": {
|
| 76 |
+
"name": str(call.get("name") or "") if isinstance(call, dict) else "",
|
| 77 |
+
"arguments": call.get("arguments") if isinstance(call, dict) else {},
|
| 78 |
+
},
|
| 79 |
+
"errors": list(resolution.get("errors") or []) if isinstance(resolution, dict) else [],
|
| 80 |
+
}
|
pyproject.toml
CHANGED
|
@@ -14,6 +14,11 @@ dependencies = [
|
|
| 14 |
dev = [
|
| 15 |
"pytest>=8.0,<9",
|
| 16 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
[tool.pytest.ini_options]
|
| 19 |
testpaths = ["tests"]
|
|
|
|
| 14 |
dev = [
|
| 15 |
"pytest>=8.0,<9",
|
| 16 |
]
|
| 17 |
+
model = [
|
| 18 |
+
"accelerate>=1.0,<2",
|
| 19 |
+
"torch>=2.8,<3",
|
| 20 |
+
"transformers>=4.55,<5",
|
| 21 |
+
]
|
| 22 |
|
| 23 |
[tool.pytest.ini_options]
|
| 24 |
testpaths = ["tests"]
|
tests/test_agent.py
CHANGED
|
@@ -14,6 +14,8 @@ def test_agent_scores_and_persists_idea() -> None:
|
|
| 14 |
assert result.state["ideas"]
|
| 15 |
assert result.state["ideas"][0]["score"] is not None
|
| 16 |
assert result.state["trace"]
|
|
|
|
|
|
|
| 17 |
assert result.state["last_artifact"]["title"] == result.artifact["title"]
|
| 18 |
assert result.response
|
| 19 |
|
|
|
|
| 14 |
assert result.state["ideas"]
|
| 15 |
assert result.state["ideas"][0]["score"] is not None
|
| 16 |
assert result.state["trace"]
|
| 17 |
+
assert result.state["last_tool_resolution"]["call"]["name"] == "save_idea"
|
| 18 |
+
assert result.state["trace"][0]["tool_resolution"]["call"]["name"] == "save_idea"
|
| 19 |
assert result.state["last_artifact"]["title"] == result.artifact["title"]
|
| 20 |
assert result.response
|
| 21 |
|
tests/test_app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
|
| 3 |
-
from app import bootstrap, engine, health, index, tool_contract_check, tool_contracts, trace_artifact
|
| 4 |
|
| 5 |
|
| 6 |
def test_health_exposes_index_metadata() -> None:
|
|
@@ -9,6 +9,7 @@ def test_health_exposes_index_metadata() -> None:
|
|
| 9 |
assert payload["ok"] is True
|
| 10 |
assert payload["projects"] == len(index.projects)
|
| 11 |
assert payload["index_algorithm"] == "tfidf-sparse-v1"
|
|
|
|
| 12 |
assert len(payload["snapshot_digest"]) == 64
|
| 13 |
|
| 14 |
|
|
@@ -18,6 +19,7 @@ def test_bootstrap_exposes_index_metadata() -> None:
|
|
| 18 |
assert payload["index_algorithm"] == "tfidf-sparse-v1"
|
| 19 |
assert payload["index_generated_at"]
|
| 20 |
assert payload["snapshot_digest"]
|
|
|
|
| 21 |
assert payload["top_projects"]
|
| 22 |
|
| 23 |
|
|
@@ -43,3 +45,11 @@ def test_tool_contract_check_endpoint_defaults_safely() -> None:
|
|
| 43 |
|
| 44 |
assert payload["status"] == "defaulted"
|
| 45 |
assert payload["call"]["name"] == "search_projects"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
|
| 3 |
+
from app import bootstrap, engine, health, index, runtime, tool_contract_check, tool_contracts, trace_artifact
|
| 4 |
|
| 5 |
|
| 6 |
def test_health_exposes_index_metadata() -> None:
|
|
|
|
| 9 |
assert payload["ok"] is True
|
| 10 |
assert payload["projects"] == len(index.projects)
|
| 11 |
assert payload["index_algorithm"] == "tfidf-sparse-v1"
|
| 12 |
+
assert payload["runtime"]["backend"] == "rules"
|
| 13 |
assert len(payload["snapshot_digest"]) == 64
|
| 14 |
|
| 15 |
|
|
|
|
| 19 |
assert payload["index_algorithm"] == "tfidf-sparse-v1"
|
| 20 |
assert payload["index_generated_at"]
|
| 21 |
assert payload["snapshot_digest"]
|
| 22 |
+
assert payload["runtime"]["tool_count"] >= 8
|
| 23 |
assert payload["top_projects"]
|
| 24 |
|
| 25 |
|
|
|
|
| 45 |
|
| 46 |
assert payload["status"] == "defaulted"
|
| 47 |
assert payload["call"]["name"] == "search_projects"
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def test_runtime_endpoint_reports_planner() -> None:
|
| 51 |
+
payload = runtime()
|
| 52 |
+
|
| 53 |
+
assert payload["backend"] == "rules"
|
| 54 |
+
assert payload["model_id"] == "deterministic-tool-router"
|
| 55 |
+
assert payload["loaded"] is True
|
tests/test_model_runtime.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from hackathon_advisor.model_runtime import (
|
| 4 |
+
MiniCPMTransformersPlanner,
|
| 5 |
+
RuleBasedPlanner,
|
| 6 |
+
create_tool_planner,
|
| 7 |
+
render_context,
|
| 8 |
+
runtime_status,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_rule_planner_emits_valid_search_call() -> None:
|
| 13 |
+
planner = RuleBasedPlanner()
|
| 14 |
+
|
| 15 |
+
resolution = planner.plan("search similar lullaby audio projects", {})
|
| 16 |
+
|
| 17 |
+
assert resolution.status == "valid"
|
| 18 |
+
assert resolution.call.name == "search_projects"
|
| 19 |
+
assert resolution.call.arguments["query"] == "search similar lullaby audio projects"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_rule_planner_uses_plan_when_idea_exists() -> None:
|
| 23 |
+
planner = RuleBasedPlanner()
|
| 24 |
+
|
| 25 |
+
resolution = planner.plan("make a build plan", {"ideas": [{"title": "A", "pitch": "B"}]})
|
| 26 |
+
|
| 27 |
+
assert resolution.status == "valid"
|
| 28 |
+
assert resolution.call.name == "make_plan"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_rule_planner_defaults_blank_to_list_projects() -> None:
|
| 32 |
+
planner = RuleBasedPlanner()
|
| 33 |
+
|
| 34 |
+
resolution = planner.plan("", {})
|
| 35 |
+
|
| 36 |
+
assert resolution.status == "valid"
|
| 37 |
+
assert resolution.call.name == "list_projects"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def test_render_context_includes_state() -> None:
|
| 41 |
+
context = render_context(
|
| 42 |
+
"make a plan",
|
| 43 |
+
{
|
| 44 |
+
"ideas": [{"title": "Archive Cartographer", "pitch": "Map family memories."}],
|
| 45 |
+
"trace": [{"input": "first", "verdict": "ECHO x2", "overall": 5.1}],
|
| 46 |
+
},
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
assert "Archive Cartographer" in context
|
| 50 |
+
assert "ECHO x2" in context
|
| 51 |
+
assert '<function name="tool_name">' in context
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def test_create_tool_planner_defaults_to_rules(monkeypatch: pytest.MonkeyPatch) -> None:
|
| 55 |
+
monkeypatch.delenv("ADVISOR_MODEL_BACKEND", raising=False)
|
| 56 |
+
|
| 57 |
+
planner = create_tool_planner()
|
| 58 |
+
|
| 59 |
+
assert isinstance(planner, RuleBasedPlanner)
|
| 60 |
+
assert runtime_status(planner).to_dict()["loaded"] is True
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def test_create_tool_planner_rejects_unknown_backend(monkeypatch: pytest.MonkeyPatch) -> None:
|
| 64 |
+
monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "bogus")
|
| 65 |
+
|
| 66 |
+
with pytest.raises(RuntimeError, match="Unsupported"):
|
| 67 |
+
create_tool_planner()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def test_minicpm_status_is_lazy() -> None:
|
| 71 |
+
planner = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B")
|
| 72 |
+
status = runtime_status(planner).to_dict()
|
| 73 |
+
|
| 74 |
+
assert status["backend"] == "minicpm-transformers"
|
| 75 |
+
assert status["loaded"] is False
|
tests/test_trace_export.py
CHANGED
|
@@ -19,6 +19,7 @@ def test_trace_jsonl_contains_manifest_and_turns() -> None:
|
|
| 19 |
assert lines[0]["index"]["algorithm"] == "tfidf-sparse-v1"
|
| 20 |
assert lines[1]["type"] == "agent_turn"
|
| 21 |
assert lines[1]["tools"]
|
|
|
|
| 22 |
assert lines[2]["plan_steps"] > 0
|
| 23 |
|
| 24 |
|
|
@@ -31,3 +32,4 @@ def test_checked_in_sample_trace_matches_schema() -> None:
|
|
| 31 |
assert lines[0]["type"] == "trace_manifest"
|
| 32 |
assert lines[0]["turn_count"] >= 3
|
| 33 |
assert all(line["schema_version"] == 1 for line in lines)
|
|
|
|
|
|
| 19 |
assert lines[0]["index"]["algorithm"] == "tfidf-sparse-v1"
|
| 20 |
assert lines[1]["type"] == "agent_turn"
|
| 21 |
assert lines[1]["tools"]
|
| 22 |
+
assert lines[1]["tool_resolution"]["call"]["name"] == "save_idea"
|
| 23 |
assert lines[2]["plan_steps"] > 0
|
| 24 |
|
| 25 |
|
|
|
|
| 32 |
assert lines[0]["type"] == "trace_manifest"
|
| 33 |
assert lines[0]["turn_count"] >= 3
|
| 34 |
assert all(line["schema_version"] == 1 for line in lines)
|
| 35 |
+
assert lines[1]["tool_resolution"]["status"] == "valid"
|