Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

App Files Files Community

JacobLinCool Codex commited on 3 days ago

Commit

fbdb1e5

verified ·

1 Parent(s): 73b4c3f

feat: add model runtime adapter

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (11) hide show

README.md +6 -0
app.py +7 -0
data/sample_trace.jsonl +4 -4
hackathon_advisor/agent.py +9 -1
hackathon_advisor/model_runtime.py +177 -0
hackathon_advisor/trace_export.py +14 -0
pyproject.toml +5 -0
tests/test_agent.py +2 -0
tests/test_app.py +11 -1
tests/test_model_runtime.py +75 -0
tests/test_trace_export.py +2 -0

README.md CHANGED Viewed

@@ -69,6 +69,12 @@ a manifest row followed by one row per agent turn. `data/sample_trace.jsonl` is
 MiniCPM XML call such as `<function name="search_projects">{"query":"lullaby audio"}</function>`, validates it against
 the schemas, and returns either the valid call or a safe default call for the UI watchdog path.
 ## Test
 ```bash

 MiniCPM XML call such as `<function name="search_projects">{"query":"lullaby audio"}</function>`, validates it against
 the schemas, and returns either the valid call or a safe default call for the UI watchdog path.
+## Runtime Backend
+The deployed Space defaults to `ADVISOR_MODEL_BACKEND=rules`, a deterministic planner that emits the same validated XML
+tool calls as the MiniCPM path. To enable the optional MiniCPM adapter in a GPU environment, install the `model` extra
+and set `ADVISOR_MODEL_BACKEND=minicpm-transformers` plus `ADVISOR_MODEL_ID=openbmb/MiniCPM5-1B`.
 ## Test
 ```bash

app.py CHANGED Viewed

@@ -46,6 +46,7 @@ def health() -> dict:
     return {
         "ok": True,
         "projects": len(index.projects),
         **trace_metadata(index),
     }
@@ -54,12 +55,18 @@ def health() -> dict:
 def bootstrap() -> dict:
     return {
         "project_count": len(index.projects),
         **trace_metadata(index),
         "top_projects": [project.to_public_dict() for project in index.top_projects(limit=8)],
         "whitespace": [item.to_dict() for item in index.find_whitespace(limit=5)],
     }
 @app.get("/api/tool-contracts")
 def tool_contracts() -> dict:
     return {

     return {
         "ok": True,
         "projects": len(index.projects),
+        "runtime": engine.runtime_status(),
         **trace_metadata(index),
     }
 def bootstrap() -> dict:
     return {
         "project_count": len(index.projects),
+        "runtime": engine.runtime_status(),
         **trace_metadata(index),
         "top_projects": [project.to_public_dict() for project in index.top_projects(limit=8)],
         "whitespace": [item.to_dict() for item in index.find_whitespace(limit=5)],
     }
+@app.get("/api/runtime")
+def runtime() -> dict:
+    return engine.runtime_status()
 @app.get("/api/tool-contracts")
 def tool_contracts() -> dict:
     return {

data/sample_trace.jsonl CHANGED Viewed

@@ -1,4 +1,4 @@
-{"app": "hackathon-advisor", "generated_at": "2026-06-06T19:52:06+00:00", "idea_count": 1, "index": {"algorithm": "tfidf-sparse-v1", "index_generated_at": "2026-06-06T19:44:42+00:00", "snapshot_digest": "1d59c168b446bc1268ef648897fac523e85b258fd6048a37409b9cfbc5c28329", "snapshot_generated_at": "2026-06-06T19:20:47+00:00"}, "schema_version": 1, "turn_count": 3, "type": "trace_manifest"}
-{"artifact_title": "A local-first archive cartographer for family photos", "input": "A local-first archive cartographer for family photos", "overall": 4.9, "plan_steps": 0, "response": "The ink bleeds around A local-first archive cartographer for family photos. Closest echoes: page 1: Family Bill Assistant; page 2: Kasualdad LFED; page 3: Mycelium. The seal reads ECHO x4 at 4.9/10. Keep the audience, but change the mechanism or artifact so the demo proves a gap instead of joining a cluster.", "schema_version": 1, "tools": [{"name": "save_idea", "summary": "Wrote idea page 'A local-first archive cartographer for family photos'."}, {"name": "search_projects", "summary": "Checked 5 closest project echoes."}, {"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.9/10."}], "turn_index": 1, "type": "agent_turn", "verdict": "ECHO x4"}
-{"artifact_title": "Hands-on science coach", "input": "write bolder and find whitespace", "overall": 6.0, "plan_steps": 0, "response": "Gold gathers on Hands-on science coach. A lab-notebook companion that designs safe experiments from household materials. No close project echoes in the current snapshot. The seal reads UNWRITTEN at 6.0/10. The next move is to make one concrete before/after scene and cite the two weakest nearby echoes in the margin.", "schema_version": 1, "tools": [{"name": "find_whitespace", "summary": "Ranked 4 under-explored regions."}, {"name": "save_idea", "summary": "Wrote idea page 'Hands-on science coach'."}, {"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.8/10."}], "turn_index": 2, "type": "agent_turn", "verdict": "UNWRITTEN"}
-{"artifact_title": "Hands-on science coach", "input": "make a build plan", "overall": 6.0, "plan_steps": 6, "response": "Mothback presses the wax for Hands-on science coach: 6.0/10, UNWRITTEN. The build path is: 1. Lock a one-sentence promise and one demo input that proves originality. 2. Refresh the Space snapshot, then tune the bleed threshold against the closest echoes. 3. Build the smallest happy path: input, citations, score seal, and shareable artifact. 4. Add one prize ", "schema_version": 1, "tools": [{"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.8/10."}, {"name": "make_plan", "summary": "Drafted 6 build steps."}], "turn_index": 3, "type": "agent_turn", "verdict": "UNWRITTEN"}

+{"app": "hackathon-advisor", "generated_at": "2026-06-06T20:04:46+00:00", "idea_count": 1, "index": {"algorithm": "tfidf-sparse-v1", "index_generated_at": "2026-06-06T19:44:42+00:00", "snapshot_digest": "1d59c168b446bc1268ef648897fac523e85b258fd6048a37409b9cfbc5c28329", "snapshot_generated_at": "2026-06-06T19:20:47+00:00"}, "schema_version": 1, "turn_count": 3, "type": "trace_manifest"}
+{"artifact_title": "A local-first archive cartographer for family photos", "input": "A local-first archive cartographer for family photos", "overall": 4.9, "plan_steps": 0, "response": "The ink bleeds around A local-first archive cartographer for family photos. Closest echoes: page 1: Family Bill Assistant; page 2: Kasualdad LFED; page 3: Mycelium. The seal reads ECHO x4 at 4.9/10. Keep the audience, but change the mechanism or artifact so the demo proves a gap instead of joining a cluster.", "schema_version": 1, "tool_resolution": {"call": {"arguments": {"pitch": "A local-first archive cartographer for family photos", "title": "A local-first archive cartographer for family photos"}, "name": "save_idea"}, "errors": [], "status": "valid"}, "tools": [{"name": "save_idea", "summary": "Wrote idea page 'A local-first archive cartographer for family photos'."}, {"name": "search_projects", "summary": "Checked 5 closest project echoes."}, {"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.9/10."}], "turn_index": 1, "type": "agent_turn", "verdict": "ECHO x4"}
+{"artifact_title": "Hands-on science coach", "input": "write bolder and find whitespace", "overall": 6.0, "plan_steps": 0, "response": "Gold gathers on Hands-on science coach. A lab-notebook companion that designs safe experiments from household materials. No close project echoes in the current snapshot. The seal reads UNWRITTEN at 6.0/10. The next move is to make one concrete before/after scene and cite the two weakest nearby echoes in the margin.", "schema_version": 1, "tool_resolution": {"call": {"arguments": {}, "name": "find_whitespace"}, "errors": [], "status": "valid"}, "tools": [{"name": "find_whitespace", "summary": "Ranked 4 under-explored regions."}, {"name": "save_idea", "summary": "Wrote idea page 'Hands-on science coach'."}, {"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.8/10."}], "turn_index": 2, "type": "agent_turn", "verdict": "UNWRITTEN"}
+{"artifact_title": "Hands-on science coach", "input": "make a build plan", "overall": 6.0, "plan_steps": 6, "response": "Mothback presses the wax for Hands-on science coach: 6.0/10, UNWRITTEN. The build path is: 1. Lock a one-sentence promise and one demo input that proves originality. 2. Refresh the Space snapshot, then tune the bleed threshold against the closest echoes. 3. Build the smallest happy path: input, citations, score seal, and shareable artifact. 4. Add one prize ", "schema_version": 1, "tool_resolution": {"call": {"arguments": {}, "name": "make_plan"}, "errors": [], "status": "valid"}, "tools": [{"name": "score_idea", "summary": "Pressed a five-quadrant seal: 4.8/10."}, {"name": "make_plan", "summary": "Drafted 6 build steps."}], "turn_index": 3, "type": "agent_turn", "verdict": "UNWRITTEN"}

hackathon_advisor/agent.py CHANGED Viewed

@@ -7,6 +7,7 @@ import re
 from hackathon_advisor.aliases import Correction, normalize_text
 from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
 from hackathon_advisor.scoring import ScoreCard
 from hackathon_advisor.tools import AdvisorTools, Idea, ToolEvent, idea_from_text
@@ -45,14 +46,20 @@ class TurnResult:
 class AdvisorEngine:
-    def __init__(self, index: ProjectIndex) -> None:
         self.index = index
         self.tools = AdvisorTools(index)
     def turn(self, message: str, state: dict[str, Any] | None = None) -> TurnResult:
         state = dict(state or {})
         state.setdefault("ideas", [])
         normalized, corrections = normalize_text(message)
         tool_events: list[ToolEvent] = []
         projects: list[Project] = []
         whitespace: list[WhitespaceItem] = []
@@ -230,6 +237,7 @@ class AdvisorEngine:
                 "plan_steps": len(plan),
                 "artifact_title": artifact.get("title", ""),
                 "response": response[:360],
             }
         )
         state["trace"] = trace[-12:]

 from hackathon_advisor.aliases import Correction, normalize_text
 from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
+from hackathon_advisor.model_runtime import ToolPlanner, create_tool_planner, runtime_status
 from hackathon_advisor.scoring import ScoreCard
 from hackathon_advisor.tools import AdvisorTools, Idea, ToolEvent, idea_from_text
 class AdvisorEngine:
+    def __init__(self, index: ProjectIndex, planner: ToolPlanner | None = None) -> None:
         self.index = index
         self.tools = AdvisorTools(index)
+        self.planner = planner or create_tool_planner()
+    def runtime_status(self) -> dict[str, Any]:
+        return runtime_status(self.planner).to_dict()
     def turn(self, message: str, state: dict[str, Any] | None = None) -> TurnResult:
         state = dict(state or {})
         state.setdefault("ideas", [])
         normalized, corrections = normalize_text(message)
+        resolution = self.planner.plan(normalized, state)
+        state["last_tool_resolution"] = resolution.to_dict()
         tool_events: list[ToolEvent] = []
         projects: list[Project] = []
         whitespace: list[WhitespaceItem] = []
                 "plan_steps": len(plan),
                 "artifact_title": artifact.get("title", ""),
                 "response": response[:360],
+                "tool_resolution": state.get("last_tool_resolution") or {},
             }
         )
         state["trace"] = trace[-12:]

hackathon_advisor/model_runtime.py ADDED Viewed

	@@ -0,0 +1,177 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import os
+from typing import Any, Protocol
+from hackathon_advisor.tool_contracts import ToolResolution, resolve_tool_call, tool_schemas
+DEFAULT_MODEL_ID = "openbmb/MiniCPM5-1B"
+DEFAULT_BACKEND = "rules"
+class ToolPlanner(Protocol):
+    backend: str
+    model_id: str
+    def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
+        ...
+@dataclass(frozen=True)
+class RuntimeStatus:
+    backend: str
+    model_id: str
+    loaded: bool
+    tool_count: int
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "backend": self.backend,
+            "model_id": self.model_id,
+            "loaded": self.loaded,
+            "tool_count": self.tool_count,
+        }
+class RuleBasedPlanner:
+    backend = "rules"
+    model_id = "deterministic-tool-router"
+    def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
+        text = " ".join(message.strip().split())
+        lower = text.lower()
+        if not text:
+            output = '<function name="list_projects">{"sort":"likes"}</function>'
+        elif any(term in lower for term in ("compare", "choose", "rank")) and state.get("ideas"):
+            output = '<function name="compare_ideas">{}</function>'
+        elif any(term in lower for term in ("plan", "roadmap", "next step", "milestone")) and state.get("ideas"):
+            output = '<function name="make_plan">{}</function>'
+        elif any(term in lower for term in ("whitespace", "original", "new", "bolder", "unwritten", "gap")):
+            output = '<function name="find_whitespace">{}</function>'
+        elif any(term in lower for term in ("search", "similar", "already", "existing", "overlap", "echo")):
+            output = f'<function name="search_projects">{{"query":{_json_string(text)}}}</function>'
+        else:
+            output = f'<function name="save_idea">{{"title":{_json_string(_title(text))},"pitch":{_json_string(text)}}}</function>'
+        return resolve_tool_call(output, fallback_query=text)
+class MiniCPMTransformersPlanner:
+    backend = "minicpm-transformers"
+    def __init__(self, model_id: str = DEFAULT_MODEL_ID) -> None:
+        self.model_id = model_id
+        self._tokenizer = None
+        self._model = None
+    def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
+        self._ensure_loaded()
+        prompt = render_context(message, state)
+        output = self._generate_tool_call(prompt)
+        return resolve_tool_call(output, fallback_query=message)
+    def _ensure_loaded(self) -> None:
+        if self._model is not None and self._tokenizer is not None:
+            return
+        try:
+            import torch
+            from transformers import AutoModelForCausalLM, AutoTokenizer
+        except ImportError as error:
+            raise RuntimeError(
+                "ADVISOR_MODEL_BACKEND=minicpm-transformers requires optional model dependencies. "
+                "Install the model extra before enabling it."
+            ) from error
+        self._tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
+        self._model = AutoModelForCausalLM.from_pretrained(
+            self.model_id,
+            torch_dtype="auto",
+            device_map="auto",
+            trust_remote_code=True,
+        )
+        if hasattr(torch, "inference_mode"):
+            self._inference_mode = torch.inference_mode
+    def _generate_tool_call(self, prompt: str) -> str:
+        assert self._tokenizer is not None
+        assert self._model is not None
+        messages = [
+            {"role": "system", "content": system_prompt()},
+            {"role": "user", "content": prompt},
+        ]
+        inputs = self._tokenizer.apply_chat_template(
+            messages,
+            tools=tool_schemas(),
+            add_generation_prompt=True,
+            enable_thinking=False,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt",
+        ).to(self._model.device)
+        generated = self._model.generate(
+            **inputs,
+            max_new_tokens=180,
+            do_sample=False,
+        )
+        new_tokens = generated[:, inputs["input_ids"].shape[-1] :]
+        return self._tokenizer.decode(new_tokens[0], skip_special_tokens=True).strip()
+def create_tool_planner() -> ToolPlanner:
+    backend = os.environ.get("ADVISOR_MODEL_BACKEND", DEFAULT_BACKEND).strip().lower()
+    if backend in ("", "rules"):
+        return RuleBasedPlanner()
+    if backend in ("minicpm", "minicpm-transformers"):
+        return MiniCPMTransformersPlanner(os.environ.get("ADVISOR_MODEL_ID", DEFAULT_MODEL_ID))
+    raise RuntimeError(f"Unsupported ADVISOR_MODEL_BACKEND={backend!r}")
+def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
+    return RuntimeStatus(
+        backend=planner.backend,
+        model_id=planner.model_id,
+        loaded=not isinstance(planner, MiniCPMTransformersPlanner) or planner._model is not None,
+        tool_count=len(tool_schemas()),
+    )
+def render_context(message: str, state: dict[str, Any]) -> str:
+    ideas = state.get("ideas") or []
+    trace = state.get("trace") or []
+    idea_lines = [
+        f"- {idea.get('title', 'Untitled')}: {idea.get('pitch', '')}"
+        for idea in ideas[-3:]
+    ]
+    trace_lines = [
+        f"- {event.get('input', '')} -> {event.get('verdict', '')} {event.get('overall', '')}"
+        for event in trace[-3:]
+    ]
+    return "\n".join(
+        [
+            "Choose exactly one tool call for the next advisor action.",
+            "Return only <function name=\"tool_name\">{...json...}</function>.",
+            f"User message: {message}",
+            "Idea board:",
+            *(idea_lines or ["- empty"]),
+            "Recent trace:",
+            *(trace_lines or ["- empty"]),
+        ]
+    )
+def system_prompt() -> str:
+    return (
+        "You are Mothback, a dry but benevolent Build Small Hackathon advisor. "
+        "Use tools to inspect existing projects, find whitespace, save ideas, score ideas, and make plans. "
+        "Emit exactly one XML tool call."
+    )
+def _json_string(value: str) -> str:
+    import json
+    return json.dumps(value, ensure_ascii=False)
+def _title(text: str) -> str:
+    return (text[:64].strip(" .") or "Unwritten Page").capitalize()

hackathon_advisor/trace_export.py CHANGED Viewed

@@ -40,6 +40,7 @@ def build_trace_jsonl(session: dict[str, Any], metadata: dict[str, Any]) -> str:
                 "plan_steps": int(event.get("plan_steps") or 0),
                 "artifact_title": str(event.get("artifact_title") or ""),
                 "response": str(event.get("response") or ""),
             }
         )
     return "\n".join(json.dumps(record, ensure_ascii=False, sort_keys=True) for record in records) + "\n"
@@ -64,3 +65,16 @@ def _tools(event: dict[str, Any]) -> list[dict[str, str]]:
         for tool in tools
         if isinstance(tool, dict)
     ]

                 "plan_steps": int(event.get("plan_steps") or 0),
                 "artifact_title": str(event.get("artifact_title") or ""),
                 "response": str(event.get("response") or ""),
+                "tool_resolution": _tool_resolution(event),
             }
         )
     return "\n".join(json.dumps(record, ensure_ascii=False, sort_keys=True) for record in records) + "\n"
         for tool in tools
         if isinstance(tool, dict)
     ]
+def _tool_resolution(event: dict[str, Any]) -> dict[str, Any]:
+    resolution = event.get("tool_resolution") or {}
+    call = resolution.get("call") if isinstance(resolution, dict) else {}
+    return {
+        "status": str(resolution.get("status") or "") if isinstance(resolution, dict) else "",
+        "call": {
+            "name": str(call.get("name") or "") if isinstance(call, dict) else "",
+            "arguments": call.get("arguments") if isinstance(call, dict) else {},
+        },
+        "errors": list(resolution.get("errors") or []) if isinstance(resolution, dict) else [],
+    }

pyproject.toml CHANGED Viewed

@@ -14,6 +14,11 @@ dependencies = [
 dev = [
   "pytest>=8.0,<9",
 ]
 [tool.pytest.ini_options]
 testpaths = ["tests"]

 dev = [
   "pytest>=8.0,<9",
 ]
+model = [
+  "accelerate>=1.0,<2",
+  "torch>=2.8,<3",
+  "transformers>=4.55,<5",
+]
 [tool.pytest.ini_options]
 testpaths = ["tests"]

tests/test_agent.py CHANGED Viewed

@@ -14,6 +14,8 @@ def test_agent_scores_and_persists_idea() -> None:
     assert result.state["ideas"]
     assert result.state["ideas"][0]["score"] is not None
     assert result.state["trace"]
     assert result.state["last_artifact"]["title"] == result.artifact["title"]
     assert result.response

     assert result.state["ideas"]
     assert result.state["ideas"][0]["score"] is not None
     assert result.state["trace"]
+    assert result.state["last_tool_resolution"]["call"]["name"] == "save_idea"
+    assert result.state["trace"][0]["tool_resolution"]["call"]["name"] == "save_idea"
     assert result.state["last_artifact"]["title"] == result.artifact["title"]
     assert result.response

tests/test_app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
-from app import bootstrap, engine, health, index, tool_contract_check, tool_contracts, trace_artifact
 def test_health_exposes_index_metadata() -> None:
@@ -9,6 +9,7 @@ def test_health_exposes_index_metadata() -> None:
     assert payload["ok"] is True
     assert payload["projects"] == len(index.projects)
     assert payload["index_algorithm"] == "tfidf-sparse-v1"
     assert len(payload["snapshot_digest"]) == 64
@@ -18,6 +19,7 @@ def test_bootstrap_exposes_index_metadata() -> None:
     assert payload["index_algorithm"] == "tfidf-sparse-v1"
     assert payload["index_generated_at"]
     assert payload["snapshot_digest"]
     assert payload["top_projects"]
@@ -43,3 +45,11 @@ def test_tool_contract_check_endpoint_defaults_safely() -> None:
     assert payload["status"] == "defaulted"
     assert payload["call"]["name"] == "search_projects"

 import json
+from app import bootstrap, engine, health, index, runtime, tool_contract_check, tool_contracts, trace_artifact
 def test_health_exposes_index_metadata() -> None:
     assert payload["ok"] is True
     assert payload["projects"] == len(index.projects)
     assert payload["index_algorithm"] == "tfidf-sparse-v1"
+    assert payload["runtime"]["backend"] == "rules"
     assert len(payload["snapshot_digest"]) == 64
     assert payload["index_algorithm"] == "tfidf-sparse-v1"
     assert payload["index_generated_at"]
     assert payload["snapshot_digest"]
+    assert payload["runtime"]["tool_count"] >= 8
     assert payload["top_projects"]
     assert payload["status"] == "defaulted"
     assert payload["call"]["name"] == "search_projects"
+def test_runtime_endpoint_reports_planner() -> None:
+    payload = runtime()
+    assert payload["backend"] == "rules"
+    assert payload["model_id"] == "deterministic-tool-router"
+    assert payload["loaded"] is True

tests/test_model_runtime.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import pytest
+from hackathon_advisor.model_runtime import (
+    MiniCPMTransformersPlanner,
+    RuleBasedPlanner,
+    create_tool_planner,
+    render_context,
+    runtime_status,
+)
+def test_rule_planner_emits_valid_search_call() -> None:
+    planner = RuleBasedPlanner()
+    resolution = planner.plan("search similar lullaby audio projects", {})
+    assert resolution.status == "valid"
+    assert resolution.call.name == "search_projects"
+    assert resolution.call.arguments["query"] == "search similar lullaby audio projects"
+def test_rule_planner_uses_plan_when_idea_exists() -> None:
+    planner = RuleBasedPlanner()
+    resolution = planner.plan("make a build plan", {"ideas": [{"title": "A", "pitch": "B"}]})
+    assert resolution.status == "valid"
+    assert resolution.call.name == "make_plan"
+def test_rule_planner_defaults_blank_to_list_projects() -> None:
+    planner = RuleBasedPlanner()
+    resolution = planner.plan("", {})
+    assert resolution.status == "valid"
+    assert resolution.call.name == "list_projects"
+def test_render_context_includes_state() -> None:
+    context = render_context(
+        "make a plan",
+        {
+            "ideas": [{"title": "Archive Cartographer", "pitch": "Map family memories."}],
+            "trace": [{"input": "first", "verdict": "ECHO x2", "overall": 5.1}],
+        },
+    )
+    assert "Archive Cartographer" in context
+    assert "ECHO x2" in context
+    assert '<function name="tool_name">' in context
+def test_create_tool_planner_defaults_to_rules(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("ADVISOR_MODEL_BACKEND", raising=False)
+    planner = create_tool_planner()
+    assert isinstance(planner, RuleBasedPlanner)
+    assert runtime_status(planner).to_dict()["loaded"] is True
+def test_create_tool_planner_rejects_unknown_backend(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "bogus")
+    with pytest.raises(RuntimeError, match="Unsupported"):
+        create_tool_planner()
+def test_minicpm_status_is_lazy() -> None:
+    planner = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B")
+    status = runtime_status(planner).to_dict()
+    assert status["backend"] == "minicpm-transformers"
+    assert status["loaded"] is False

tests/test_trace_export.py CHANGED Viewed

@@ -19,6 +19,7 @@ def test_trace_jsonl_contains_manifest_and_turns() -> None:
     assert lines[0]["index"]["algorithm"] == "tfidf-sparse-v1"
     assert lines[1]["type"] == "agent_turn"
     assert lines[1]["tools"]
     assert lines[2]["plan_steps"] > 0
@@ -31,3 +32,4 @@ def test_checked_in_sample_trace_matches_schema() -> None:
     assert lines[0]["type"] == "trace_manifest"
     assert lines[0]["turn_count"] >= 3
     assert all(line["schema_version"] == 1 for line in lines)

     assert lines[0]["index"]["algorithm"] == "tfidf-sparse-v1"
     assert lines[1]["type"] == "agent_turn"
     assert lines[1]["tools"]
+    assert lines[1]["tool_resolution"]["call"]["name"] == "save_idea"
     assert lines[2]["plan_steps"] > 0
     assert lines[0]["type"] == "trace_manifest"
     assert lines[0]["turn_count"] >= 3
     assert all(line["schema_version"] == 1 for line in lines)
+    assert lines[1]["tool_resolution"]["status"] == "valid"