hackathon-advisor / tests /test_agent.py
JacobLinCool's picture
feat: stream advisor progress
6d9770a verified
from pathlib import Path
from tests.helpers import load_test_index
from hackathon_advisor.agent import AdvisorEngine
from hackathon_advisor.data import ProjectIndex
from hackathon_advisor.tool_contracts import ToolCall, ToolResolution
class StaticPlanner:
backend = "test"
model_id = "static"
def __init__(self, call: ToolCall) -> None:
self.call = call
def plan(self, message: str, state: dict) -> ToolResolution:
return ToolResolution(status="valid", call=self.call, errors=())
def test_agent_scores_and_persists_idea() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
result = engine.turn("A local-first archive cartographer for family photos", {})
assert result.score is not None
assert result.state["ideas"]
assert result.state["ideas"][0]["score"] is not None
assert "goal_fit" in result.state["ideas"][0]["score"]
assert "prize_fit" not in result.state["ideas"][0]["score"]
assert "goal_fit" in result.artifact["seal"]
assert "prize_fit" not in result.artifact["seal"]
assert result.state["trace"]
assert result.state["last_tool_resolution"]["call"]["name"] == "save_idea"
assert result.state["trace"][0]["tool_resolution"]["call"]["name"] == "save_idea"
assert result.state["last_artifact"]["title"] == result.artifact["title"]
assert result.state["ideas"][0]["artifact"]["title"] == result.artifact["title"]
assert result.artifact["wood_map"]["caption"]
assert {dot["kind"] for dot in result.artifact["wood_map"]["dots"]} >= {"idea", "echo", "inked"}
assert result.score.to_dict()["echoes"][0]["page_number"] >= 1
assert "page " in result.response
assert result.response
def test_agent_finds_whitespace() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
result = engine.turn("write bolder and find whitespace", {})
assert result.whitespace
assert result.score is not None
assert result.artifact["verdict"] == "UNWRITTEN"
assert result.state["ideas"][0]["title"] == result.whitespace[0].label
def test_gap_command_explores_unused_whitespace() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
first = engine.turn("write bolder and find whitespace", {})
second = engine.turn("write bolder and find whitespace", first.state)
assert len(second.state["ideas"]) == 2
assert first.whitespace[0].label != second.whitespace[0].label
assert second.state["ideas"][-1]["title"] == second.whitespace[0].label
assert second.state["current_whitespace"]["label"] == second.whitespace[0].label
def test_agent_preserves_canonical_jargon_case() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
result = engine.turn("use neutron and mini cpm on zero gpu", {})
assert "MiniCPM5" in result.artifact["title"]
assert "ZeroGPU" in result.artifact["title"]
def test_plan_command_uses_current_idea() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
first = engine.turn("A local-first archive cartographer for family photos", {})
planned = engine.turn("make a build plan", first.state)
assert planned.plan
assert planned.artifact["title"] == first.artifact["title"]
assert planned.state["ideas"][0]["title"] == first.artifact["title"]
assert all("Record the trace" not in step for step in planned.plan)
assert all("session trace" not in step for step in planned.plan)
def test_non_plan_turns_clear_stale_build_plan() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
first = engine.turn("A local-first archive cartographer for family photos", {})
planned = engine.turn("make a build plan", first.state)
project = engine.turn("read project lolaby", planned.state)
second = engine.turn("A hands-on science coach for kitchen experiments", planned.state)
assert planned.state["last_plan"]
assert "last_plan" not in project.state
assert "last_plan" not in second.state
def test_plan_and_rank_do_not_create_placeholder_ideas() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
planned = engine.turn("make a build plan", {})
ranked = engine.turn("compare ideas", planned.state)
assert planned.state["ideas"] == []
assert ranked.state["ideas"] == []
assert "Write one project instinct first" in planned.response
assert "No idea pages" in ranked.response
assert planned.tool_events[0].name == "make_plan"
assert ranked.tool_events[0].name == "compare_ideas"
def test_plan_uses_profile_context() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
state = {
"profile": {
"skills": "frontend prototyping",
"time": "one evening",
"preferences": "quiet dashboards",
"constraints": "CPU-only Space",
}
}
first = engine.turn("A local-first archive cartographer for family photos", state)
planned = engine.turn("make a build plan", first.state)
assert any("one evening" in step for step in planned.plan)
assert any("frontend prototyping" in step for step in planned.plan)
assert any("CPU-only Space" in step for step in planned.plan)
assert any("quiet dashboards" in step for step in planned.plan)
def test_distinct_idea_turns_append_to_board() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
first = engine.turn("A local-first archive cartographer for family photos", {})
second = engine.turn("write bolder and find whitespace", first.state)
assert len(second.state["ideas"]) == 2
assert second.state["ideas"][0]["title"] == first.artifact["title"]
assert second.state["ideas"][1]["title"] == second.artifact["title"]
assert second.state["ideas"][0]["artifact"]["title"] == first.artifact["title"]
assert second.state["ideas"][1]["artifact"]["title"] == second.artifact["title"]
def test_compare_ideas_reranks_board_and_selects_winner() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
first = engine.turn("A local-first archive cartographer for family photos", {})
second = engine.turn("write bolder and find whitespace", first.state)
ranked = engine.turn("compare ideas", second.state)
assert ranked.score is not None
assert ranked.artifact["title"] == ranked.state["ideas"][0]["title"]
assert ranked.state["current_idea_id"] == ranked.state["ideas"][0]["id"]
assert ranked.state["ideas"][0]["score"]["overall"] >= ranked.state["ideas"][1]["score"]["overall"]
assert all(idea["artifact"]["title"] == idea["title"] for idea in ranked.state["ideas"])
assert ranked.plan
assert "Ranked pages:" in ranked.response
assert ranked.tool_events[0].name == "compare_ideas"
def test_plan_preserves_unwritten_whitespace_verdict() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
whitespace = engine.turn("write bolder and find whitespace", {})
planned = engine.turn("make a build plan", whitespace.state)
assert whitespace.artifact["verdict"] == "UNWRITTEN"
assert planned.artifact["title"] == whitespace.artifact["title"]
assert planned.artifact["verdict"] == "UNWRITTEN"
def test_planner_get_project_drives_project_response() -> None:
index = load_test_index()
engine = AdvisorEngine(index, planner=StaticPlanner(ToolCall("get_project", {"id": "lolaby"})))
result = engine.turn("read lolaby", {})
assert result.projects
assert result.projects[0].slug == "lolaby"
assert result.tool_events[0].name == "get_project"
def test_rule_project_reference_does_not_create_or_score_idea() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
first = engine.turn("A local-first archive cartographer for family photos", {})
result = engine.turn("read project lolaby", first.state)
assert result.projects
assert result.projects[0].slug == "lolaby"
assert result.score is None
assert result.artifact == {}
assert len(result.state["ideas"]) == 1
assert result.state["ideas"][0]["title"] == first.artifact["title"]
assert result.state["last_artifact"]["title"] == first.artifact["title"]
assert result.state["last_tool_resolution"]["call"]["name"] == "get_project"
def test_planner_profile_and_goals_update_state() -> None:
index = load_test_index()
planned = AdvisorEngine(index).turn("A local-first archive cartographer for family photos", {})
planned = AdvisorEngine(index).turn("make a build plan", planned.state)
assert planned.state["last_plan"]
profile_engine = AdvisorEngine(
index,
planner=StaticPlanner(ToolCall("update_profile", {"field": "skills", "value": "frontend"})),
)
profile = profile_engine.turn("remember this", planned.state)
target_engine = AdvisorEngine(
index,
planner=StaticPlanner(ToolCall("set_goals", {"goals": ["Off the Grid", "Field Notes"]})),
)
targeted = target_engine.turn("set goals", profile.state)
assert targeted.state["profile"]["skills"] == "frontend"
assert targeted.state["goals"] == ["Off the Grid", "Field Notes"]
assert "last_plan" not in profile.state
assert "last_plan" not in targeted.state
assert "Local-first, Build notes" in targeted.response
def test_goal_update_invalidates_current_idea_artifact() -> None:
index = load_test_index()
first = AdvisorEngine(index).turn("A local-first archive cartographer for family photos", {})
first = AdvisorEngine(index).turn("make a build plan", first.state)
assert first.state["last_plan"]
target_engine = AdvisorEngine(
index,
planner=StaticPlanner(ToolCall("set_goals", {"goals": ["Field Notes"]})),
)
targeted = target_engine.turn("set goals", first.state)
idea = targeted.state["ideas"][0]
assert idea["score"] is None
assert idea["artifact"] is None
assert "last_artifact" not in targeted.state
assert "last_plan" not in targeted.state
def test_session_goals_apply_to_new_and_current_ideas() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
state = {"goals": ["Field Notes"]}
first = engine.turn("A local-first archive cartographer for family photos", state)
first_idea = first.state["ideas"][0]
planned = engine.turn("make a build plan", first.state)
assert first_idea["goals"] == ["Field Notes"]
assert all("LoRA" not in step for step in planned.plan)
def test_well_tuned_goal_adds_training_step_to_plan() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
state = {"goals": ["Well-Tuned"]}
first = engine.turn("A local-first archive cartographer for family photos", state)
planned = engine.turn("make a build plan", first.state)
assert first.state["ideas"][0]["goals"] == ["Well-Tuned"]
assert any("LoRA" in step for step in planned.plan)
assert all("advisor turns" not in step for step in planned.plan)
def test_planner_score_idea_scores_current_idea() -> None:
index = load_test_index()
first = AdvisorEngine(index).turn("A local-first archive cartographer for family photos", {})
engine = AdvisorEngine(index, planner=StaticPlanner(ToolCall("score_idea", {})))
scored = engine.turn("score it", first.state)
assert scored.score is not None
assert scored.artifact["title"] == first.artifact["title"]
def test_turn_stream_emits_ordered_progress_events() -> None:
index = load_test_index()
engine = AdvisorEngine(index)
events = list(engine.turn_stream("A local-first archive cartographer for family photos", {}))
types = [event["type"] for event in events]
assert types[0] == "start"
assert types[-1] == "done"
assert "token" in types
# the planning stage is announced before any tool runs, and tools stream as they execute
assert types.index("stage") < types.index("tool_event")
tool_events = [event for event in events if event["type"] == "tool_event"]
assert [event["name"] for event in tool_events] == ["save_idea", "search_projects", "score_idea"]
assert events[-1]["state"]["ideas"]
def test_turn_stream_done_matches_blocking_turn() -> None:
# idea ids are randomly generated, so compare the deterministic surface of the turn.
index = load_test_index()
streamed = list(AdvisorEngine(index).turn_stream("write bolder and find whitespace", {}))
done = next(event for event in streamed if event["type"] == "done")
blocking = AdvisorEngine(index).turn("write bolder and find whitespace", {})
assert done["response"] == blocking.response
assert done["score"] == (blocking.score.to_dict() if blocking.score else None)
assert done["plan"] == blocking.plan
assert [item["label"] for item in done["whitespace"]] == [
item.label for item in blocking.whitespace
]
assert [idea["title"] for idea in done["state"]["ideas"]] == [
idea["title"] for idea in blocking.state["ideas"]
]
def test_turn_accepts_injected_resolution() -> None:
index = load_test_index()
engine = AdvisorEngine(index, planner=StaticPlanner(ToolCall("score_idea", {})))
injected = ToolResolution(status="valid", call=ToolCall("list_projects", {"sort": "likes"}), errors=())
result = engine.turn("score it", {}, resolution=injected)
# the injected list_projects call wins over the planner's score_idea call
assert result.state["last_tool_resolution"]["call"]["name"] == "list_projects"