workbench / tests /unit /test_agent_runner.py
GitHub Actions
Initial ZeroGPU deployment with spaces shim
7f9dfed
Raw
History Blame Contribute Delete
3.09 kB
from __future__ import annotations
import tempfile
import unittest
from pathlib import Path
from agent.runner import (
AGENT_SYSTEM_PROMPT,
default_safety_gates,
export_agent_traces,
export_agent_traces_hf_dataset,
run_agent_loop,
run_paper_to_code_loop,
save_agent_trace,
)
class AgentRunnerTest(unittest.TestCase):
def test_agent_loop_creates_research_plan_implementation_trace(self) -> None:
session = run_agent_loop("Improve dataset stats")
phases = [step.phase for step in session.steps]
self.assertIn("research", phases)
self.assertIn("plan", phases)
self.assertIn("implement", phases)
self.assertIn("verify", phases)
self.assertIn("safe_calculator", session.tools)
self.assertTrue(session.safety_gates)
self.assertEqual(session.as_dict()["system_prompt"], AGENT_SYSTEM_PROMPT)
def test_agent_loop_can_use_safe_calculator_tool(self) -> None:
session = run_agent_loop("calculate: 2 + 2")
self.assertIn("tool:safe_calculator", [step.phase for step in session.steps])
self.assertIn("4.0", session.as_markdown())
def test_saves_and_exports_agent_trace(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
source = Path(tmp) / "agent_traces.jsonl"
output = Path(tmp) / "exports" / "agent_traces.jsonl"
session = run_agent_loop("Improve docs")
saved = save_agent_trace(session, source)
exported = export_agent_traces(source, output)
self.assertEqual(saved, source)
self.assertEqual(exported, output)
self.assertIn("Improve docs", output.read_text(encoding="utf-8"))
def test_exports_agent_trace_dataset(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
source = Path(tmp) / "agent_traces.jsonl"
target = Path(tmp) / "dataset"
save_agent_trace(run_agent_loop("Trace me"), source)
exported = export_agent_traces_hf_dataset(source, target)
self.assertEqual(exported, target)
self.assertTrue((target / "data.jsonl").exists())
self.assertTrue((target / "README.md").exists())
def test_paper_to_code_trace_has_required_phases_and_safety_gates(self) -> None:
session = run_paper_to_code_loop(
"Demo paper",
"Claims a local reward model can rank completions.",
"Implement deterministic reward eval.",
)
phases = [step.phase for step in session.steps]
self.assertEqual(phases, ["research", "plan", "implement", "verify"])
self.assertIn("Paper-to-code", session.task)
self.assertIn("No shell commands", session.as_markdown())
def test_default_safety_gates_block_external_side_effects(self) -> None:
gates = default_safety_gates()
self.assertTrue(any("downloaded automatically" in gate for gate in gates))
self.assertTrue(any("matching test" in gate for gate in gates))
if __name__ == "__main__":
unittest.main()