Spaces:
Running on Zero
Running on Zero
File size: 3,090 Bytes
7f9dfed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | from __future__ import annotations
import tempfile
import unittest
from pathlib import Path
from agent.runner import (
AGENT_SYSTEM_PROMPT,
default_safety_gates,
export_agent_traces,
export_agent_traces_hf_dataset,
run_agent_loop,
run_paper_to_code_loop,
save_agent_trace,
)
class AgentRunnerTest(unittest.TestCase):
def test_agent_loop_creates_research_plan_implementation_trace(self) -> None:
session = run_agent_loop("Improve dataset stats")
phases = [step.phase for step in session.steps]
self.assertIn("research", phases)
self.assertIn("plan", phases)
self.assertIn("implement", phases)
self.assertIn("verify", phases)
self.assertIn("safe_calculator", session.tools)
self.assertTrue(session.safety_gates)
self.assertEqual(session.as_dict()["system_prompt"], AGENT_SYSTEM_PROMPT)
def test_agent_loop_can_use_safe_calculator_tool(self) -> None:
session = run_agent_loop("calculate: 2 + 2")
self.assertIn("tool:safe_calculator", [step.phase for step in session.steps])
self.assertIn("4.0", session.as_markdown())
def test_saves_and_exports_agent_trace(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
source = Path(tmp) / "agent_traces.jsonl"
output = Path(tmp) / "exports" / "agent_traces.jsonl"
session = run_agent_loop("Improve docs")
saved = save_agent_trace(session, source)
exported = export_agent_traces(source, output)
self.assertEqual(saved, source)
self.assertEqual(exported, output)
self.assertIn("Improve docs", output.read_text(encoding="utf-8"))
def test_exports_agent_trace_dataset(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
source = Path(tmp) / "agent_traces.jsonl"
target = Path(tmp) / "dataset"
save_agent_trace(run_agent_loop("Trace me"), source)
exported = export_agent_traces_hf_dataset(source, target)
self.assertEqual(exported, target)
self.assertTrue((target / "data.jsonl").exists())
self.assertTrue((target / "README.md").exists())
def test_paper_to_code_trace_has_required_phases_and_safety_gates(self) -> None:
session = run_paper_to_code_loop(
"Demo paper",
"Claims a local reward model can rank completions.",
"Implement deterministic reward eval.",
)
phases = [step.phase for step in session.steps]
self.assertEqual(phases, ["research", "plan", "implement", "verify"])
self.assertIn("Paper-to-code", session.task)
self.assertIn("No shell commands", session.as_markdown())
def test_default_safety_gates_block_external_side_effects(self) -> None:
gates = default_safety_gates()
self.assertTrue(any("downloaded automatically" in gate for gate in gates))
self.assertTrue(any("matching test" in gate for gate in gates))
if __name__ == "__main__":
unittest.main()
|