Spaces:
Running on Zero
Running on Zero
| from __future__ import annotations | |
| import tempfile | |
| import unittest | |
| from pathlib import Path | |
| from agent.runner import ( | |
| AGENT_SYSTEM_PROMPT, | |
| default_safety_gates, | |
| export_agent_traces, | |
| export_agent_traces_hf_dataset, | |
| run_agent_loop, | |
| run_paper_to_code_loop, | |
| save_agent_trace, | |
| ) | |
| class AgentRunnerTest(unittest.TestCase): | |
| def test_agent_loop_creates_research_plan_implementation_trace(self) -> None: | |
| session = run_agent_loop("Improve dataset stats") | |
| phases = [step.phase for step in session.steps] | |
| self.assertIn("research", phases) | |
| self.assertIn("plan", phases) | |
| self.assertIn("implement", phases) | |
| self.assertIn("verify", phases) | |
| self.assertIn("safe_calculator", session.tools) | |
| self.assertTrue(session.safety_gates) | |
| self.assertEqual(session.as_dict()["system_prompt"], AGENT_SYSTEM_PROMPT) | |
| def test_agent_loop_can_use_safe_calculator_tool(self) -> None: | |
| session = run_agent_loop("calculate: 2 + 2") | |
| self.assertIn("tool:safe_calculator", [step.phase for step in session.steps]) | |
| self.assertIn("4.0", session.as_markdown()) | |
| def test_saves_and_exports_agent_trace(self) -> None: | |
| with tempfile.TemporaryDirectory() as tmp: | |
| source = Path(tmp) / "agent_traces.jsonl" | |
| output = Path(tmp) / "exports" / "agent_traces.jsonl" | |
| session = run_agent_loop("Improve docs") | |
| saved = save_agent_trace(session, source) | |
| exported = export_agent_traces(source, output) | |
| self.assertEqual(saved, source) | |
| self.assertEqual(exported, output) | |
| self.assertIn("Improve docs", output.read_text(encoding="utf-8")) | |
| def test_exports_agent_trace_dataset(self) -> None: | |
| with tempfile.TemporaryDirectory() as tmp: | |
| source = Path(tmp) / "agent_traces.jsonl" | |
| target = Path(tmp) / "dataset" | |
| save_agent_trace(run_agent_loop("Trace me"), source) | |
| exported = export_agent_traces_hf_dataset(source, target) | |
| self.assertEqual(exported, target) | |
| self.assertTrue((target / "data.jsonl").exists()) | |
| self.assertTrue((target / "README.md").exists()) | |
| def test_paper_to_code_trace_has_required_phases_and_safety_gates(self) -> None: | |
| session = run_paper_to_code_loop( | |
| "Demo paper", | |
| "Claims a local reward model can rank completions.", | |
| "Implement deterministic reward eval.", | |
| ) | |
| phases = [step.phase for step in session.steps] | |
| self.assertEqual(phases, ["research", "plan", "implement", "verify"]) | |
| self.assertIn("Paper-to-code", session.task) | |
| self.assertIn("No shell commands", session.as_markdown()) | |
| def test_default_safety_gates_block_external_side_effects(self) -> None: | |
| gates = default_safety_gates() | |
| self.assertTrue(any("downloaded automatically" in gate for gate in gates)) | |
| self.assertTrue(any("matching test" in gate for gate in gates)) | |
| if __name__ == "__main__": | |
| unittest.main() | |