from __future__ import annotations import json import unittest from unittest.mock import Mock, patch from zeta_hf.eval_runner import BenchmarkQuestion, _cache_key, _get_cached_answer, run_benchmark class EvalRunnerTests(unittest.TestCase): def test_cache_key_is_versioned(self) -> None: question = BenchmarkQuestion( task_id="task-1", question="Example question", level="1", file_name="", ) key = json.loads(_cache_key(question)) self.assertEqual(key["cache_version"], "official-qwen-v1") def test_cache_lookup_reuses_matching_entries(self) -> None: question = BenchmarkQuestion( task_id="task-2", question="Another question", level="1", file_name="file.txt", ) cache = { json.dumps( { "cache_version": "official-qwen-v1", "task_id": "task-2", "question": "Another question", "file_name": "file.txt", }, sort_keys=True, ): "cached-answer" } self.assertEqual(_get_cached_answer(cache, question), "cached-answer") @patch("zeta_hf.eval_runner.requests.post") @patch("zeta_hf.eval_runner.requests.get") def test_dry_run_does_not_submit(self, mock_get: Mock, mock_post: Mock) -> None: response = Mock() response.raise_for_status.return_value = None response.json.return_value = [{"task_id": "1", "question": "q", "Level": "1", "file_name": ""}] mock_get.return_value = response agent = Mock() agent.answer_question.return_value = "answer" result = run_benchmark("user", submit=False, agent=agent) self.assertFalse(result.submitted) mock_post.assert_not_called() if __name__ == "__main__": unittest.main()