zeta-agent / tests /test_eval_runner.py
AliNadhir's picture
Initial official course wrapper upload
8949bf0 verified
from __future__ import annotations
import json
import unittest
from unittest.mock import Mock, patch
from zeta_hf.eval_runner import BenchmarkQuestion, _cache_key, _get_cached_answer, run_benchmark
class EvalRunnerTests(unittest.TestCase):
def test_cache_key_is_versioned(self) -> None:
question = BenchmarkQuestion(
task_id="task-1",
question="Example question",
level="1",
file_name="",
)
key = json.loads(_cache_key(question))
self.assertEqual(key["cache_version"], "official-qwen-v1")
def test_cache_lookup_reuses_matching_entries(self) -> None:
question = BenchmarkQuestion(
task_id="task-2",
question="Another question",
level="1",
file_name="file.txt",
)
cache = {
json.dumps(
{
"cache_version": "official-qwen-v1",
"task_id": "task-2",
"question": "Another question",
"file_name": "file.txt",
},
sort_keys=True,
): "cached-answer"
}
self.assertEqual(_get_cached_answer(cache, question), "cached-answer")
@patch("zeta_hf.eval_runner.requests.post")
@patch("zeta_hf.eval_runner.requests.get")
def test_dry_run_does_not_submit(self, mock_get: Mock, mock_post: Mock) -> None:
response = Mock()
response.raise_for_status.return_value = None
response.json.return_value = [{"task_id": "1", "question": "q", "Level": "1", "file_name": ""}]
mock_get.return_value = response
agent = Mock()
agent.answer_question.return_value = "answer"
result = run_benchmark("user", submit=False, agent=agent)
self.assertFalse(result.submitted)
mock_post.assert_not_called()
if __name__ == "__main__":
unittest.main()