Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| import unittest | |
| from unittest.mock import Mock, patch | |
| from zeta_hf.eval_runner import BenchmarkQuestion, _cache_key, _get_cached_answer, run_benchmark | |
| class EvalRunnerTests(unittest.TestCase): | |
| def test_cache_key_is_versioned(self) -> None: | |
| question = BenchmarkQuestion( | |
| task_id="task-1", | |
| question="Example question", | |
| level="1", | |
| file_name="", | |
| ) | |
| key = json.loads(_cache_key(question)) | |
| self.assertEqual(key["cache_version"], "official-qwen-v1") | |
| def test_cache_lookup_reuses_matching_entries(self) -> None: | |
| question = BenchmarkQuestion( | |
| task_id="task-2", | |
| question="Another question", | |
| level="1", | |
| file_name="file.txt", | |
| ) | |
| cache = { | |
| json.dumps( | |
| { | |
| "cache_version": "official-qwen-v1", | |
| "task_id": "task-2", | |
| "question": "Another question", | |
| "file_name": "file.txt", | |
| }, | |
| sort_keys=True, | |
| ): "cached-answer" | |
| } | |
| self.assertEqual(_get_cached_answer(cache, question), "cached-answer") | |
| def test_dry_run_does_not_submit(self, mock_get: Mock, mock_post: Mock) -> None: | |
| response = Mock() | |
| response.raise_for_status.return_value = None | |
| response.json.return_value = [{"task_id": "1", "question": "q", "Level": "1", "file_name": ""}] | |
| mock_get.return_value = response | |
| agent = Mock() | |
| agent.answer_question.return_value = "answer" | |
| result = run_benchmark("user", submit=False, agent=agent) | |
| self.assertFalse(result.submitted) | |
| mock_post.assert_not_called() | |
| if __name__ == "__main__": | |
| unittest.main() | |