Spaces:
Sleeping
Sleeping
| """Tests for QA evaluation dataset schema validity.""" | |
| import json | |
| import os | |
| import pytest | |
| from pathlib import Path | |
| EVAL_DIR = Path(__file__).parent.parent / "eval" | |
| REQUIRED_FIELDS = {"id", "question", "gold_answer", "relevant_chunk_ids", "expected_terms", "difficulty"} | |
| VALID_DIFFICULTIES = {"easy", "medium", "hard"} | |
| def iter_jsonl(path: Path): | |
| """Yield parsed rows from a JSONL file.""" | |
| with open(path, "r", encoding="utf-8") as f: | |
| for line_no, line in enumerate(f, 1): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| yield line_no, json.loads(line) | |
| def get_eval_files(): | |
| """Find all .jsonl files in eval/ directory.""" | |
| if not EVAL_DIR.exists(): | |
| return [] | |
| return list(EVAL_DIR.glob("*.jsonl")) | |
| def eval_file(request): | |
| return request.param | |
| def test_eval_files_exist(): | |
| """At least one eval JSONL file should exist.""" | |
| files = get_eval_files() | |
| assert len(files) > 0, "No eval JSONL files found in eval/ directory" | |
| def test_eval_file_has_required_fields(eval_file): | |
| """Every row in every eval file must have all required fields.""" | |
| for line_no, row in iter_jsonl(eval_file): | |
| missing = REQUIRED_FIELDS - set(row.keys()) | |
| assert not missing, f"{eval_file.name}:{line_no} missing fields: {missing}" | |
| def test_eval_file_has_valid_types(eval_file): | |
| """Field types must be correct.""" | |
| for line_no, row in iter_jsonl(eval_file): | |
| assert isinstance(row["id"], str), f"{eval_file.name}:{line_no} id must be string" | |
| assert isinstance(row["question"], str), f"{eval_file.name}:{line_no} question must be string" | |
| assert isinstance(row["gold_answer"], str), f"{eval_file.name}:{line_no} gold_answer must be string" | |
| assert isinstance(row["relevant_chunk_ids"], list), f"{eval_file.name}:{line_no} relevant_chunk_ids must be list" | |
| assert isinstance(row["expected_terms"], list), f"{eval_file.name}:{line_no} expected_terms must be list" | |
| assert isinstance(row["difficulty"], str), f"{eval_file.name}:{line_no} difficulty must be string" | |
| def test_eval_file_has_valid_difficulty(eval_file): | |
| """Difficulty must be easy, medium, or hard.""" | |
| for line_no, row in iter_jsonl(eval_file): | |
| assert row["difficulty"] in VALID_DIFFICULTIES, ( | |
| f"{eval_file.name}:{line_no} invalid difficulty: {row['difficulty']}" | |
| ) | |
| def test_eval_file_has_unique_ids(eval_file): | |
| """All question IDs within a file must be unique.""" | |
| ids = [] | |
| for _, row in iter_jsonl(eval_file): | |
| ids.append(row["id"]) | |
| assert len(ids) == len(set(ids)), f"{eval_file.name} has duplicate IDs" | |
| def test_eval_file_questions_not_empty(eval_file): | |
| """Questions must not be empty.""" | |
| for line_no, row in iter_jsonl(eval_file): | |
| assert len(row["question"].strip()) > 5, ( | |
| f"{eval_file.name}:{line_no} question too short: {row['question']}" | |
| ) | |
| def test_starter_file_has_15_questions(): | |
| """The 15-question starter file must have exactly 15 rows.""" | |
| starter = EVAL_DIR / "qa_15_starter.jsonl" | |
| if not starter.exists(): | |
| pytest.skip("qa_15_starter.jsonl not found") | |
| rows = list(iter_jsonl(starter)) | |
| assert len(rows) == 15, f"Expected 15 questions, got {len(rows)}" | |
| def test_full_file_has_50_questions(): | |
| """The 50-question file must have exactly 50 rows.""" | |
| full = EVAL_DIR / "qa_50_graphresearcher.jsonl" | |
| if not full.exists(): | |
| pytest.skip("qa_50_graphresearcher.jsonl not found") | |
| rows = list(iter_jsonl(full)) | |
| assert len(rows) == 50, f"Expected 50 questions, got {len(rows)}" | |