Spaces:

yugbirla
/

GraphResearcher

Sleeping

App Files Files Community

GraphResearcher / tests /test_eval_dataset_schema.py

yugbirla

Add evaluation framework, ablation tests, and lean README

7728916 15 days ago

Raw

History Blame Contribute Delete

3.71 kB

	"""Tests for QA evaluation dataset schema validity."""

	import json
	import os
	import pytest
	from pathlib import Path

	EVAL_DIR = Path(__file__).parent.parent / "eval"
	REQUIRED_FIELDS = {"id", "question", "gold_answer", "relevant_chunk_ids", "expected_terms", "difficulty"}
	VALID_DIFFICULTIES = {"easy", "medium", "hard"}


	def iter_jsonl(path: Path):
	"""Yield parsed rows from a JSONL file."""
	with open(path, "r", encoding="utf-8") as f:
	for line_no, line in enumerate(f, 1):
	line = line.strip()
	if not line:
	continue
	yield line_no, json.loads(line)


	def get_eval_files():
	"""Find all .jsonl files in eval/ directory."""
	if not EVAL_DIR.exists():
	return []
	return list(EVAL_DIR.glob("*.jsonl"))


	@pytest.fixture(params=get_eval_files(), ids=lambda p: p.name)
	def eval_file(request):
	return request.param


	def test_eval_files_exist():
	"""At least one eval JSONL file should exist."""
	files = get_eval_files()
	assert len(files) > 0, "No eval JSONL files found in eval/ directory"


	def test_eval_file_has_required_fields(eval_file):
	"""Every row in every eval file must have all required fields."""
	for line_no, row in iter_jsonl(eval_file):
	missing = REQUIRED_FIELDS - set(row.keys())
	assert not missing, f"{eval_file.name}:{line_no} missing fields: {missing}"


	def test_eval_file_has_valid_types(eval_file):
	"""Field types must be correct."""
	for line_no, row in iter_jsonl(eval_file):
	assert isinstance(row["id"], str), f"{eval_file.name}:{line_no} id must be string"
	assert isinstance(row["question"], str), f"{eval_file.name}:{line_no} question must be string"
	assert isinstance(row["gold_answer"], str), f"{eval_file.name}:{line_no} gold_answer must be string"
	assert isinstance(row["relevant_chunk_ids"], list), f"{eval_file.name}:{line_no} relevant_chunk_ids must be list"
	assert isinstance(row["expected_terms"], list), f"{eval_file.name}:{line_no} expected_terms must be list"
	assert isinstance(row["difficulty"], str), f"{eval_file.name}:{line_no} difficulty must be string"


	def test_eval_file_has_valid_difficulty(eval_file):
	"""Difficulty must be easy, medium, or hard."""
	for line_no, row in iter_jsonl(eval_file):
	assert row["difficulty"] in VALID_DIFFICULTIES, (
	f"{eval_file.name}:{line_no} invalid difficulty: {row['difficulty']}"
	)


	def test_eval_file_has_unique_ids(eval_file):
	"""All question IDs within a file must be unique."""
	ids = []
	for _, row in iter_jsonl(eval_file):
	ids.append(row["id"])
	assert len(ids) == len(set(ids)), f"{eval_file.name} has duplicate IDs"


	def test_eval_file_questions_not_empty(eval_file):
	"""Questions must not be empty."""
	for line_no, row in iter_jsonl(eval_file):
	assert len(row["question"].strip()) > 5, (
	f"{eval_file.name}:{line_no} question too short: {row['question']}"
	)


	def test_starter_file_has_15_questions():
	"""The 15-question starter file must have exactly 15 rows."""
	starter = EVAL_DIR / "qa_15_starter.jsonl"
	if not starter.exists():
	pytest.skip("qa_15_starter.jsonl not found")
	rows = list(iter_jsonl(starter))
	assert len(rows) == 15, f"Expected 15 questions, got {len(rows)}"


	def test_full_file_has_50_questions():
	"""The 50-question file must have exactly 50 rows."""
	full = EVAL_DIR / "qa_50_graphresearcher.jsonl"
	if not full.exists():
	pytest.skip("qa_50_graphresearcher.jsonl not found")
	rows = list(iter_jsonl(full))
	assert len(rows) == 50, f"Expected 50 questions, got {len(rows)}"