"""Tests for MathVision-style dataset loading.""" from __future__ import annotations import json from pathlib import Path import pytest from mathvision_explorer.dataset import ( filter_records, load_jsonl_records, load_jsonl_records_from_text, record_from_mapping, summarize_records, ) def test_record_from_mapping_accepts_problem_id_fallback() -> None: """Raw records may use either id or problem_id for the stable key.""" record = record_from_mapping( { "problem_id": "mv-1", "question": "How many squares are visible?", "answer": "4", "options": ["3", "4", "5"], "subject": "geometry", "level": 2, } ) assert record.problem_id == "mv-1" assert record.options == ("3", "4", "5") assert record.subject == "geometry" assert record.level == 2 def test_load_jsonl_records_resolves_relative_image_paths(tmp_path: Path) -> None: """Relative image paths are resolved from the JSONL file directory.""" jsonl_path = tmp_path / "records.jsonl" payload = { "id": "mv-2", "question": "Pick the matching pattern.", "answer": "B", "image": "images/mv-2.png", } jsonl_path.write_text(json.dumps(payload) + "\n", encoding="utf-8") records = load_jsonl_records(jsonl_path) assert records[0].image_path == tmp_path / "images" / "mv-2.png" def test_load_jsonl_records_from_text_resolves_relative_image_paths(tmp_path: Path) -> None: """Uploaded JSONL content can still use a caller-provided image base directory.""" payload = { "id": "mv-3", "question": "Pick the matching graph.", "answer": "A", "image": "images/mv-3.png", } records = load_jsonl_records_from_text(json.dumps(payload), source_dir=tmp_path) assert records[0].image_path == tmp_path / "images" / "mv-3.png" def test_filter_and_summary() -> None: """Records can be filtered and summarized for explorer views.""" records = [ record_from_mapping({"id": "a", "question": "Q1", "answer": "1", "subject": "counting"}), record_from_mapping( {"id": "b", "question": "Q2", "answer": "2", "subject": "geometry", "level": 3} ), ] assert [record.problem_id for record in filter_records(records, subject="geometry")] == ["b"] assert summarize_records(records) == { "records": 2, "images": 0, "subjects": ["counting", "geometry"], "levels": [3], } def test_invalid_options_raise_value_error() -> None: """Options must be textual choices.""" with pytest.raises(ValueError, match="Options"): record_from_mapping({"id": "bad", "question": "Q", "answer": "A", "options": [1]})