| """Tests for MathVision-style dataset loading.""" |
|
|
| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| import pytest |
|
|
| from mathvision_explorer.dataset import ( |
| filter_records, |
| load_jsonl_records, |
| load_jsonl_records_from_text, |
| record_from_mapping, |
| summarize_records, |
| ) |
|
|
|
|
| def test_record_from_mapping_accepts_problem_id_fallback() -> None: |
| """Raw records may use either id or problem_id for the stable key.""" |
|
|
| record = record_from_mapping( |
| { |
| "problem_id": "mv-1", |
| "question": "How many squares are visible?", |
| "answer": "4", |
| "options": ["3", "4", "5"], |
| "subject": "geometry", |
| "level": 2, |
| } |
| ) |
|
|
| assert record.problem_id == "mv-1" |
| assert record.options == ("3", "4", "5") |
| assert record.subject == "geometry" |
| assert record.level == 2 |
|
|
|
|
| def test_load_jsonl_records_resolves_relative_image_paths(tmp_path: Path) -> None: |
| """Relative image paths are resolved from the JSONL file directory.""" |
|
|
| jsonl_path = tmp_path / "records.jsonl" |
| payload = { |
| "id": "mv-2", |
| "question": "Pick the matching pattern.", |
| "answer": "B", |
| "image": "images/mv-2.png", |
| } |
| jsonl_path.write_text(json.dumps(payload) + "\n", encoding="utf-8") |
|
|
| records = load_jsonl_records(jsonl_path) |
|
|
| assert records[0].image_path == tmp_path / "images" / "mv-2.png" |
|
|
|
|
| def test_load_jsonl_records_from_text_resolves_relative_image_paths(tmp_path: Path) -> None: |
| """Uploaded JSONL content can still use a caller-provided image base directory.""" |
|
|
| payload = { |
| "id": "mv-3", |
| "question": "Pick the matching graph.", |
| "answer": "A", |
| "image": "images/mv-3.png", |
| } |
|
|
| records = load_jsonl_records_from_text(json.dumps(payload), source_dir=tmp_path) |
|
|
| assert records[0].image_path == tmp_path / "images" / "mv-3.png" |
|
|
|
|
| def test_filter_and_summary() -> None: |
| """Records can be filtered and summarized for explorer views.""" |
|
|
| records = [ |
| record_from_mapping({"id": "a", "question": "Q1", "answer": "1", "subject": "counting"}), |
| record_from_mapping( |
| {"id": "b", "question": "Q2", "answer": "2", "subject": "geometry", "level": 3} |
| ), |
| ] |
|
|
| assert [record.problem_id for record in filter_records(records, subject="geometry")] == ["b"] |
| assert summarize_records(records) == { |
| "records": 2, |
| "images": 0, |
| "subjects": ["counting", "geometry"], |
| "levels": [3], |
| } |
|
|
|
|
| def test_invalid_options_raise_value_error() -> None: |
| """Options must be textual choices.""" |
|
|
| with pytest.raises(ValueError, match="Options"): |
| record_from_mapping({"id": "bad", "question": "Q", "answer": "A", "options": [1]}) |
|
|