| """Tests for Streamlit app helpers.""" |
|
|
| from __future__ import annotations |
|
|
| from pathlib import Path |
|
|
| from PIL import Image |
|
|
| from mathvision_explorer.streamlit_app import ( |
| _choose_hf_split, |
| _hf_dataset_id_from_ref, |
| _record_from_hf_row, |
| ) |
|
|
|
|
| def test_hf_dataset_id_from_url() -> None: |
| """Dataset URLs and repo IDs resolve to the same Hub id.""" |
|
|
| assert ( |
| _hf_dataset_id_from_ref("https://huggingface.co/datasets/MathLLMs/MathVision") |
| == "MathLLMs/MathVision" |
| ) |
| assert _hf_dataset_id_from_ref("datasets/MathLLMs/MathVision") == "MathLLMs/MathVision" |
|
|
|
|
| def test_record_from_hf_row_persists_image(tmp_path: Path) -> None: |
| """HF dataset image objects are persisted so existing image-path code can use them.""" |
|
|
| record = _record_from_hf_row( |
| { |
| "id": "1", |
| "question": "How many triangles?", |
| "answer": "3", |
| "decoded_image": Image.new("RGB", (4, 4), color="white"), |
| }, |
| row_index=0, |
| image_dir=tmp_path, |
| ) |
|
|
| assert record is not None |
| assert record.problem_id == "1" |
| assert record.image_path == tmp_path / "row-00000.png" |
| assert record.image_path.exists() |
|
|
|
|
| def test_choose_hf_split_prefers_test() -> None: |
| """Automatic split selection picks a useful default without user input.""" |
|
|
| datasets = _FakeDatasets({"train": object(), "test": object()}) |
|
|
| assert _choose_hf_split(datasets, "org/name") == "test" |
|
|
|
|
| def test_choose_hf_split_falls_back_to_first_available() -> None: |
| """Datasets with custom split names still load without a visible split field.""" |
|
|
| datasets = _FakeDatasets({"testmini": object(), "dev": object()}) |
|
|
| assert _choose_hf_split(datasets, "org/name") == "testmini" |
|
|
|
|
| class _FakeDatasets: |
| def __init__(self, splits: dict[str, object]) -> None: |
| self._splits = splits |
|
|
| def load_dataset_builder(self, repo_id: str) -> _FakeBuilder: |
| assert repo_id == "org/name" |
| return _FakeBuilder(self._splits) |
|
|
|
|
| class _FakeBuilder: |
| def __init__(self, splits: dict[str, object]) -> None: |
| self.info = _FakeInfo(splits) |
|
|
|
|
| class _FakeInfo: |
| def __init__(self, splits: dict[str, object]) -> None: |
| self.splits = splits |
|
|