File size: 2,228 Bytes
5f7d974 f21dc8c 5f7d974 f21dc8c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | """Tests for Streamlit app helpers."""
from __future__ import annotations
from pathlib import Path
from PIL import Image
from mathvision_explorer.streamlit_app import (
_choose_hf_split,
_hf_dataset_id_from_ref,
_record_from_hf_row,
)
def test_hf_dataset_id_from_url() -> None:
"""Dataset URLs and repo IDs resolve to the same Hub id."""
assert (
_hf_dataset_id_from_ref("https://huggingface.co/datasets/MathLLMs/MathVision")
== "MathLLMs/MathVision"
)
assert _hf_dataset_id_from_ref("datasets/MathLLMs/MathVision") == "MathLLMs/MathVision"
def test_record_from_hf_row_persists_image(tmp_path: Path) -> None:
"""HF dataset image objects are persisted so existing image-path code can use them."""
record = _record_from_hf_row(
{
"id": "1",
"question": "How many triangles?",
"answer": "3",
"decoded_image": Image.new("RGB", (4, 4), color="white"),
},
row_index=0,
image_dir=tmp_path,
)
assert record is not None
assert record.problem_id == "1"
assert record.image_path == tmp_path / "row-00000.png"
assert record.image_path.exists()
def test_choose_hf_split_prefers_test() -> None:
"""Automatic split selection picks a useful default without user input."""
datasets = _FakeDatasets({"train": object(), "test": object()})
assert _choose_hf_split(datasets, "org/name") == "test"
def test_choose_hf_split_falls_back_to_first_available() -> None:
"""Datasets with custom split names still load without a visible split field."""
datasets = _FakeDatasets({"testmini": object(), "dev": object()})
assert _choose_hf_split(datasets, "org/name") == "testmini"
class _FakeDatasets:
def __init__(self, splits: dict[str, object]) -> None:
self._splits = splits
def load_dataset_builder(self, repo_id: str) -> _FakeBuilder:
assert repo_id == "org/name"
return _FakeBuilder(self._splits)
class _FakeBuilder:
def __init__(self, splits: dict[str, object]) -> None:
self.info = _FakeInfo(splits)
class _FakeInfo:
def __init__(self, splits: dict[str, object]) -> None:
self.splits = splits
|