mathvision-jepa-explorer / tests /test_streamlit_app.py
ddebree's picture
Auto-select
f21dc8c
"""Tests for Streamlit app helpers."""
from __future__ import annotations
from pathlib import Path
from PIL import Image
from mathvision_explorer.streamlit_app import (
_choose_hf_split,
_hf_dataset_id_from_ref,
_record_from_hf_row,
)
def test_hf_dataset_id_from_url() -> None:
"""Dataset URLs and repo IDs resolve to the same Hub id."""
assert (
_hf_dataset_id_from_ref("https://huggingface.co/datasets/MathLLMs/MathVision")
== "MathLLMs/MathVision"
)
assert _hf_dataset_id_from_ref("datasets/MathLLMs/MathVision") == "MathLLMs/MathVision"
def test_record_from_hf_row_persists_image(tmp_path: Path) -> None:
"""HF dataset image objects are persisted so existing image-path code can use them."""
record = _record_from_hf_row(
{
"id": "1",
"question": "How many triangles?",
"answer": "3",
"decoded_image": Image.new("RGB", (4, 4), color="white"),
},
row_index=0,
image_dir=tmp_path,
)
assert record is not None
assert record.problem_id == "1"
assert record.image_path == tmp_path / "row-00000.png"
assert record.image_path.exists()
def test_choose_hf_split_prefers_test() -> None:
"""Automatic split selection picks a useful default without user input."""
datasets = _FakeDatasets({"train": object(), "test": object()})
assert _choose_hf_split(datasets, "org/name") == "test"
def test_choose_hf_split_falls_back_to_first_available() -> None:
"""Datasets with custom split names still load without a visible split field."""
datasets = _FakeDatasets({"testmini": object(), "dev": object()})
assert _choose_hf_split(datasets, "org/name") == "testmini"
class _FakeDatasets:
def __init__(self, splits: dict[str, object]) -> None:
self._splits = splits
def load_dataset_builder(self, repo_id: str) -> _FakeBuilder:
assert repo_id == "org/name"
return _FakeBuilder(self._splits)
class _FakeBuilder:
def __init__(self, splits: dict[str, object]) -> None:
self.info = _FakeInfo(splits)
class _FakeInfo:
def __init__(self, splits: dict[str, object]) -> None:
self.splits = splits