|
|
"""Tests for the data loader.""" |
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
import os |
|
|
from typing import TYPE_CHECKING |
|
|
from unittest.mock import patch |
|
|
|
|
|
import pytest |
|
|
|
|
|
from stroke_deepisles_demo.data.adapter import HuggingFaceDataset, LocalDataset, logger |
|
|
from stroke_deepisles_demo.data.loader import load_isles_dataset |
|
|
|
|
|
if TYPE_CHECKING: |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
SKIP_IN_CI = pytest.mark.skipif( |
|
|
os.environ.get("CI") == "true", |
|
|
reason="Skips large HuggingFace downloads in CI (disk space)", |
|
|
) |
|
|
|
|
|
|
|
|
def test_load_from_local_returns_local_dataset(synthetic_isles_dir: Path) -> None: |
|
|
"""Test that loading from local path returns a LocalDataset.""" |
|
|
dataset = load_isles_dataset(source=synthetic_isles_dir, local_mode=True) |
|
|
assert isinstance(dataset, LocalDataset) |
|
|
assert len(dataset) > 0 |
|
|
|
|
|
|
|
|
def test_load_from_local_finds_all_cases(synthetic_isles_dir: Path) -> None: |
|
|
"""Test that the loader correctly delegates finding cases to adapter.""" |
|
|
dataset = load_isles_dataset(source=synthetic_isles_dir) |
|
|
assert len(dataset) == 2 |
|
|
assert dataset.list_case_ids() == ["sub-stroke0001", "sub-stroke0002"] |
|
|
|
|
|
|
|
|
def test_load_hf_warns_on_non_standard_dataset() -> None: |
|
|
"""Test that loading a non-standard HF dataset logs a warning. |
|
|
|
|
|
Note: With pre-computed case IDs, the dataset ID mismatch is only detected |
|
|
at build time (warning logged), not at get_case() time. The actual 404 error |
|
|
would only occur when trying to download a case that doesn't exist. |
|
|
""" |
|
|
with patch.object(logger, "warning") as mock_warning: |
|
|
ds = load_isles_dataset(source="fake/nonexistent-dataset", local_mode=False) |
|
|
mock_warning.assert_called_once() |
|
|
assert "does not match pre-computed constants" in mock_warning.call_args[0][0] |
|
|
|
|
|
assert isinstance(ds, HuggingFaceDataset) |
|
|
assert len(ds) == 149 |
|
|
|
|
|
|
|
|
@pytest.mark.integration |
|
|
@SKIP_IN_CI |
|
|
def test_load_from_huggingface_returns_hf_dataset() -> None: |
|
|
"""Test that loading from HuggingFace returns a HuggingFaceDataset. |
|
|
|
|
|
Note: Skipped in CI due to large download size (~GB) and limited disk space. |
|
|
Run locally with: pytest -m integration tests/data/test_loader.py |
|
|
""" |
|
|
with load_isles_dataset() as dataset: |
|
|
assert isinstance(dataset, HuggingFaceDataset) |
|
|
assert len(dataset) == 149 |
|
|
assert dataset.list_case_ids()[0] == "sub-stroke0001" |
|
|
|