File size: 2,468 Bytes
aef1f5a
3c4c67b
 
 
bd5f74b
aef1f5a
262b3cb
3c4c67b
 
 
262b3cb
 
3c4c67b
aef1f5a
 
3c4c67b
bd5f74b
 
 
 
 
 
3c4c67b
aef1f5a
 
 
 
 
3c4c67b
 
aef1f5a
 
 
 
 
3c4c67b
 
262b3cb
 
 
 
 
 
 
 
80cbb1a
262b3cb
 
 
 
 
363ba14
 
 
bd5f74b
363ba14
262b3cb
bd5f74b
 
 
 
363ba14
262b3cb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""Tests for the data loader."""

from __future__ import annotations

import os
from typing import TYPE_CHECKING
from unittest.mock import MagicMock, patch

import pytest

from stroke_deepisles_demo.data.adapter import LocalDataset
from stroke_deepisles_demo.data.loader import HuggingFaceDatasetWrapper, load_isles_dataset

if TYPE_CHECKING:
    from pathlib import Path

# Skip tests that download large datasets in CI (limited disk space)
SKIP_IN_CI = pytest.mark.skipif(
    os.environ.get("CI") == "true",
    reason="Skips large HuggingFace downloads in CI (disk space)",
)


def test_load_from_local_returns_local_dataset(synthetic_isles_dir: Path) -> None:
    """Test that loading from local path returns a LocalDataset."""
    dataset = load_isles_dataset(source=synthetic_isles_dir, local_mode=True)
    assert isinstance(dataset, LocalDataset)
    assert len(dataset) > 0


def test_load_from_local_finds_all_cases(synthetic_isles_dir: Path) -> None:
    """Test that the loader correctly delegates finding cases to adapter."""
    dataset = load_isles_dataset(source=synthetic_isles_dir)
    assert len(dataset) == 2
    assert dataset.list_case_ids() == ["sub-stroke0001", "sub-stroke0002"]


def test_load_hf_calls_load_dataset() -> None:
    """Test that loading from HF calls datasets.load_dataset."""
    with patch("datasets.load_dataset") as mock_load:
        mock_ds = MagicMock()
        mock_ds.__len__.return_value = 0
        # Mock column access for index building
        mock_ds.__getitem__.side_effect = lambda key: [] if key == "subject_id" else MagicMock()
        mock_load.return_value = mock_ds

        ds = load_isles_dataset(source="my/dataset", local_mode=False)

        assert isinstance(ds, HuggingFaceDatasetWrapper)
        mock_load.assert_called_once()
        assert mock_load.call_args[0][0] == "my/dataset"


@pytest.mark.integration
@SKIP_IN_CI
def test_load_from_huggingface_returns_hf_dataset() -> None:
    """Test that loading from HuggingFace returns a HuggingFaceDatasetWrapper.

    Note: Skipped in CI due to large download size (~GB) and limited disk space.
    Run locally with: pytest -m integration tests/data/test_loader.py
    """
    with load_isles_dataset() as dataset:  # Default is HuggingFace mode
        assert isinstance(dataset, HuggingFaceDatasetWrapper)
        # We can't guarantee length if we don't mock, but we can check type
        # Real test might fail if network issue or auth issue