File size: 3,072 Bytes
aef1f5a
3c4c67b
 
 
 
aef1f5a
363ba14
3c4c67b
aef1f5a
363ba14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c4c67b
 
aef1f5a
 
 
3c4c67b
aef1f5a
 
 
 
3c4c67b
 
363ba14
 
 
 
3c4c67b
363ba14
3c4c67b
363ba14
 
3c4c67b
363ba14
3c4c67b
 
363ba14
 
 
 
3c4c67b
 
363ba14
 
 
 
 
 
 
3c4c67b
363ba14
 
 
 
 
3c4c67b
363ba14
 
 
 
 
 
 
 
 
 
 
 
 
 
aef1f5a
3c4c67b
363ba14
 
aef1f5a
3c4c67b
363ba14
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""Load ISLES24 data from local directory or HuggingFace Hub."""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Protocol, Self

if TYPE_CHECKING:
    from stroke_deepisles_demo.core.types import CaseFiles


class Dataset(Protocol):
    """Protocol for dataset access.

    All dataset implementations support context manager usage for proper cleanup:

        with load_isles_dataset() as ds:
            case = ds.get_case(0)
            # ... process case ...
        # cleanup happens automatically
    """

    def __len__(self) -> int: ...
    def __enter__(self) -> Self: ...
    def __exit__(self, *args: object) -> None: ...
    def list_case_ids(self) -> list[str]: ...
    def get_case(self, case_id: str | int) -> CaseFiles: ...
    def cleanup(self) -> None: ...


@dataclass
class DatasetInfo:
    """Metadata about the dataset."""

    source: str  # "local" or HF dataset ID
    num_cases: int
    modalities: list[str]
    has_ground_truth: bool


# Default HuggingFace dataset ID
DEFAULT_HF_DATASET = "hugging-science/isles24-stroke"


def load_isles_dataset(
    source: str | Path | None = None,
    *,
    local_mode: bool | None = None,
) -> Dataset:
    """
    Load ISLES24 dataset from local directory or HuggingFace Hub.

    Args:
        source: Local directory path or HuggingFace dataset ID.
                If None, uses HuggingFace dataset by default.
        local_mode: If True, treat source as local directory.
                    If None, auto-detect based on source type.

    Returns:
        Dataset-like object providing case access. Use as context manager
        for automatic cleanup of temp files (important for HuggingFace mode).

    Examples:
        # Load from HuggingFace with automatic cleanup (recommended)
        with load_isles_dataset() as ds:
            case = ds.get_case(0)

        # Load from local directory
        ds = load_isles_dataset("data/isles24", local_mode=True)

        # Load specific HuggingFace dataset
        ds = load_isles_dataset("hugging-science/isles24-stroke")
    """
    # Auto-detect mode if not specified
    if local_mode is None:
        if source is None:
            local_mode = False  # Default to HuggingFace
        elif isinstance(source, Path):
            local_mode = True
        else:
            # String: check if it's an existing local path
            # Only select local mode if the path itself exists
            # (avoids misclassifying HF dataset IDs like "org/dataset")
            source_path = Path(source)
            local_mode = source_path.exists()

    if local_mode:
        from stroke_deepisles_demo.data.adapter import build_local_dataset

        if source is None:
            source = "data/isles24"
        return build_local_dataset(Path(source))

    # HuggingFace mode
    from stroke_deepisles_demo.data.adapter import build_huggingface_dataset

    dataset_id = source if source else DEFAULT_HF_DATASET
    return build_huggingface_dataset(str(dataset_id))