File size: 3,072 Bytes
aef1f5a 3c4c67b aef1f5a 363ba14 3c4c67b aef1f5a 363ba14 3c4c67b aef1f5a 3c4c67b aef1f5a 3c4c67b 363ba14 3c4c67b 363ba14 3c4c67b 363ba14 3c4c67b 363ba14 3c4c67b 363ba14 3c4c67b 363ba14 3c4c67b 363ba14 3c4c67b 363ba14 aef1f5a 3c4c67b 363ba14 aef1f5a 3c4c67b 363ba14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
"""Load ISLES24 data from local directory or HuggingFace Hub."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Protocol, Self
if TYPE_CHECKING:
from stroke_deepisles_demo.core.types import CaseFiles
class Dataset(Protocol):
"""Protocol for dataset access.
All dataset implementations support context manager usage for proper cleanup:
with load_isles_dataset() as ds:
case = ds.get_case(0)
# ... process case ...
# cleanup happens automatically
"""
def __len__(self) -> int: ...
def __enter__(self) -> Self: ...
def __exit__(self, *args: object) -> None: ...
def list_case_ids(self) -> list[str]: ...
def get_case(self, case_id: str | int) -> CaseFiles: ...
def cleanup(self) -> None: ...
@dataclass
class DatasetInfo:
"""Metadata about the dataset."""
source: str # "local" or HF dataset ID
num_cases: int
modalities: list[str]
has_ground_truth: bool
# Default HuggingFace dataset ID
DEFAULT_HF_DATASET = "hugging-science/isles24-stroke"
def load_isles_dataset(
source: str | Path | None = None,
*,
local_mode: bool | None = None,
) -> Dataset:
"""
Load ISLES24 dataset from local directory or HuggingFace Hub.
Args:
source: Local directory path or HuggingFace dataset ID.
If None, uses HuggingFace dataset by default.
local_mode: If True, treat source as local directory.
If None, auto-detect based on source type.
Returns:
Dataset-like object providing case access. Use as context manager
for automatic cleanup of temp files (important for HuggingFace mode).
Examples:
# Load from HuggingFace with automatic cleanup (recommended)
with load_isles_dataset() as ds:
case = ds.get_case(0)
# Load from local directory
ds = load_isles_dataset("data/isles24", local_mode=True)
# Load specific HuggingFace dataset
ds = load_isles_dataset("hugging-science/isles24-stroke")
"""
# Auto-detect mode if not specified
if local_mode is None:
if source is None:
local_mode = False # Default to HuggingFace
elif isinstance(source, Path):
local_mode = True
else:
# String: check if it's an existing local path
# Only select local mode if the path itself exists
# (avoids misclassifying HF dataset IDs like "org/dataset")
source_path = Path(source)
local_mode = source_path.exists()
if local_mode:
from stroke_deepisles_demo.data.adapter import build_local_dataset
if source is None:
source = "data/isles24"
return build_local_dataset(Path(source))
# HuggingFace mode
from stroke_deepisles_demo.data.adapter import build_huggingface_dataset
dataset_id = source if source else DEFAULT_HF_DATASET
return build_huggingface_dataset(str(dataset_id))
|