|
|
"""Load ISLES24 data from local directory or HuggingFace Hub.""" |
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
from dataclasses import dataclass |
|
|
from pathlib import Path |
|
|
from typing import TYPE_CHECKING, Protocol, Self |
|
|
|
|
|
if TYPE_CHECKING: |
|
|
from stroke_deepisles_demo.core.types import CaseFiles |
|
|
|
|
|
|
|
|
class Dataset(Protocol): |
|
|
"""Protocol for dataset access. |
|
|
|
|
|
All dataset implementations support context manager usage for proper cleanup: |
|
|
|
|
|
with load_isles_dataset() as ds: |
|
|
case = ds.get_case(0) |
|
|
# ... process case ... |
|
|
# cleanup happens automatically |
|
|
""" |
|
|
|
|
|
def __len__(self) -> int: ... |
|
|
def __enter__(self) -> Self: ... |
|
|
def __exit__(self, *args: object) -> None: ... |
|
|
def list_case_ids(self) -> list[str]: ... |
|
|
def get_case(self, case_id: str | int) -> CaseFiles: ... |
|
|
def cleanup(self) -> None: ... |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class DatasetInfo: |
|
|
"""Metadata about the dataset.""" |
|
|
|
|
|
source: str |
|
|
num_cases: int |
|
|
modalities: list[str] |
|
|
has_ground_truth: bool |
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_HF_DATASET = "hugging-science/isles24-stroke" |
|
|
|
|
|
|
|
|
def load_isles_dataset( |
|
|
source: str | Path | None = None, |
|
|
*, |
|
|
local_mode: bool | None = None, |
|
|
) -> Dataset: |
|
|
""" |
|
|
Load ISLES24 dataset from local directory or HuggingFace Hub. |
|
|
|
|
|
Args: |
|
|
source: Local directory path or HuggingFace dataset ID. |
|
|
If None, uses HuggingFace dataset by default. |
|
|
local_mode: If True, treat source as local directory. |
|
|
If None, auto-detect based on source type. |
|
|
|
|
|
Returns: |
|
|
Dataset-like object providing case access. Use as context manager |
|
|
for automatic cleanup of temp files (important for HuggingFace mode). |
|
|
|
|
|
Examples: |
|
|
# Load from HuggingFace with automatic cleanup (recommended) |
|
|
with load_isles_dataset() as ds: |
|
|
case = ds.get_case(0) |
|
|
|
|
|
# Load from local directory |
|
|
ds = load_isles_dataset("data/isles24", local_mode=True) |
|
|
|
|
|
# Load specific HuggingFace dataset |
|
|
ds = load_isles_dataset("hugging-science/isles24-stroke") |
|
|
""" |
|
|
|
|
|
if local_mode is None: |
|
|
if source is None: |
|
|
local_mode = False |
|
|
elif isinstance(source, Path): |
|
|
local_mode = True |
|
|
else: |
|
|
|
|
|
|
|
|
|
|
|
source_path = Path(source) |
|
|
local_mode = source_path.exists() |
|
|
|
|
|
if local_mode: |
|
|
from stroke_deepisles_demo.data.adapter import build_local_dataset |
|
|
|
|
|
if source is None: |
|
|
source = "data/isles24" |
|
|
return build_local_dataset(Path(source)) |
|
|
|
|
|
|
|
|
from stroke_deepisles_demo.data.adapter import build_huggingface_dataset |
|
|
|
|
|
dataset_id = source if source else DEFAULT_HF_DATASET |
|
|
return build_huggingface_dataset(str(dataset_id)) |
|
|
|