Spaces:
Running
Running
File size: 2,411 Bytes
23680f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
"""Storage configuration for HyperView."""
import os
from dataclasses import dataclass, field
from pathlib import Path
def get_default_datasets_dir() -> Path:
"""Get the default datasets directory.
Uses HYPERVIEW_DATASETS_DIR env var if set, otherwise ~/.hyperview/datasets/
Each dataset gets its own subdirectory with isolated LanceDB tables.
"""
env_dir = os.environ.get("HYPERVIEW_DATASETS_DIR")
if env_dir:
return Path(env_dir)
return Path.home() / ".hyperview" / "datasets"
def get_default_media_dir() -> Path:
"""Get the default media directory for downloaded images.
Uses HYPERVIEW_MEDIA_DIR env var if set, otherwise ~/.hyperview/media/
Similar to FiftyOne's ~/fiftyone/huggingface/hub/ pattern.
"""
env_dir = os.environ.get("HYPERVIEW_MEDIA_DIR")
if env_dir:
return Path(env_dir)
return Path.home() / ".hyperview" / "media"
@dataclass
class StorageConfig:
"""Configuration for storage backend."""
datasets_dir: Path = field(default_factory=get_default_datasets_dir)
media_dir: Path = field(default_factory=get_default_media_dir)
@classmethod
def default(cls) -> "StorageConfig":
"""Create a default configuration."""
return cls(
datasets_dir=get_default_datasets_dir(),
media_dir=get_default_media_dir(),
)
def ensure_dir_exists(self) -> None:
"""Ensure the datasets directory exists."""
self.datasets_dir.mkdir(parents=True, exist_ok=True)
def ensure_media_dir_exists(self) -> None:
"""Ensure the media directory exists."""
self.media_dir.mkdir(parents=True, exist_ok=True)
def get_huggingface_media_dir(self, dataset_name: str, split: str) -> Path:
"""Get the directory for storing HuggingFace dataset media.
Creates: ~/.hyperview/media/huggingface/{dataset_name}/{split}/
Args:
dataset_name: Name of the HuggingFace dataset (e.g., "cifar100")
split: Dataset split (e.g., "train", "test")
Returns:
Path to the media directory for this dataset/split.
"""
# Sanitize dataset name for filesystem (replace / with _)
safe_name = dataset_name.replace("/", "_")
media_path = self.media_dir / "huggingface" / safe_name / split
media_path.mkdir(parents=True, exist_ok=True)
return media_path
|