from __future__ import annotations from dataclasses import dataclass from pathlib import Path from .utils import repository_root @dataclass(frozen=True) class StorageLayout: root: Path datasets: Path models: Path hf: Path artifacts: Path runs: Path sweeps: Path evals: Path logs: Path wandb: Path def storage_layout(cache_dir: str | Path = "cache") -> StorageLayout: root = _resolve_storage_root(cache_dir) artifacts = root / "artifacts" logs = root / "logs" return StorageLayout( root=root, datasets=root / "datasets", models=root / "models", hf=root / "hf", artifacts=artifacts, runs=artifacts / "runs", sweeps=artifacts / "sweeps", evals=artifacts / "eval", logs=logs, wandb=logs / "wandb", ) def ensure_storage_layout(cache_dir: str | Path = "cache") -> StorageLayout: layout = storage_layout(cache_dir) for path in ( layout.root, layout.datasets, layout.models, layout.hf, layout.artifacts, layout.runs, layout.sweeps, layout.evals, layout.logs, layout.wandb, ): path.mkdir(parents=True, exist_ok=True) return layout def resolve_storage_path(path: str | Path, cache_dir: str | Path = "cache") -> Path: candidate = Path(path) if candidate.is_absolute(): return candidate.resolve() repo = repository_root() cache_root = _resolve_storage_root(cache_dir) if candidate.parts and candidate.parts[0] == cache_root.name: return (repo / candidate).resolve() return (cache_root / candidate).resolve() def _resolve_storage_root(cache_dir: str | Path) -> Path: candidate = Path(cache_dir) if candidate.is_absolute(): return candidate.resolve() return (repository_root() / candidate).resolve()