from __future__ import annotations import json import math import os import random from pathlib import Path from typing import Any, Dict import numpy as np import torch import yaml def project_root() -> Path: return Path(__file__).resolve().parents[1] def resolve_path(path: str | Path) -> Path: p = Path(path) return p if p.is_absolute() else project_root() / p def load_config(path: str | Path) -> Dict[str, Any]: config_path = resolve_path(path) if not config_path.exists(): raise FileNotFoundError(f"Config file not found: {config_path}") with config_path.open("r", encoding="utf-8") as f: cfg = yaml.safe_load(f) if cfg is None: raise ValueError(f"Config file is empty: {config_path}") return cfg def save_json(obj: Dict[str, Any], path: str | Path) -> None: out = resolve_path(path) out.parent.mkdir(parents=True, exist_ok=True) with out.open("w", encoding="utf-8") as f: json.dump(obj, f, indent=2) def load_json(path: str | Path) -> Dict[str, Any]: p = resolve_path(path) with p.open("r", encoding="utf-8") as f: return json.load(f) def set_seed(seed: int) -> None: random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) def get_device(requested: str = "cuda") -> torch.device: if requested == "cuda" and not torch.cuda.is_available(): print("CUDA was requested but is not available. Falling back to CPU.") return torch.device("cpu") return torch.device(requested) def get_torch_dtype(dtype: str) -> torch.dtype: if dtype == "float16": return torch.float16 if dtype == "bfloat16": return torch.bfloat16 if dtype == "float32": return torch.float32 raise ValueError(f"Unsupported dtype: {dtype}") def count_parameters(model: torch.nn.Module) -> int: return sum(p.numel() for p in model.parameters() if p.requires_grad) def format_count(n: int) -> str: if n >= 1_000_000: return f"{n / 1_000_000:.2f}M" if n >= 1_000: return f"{n / 1_000:.2f}K" return str(n) def cuda_memory_summary() -> str: if not torch.cuda.is_available(): return "CUDA not available" allocated = torch.cuda.memory_allocated() / 1024**3 reserved = torch.cuda.memory_reserved() / 1024**3 max_allocated = torch.cuda.max_memory_allocated() / 1024**3 return f"VRAM allocated={allocated:.2f}GB reserved={reserved:.2f}GB max_allocated={max_allocated:.2f}GB" def cosine_lr(iter_num: int, learning_rate: float, min_lr: float, warmup_iters: int, max_iters: int) -> float: if iter_num < warmup_iters: return learning_rate * (iter_num + 1) / max(1, warmup_iters) if iter_num > max_iters: return min_lr decay_ratio = (iter_num - warmup_iters) / max(1, max_iters - warmup_iters) coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) return min_lr + coeff * (learning_rate - min_lr)