| from __future__ import annotations |
|
|
| import json |
| import math |
| import os |
| import random |
| from pathlib import Path |
| from typing import Any, Dict |
|
|
| import numpy as np |
| import torch |
| import yaml |
|
|
|
|
| def project_root() -> Path: |
| return Path(__file__).resolve().parents[1] |
|
|
|
|
| def resolve_path(path: str | Path) -> Path: |
| p = Path(path) |
| return p if p.is_absolute() else project_root() / p |
|
|
|
|
| def load_config(path: str | Path) -> Dict[str, Any]: |
| config_path = resolve_path(path) |
| if not config_path.exists(): |
| raise FileNotFoundError(f"Config file not found: {config_path}") |
| with config_path.open("r", encoding="utf-8") as f: |
| cfg = yaml.safe_load(f) |
| if cfg is None: |
| raise ValueError(f"Config file is empty: {config_path}") |
| return cfg |
|
|
|
|
| def save_json(obj: Dict[str, Any], path: str | Path) -> None: |
| out = resolve_path(path) |
| out.parent.mkdir(parents=True, exist_ok=True) |
| with out.open("w", encoding="utf-8") as f: |
| json.dump(obj, f, indent=2) |
|
|
|
|
| def load_json(path: str | Path) -> Dict[str, Any]: |
| p = resolve_path(path) |
| with p.open("r", encoding="utf-8") as f: |
| return json.load(f) |
|
|
|
|
| def set_seed(seed: int) -> None: |
| random.seed(seed) |
| np.random.seed(seed) |
| torch.manual_seed(seed) |
| if torch.cuda.is_available(): |
| torch.cuda.manual_seed_all(seed) |
|
|
|
|
| def get_device(requested: str = "cuda") -> torch.device: |
| if requested == "cuda" and not torch.cuda.is_available(): |
| print("CUDA was requested but is not available. Falling back to CPU.") |
| return torch.device("cpu") |
| return torch.device(requested) |
|
|
|
|
| def get_torch_dtype(dtype: str) -> torch.dtype: |
| if dtype == "float16": |
| return torch.float16 |
| if dtype == "bfloat16": |
| return torch.bfloat16 |
| if dtype == "float32": |
| return torch.float32 |
| raise ValueError(f"Unsupported dtype: {dtype}") |
|
|
|
|
| def count_parameters(model: torch.nn.Module) -> int: |
| return sum(p.numel() for p in model.parameters() if p.requires_grad) |
|
|
|
|
| def format_count(n: int) -> str: |
| if n >= 1_000_000: |
| return f"{n / 1_000_000:.2f}M" |
| if n >= 1_000: |
| return f"{n / 1_000:.2f}K" |
| return str(n) |
|
|
|
|
| def cuda_memory_summary() -> str: |
| if not torch.cuda.is_available(): |
| return "CUDA not available" |
| allocated = torch.cuda.memory_allocated() / 1024**3 |
| reserved = torch.cuda.memory_reserved() / 1024**3 |
| max_allocated = torch.cuda.max_memory_allocated() / 1024**3 |
| return f"VRAM allocated={allocated:.2f}GB reserved={reserved:.2f}GB max_allocated={max_allocated:.2f}GB" |
|
|
|
|
| def cosine_lr(iter_num: int, learning_rate: float, min_lr: float, warmup_iters: int, max_iters: int) -> float: |
| if iter_num < warmup_iters: |
| return learning_rate * (iter_num + 1) / max(1, warmup_iters) |
| if iter_num > max_iters: |
| return min_lr |
| decay_ratio = (iter_num - warmup_iters) / max(1, max_iters - warmup_iters) |
| coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) |
| return min_lr + coeff * (learning_rate - min_lr) |
|
|