|
|
""" |
|
|
Validation matrix for model, precision, and GPU capabilities |
|
|
""" |
|
|
|
|
|
import importlib |
|
|
import math |
|
|
import os |
|
|
from dataclasses import dataclass |
|
|
from typing import Dict, Optional, Tuple |
|
|
|
|
|
import torch |
|
|
from transformers import AutoTokenizer |
|
|
|
|
|
PRECISIONS = {"fp32", "fp16", "bf16", "qlora4bit"} |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class GpuInfo: |
|
|
available: bool |
|
|
name: str |
|
|
total_bytes: int |
|
|
free_bytes: int |
|
|
cc_major: int |
|
|
cc_minor: int |
|
|
bf16_supported: bool |
|
|
device_index: int = 0 |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class MultiGpuInfo: |
|
|
"""Information about all available GPUs""" |
|
|
gpus: list[GpuInfo] |
|
|
count: int |
|
|
total_vram_gb: float |
|
|
|
|
|
def __post_init__(self): |
|
|
self.count = len(self.gpus) |
|
|
self.total_vram_gb = sum(gpu.total_bytes for gpu in self.gpus) / (1024**3) |
|
|
|
|
|
|
|
|
def get_gpu_info(device: int = 0) -> GpuInfo: |
|
|
"""Get GPU information and capabilities for a specific device""" |
|
|
if not torch.cuda.is_available(): |
|
|
return GpuInfo(False, "cpu", 0, 0, 0, 0, False, device) |
|
|
|
|
|
name = torch.cuda.get_device_name(device) |
|
|
total = torch.cuda.get_device_properties(device).total_memory |
|
|
free = torch.cuda.mem_get_info(device)[0] |
|
|
major, minor = torch.cuda.get_device_capability(device) |
|
|
bf16_ok = torch.cuda.is_bf16_supported() |
|
|
|
|
|
return GpuInfo(True, name, total, free, major, minor, bf16_ok, device) |
|
|
|
|
|
|
|
|
def get_all_gpu_info() -> MultiGpuInfo: |
|
|
"""Get information about all available GPUs""" |
|
|
if not torch.cuda.is_available(): |
|
|
return MultiGpuInfo([], 0, 0.0) |
|
|
|
|
|
gpus = [] |
|
|
device_count = torch.cuda.device_count() |
|
|
|
|
|
for device_idx in range(device_count): |
|
|
gpu_info = get_gpu_info(device_idx) |
|
|
gpus.append(gpu_info) |
|
|
|
|
|
return MultiGpuInfo(gpus, device_count, sum(gpu.total_bytes for gpu in gpus) / (1024**3)) |
|
|
|
|
|
|
|
|
def has_bitsandbytes() -> bool: |
|
|
"""Check if bitsandbytes is available""" |
|
|
try: |
|
|
importlib.import_module("bitsandbytes") |
|
|
return True |
|
|
except Exception: |
|
|
return False |
|
|
|
|
|
|
|
|
def precision_supported(precision: str, gpu: GpuInfo) -> Tuple[bool, str]: |
|
|
"""Check if precision is supported on the given GPU""" |
|
|
if precision == "bf16" and not gpu.bf16_supported: |
|
|
return False, "bf16 is not supported on this GPU. Try fp16." |
|
|
|
|
|
if precision == "qlora4bit": |
|
|
if not gpu.available: |
|
|
return False, "4-bit requires CUDA GPU. Try fp16 on CPU is not supported for 4-bit." |
|
|
if not has_bitsandbytes(): |
|
|
return False, "bitsandbytes not installed. `pip install bitsandbytes` or use fp16." |
|
|
if (gpu.cc_major, gpu.cc_minor) < (7, 0): |
|
|
return False, f"Compute capability {gpu.cc_major}.{gpu.cc_minor} may be insufficient for 4-bit. Use fp16." |
|
|
|
|
|
|
|
|
return True, "ok" |
|
|
|
|
|
|
|
|
def estimate_model_params(config) -> Optional[int]: |
|
|
"""Estimate model parameters from config""" |
|
|
try: |
|
|
hs = int(getattr(config, "hidden_size", 0)) |
|
|
nl = int(getattr(config, "num_hidden_layers", 0)) |
|
|
vs = int(getattr(config, "vocab_size", 0)) |
|
|
|
|
|
if hs == 0 or nl == 0 or vs == 0: |
|
|
return None |
|
|
|
|
|
|
|
|
per_layer = 12 * hs * hs |
|
|
total = per_layer * nl + vs * hs |
|
|
return total |
|
|
except Exception: |
|
|
return None |
|
|
|
|
|
|
|
|
def bytes_per_param(precision: str) -> float: |
|
|
"""Get bytes per parameter for given precision""" |
|
|
return { |
|
|
"fp32": 4, |
|
|
"fp16": 2, |
|
|
"bf16": 2, |
|
|
"qlora4bit": 0.5, |
|
|
}.get(precision, 2) |
|
|
|
|
|
|
|
|
def estimate_memory_bytes(params: Optional[int], precision: str, adam: bool, lora: bool) -> Optional[int]: |
|
|
"""Estimate memory usage in bytes""" |
|
|
if params is None: |
|
|
return None |
|
|
|
|
|
base = params * bytes_per_param(precision) |
|
|
|
|
|
overhead = 2.0 if adam else 0.6 |
|
|
if lora: |
|
|
overhead *= 0.3 |
|
|
|
|
|
return int(base * (1.0 + overhead)) |
|
|
|
|
|
|
|
|
def tokenizer_ok(model_id_or_path: str) -> Tuple[bool, str]: |
|
|
"""Check if tokenizer can be loaded and is properly configured""" |
|
|
try: |
|
|
tok = AutoTokenizer.from_pretrained(model_id_or_path, use_fast=True, trust_remote_code=True) |
|
|
if tok.pad_token is None: |
|
|
|
|
|
if getattr(tok, "eos_token", None): |
|
|
return True, "No pad_token; will use eos_token for padding." |
|
|
return False, "Tokenizer missing pad_token and eos_token." |
|
|
return True, "ok" |
|
|
except Exception as e: |
|
|
return False, f"Tokenizer load failed: {e}" |
|
|
|
|
|
|
|
|
|
|
|
|