"""Device detection and auto-configuration utilities.""" from __future__ import annotations from typing import TYPE_CHECKING import torch if TYPE_CHECKING: from llm_lab.config import TrainConfig def get_device() -> torch.device: """Returns the available device (cuda or cpu).""" return torch.device("cuda" if torch.cuda.is_available() else "cpu") def detect_gpu_info() -> dict: """Returns GPU name and memory information. Returns: {"name": str, "memory_gb": float} or an empty dict if no GPU is available """ if not torch.cuda.is_available(): return {} return { "name": torch.cuda.get_device_name(), "memory_gb": round(torch.cuda.get_device_properties(0).total_memory / 1e9, 1), } def auto_configure(config: "TrainConfig") -> "TrainConfig": """Automatically adjusts configuration based on GPU type. In Colab Pro+, an A100 is not always assigned. If a T4 or V100 is assigned, configuration is automatically adjusted. Returns: Adjusted TrainConfig """ if not torch.cuda.is_available(): print("āš ļø No GPU found! Running in CPU mode (very slow)") config.dtype = "float32" config.micro_batch_size = 1 config.gradient_accumulation_steps = 4 return config gpu_name = torch.cuda.get_device_name().lower() gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9 print(f"\nšŸ” GPU detected: {torch.cuda.get_device_name()} ({gpu_mem:.1f} GB)") if "a100" in gpu_name: # A100 40GB: use default settings (optimal) print(" → A100 detected: using default settings (bf16, batch=4)") config.dtype = "bfloat16" config.micro_batch_size = 4 elif "v100" in gpu_name: # V100 16GB: bf16 not supported, reduce batch size print(" → V100 detected: fp16 mode, reduced batch size") config.dtype = "float16" config.micro_batch_size = 2 config.gradient_accumulation_steps = 64 # maintain effective batch size elif "t4" in gpu_name: # T4 16GB: bf16 not supported, smaller batch print(" → T4 detected: fp16 mode, minimum batch size") config.dtype = "float16" config.micro_batch_size = 1 config.gradient_accumulation_steps = 128 elif "l4" in gpu_name: # L4 24GB: bf16 supported print(" → L4 detected: bf16 mode, adjusted batch size") config.dtype = "bfloat16" config.micro_batch_size = 2 config.gradient_accumulation_steps = 64 else: print(f" → Unknown GPU. Adjusting settings based on memory") if gpu_mem >= 30: config.micro_batch_size = 4 elif gpu_mem >= 16: config.micro_batch_size = 2 else: config.micro_batch_size = 1 config.gradient_accumulation_steps = 128 print(f" → dtype: {config.dtype}") print(f" → micro_batch: {config.micro_batch_size}") print(f" → grad_accum: {config.gradient_accumulation_steps}") print(f" → effective_batch: {config.effective_batch_size}") return config