| import os |
| import sys |
| import torch |
|
|
| def verify_environment(): |
| print("========================================") |
| print(" MI300X ROCm Environment Verification ") |
| print("========================================") |
| |
| |
| print(f"\n[1] PyTorch Version: {torch.__version__}") |
| if not torch.cuda.is_available(): |
| print("β CUDA/HIP is not available. Please check your ROCm installation.") |
| sys.exit(1) |
| |
| print("β
PyTorch is installed with CUDA/HIP support.") |
| |
| |
| device_count = torch.cuda.device_count() |
| print(f" Available GPUs: {device_count}") |
| for i in range(device_count): |
| print(f" GPU {i}: {torch.cuda.get_device_name(i)}") |
| |
| vram = torch.cuda.get_device_properties(i).total_memory / (1024 ** 3) |
| print(f" VRAM GPU {i}: {vram:.2f} GB") |
| if "MI300" in torch.cuda.get_device_name(i): |
| print(" β
MI300X detected.") |
|
|
| |
| print("\n[2] Checking DeepSpeed...") |
| try: |
| import deepspeed |
| print(f"β
DeepSpeed Version: {deepspeed.__version__}") |
| except ImportError: |
| print("β DeepSpeed is not installed.") |
| |
| |
| print("\n[3] Checking Flash Attention 2 (ROCm)...") |
| try: |
| import flash_attn |
| print(f"β
Flash Attention 2 Version: {flash_attn.__version__}") |
| except ImportError: |
| print("β Flash Attention 2 is not installed or not configured for ROCm.") |
| |
| |
| print("\n[4] Checking Axolotl...") |
| try: |
| import axolotl |
| print("β
Axolotl is installed.") |
| except ImportError: |
| print("β Axolotl is not installed.") |
|
|
| print("\n========================================") |
| print(" Verification Complete ") |
| print("========================================") |
|
|
| if __name__ == "__main__": |
| verify_environment() |
|
|