ForgeSight / backend /amd_hackathon /verify_env.py
rasAli02's picture
git add, commit, push
307f1c5
import os
import sys
import torch
def verify_environment():
print("========================================")
print(" MI300X ROCm Environment Verification ")
print("========================================")
# Check PyTorch
print(f"\n[1] PyTorch Version: {torch.__version__}")
if not torch.cuda.is_available():
print("❌ CUDA/HIP is not available. Please check your ROCm installation.")
sys.exit(1)
print("βœ… PyTorch is installed with CUDA/HIP support.")
# Check ROCm specific device properties
device_count = torch.cuda.device_count()
print(f" Available GPUs: {device_count}")
for i in range(device_count):
print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
# Check VRAM
vram = torch.cuda.get_device_properties(i).total_memory / (1024 ** 3)
print(f" VRAM GPU {i}: {vram:.2f} GB")
if "MI300" in torch.cuda.get_device_name(i):
print(" βœ… MI300X detected.")
# Check DeepSpeed
print("\n[2] Checking DeepSpeed...")
try:
import deepspeed
print(f"βœ… DeepSpeed Version: {deepspeed.__version__}")
except ImportError:
print("❌ DeepSpeed is not installed.")
# Check Flash Attention
print("\n[3] Checking Flash Attention 2 (ROCm)...")
try:
import flash_attn
print(f"βœ… Flash Attention 2 Version: {flash_attn.__version__}")
except ImportError:
print("❌ Flash Attention 2 is not installed or not configured for ROCm.")
# Check Axolotl
print("\n[4] Checking Axolotl...")
try:
import axolotl
print("βœ… Axolotl is installed.")
except ImportError:
print("❌ Axolotl is not installed.")
print("\n========================================")
print(" Verification Complete ")
print("========================================")
if __name__ == "__main__":
verify_environment()