#!/usr/bin/env bash # Debug script 1: Basic ROCm environment and tool availability check set -euo pipefail echo "=== ROCm Environment Debug Script 1 ===" echo "Testing basic ROCm/HIP environment setup" echo # Set ROCm environment variables export ROCM_PATH="${ROCM_PATH:-/opt/rocm-7.0.1}" export ROCM_HOME="${ROCM_HOME:-$ROCM_PATH}" export HIP_PATH="${HIP_PATH:-$ROCM_PATH}" export HIP_HOME="${HIP_HOME:-$ROCM_PATH}" export PATH="$ROCM_HOME/bin:$PATH" export TORCH_HIP_ARCH_LIST="${TORCH_HIP_ARCH_LIST:-gfx942}" export HSA_OVERRIDE_GFX_VERSION="${HSA_OVERRIDE_GFX_VERSION:-gfx942}" echo "Environment Variables:" echo "ROCM_PATH=$ROCM_PATH" echo "ROCM_HOME=$ROCM_HOME" echo "HIP_PATH=$HIP_PATH" echo "HIP_HOME=$HIP_HOME" echo "TORCH_HIP_ARCH_LIST=$TORCH_HIP_ARCH_LIST" echo "HSA_OVERRIDE_GFX_VERSION=$HSA_OVERRIDE_GFX_VERSION" echo "PATH (ROCm portion): $(echo $PATH | tr ':' '\n' | grep rocm || echo 'No ROCm in PATH')" echo echo "=== Directory Checks ===" echo "ROCm installation directory exists: $(test -d "$ROCM_PATH" && echo 'YES' || echo 'NO')" echo "ROCm bin directory exists: $(test -d "$ROCM_PATH/bin" && echo 'YES' || echo 'NO')" echo "ROCm include directory exists: $(test -d "$ROCM_PATH/include" && echo 'YES' || echo 'NO')" echo "ROCm lib directory exists: $(test -d "$ROCM_PATH/lib" && echo 'YES' || echo 'NO')" echo echo "=== Tool Availability ===" echo "hipcc available: $(which hipcc >/dev/null 2>&1 && echo 'YES' || echo 'NO')" echo "hip-clang available: $(which hip-clang >/dev/null 2>&1 && echo 'YES' || echo 'NO')" echo "rocm-smi available: $(which rocm-smi >/dev/null 2>&1 && echo 'YES' || echo 'NO')" echo "hipconfig available: $(which hipconfig >/dev/null 2>&1 && echo 'YES' || echo 'NO')" echo echo "=== Tool Versions ===" if which hipcc >/dev/null 2>&1; then echo "hipcc version:" hipcc --version || echo "Failed to get hipcc version" echo fi if which hipconfig >/dev/null 2>&1; then echo "HIP config:" hipconfig --full || echo "Failed to get hipconfig" echo fi if which rocm-smi >/dev/null 2>&1; then echo "ROCm SMI:" rocm-smi --showproductname || echo "Failed to get ROCm SMI info" echo fi echo "=== Python Environment ===" python3 --version || echo "Python3 not available" python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')" || echo "PyTorch not available" python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')" || echo "Failed to check CUDA availability" python3 -c "import torch; print(f'HIP available: {hasattr(torch.version, \"hip\") and torch.version.hip is not None}')" || echo "Failed to check HIP availability" echo echo "=== Basic HIP Device Check ===" if which hipinfo >/dev/null 2>&1; then echo "HIP devices:" hipinfo || echo "hipinfo failed" else echo "hipinfo not available" fi echo echo "=== Debug Script 1 Complete ==="