| | #!/usr/bin/env bash |
| |
|
| | |
| |
|
| | set -euo pipefail |
| |
|
| | echo "=== PyTorch C++ Extension Debug Script 3 ===" |
| | echo "Testing PyTorch C++ extension compilation with HIP" |
| | echo |
| |
|
| | |
| | export ROCM_PATH="${ROCM_PATH:-/opt/rocm-7.0.1}" |
| | export ROCM_HOME="${ROCM_HOME:-$ROCM_PATH}" |
| | export HIP_PATH="${HIP_PATH:-$ROCM_PATH}" |
| | export HIP_HOME="${HIP_HOME:-$ROCM_PATH}" |
| | export PATH="$ROCM_HOME/bin:$PATH" |
| | export TORCH_HIP_ARCH_LIST="${TORCH_HIP_ARCH_LIST:-gfx942}" |
| | export HSA_OVERRIDE_GFX_VERSION="${HSA_OVERRIDE_GFX_VERSION:-gfx942}" |
| | export TORCH_EXTENSIONS_DIR="${TORCH_EXTENSIONS_DIR:-$PWD/.torch_extensions_debug}" |
| |
|
| | |
| | mkdir -p /tmp/torch_ext_test |
| | cd /tmp/torch_ext_test |
| |
|
| | echo "=== Creating Simple PyTorch Extension ===" |
| |
|
| | |
| | cat > simple_kernel.cu << 'EOF' |
| | |
| | |
| |
|
| | |
| | |
| | |
| | hipLaunchKernelGGL(kernel, grid, block, smem, stream, __VA_ARGS__) |
| | |
| | |
| | |
| | kernel<<<grid, block, smem, stream>>>(__VA_ARGS__) |
| | #endif |
| | |
| | __global__ void add_kernel(const float* a, const float* b, float* c, int n) { |
| | int idx = blockIdx.x * blockDim.x + threadIdx.x; |
| | if (idx < n) { |
| | c[idx] = a[idx] + b[idx]; |
| | } |
| | } |
| | |
| | torch::Tensor add_tensors_cuda(torch::Tensor a, torch::Tensor b) { |
| | auto c = torch::zeros_like(a); |
| | int n = a.numel(); |
| | |
| | const int block_size = 256; |
| | const int grid_size = (n + block_size - 1) / block_size; |
| | |
| | CUDA_LAUNCH_KERNEL( |
| | add_kernel, |
| | dim3(grid_size), |
| | dim3(block_size), |
| | 0, |
| | 0, |
| | a.data_ptr<float>(), |
| | b.data_ptr<float>(), |
| | c.data_ptr<float>(), |
| | n |
| | ); |
| | |
| | return c; |
| | } |
| | |
| | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { |
| | m.def("add_tensors", &add_tensors_cuda, "Add two tensors (CUDA/HIP)"); |
| | } |
| | EOF |
| | |
| | # Create Python test script |
| | cat > test_extension.py << 'EOF' |
| | import os |
| | import sys |
| | import torch |
| | from torch.utils.cpp_extension import load |
| | |
| | print("=== PyTorch Extension Load Test ===") |
| | print(f"PyTorch version: {torch.__version__}") |
| | print(f"CUDA available: {torch.cuda.is_available()}") |
| | print(f"Device count: {torch.cuda.device_count()}") |
| | |
| | if hasattr(torch.version, 'hip') and torch.version.hip: |
| | print(f"HIP version: {torch.version.hip}") |
| | |
| | print("\n=== Loading Extension ===") |
| | print("This may take a while and will show compilation output...") |
| | print("If this hangs, it indicates the same issue as build.py") |
| | |
| | try: |
| | # Mimic the same load call as build.py |
| | simple_ext = load( |
| | name="simple_test_ext", |
| | sources=["simple_kernel.cu"], |
| | extra_cflags=["-O3", "-std=c++17"], |
| | extra_cuda_cflags=["-O3"], # torch switches this to hipcc on ROCm |
| | verbose=True, |
| | is_python_module=False |
| | ) |
| | print("β Extension compilation successful!") |
| | |
| | # Test the extension |
| | print("\n=== Testing Extension ===") |
| | device = 'cuda' if torch.cuda.is_available() else 'cpu' |
| | a = torch.randn(1000, device=device) |
| | b = torch.randn(1000, device=device) |
| | |
| | if device == 'cuda': |
| | result = simple_ext.add_tensors(a, b) |
| | expected = a + b |
| | if torch.allclose(result, expected): |
| | print("β Extension execution successful!") |
| | else: |
| | print("β Extension execution failed - results don't match") |
| | else: |
| | print("β No CUDA device, skipping execution test") |
| | |
| | except Exception as e: |
| | print(f"β Extension compilation/loading failed: {e}") |
| | import traceback |
| | traceback.print_exc() |
| | EOF |
| | |
| | echo "=== Running PyTorch Extension Test ===" |
| | echo "This test mimics the same compilation process as build.py" |
| | echo "If this hangs, it shows the same issue as the main build" |
| | echo |
| | |
| | # Set a timeout to prevent infinite hang |
| | timeout 300 python3 test_extension.py || { |
| | exit_code=$? |
| | if [ $exit_code -eq 124 ]; then |
| | echo "β Extension compilation timed out after 5 minutes (same as build.py hang)" |
| | else |
| | echo "β Extension compilation failed with exit code $exit_code" |
| | fi |
| | } |
| | |
| | echo |
| | echo "=== Testing with Minimal Sources ===" |
| | |
| | # Create an even simpler version |
| | cat > minimal_kernel.cu << 'EOF' |
| | #include <torch/extension.h> |
| | |
| | torch::Tensor dummy_function(torch::Tensor input) { |
| | return input.clone(); |
| | } |
| | |
| | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { |
| | m.def("dummy", &dummy_function, "Dummy function"); |
| | } |
| | EOF |
| | |
| | cat > test_minimal.py << 'EOF' |
| | import torch |
| | from torch.utils.cpp_extension import load |
| | |
| | print("=== Minimal Extension Test ===") |
| | |
| | try: |
| | minimal_ext = load( |
| | name="minimal_test_ext", |
| | sources=["minimal_kernel.cu"], |
| | extra_cflags=["-O3"], |
| | verbose=True, |
| | with_cuda=False # Skip CUDA/HIP compilation |
| | ) |
| | print("β Minimal extension (CPU only) successful!") |
| | except Exception as e: |
| | print(f"β Even minimal extension failed: {e}") |
| | EOF |
| | |
| | echo "Testing minimal CPU-only extension..." |
| | timeout 120 python3 test_minimal.py || echo "Minimal extension also failed/timed out" |
| | |
| | echo |
| | echo "=== Debug Script 3 Complete ===" |
| | |
| | # Cleanup |
| | cd / |
| | rm -rf /tmp/torch_ext_test |