| | #!/usr/bin/env bash
|
| | export HOME=/home/user
|
| | export PYTHONUNBUFFERED=1
|
| | export HF_HOME=/home/user/.cache/huggingface
|
| |
|
| | export OMP_NUM_THREADS=$(nproc)
|
| | export MKL_NUM_THREADS=$(nproc)
|
| | export OPENBLAS_NUM_THREADS=$(nproc)
|
| | export NUMEXPR_NUM_THREADS=$(nproc)
|
| |
|
| | export TORCH_ALLOW_TF32_CUBLAS=1
|
| | export TORCH_ALLOW_TF32_CUDNN=1
|
| |
|
| |
|
| | export SDL_AUDIODRIVER=dummy
|
| | export PULSE_RUNTIME_PATH=/tmp/pulse-runtime
|
| |
|
| |
|
| |
|
| | echo "π CUDA Environment Debug Information:"
|
| | echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| |
|
| |
|
| | if command -v nvidia-smi >/dev/null 2>&1; then
|
| | echo "β
nvidia-smi available"
|
| | echo "π GPU Information:"
|
| | nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free --format=csv,noheader,nounits 2>/dev/null || echo "β nvidia-smi failed to query GPU"
|
| | echo "π Running Processes:"
|
| | nvidia-smi --query-compute-apps=pid,name,used_memory --format=csv,noheader,nounits 2>/dev/null || echo "βΉοΈ No running CUDA processes"
|
| | else
|
| | echo "β nvidia-smi not available in container"
|
| | fi
|
| |
|
| |
|
| | echo ""
|
| | echo "π§ CUDA Runtime Check:"
|
| | if ls /usr/local/cuda*/lib*/libcudart.so* >/dev/null 2>&1; then
|
| | echo "β
CUDA runtime libraries found:"
|
| | ls /usr/local/cuda*/lib*/libcudart.so* 2>/dev/null
|
| | else
|
| | echo "β CUDA runtime libraries not found"
|
| | fi
|
| |
|
| |
|
| | echo ""
|
| | echo "π₯οΈ CUDA Device Files:"
|
| | if ls /dev/nvidia* >/dev/null 2>&1; then
|
| | echo "β
NVIDIA device files found:"
|
| | ls -la /dev/nvidia* 2>/dev/null
|
| | else
|
| | echo "β No NVIDIA device files found - Docker may not have GPU access"
|
| | fi
|
| |
|
| |
|
| | echo ""
|
| | echo "π CUDA Environment Variables:"
|
| | echo " CUDA_HOME: ${CUDA_HOME:-not set}"
|
| | echo " CUDA_ROOT: ${CUDA_ROOT:-not set}"
|
| | echo " CUDA_PATH: ${CUDA_PATH:-not set}"
|
| | echo " LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-not set}"
|
| | echo " TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-not set}"
|
| | echo " CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}"
|
| |
|
| |
|
| | echo ""
|
| | echo "π PyTorch CUDA Check:"
|
| | python3 -c "
|
| | import sys
|
| | try:
|
| | import torch
|
| | print('β
PyTorch imported successfully')
|
| | print(f' Version: {torch.__version__}')
|
| | print(f' CUDA available: {torch.cuda.is_available()}')
|
| | if torch.cuda.is_available():
|
| | print(f' CUDA version: {torch.version.cuda}')
|
| | print(f' cuDNN version: {torch.backends.cudnn.version()}')
|
| | print(f' Device count: {torch.cuda.device_count()}')
|
| | for i in range(torch.cuda.device_count()):
|
| | props = torch.cuda.get_device_properties(i)
|
| | print(f' Device {i}: {props.name} (SM {props.major}.{props.minor}, {props.total_memory//1024//1024}MB)')
|
| | else:
|
| | print('β CUDA not available to PyTorch')
|
| | print(' This could mean:')
|
| | print(' - CUDA runtime not properly installed')
|
| | print(' - GPU not accessible to container')
|
| | print(' - Driver/runtime version mismatch')
|
| | except ImportError as e:
|
| | print(f'β Failed to import PyTorch: {e}')
|
| | except Exception as e:
|
| | print(f'β PyTorch CUDA check failed: {e}')
|
| | " 2>&1
|
| |
|
| |
|
| | echo ""
|
| | echo "π©Ί Common Issue Diagnostics:"
|
| |
|
| |
|
| | if [ ! -e /dev/nvidia0 ] && [ ! -e /dev/nvidiactl ]; then
|
| | echo "β No NVIDIA device nodes - container likely missing --gpus all or --runtime=nvidia"
|
| | fi
|
| |
|
| |
|
| | if [ -z "$LD_LIBRARY_PATH" ] || ! echo "$LD_LIBRARY_PATH" | grep -q cuda; then
|
| | echo "β οΈ LD_LIBRARY_PATH may not include CUDA libraries"
|
| | fi
|
| |
|
| |
|
| | if ls /dev/nvidia* >/dev/null 2>&1; then
|
| | if ! ls -la /dev/nvidia* | grep -q "rw-rw-rw-\|rw-r--r--"; then
|
| | echo "β οΈ NVIDIA device files may have restrictive permissions"
|
| | fi
|
| | fi
|
| |
|
| | echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| | echo "π Starting application..."
|
| | echo ""
|
| |
|
| | exec su -p user -c "python3 wgp.py --listen $*"
|
| |
|