File size: 15,680 Bytes
61ba51e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | #!/bin/bash
# Install the dependency in CI.
set -euxo pipefail
# Set up environment variables
IS_BLACKWELL=${IS_BLACKWELL:-0}
CU_VERSION="cu129"
FLASHINFER_VERSION=0.6.4
OPTIONAL_DEPS="${1:-}"
# Detect system architecture
ARCH=$(uname -m)
echo "Detected architecture: ${ARCH}"
if [ "$CU_VERSION" = "cu130" ]; then
NVRTC_SPEC="nvidia-cuda-nvrtc"
else
NVRTC_SPEC="nvidia-cuda-nvrtc-cu12"
fi
# Kill existing processes
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
bash "${SCRIPT_DIR}/../../killall_sglang.sh"
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
# Install apt packages (including python3/pip which may be missing on some runners)
# Use --no-install-recommends and ignore errors from unrelated broken packages on the runner
# The NVIDIA driver packages may have broken dependencies that are unrelated to these packages
# Run apt-get update first to refresh package index (stale index causes 404 on security.ubuntu.com)
apt-get update || true
apt-get install -y --no-install-recommends python3 python3-pip python3-venv python3-dev git libnuma-dev libssl-dev pkg-config libibverbs-dev libibverbs1 ibverbs-providers ibverbs-utils || {
echo "Warning: apt-get install failed, checking if required packages are available..."
# Verify the packages we need are actually installed
for pkg in python3 python3-pip python3-venv python3-dev git libnuma-dev libssl-dev pkg-config libibverbs-dev libibverbs1 ibverbs-providers ibverbs-utils; do
if ! dpkg -l "$pkg" 2>/dev/null | grep -q "^ii"; then
echo "ERROR: Required package $pkg is not installed and apt-get failed"
exit 1
fi
done
echo "All required packages are already installed, continuing..."
}
# Clear torch compilation cache
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
# Check if protoc of correct architecture is already installed
if command -v protoc >/dev/null 2>&1; then
if protoc --version >/dev/null 2>&1; then
echo "protoc already installed: $(protoc --version)"
else
echo "protoc found but not runnable, reinstalling..."
INSTALL_PROTOC=1
fi
else
INSTALL_PROTOC=1
fi
# Install protoc for router build (gRPC protobuf compilation)
if [ "${INSTALL_PROTOC:-0}" = "1" ]; then
# TODO: move this to a separate script
echo "Installing protoc..."
if command -v apt-get &> /dev/null; then
# Ubuntu/Debian
apt-get update || true # May fail due to unrelated broken packages
apt-get install -y --no-install-recommends wget unzip gcc g++ perl make || {
echo "Warning: apt-get install failed, checking if required packages are available..."
for pkg in wget unzip gcc g++ perl make; do
if ! dpkg -l "$pkg" 2>/dev/null | grep -q "^ii"; then
echo "ERROR: Required package $pkg is not installed and apt-get failed"
exit 1
fi
done
echo "All required packages are already installed, continuing..."
}
elif command -v yum &> /dev/null; then
# RHEL/CentOS
yum update -y
yum install -y wget unzip gcc gcc-c++ perl-core make
fi
cd /tmp
# Determine protoc architecture
if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
PROTOC_ARCH="aarch_64"
else
PROTOC_ARCH="x86_64"
fi
PROTOC_ZIP="protoc-32.0-linux-${PROTOC_ARCH}.zip"
wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/${PROTOC_ZIP}
unzip -o ${PROTOC_ZIP} -d /usr/local
rm ${PROTOC_ZIP}
protoc --version
cd -
else
echo "protoc already installed: $(protoc --version)"
fi
# Install uv (use python3 -m pip for robustness since some runners only have pip3)
python3 -m pip install --upgrade pip
if [ "$IS_BLACKWELL" = "1" ]; then
# The blackwell CI runner has some issues with pip and uv,
# so we can only use pip with `--break-system-packages`
PIP_CMD="pip"
PIP_INSTALL_SUFFIX="--break-system-packages"
PIP_UNINSTALL_CMD="pip uninstall -y"
PIP_UNINSTALL_SUFFIX="--break-system-packages"
else
# In normal cases, we use uv, which is much faster than pip.
pip install uv
export UV_SYSTEM_PYTHON=true
PIP_CMD="uv pip"
PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match --prerelease allow"
PIP_UNINSTALL_CMD="uv pip uninstall"
PIP_UNINSTALL_SUFFIX=""
fi
# Clean up existing installations
$PIP_UNINSTALL_CMD sgl-kernel sglang $PIP_UNINSTALL_SUFFIX || true
# Keep flashinfer packages installed if version matches to avoid re-downloading:
# - flashinfer-cubin: 150+ MB, plus extra cubins from ci_download_flashinfer_cubin.sh
# - flashinfer-jit-cache: 1.2+ GB, by far the largest download in CI
FLASHINFER_CUBIN_REQUIRED=$(grep -Po -m1 '(?<=flashinfer_cubin==)[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "")
FLASHINFER_CUBIN_INSTALLED=$(pip show flashinfer-cubin 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "")
FLASHINFER_JIT_INSTALLED=$(pip show flashinfer-jit-cache 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed 's/+.*//' || echo "")
UNINSTALL_CUBIN=true
UNINSTALL_JIT_CACHE=true
if [ "$FLASHINFER_CUBIN_INSTALLED" = "$FLASHINFER_CUBIN_REQUIRED" ] && [ -n "$FLASHINFER_CUBIN_REQUIRED" ]; then
echo "flashinfer-cubin==${FLASHINFER_CUBIN_REQUIRED} already installed, keeping it"
UNINSTALL_CUBIN=false
else
echo "flashinfer-cubin version mismatch (installed: ${FLASHINFER_CUBIN_INSTALLED:-none}, required: ${FLASHINFER_CUBIN_REQUIRED}), reinstalling"
fi
if [ "$FLASHINFER_JIT_INSTALLED" = "$FLASHINFER_VERSION" ] && [ -n "$FLASHINFER_VERSION" ]; then
echo "flashinfer-jit-cache==${FLASHINFER_VERSION} already installed, keeping it"
UNINSTALL_JIT_CACHE=false
else
echo "flashinfer-jit-cache version mismatch (installed: ${FLASHINFER_JIT_INSTALLED:-none}, required: ${FLASHINFER_VERSION}), will reinstall"
fi
# Build uninstall list based on what needs updating
FLASHINFER_UNINSTALL="flashinfer-python"
[ "$UNINSTALL_CUBIN" = true ] && FLASHINFER_UNINSTALL="$FLASHINFER_UNINSTALL flashinfer-cubin"
[ "$UNINSTALL_JIT_CACHE" = true ] && FLASHINFER_UNINSTALL="$FLASHINFER_UNINSTALL flashinfer-jit-cache"
$PIP_UNINSTALL_CMD $FLASHINFER_UNINSTALL $PIP_UNINSTALL_SUFFIX || true
$PIP_UNINSTALL_CMD opencv-python opencv-python-headless $PIP_UNINSTALL_SUFFIX || true
# Install the main package
EXTRAS="dev"
if [ -n "$OPTIONAL_DEPS" ]; then
EXTRAS="dev,${OPTIONAL_DEPS}"
fi
echo "Installing python extras: [${EXTRAS}]"
$PIP_CMD install -e "python[${EXTRAS}]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX
# Fix CUDA version mismatch between torch and torchaudio.
# PyPI's torch 2.9.1 bundles cu128 but torchaudio from pytorch.org/cu129 uses cu129.
# This mismatch causes torchaudio's C extension to fail loading, producing:
# "partially initialized module 'torchaudio' has no attribute 'lib'"
# We cannot replace torch with cu129 (breaks sgl_kernel ABI), so instead we reinstall
# torchaudio/torchvision from an index matching torch's CUDA version.
TORCH_CUDA_VER=$(python3 -c "import torch; v=torch.version.cuda; parts=v.split('.'); print(f'cu{parts[0]}{parts[1]}')")
echo "Detected torch CUDA version: ${TORCH_CUDA_VER}"
if [ "${TORCH_CUDA_VER}" != "${CU_VERSION}" ]; then
# Pin versions to match what was installed by pyproject.toml (strip +cuXYZ suffix)
TORCHAUDIO_VER=$(pip show torchaudio 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed 's/+.*//')
TORCHVISION_VER=$(pip show torchvision 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed 's/+.*//')
echo "Reinstalling torchaudio==${TORCHAUDIO_VER} torchvision==${TORCHVISION_VER} from ${TORCH_CUDA_VER} index to match torch..."
$PIP_CMD install "torchaudio==${TORCHAUDIO_VER}" "torchvision==${TORCHVISION_VER}" --index-url "https://download.pytorch.org/whl/${TORCH_CUDA_VER}" --force-reinstall --no-deps $PIP_INSTALL_SUFFIX
fi
# Install router for pd-disagg test
$PIP_CMD install sglang-router $PIP_INSTALL_SUFFIX
# Remove flash_attn folder to avoid conflicts
PYTHON_LIB_PATH=$(python3 -c "import site; print(site.getsitepackages()[0])")
FLASH_ATTN_PATH="${PYTHON_LIB_PATH}/flash_attn"
if [ -d "$FLASH_ATTN_PATH" ]; then
echo "Directory $FLASH_ATTN_PATH exists. Removing..."
rm -rf "$FLASH_ATTN_PATH"
else
echo "Directory $FLASH_ATTN_PATH does not exist."
fi
# Install sgl-kernel
SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ] && [ -d "sgl-kernel/dist" ]; then
ls -alh sgl-kernel/dist
# Determine wheel architecture
if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
WHEEL_ARCH="aarch64"
else
WHEEL_ARCH="x86_64"
fi
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_${WHEEL_ARCH}.whl --force-reinstall $PIP_INSTALL_SUFFIX
elif [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ] && [ ! -d "sgl-kernel/dist" ]; then
# CUSTOM_BUILD_SGL_KERNEL was set but artifacts not available (e.g., stage rerun without wheel build)
# Fail instead of falling back to PyPI - we need to test the built kernel, not PyPI version
echo "ERROR: CUSTOM_BUILD_SGL_KERNEL=true but sgl-kernel/dist not found."
echo "This usually happens when rerunning a stage without the sgl-kernel-build-wheels job."
echo "Please re-run the full workflow using /tag-and-rerun-ci to rebuild the kernel."
exit 1
else
# On Blackwell machines, skip reinstall if correct version already installed to avoid race conditions
if [ "$IS_BLACKWELL" = "1" ]; then
INSTALLED_SGL_KERNEL=$(pip show sgl-kernel 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "")
if [ "$INSTALLED_SGL_KERNEL" = "$SGL_KERNEL_VERSION_FROM_SRT" ]; then
echo "sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} already installed, skipping reinstall"
else
echo "Installing sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} (current: ${INSTALLED_SGL_KERNEL:-none})"
$PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} $PIP_INSTALL_SUFFIX
fi
else
$PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
fi
fi
# Show current packages
$PIP_CMD list
# Install other python dependencies
$PIP_CMD install mooncake-transfer-engine==0.3.9 "${NVRTC_SPEC}" py-spy scipy huggingface_hub[hf_xet] pytest $PIP_INSTALL_SUFFIX
if [ "$IS_BLACKWELL" != "1" ]; then
# For lmms_evals evaluating MMMU
git clone --branch v0.5 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
$PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX
fi
# DeepEP depends on nvshmem 3.4.5
# On Blackwell machines, skip reinstall if correct version already installed to avoid race conditions
if [ "$IS_BLACKWELL" = "1" ]; then
INSTALLED_NVSHMEM=$(pip show nvidia-nvshmem-cu12 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "")
if [ "$INSTALLED_NVSHMEM" = "3.4.5" ]; then
echo "nvidia-nvshmem-cu12==3.4.5 already installed, skipping reinstall"
else
$PIP_CMD install nvidia-nvshmem-cu12==3.4.5 $PIP_INSTALL_SUFFIX
fi
else
$PIP_CMD install nvidia-nvshmem-cu12==3.4.5 --force-reinstall $PIP_INSTALL_SUFFIX
fi
# Cudnn with version less than 9.16.0.29 will cause performance regression on Conv3D kernel
# On Blackwell machines, skip reinstall if correct version already installed to avoid race conditions
if [ "$IS_BLACKWELL" = "1" ]; then
INSTALLED_CUDNN=$(pip show nvidia-cudnn-cu12 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "")
if [ "$INSTALLED_CUDNN" = "9.16.0.29" ]; then
echo "nvidia-cudnn-cu12==9.16.0.29 already installed, skipping reinstall"
else
$PIP_CMD install nvidia-cudnn-cu12==9.16.0.29 $PIP_INSTALL_SUFFIX
fi
else
$PIP_CMD install nvidia-cudnn-cu12==9.16.0.29 --force-reinstall $PIP_INSTALL_SUFFIX
fi
$PIP_CMD uninstall xformers || true
# Install flashinfer-jit-cache with caching and retry logic (flashinfer.ai can have transient DNS issues)
# The jit-cache wheel is 1.2+ GB, so we skip the download entirely if already installed.
FLASHINFER_INSTALLED=false
if [ "$UNINSTALL_JIT_CACHE" = false ]; then
FLASHINFER_INSTALLED=true
echo "flashinfer-jit-cache already at correct version, skipping download"
fi
if [ "$FLASHINFER_INSTALLED" = false ]; then
# Cache directory for flashinfer wheels (persists across CI runs on self-hosted runners)
FLASHINFER_CACHE_DIR="${HOME}/.cache/flashinfer-wheels"
mkdir -p "${FLASHINFER_CACHE_DIR}"
# Clean up old versions to avoid cache bloat
find "${FLASHINFER_CACHE_DIR}" -name "flashinfer_jit_cache-*.whl" ! -name "flashinfer_jit_cache-${FLASHINFER_VERSION}*" -type f -delete 2>/dev/null || true
FLASHINFER_WHEEL_PATTERN="flashinfer_jit_cache-${FLASHINFER_VERSION}*.whl"
CACHED_WHEEL=$(find "${FLASHINFER_CACHE_DIR}" -name "${FLASHINFER_WHEEL_PATTERN}" -type f 2>/dev/null | head -n 1)
# Try to install from cache first
if [ -n "$CACHED_WHEEL" ] && [ -f "$CACHED_WHEEL" ]; then
echo "Found cached flashinfer wheel: $CACHED_WHEEL"
if $PIP_CMD install "$CACHED_WHEEL" $PIP_INSTALL_SUFFIX; then
FLASHINFER_INSTALLED=true
echo "Successfully installed flashinfer-jit-cache from cache"
else
echo "Failed to install from cache, will try downloading..."
rm -f "$CACHED_WHEEL"
fi
fi
# If not installed from cache, download with retry logic
if [ "$FLASHINFER_INSTALLED" = false ]; then
for i in {1..5}; do
# Download wheel to cache directory (use pip directly as uv pip doesn't support download)
# Timeout after 10 minutes — the wheel is ~1.2 GB
if timeout 600 pip download flashinfer-jit-cache==${FLASHINFER_VERSION} \
--index-url https://flashinfer.ai/whl/${CU_VERSION} \
-d "${FLASHINFER_CACHE_DIR}"; then
CACHED_WHEEL=$(find "${FLASHINFER_CACHE_DIR}" -name "${FLASHINFER_WHEEL_PATTERN}" -type f 2>/dev/null | head -n 1)
if [ -n "$CACHED_WHEEL" ] && [ -f "$CACHED_WHEEL" ]; then
if $PIP_CMD install "$CACHED_WHEEL" $PIP_INSTALL_SUFFIX; then
FLASHINFER_INSTALLED=true
echo "Successfully downloaded and installed flashinfer-jit-cache"
break
fi
else
echo "Warning: Download succeeded but wheel file not found"
fi
fi
echo "Attempt $i to download flashinfer-jit-cache failed, retrying in 10 seconds..."
sleep 10
done
fi
fi
if [ "$FLASHINFER_INSTALLED" = false ]; then
echo "ERROR: Failed to install flashinfer-jit-cache after 5 attempts"
exit 1
fi
# Download flashinfer cubins if the local set is incomplete
bash "${SCRIPT_DIR}/ci_download_flashinfer_cubin.sh"
# Show current packages
$PIP_CMD list
python3 -c "import torch; print(torch.version.cuda)"
# Prepare the CI runner (cleanup HuggingFace cache, etc.)
bash "${SCRIPT_DIR}/prepare_runner.sh"
|