| #!/bin/bash |
| |
| set -euxo pipefail |
|
|
| |
| IS_BLACKWELL=${IS_BLACKWELL:-0} |
| CU_VERSION="cu129" |
| FLASHINFER_VERSION=0.6.4 |
| OPTIONAL_DEPS="${1:-}" |
|
|
| |
| ARCH=$(uname -m) |
| echo "Detected architecture: ${ARCH}" |
|
|
| if [ "$CU_VERSION" = "cu130" ]; then |
| NVRTC_SPEC="nvidia-cuda-nvrtc" |
| else |
| NVRTC_SPEC="nvidia-cuda-nvrtc-cu12" |
| fi |
|
|
| |
| SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" |
| bash "${SCRIPT_DIR}/../../killall_sglang.sh" |
| echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}" |
|
|
| |
| |
| |
| |
| apt-get update || true |
| apt-get install -y --no-install-recommends python3 python3-pip python3-venv python3-dev git libnuma-dev libssl-dev pkg-config libibverbs-dev libibverbs1 ibverbs-providers ibverbs-utils || { |
| echo "Warning: apt-get install failed, checking if required packages are available..." |
| |
| for pkg in python3 python3-pip python3-venv python3-dev git libnuma-dev libssl-dev pkg-config libibverbs-dev libibverbs1 ibverbs-providers ibverbs-utils; do |
| if ! dpkg -l "$pkg" 2>/dev/null | grep -q "^ii"; then |
| echo "ERROR: Required package $pkg is not installed and apt-get failed" |
| exit 1 |
| fi |
| done |
| echo "All required packages are already installed, continuing..." |
| } |
|
|
| |
| python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)' |
|
|
| |
| if command -v protoc >/dev/null 2>&1; then |
| if protoc --version >/dev/null 2>&1; then |
| echo "protoc already installed: $(protoc --version)" |
| else |
| echo "protoc found but not runnable, reinstalling..." |
| INSTALL_PROTOC=1 |
| fi |
| else |
| INSTALL_PROTOC=1 |
| fi |
|
|
| |
| if [ "${INSTALL_PROTOC:-0}" = "1" ]; then |
| |
| echo "Installing protoc..." |
| if command -v apt-get &> /dev/null; then |
| |
| apt-get update || true |
| apt-get install -y --no-install-recommends wget unzip gcc g++ perl make || { |
| echo "Warning: apt-get install failed, checking if required packages are available..." |
| for pkg in wget unzip gcc g++ perl make; do |
| if ! dpkg -l "$pkg" 2>/dev/null | grep -q "^ii"; then |
| echo "ERROR: Required package $pkg is not installed and apt-get failed" |
| exit 1 |
| fi |
| done |
| echo "All required packages are already installed, continuing..." |
| } |
| elif command -v yum &> /dev/null; then |
| |
| yum update -y |
| yum install -y wget unzip gcc gcc-c++ perl-core make |
| fi |
|
|
| cd /tmp |
| |
| if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then |
| PROTOC_ARCH="aarch_64" |
| else |
| PROTOC_ARCH="x86_64" |
| fi |
| PROTOC_ZIP="protoc-32.0-linux-${PROTOC_ARCH}.zip" |
| wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/${PROTOC_ZIP} |
| unzip -o ${PROTOC_ZIP} -d /usr/local |
| rm ${PROTOC_ZIP} |
| protoc --version |
| cd - |
| else |
| echo "protoc already installed: $(protoc --version)" |
| fi |
|
|
| |
| python3 -m pip install --upgrade pip |
|
|
| if [ "$IS_BLACKWELL" = "1" ]; then |
| |
| |
| PIP_CMD="pip" |
| PIP_INSTALL_SUFFIX="--break-system-packages" |
| PIP_UNINSTALL_CMD="pip uninstall -y" |
| PIP_UNINSTALL_SUFFIX="--break-system-packages" |
| else |
| |
| pip install uv |
| export UV_SYSTEM_PYTHON=true |
|
|
| PIP_CMD="uv pip" |
| PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match --prerelease allow" |
| PIP_UNINSTALL_CMD="uv pip uninstall" |
| PIP_UNINSTALL_SUFFIX="" |
| fi |
|
|
| |
| $PIP_UNINSTALL_CMD sgl-kernel sglang $PIP_UNINSTALL_SUFFIX || true |
| |
| |
| |
| FLASHINFER_CUBIN_REQUIRED=$(grep -Po -m1 '(?<=flashinfer_cubin==)[0-9A-Za-z\.\-]+' python/pyproject.toml || echo "") |
| FLASHINFER_CUBIN_INSTALLED=$(pip show flashinfer-cubin 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "") |
| FLASHINFER_JIT_INSTALLED=$(pip show flashinfer-jit-cache 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed 's/+.*//' || echo "") |
|
|
| UNINSTALL_CUBIN=true |
| UNINSTALL_JIT_CACHE=true |
|
|
| if [ "$FLASHINFER_CUBIN_INSTALLED" = "$FLASHINFER_CUBIN_REQUIRED" ] && [ -n "$FLASHINFER_CUBIN_REQUIRED" ]; then |
| echo "flashinfer-cubin==${FLASHINFER_CUBIN_REQUIRED} already installed, keeping it" |
| UNINSTALL_CUBIN=false |
| else |
| echo "flashinfer-cubin version mismatch (installed: ${FLASHINFER_CUBIN_INSTALLED:-none}, required: ${FLASHINFER_CUBIN_REQUIRED}), reinstalling" |
| fi |
|
|
| if [ "$FLASHINFER_JIT_INSTALLED" = "$FLASHINFER_VERSION" ] && [ -n "$FLASHINFER_VERSION" ]; then |
| echo "flashinfer-jit-cache==${FLASHINFER_VERSION} already installed, keeping it" |
| UNINSTALL_JIT_CACHE=false |
| else |
| echo "flashinfer-jit-cache version mismatch (installed: ${FLASHINFER_JIT_INSTALLED:-none}, required: ${FLASHINFER_VERSION}), will reinstall" |
| fi |
|
|
| |
| FLASHINFER_UNINSTALL="flashinfer-python" |
| [ "$UNINSTALL_CUBIN" = true ] && FLASHINFER_UNINSTALL="$FLASHINFER_UNINSTALL flashinfer-cubin" |
| [ "$UNINSTALL_JIT_CACHE" = true ] && FLASHINFER_UNINSTALL="$FLASHINFER_UNINSTALL flashinfer-jit-cache" |
| $PIP_UNINSTALL_CMD $FLASHINFER_UNINSTALL $PIP_UNINSTALL_SUFFIX || true |
| $PIP_UNINSTALL_CMD opencv-python opencv-python-headless $PIP_UNINSTALL_SUFFIX || true |
|
|
| |
| EXTRAS="dev" |
| if [ -n "$OPTIONAL_DEPS" ]; then |
| EXTRAS="dev,${OPTIONAL_DEPS}" |
| fi |
| echo "Installing python extras: [${EXTRAS}]" |
|
|
| $PIP_CMD install -e "python[${EXTRAS}]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX |
|
|
| |
| |
| |
| |
| |
| |
| TORCH_CUDA_VER=$(python3 -c "import torch; v=torch.version.cuda; parts=v.split('.'); print(f'cu{parts[0]}{parts[1]}')") |
| echo "Detected torch CUDA version: ${TORCH_CUDA_VER}" |
| if [ "${TORCH_CUDA_VER}" != "${CU_VERSION}" ]; then |
| |
| TORCHAUDIO_VER=$(pip show torchaudio 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed 's/+.*//') |
| TORCHVISION_VER=$(pip show torchvision 2>/dev/null | grep "^Version:" | awk '{print $2}' | sed 's/+.*//') |
| echo "Reinstalling torchaudio==${TORCHAUDIO_VER} torchvision==${TORCHVISION_VER} from ${TORCH_CUDA_VER} index to match torch..." |
| $PIP_CMD install "torchaudio==${TORCHAUDIO_VER}" "torchvision==${TORCHVISION_VER}" --index-url "https://download.pytorch.org/whl/${TORCH_CUDA_VER}" --force-reinstall --no-deps $PIP_INSTALL_SUFFIX |
| fi |
|
|
| |
| $PIP_CMD install sglang-router $PIP_INSTALL_SUFFIX |
|
|
| |
| PYTHON_LIB_PATH=$(python3 -c "import site; print(site.getsitepackages()[0])") |
| FLASH_ATTN_PATH="${PYTHON_LIB_PATH}/flash_attn" |
|
|
| if [ -d "$FLASH_ATTN_PATH" ]; then |
| echo "Directory $FLASH_ATTN_PATH exists. Removing..." |
| rm -rf "$FLASH_ATTN_PATH" |
| else |
| echo "Directory $FLASH_ATTN_PATH does not exist." |
| fi |
|
|
| |
| SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml) |
| SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml) |
| echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}" |
|
|
| if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ] && [ -d "sgl-kernel/dist" ]; then |
| ls -alh sgl-kernel/dist |
| |
| if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then |
| WHEEL_ARCH="aarch64" |
| else |
| WHEEL_ARCH="x86_64" |
| fi |
| $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_${WHEEL_ARCH}.whl --force-reinstall $PIP_INSTALL_SUFFIX |
| elif [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ] && [ ! -d "sgl-kernel/dist" ]; then |
| |
| |
| echo "ERROR: CUSTOM_BUILD_SGL_KERNEL=true but sgl-kernel/dist not found." |
| echo "This usually happens when rerunning a stage without the sgl-kernel-build-wheels job." |
| echo "Please re-run the full workflow using /tag-and-rerun-ci to rebuild the kernel." |
| exit 1 |
| else |
| |
| if [ "$IS_BLACKWELL" = "1" ]; then |
| INSTALLED_SGL_KERNEL=$(pip show sgl-kernel 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "") |
| if [ "$INSTALLED_SGL_KERNEL" = "$SGL_KERNEL_VERSION_FROM_SRT" ]; then |
| echo "sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} already installed, skipping reinstall" |
| else |
| echo "Installing sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} (current: ${INSTALLED_SGL_KERNEL:-none})" |
| $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} $PIP_INSTALL_SUFFIX |
| fi |
| else |
| $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX |
| fi |
| fi |
|
|
| |
| $PIP_CMD list |
|
|
| |
| $PIP_CMD install mooncake-transfer-engine==0.3.9 "${NVRTC_SPEC}" py-spy scipy huggingface_hub[hf_xet] pytest $PIP_INSTALL_SUFFIX |
|
|
| if [ "$IS_BLACKWELL" != "1" ]; then |
| |
| git clone --branch v0.5 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git |
| $PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX |
| fi |
|
|
| |
| |
| if [ "$IS_BLACKWELL" = "1" ]; then |
| INSTALLED_NVSHMEM=$(pip show nvidia-nvshmem-cu12 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "") |
| if [ "$INSTALLED_NVSHMEM" = "3.4.5" ]; then |
| echo "nvidia-nvshmem-cu12==3.4.5 already installed, skipping reinstall" |
| else |
| $PIP_CMD install nvidia-nvshmem-cu12==3.4.5 $PIP_INSTALL_SUFFIX |
| fi |
| else |
| $PIP_CMD install nvidia-nvshmem-cu12==3.4.5 --force-reinstall $PIP_INSTALL_SUFFIX |
| fi |
|
|
| |
| |
| if [ "$IS_BLACKWELL" = "1" ]; then |
| INSTALLED_CUDNN=$(pip show nvidia-cudnn-cu12 2>/dev/null | grep "^Version:" | awk '{print $2}' || echo "") |
| if [ "$INSTALLED_CUDNN" = "9.16.0.29" ]; then |
| echo "nvidia-cudnn-cu12==9.16.0.29 already installed, skipping reinstall" |
| else |
| $PIP_CMD install nvidia-cudnn-cu12==9.16.0.29 $PIP_INSTALL_SUFFIX |
| fi |
| else |
| $PIP_CMD install nvidia-cudnn-cu12==9.16.0.29 --force-reinstall $PIP_INSTALL_SUFFIX |
| fi |
| $PIP_CMD uninstall xformers || true |
|
|
| |
| |
| FLASHINFER_INSTALLED=false |
| if [ "$UNINSTALL_JIT_CACHE" = false ]; then |
| FLASHINFER_INSTALLED=true |
| echo "flashinfer-jit-cache already at correct version, skipping download" |
| fi |
|
|
| if [ "$FLASHINFER_INSTALLED" = false ]; then |
| |
| FLASHINFER_CACHE_DIR="${HOME}/.cache/flashinfer-wheels" |
| mkdir -p "${FLASHINFER_CACHE_DIR}" |
|
|
| |
| find "${FLASHINFER_CACHE_DIR}" -name "flashinfer_jit_cache-*.whl" ! -name "flashinfer_jit_cache-${FLASHINFER_VERSION}*" -type f -delete 2>/dev/null || true |
|
|
| FLASHINFER_WHEEL_PATTERN="flashinfer_jit_cache-${FLASHINFER_VERSION}*.whl" |
| CACHED_WHEEL=$(find "${FLASHINFER_CACHE_DIR}" -name "${FLASHINFER_WHEEL_PATTERN}" -type f 2>/dev/null | head -n 1) |
|
|
| |
| if [ -n "$CACHED_WHEEL" ] && [ -f "$CACHED_WHEEL" ]; then |
| echo "Found cached flashinfer wheel: $CACHED_WHEEL" |
| if $PIP_CMD install "$CACHED_WHEEL" $PIP_INSTALL_SUFFIX; then |
| FLASHINFER_INSTALLED=true |
| echo "Successfully installed flashinfer-jit-cache from cache" |
| else |
| echo "Failed to install from cache, will try downloading..." |
| rm -f "$CACHED_WHEEL" |
| fi |
| fi |
|
|
| |
| if [ "$FLASHINFER_INSTALLED" = false ]; then |
| for i in {1..5}; do |
| |
| |
| if timeout 600 pip download flashinfer-jit-cache==${FLASHINFER_VERSION} \ |
| --index-url https://flashinfer.ai/whl/${CU_VERSION} \ |
| -d "${FLASHINFER_CACHE_DIR}"; then |
|
|
| CACHED_WHEEL=$(find "${FLASHINFER_CACHE_DIR}" -name "${FLASHINFER_WHEEL_PATTERN}" -type f 2>/dev/null | head -n 1) |
| if [ -n "$CACHED_WHEEL" ] && [ -f "$CACHED_WHEEL" ]; then |
| if $PIP_CMD install "$CACHED_WHEEL" $PIP_INSTALL_SUFFIX; then |
| FLASHINFER_INSTALLED=true |
| echo "Successfully downloaded and installed flashinfer-jit-cache" |
| break |
| fi |
| else |
| echo "Warning: Download succeeded but wheel file not found" |
| fi |
| fi |
| echo "Attempt $i to download flashinfer-jit-cache failed, retrying in 10 seconds..." |
| sleep 10 |
| done |
| fi |
| fi |
|
|
| if [ "$FLASHINFER_INSTALLED" = false ]; then |
| echo "ERROR: Failed to install flashinfer-jit-cache after 5 attempts" |
| exit 1 |
| fi |
|
|
| |
| bash "${SCRIPT_DIR}/ci_download_flashinfer_cubin.sh" |
|
|
| |
| $PIP_CMD list |
| python3 -c "import torch; print(torch.version.cuda)" |
|
|
| |
| bash "${SCRIPT_DIR}/prepare_runner.sh" |
|
|