# Usage (to build SGLang ROCm docker image): # docker build --build-arg SGL_BRANCH=v0.5.9 --build-arg GPU_ARCH=gfx942 -t v0.5.9-rocm700-mi30x -f rocm.Dockerfile . # docker build --build-arg SGL_BRANCH=v0.5.9 --build-arg GPU_ARCH=gfx942-rocm720 -t v0.5.9-rocm720-mi30x -f rocm.Dockerfile . # docker build --build-arg SGL_BRANCH=v0.5.9 --build-arg GPU_ARCH=gfx950 -t v0.5.9-rocm700-mi35x -f rocm.Dockerfile . # docker build --build-arg SGL_BRANCH=v0.5.9 --build-arg GPU_ARCH=gfx950-rocm720 -t v0.5.9-rocm720-mi35x -f rocm.Dockerfile . # Usage (to build SGLang ROCm + Mori docker image): # docker build --build-arg SGL_BRANCH=v0.5.9 --build-arg GPU_ARCH=gfx942 --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic -t v0.5.9-rocm700-mi30x -f rocm.Dockerfile . # docker build --build-arg SGL_BRANCH=v0.5.9 --build-arg GPU_ARCH=gfx942-rocm720 --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic -t v0.5.9-rocm720-mi30x -f rocm.Dockerfile . # docker build --build-arg SGL_BRANCH=v0.5.9 --build-arg GPU_ARCH=gfx950 --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic -t v0.5.9-rocm700-mi35x -f rocm.Dockerfile . # docker build --build-arg SGL_BRANCH=v0.5.9 --build-arg GPU_ARCH=gfx950-rocm720 --build-arg ENABLE_MORI=1 --build-arg NIC_BACKEND=ainic -t v0.5.9-rocm720-mi35x -f rocm.Dockerfile . # Default base images ARG BASE_IMAGE_942="rocm/sgl-dev:rocm7-vllm-20250904" ARG BASE_IMAGE_942_ROCM720="rocm/pytorch:rocm7.2_ubuntu22.04_py3.10_pytorch_release_2.9.1" ARG BASE_IMAGE_950="rocm/sgl-dev:rocm7-vllm-20250904" ARG BASE_IMAGE_950_ROCM720="rocm/pytorch:rocm7.2_ubuntu22.04_py3.10_pytorch_release_2.9.1" # This is necessary for scope purpose ARG GPU_ARCH=gfx950 # =============================== # Base image 942 with rocm700 and args FROM $BASE_IMAGE_942 AS gfx942 ENV BUILD_VLLM="0" ENV BUILD_TRITON="0" ENV BUILD_LLVM="0" ENV BUILD_AITER_ALL="1" ENV BUILD_MOONCAKE="1" ENV AITER_COMMIT="v0.1.11.post1" # =============================== # Base image 942 with rocm720 and args FROM $BASE_IMAGE_942_ROCM720 AS gfx942-rocm720 ENV BUILD_VLLM="0" ENV BUILD_TRITON="1" ENV BUILD_LLVM="0" ENV BUILD_AITER_ALL="1" ENV BUILD_MOONCAKE="1" ENV AITER_COMMIT="v0.1.11.post1" # =============================== # Base image 950 and args FROM $BASE_IMAGE_950 AS gfx950 ENV BUILD_VLLM="0" ENV BUILD_TRITON="0" ENV BUILD_LLVM="0" ENV BUILD_AITER_ALL="1" ENV BUILD_MOONCAKE="1" ENV AITER_COMMIT="v0.1.11.post1" # =============================== # Base image 950 with rocm720 and args FROM $BASE_IMAGE_950_ROCM720 AS gfx950-rocm720 ENV BUILD_VLLM="0" ENV BUILD_TRITON="1" ENV BUILD_LLVM="0" ENV BUILD_AITER_ALL="1" ENV BUILD_MOONCAKE="1" ENV AITER_COMMIT="v0.1.11.post1" # =============================== # Chosen arch and args FROM ${GPU_ARCH} # This is necessary for scope purpose, again ARG GPU_ARCH=gfx950 ENV GPU_ARCH_LIST=${GPU_ARCH%-*} ENV PYTORCH_ROCM_ARCH=gfx942;gfx950 ARG SGL_REPO="https://github.com/sgl-project/sglang.git" ARG SGL_DEFAULT="main" ARG SGL_BRANCH=${SGL_DEFAULT} # Version override for setuptools_scm (used in nightly builds) ARG SETUPTOOLS_SCM_PRETEND_VERSION="" ARG TRITON_REPO="https://github.com/triton-lang/triton.git" ARG TRITON_COMMIT="42270451990532c67e69d753fbd026f28fcc4840" ARG AITER_REPO="https://github.com/ROCm/aiter.git" ARG LLVM_REPO="https://github.com/jrbyrnes/llvm-project.git" ARG LLVM_BRANCH="MainOpSelV2" ARG LLVM_COMMIT="6520ace8227ffe2728148d5f3b9872a870b0a560" ARG MOONCAKE_REPO="https://github.com/kvcache-ai/Mooncake.git" ARG MOONCAKE_COMMIT="b6a841dc78c707ec655a563453277d969fb8f38d" ARG TILELANG_REPO="https://github.com/tile-ai/tilelang.git" ARG TILELANG_COMMIT="ebf4a7cb8881432165ae8760e99d209d905c704a" ARG FHT_REPO="https://github.com/jeffdaily/fast-hadamard-transform.git" ARG FHT_BRANCH="rocm" ARG FHT_COMMIT="46efb7d776d38638fc39f3c803eaee3dd7016bd1" ARG ENABLE_MORI=0 ARG NIC_BACKEND=none ARG MORI_REPO="https://github.com/ROCm/mori.git" ARG MORI_COMMIT="2f88d06aba75400262ca5c1ca5986cf1fdf4cd82" # AMD AINIC apt repo settings ARG AINIC_VERSION=1.117.5 ARG UBUNTU_CODENAME=jammy USER root # Fix hipDeviceGetName returning empty string in ROCm 7.0 docker images. # The ROCm 7.0 base image is missing libdrm-amdgpu-common which provides the # amdgpu.ids device-ID-to-marketing-name mapping file. # ROCm 7.2 base images already ship these packages, so this step is skipped. # See https://github.com/ROCm/ROCm/issues/5992 RUN set -eux; \ case "${GPU_ARCH}" in \ *rocm720*) \ echo "ROCm 7.2 (GPU_ARCH=${GPU_ARCH}): libdrm-amdgpu packages already present, skipping"; \ ;; \ *) \ echo "ROCm 7.0 (GPU_ARCH=${GPU_ARCH}): installing libdrm-amdgpu packages"; \ curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key \ | gpg --dearmor -o /etc/apt/keyrings/amdgpu-graphics.gpg \ && echo 'deb [arch=amd64,i386 signed-by=/etc/apt/keyrings/amdgpu-graphics.gpg] https://repo.radeon.com/graphics/7.0/ubuntu jammy main' \ > /etc/apt/sources.list.d/amdgpu-graphics.list \ && apt-get update \ && apt-get install -y --no-install-recommends \ libdrm-amdgpu-common \ libdrm-amdgpu-amdgpu1 \ libdrm2-amdgpu \ && rm -rf /var/lib/apt/lists/* \ && cp /opt/amdgpu/share/libdrm/amdgpu.ids /usr/share/libdrm/amdgpu.ids; \ ;; \ esac # Install some basic utilities RUN python -m pip install --upgrade pip && pip install setuptools_scm RUN apt-get purge -y sccache; python -m pip uninstall -y sccache; rm -f "$(which sccache)" # Install AMD SMI Python package from ROCm distribution. # The ROCm 7.2 base image (rocm/pytorch) does not pre-install this package. RUN set -eux; \ case "${GPU_ARCH}" in \ *rocm720*) \ echo "ROCm 7.2 flavor detected from GPU_ARCH=${GPU_ARCH}"; \ cd /opt/rocm/share/amd_smi \ && python3 -m pip install --no-cache-dir . \ ;; \ *) \ echo "Not rocm720 (GPU_ARCH=${GPU_ARCH}), skip amdsmi installation"; \ ;; \ esac WORKDIR /sgl-workspace # ----------------------- # llvm RUN if [ "$BUILD_LLVM" = "1" ]; then \ ENV HIP_CLANG_PATH="/sgl-workspace/llvm-project/build/bin/" \ git clone --single-branch ${LLVM_REPO} -b ${LLVM_BRANCH} \ && cd llvm-project \ && git checkout ${LLVM_COMMIT} \ && mkdir build \ && cd build \ && cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=1 -DLLVM_TARGETS_TO_BUILD="AMDGPU;X86" -DLLVM_ENABLE_PROJECTS="clang;lld;" -DLLVM_ENABLE_RUNTIMES="compiler-rt" ../llvm \ && make -j$(nproc); \ fi # ----------------------- # AITER # Unset setuptools_scm override so AITER gets its own version (AITER_COMMIT), not SGLang's # (SETUPTOOLS_SCM_PRETEND_VERSION is set later for SGLang nightly builds and would otherwise # leak into AITER's version when AITER uses setuptools_scm) ENV SETUPTOOLS_SCM_PRETEND_VERSION= RUN pip uninstall -y aiter \ && pip install flydsl==0.0.1.dev95158637 \ && pip install psutil pybind11 # Required by AITER setup.py RUN git clone ${AITER_REPO} \ && cd aiter \ && git checkout ${AITER_COMMIT} \ && git submodule update --init --recursive # Hot patches for AITER in v0.1.10.post3 # This is for ROCm 7.2 only, because of the image rebase from vllm # to rocm/pytorch. RUN set -eux; \ case "${GPU_ARCH}" in \ *rocm720*) \ echo "ROCm 7.2 flavor detected from GPU_ARCH=${GPU_ARCH}"; \ cd aiter \ && sed -i '459 s/if.*:/if False:/' aiter/ops/triton/attention/pa_mqa_logits.py; \ ;; \ *) \ echo "Not rocm720 (GPU_ARCH=${GPU_ARCH}), skip patch"; \ ;; \ esac RUN cd aiter \ && echo "[AITER] GPU_ARCH=${GPU_ARCH}" \ && if [ "$BUILD_AITER_ALL" = "1" ] && [ "$BUILD_LLVM" = "1" ]; then \ sh -c "HIP_CLANG_PATH=/sgl-workspace/llvm-project/build/bin/ PREBUILD_KERNELS=1 GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop"; \ elif [ "$BUILD_AITER_ALL" = "1" ]; then \ sh -c "PREBUILD_KERNELS=1 GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop"; \ else \ sh -c "GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop"; \ fi \ && echo "export PYTHONPATH=/sgl-workspace/aiter:\${PYTHONPATH}" >> /etc/bash.bashrc # ----------------------- # Build Mooncake ENV PATH=$PATH:/usr/local/go/bin RUN if [ "$BUILD_MOONCAKE" = "1" ]; then \ apt update && apt install -y zip unzip wget && \ apt install -y gcc make libtool autoconf librdmacm-dev rdmacm-utils infiniband-diags ibverbs-utils perftest ethtool libibverbs-dev rdma-core && \ apt install -y openssh-server openmpi-bin openmpi-common libopenmpi-dev && \ git clone ${MOONCAKE_REPO} && \ cd Mooncake && \ git checkout ${MOONCAKE_COMMIT} && \ git submodule update --init --recursive && \ bash dependencies.sh -y && \ rm -rf /usr/local/go && \ wget https://go.dev/dl/go1.22.2.linux-amd64.tar.gz && \ tar -C /usr/local -xzf go1.22.2.linux-amd64.tar.gz && \ rm go1.22.2.linux-amd64.tar.gz && \ mkdir -p build && \ cd build && \ cmake .. -DUSE_HIP=ON -DUSE_ETCD=ON && \ make -j "$(nproc)" && make install; \ fi # ----------------------- # Build SGLang ARG BUILD_TYPE=all # Set version for setuptools_scm if provided (for nightly builds). Only pass in the SGLang # pip install RUN so it does not affect AITER, sgl-model-gateway, TileLang, FHT, MORI, etc. ARG SETUPTOOLS_SCM_PRETEND_VERSION RUN pip install IPython \ && pip install orjson \ && pip install python-multipart \ && pip install torchao==0.9.0 \ && pip install pybind11 RUN pip uninstall -y sgl_kernel sglang RUN git clone ${SGL_REPO} \ && cd sglang \ && if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \ echo "Using ${SGL_DEFAULT}, default branch."; \ git checkout ${SGL_DEFAULT}; \ else \ echo "Using ${SGL_BRANCH} branch."; \ git checkout ${SGL_BRANCH}; \ fi \ && cd sgl-kernel \ && rm -f pyproject.toml \ && mv pyproject_rocm.toml pyproject.toml \ && AMDGPU_TARGET=$GPU_ARCH_LIST python setup_rocm.py install \ && cd .. \ && rm -rf python/pyproject.toml && mv python/pyproject_other.toml python/pyproject.toml \ && if [ "$BUILD_TYPE" = "srt" ]; then \ export SETUPTOOLS_SCM_PRETEND_VERSION="${SETUPTOOLS_SCM_PRETEND_VERSION}" && python -m pip --no-cache-dir install -e "python[srt_hip,diffusion_hip]"; \ else \ export SETUPTOOLS_SCM_PRETEND_VERSION="${SETUPTOOLS_SCM_PRETEND_VERSION}" && python -m pip --no-cache-dir install -e "python[all_hip]"; \ fi RUN python -m pip cache purge # Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build. RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \ /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \ -type f -name '*MI300X*' | xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {} # Install Rust toolchain for sgl-model-gateway ENV PATH="/root/.cargo/bin:${PATH}" RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \ && rustc --version && cargo --version ENV CARGO_BUILD_JOBS=4 # Build and install sgl-model-gateway RUN python3 -m pip install --no-cache-dir maturin \ && cd /sgl-workspace/sglang/sgl-model-gateway/bindings/python \ && ulimit -n 65536 && maturin build --release --features vendored-openssl --out dist \ && python3 -m pip install --force-reinstall dist/*.whl \ && rm -rf /root/.cache # ----------------------- # TileLang ENV DEBIAN_FRONTEND=noninteractive ENV LIBGL_ALWAYS_INDIRECT=1 RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment RUN /bin/bash -lc 'set -euo pipefail; \ echo "[TileLang] Building TileLang for ${GPU_ARCH}"; \ # System dependencies (NO llvm-dev to avoid llvm-config-16 shadowing) apt-get update && apt-get install -y --no-install-recommends \ build-essential git wget curl ca-certificates gnupg \ libgtest-dev libgmock-dev \ libprotobuf-dev protobuf-compiler libgflags-dev libsqlite3-dev \ python3 python3-dev python3-setuptools python3-pip python3-apt \ gcc libtinfo-dev zlib1g-dev libedit-dev libxml2-dev vim \ cmake ninja-build pkg-config libstdc++6 software-properties-common \ && rm -rf /var/lib/apt/lists/*; \ \ # Prefer the container venv VENV_PY="/opt/venv/bin/python"; \ VENV_PIP="/opt/venv/bin/pip"; \ if [ ! -x "$VENV_PY" ]; then VENV_PY="python3"; fi; \ if [ ! -x "$VENV_PIP" ]; then VENV_PIP="pip3"; fi; \ \ # Build GoogleTest static libs (Ubuntu package ships sources only) cmake -S /usr/src/googletest -B /tmp/build-gtest -DBUILD_GTEST=ON -DBUILD_GMOCK=ON -DCMAKE_BUILD_TYPE=Release && \ cmake --build /tmp/build-gtest -j"$(nproc)" && \ cp -v /tmp/build-gtest/lib/*.a /usr/lib/x86_64-linux-gnu/ && \ rm -rf /tmp/build-gtest; \ \ # Keep setuptools < 80 (compat with base image) "$VENV_PIP" install --upgrade "setuptools>=77.0.3,<80" wheel cmake ninja scikit-build-core && \ "$VENV_PIP" cache purge || true; \ \ # Locate ROCm llvm-config; fallback to installing LLVM 18 if missing LLVM_CONFIG_PATH=""; \ for p in /opt/rocm/llvm/bin/llvm-config /opt/rocm/llvm-*/bin/llvm-config /opt/rocm-*/llvm*/bin/llvm-config; do \ if [ -x "$p" ]; then LLVM_CONFIG_PATH="$p"; break; fi; \ done; \ if [ -z "$LLVM_CONFIG_PATH" ]; then \ echo "[TileLang] ROCm llvm-config not found; installing LLVM 18..."; \ curl -fsSL https://apt.llvm.org/llvm-snapshot.gpg.key | gpg --dearmor -o /etc/apt/keyrings/llvm.gpg; \ echo "deb [signed-by=/etc/apt/keyrings/llvm.gpg] http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" > /etc/apt/sources.list.d/llvm.list; \ apt-get update; \ apt-get install -y --no-install-recommends llvm-18; \ rm -rf /var/lib/apt/lists/*; \ LLVM_CONFIG_PATH="$(command -v llvm-config-18)"; \ if [ -z "$LLVM_CONFIG_PATH" ]; then echo "ERROR: llvm-config-18 not found after install"; exit 1; fi; \ fi; \ echo "[TileLang] Using LLVM_CONFIG at: $LLVM_CONFIG_PATH"; \ export PATH="$(dirname "$LLVM_CONFIG_PATH"):/usr/local/bin:${PATH}"; \ export LLVM_CONFIG="$LLVM_CONFIG_PATH"; \ \ # Optional shim for tools that expect llvm-config-16 mkdir -p /usr/local/bin && \ printf "#!/usr/bin/env bash\nexec \"%s\" \"\$@\"\n" "$LLVM_CONFIG_PATH" > /usr/local/bin/llvm-config-16 && \ chmod +x /usr/local/bin/llvm-config-16; \ \ # TVM Python bits need Cython + z3 before configure. # Pin z3-solver==4.15.4.0: 4.15.4.0 has a manylinux wheel; 4.15.5.0 has no wheel and builds from source (fails: C++20 needs GCC 14+, image has GCC 11). "$VENV_PIP" install --no-cache-dir "cython>=0.29.36,<3.0" "apache-tvm-ffi @ git+https://github.com/apache/tvm-ffi.git@37d0485b2058885bf4e7a486f7d7b2174a8ac1ce" "z3-solver==4.15.4.0"; \ \ # Clone + pin TileLang (bundled TVM), then build git clone --recursive "${TILELANG_REPO}" /opt/tilelang && \ cd /opt/tilelang && \ git fetch --depth=1 origin "${TILELANG_COMMIT}" || true && \ git checkout -f "${TILELANG_COMMIT}" && \ git submodule update --init --recursive && \ export CMAKE_ARGS="-DUSE_CUDA=OFF -DUSE_ROCM=ON -DROCM_PATH=/opt/rocm -DLLVM_CONFIG=${LLVM_CONFIG} -DSKBUILD_SABI_VERSION= ${CMAKE_ARGS:-}" && \ "$VENV_PIP" install -e . -v --no-build-isolation --no-deps; \ if [ -f pyproject.toml ]; then sed -i "/^[[:space:]]*\"torch/d" pyproject.toml || true; fi; \ "$VENV_PIP" cache purge || true; \ "$VENV_PY" -c "import tilelang; print(tilelang.__version__)"' # ----------------------- # Hadamard-transform (HIP build) RUN /bin/bash -lc 'set -euo pipefail; \ git clone --branch "${FHT_BRANCH}" "${FHT_REPO}" fast-hadamard-transform; \ cd fast-hadamard-transform; \ git checkout -f "${FHT_COMMIT}"; \ python setup.py install' # ----------------------- # Python tools RUN python3 -m pip install --no-cache-dir \ py-spy \ pre-commit \ tabulate # ----------------------- # MORI (optional) RUN /bin/bash -lc 'set -euo pipefail; \ if [ "${ENABLE_MORI}" != "1" ]; then \ echo "[MORI] Skipping (ENABLE_MORI=${ENABLE_MORI})"; \ exit 0; \ fi; \ echo "[MORI] Enabling MORI (NIC_BACKEND=${NIC_BACKEND})"; \ \ # Base deps for MORI build apt-get update && apt-get install -y --no-install-recommends \ build-essential \ g++ \ jq \ libopenmpi-dev \ libpci-dev \ initramfs-tools \ && rm -rf /var/lib/apt/lists/*; \ \ # NIC backend deps case "${NIC_BACKEND}" in \ # default: mlx5 none) \ export USE_IONIC="OFF"; \ export USE_BNXT="OFF"; \ ;; \ # AMD NIC ainic) \ export USE_IONIC="ON"; \ export USE_BNXT="OFF"; \ apt-get update && apt-get install -y --no-install-recommends ca-certificates curl gnupg apt-transport-https && \ rm -rf /var/lib/apt/lists/* && mkdir -p /etc/apt/keyrings; \ curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/amdainic.gpg; \ echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/amdainic.gpg] https://repo.radeon.com/amdainic/pensando/ubuntu/${AINIC_VERSION} ${UBUNTU_CODENAME} main" \ > /etc/apt/sources.list.d/amdainic.list; \ apt-get update && apt-get install -y --no-install-recommends \ libionic-dev \ ionic-common \ ; \ rm -rf /var/lib/apt/lists/*; \ ;; \ # TODO: Add Broadcom bnxt packages/repos here later. # bnxt) \ # export USE_IONIC="OFF"; \ # export USE_BNXT="ON"; \ # echo "[MORI] NIC_BACKEND=bnxt: USE_BNXT=ON. Add Broadcom bnxt packages/repos here later."; \ # ;; \ *) \ echo "ERROR: unknown NIC_BACKEND=${NIC_BACKEND}. Use one of: none, ainic"; \ exit 2; \ ;; \ esac; \ \ # Build/install MORI export MORI_GPU_ARCHS="${GPU_ARCH_LIST}"; \ echo "[MORI] MORI_GPU_ARCHS=${MORI_GPU_ARCHS} USE_IONIC=${USE_IONIC} USE_BNXT=${USE_BNXT}"; \ rm -rf /sgl-workspace/mori; \ git clone "${MORI_REPO}" /sgl-workspace/mori; \ cd /sgl-workspace/mori; \ git checkout "${MORI_COMMIT}"; \ git submodule update --init --recursive; \ python3 setup.py develop; \ python3 -c "import os, torch; print(os.path.join(os.path.dirname(torch.__file__), \"lib\"))" > /etc/ld.so.conf.d/torch.conf; \ ldconfig; \ echo "export PYTHONPATH=/sgl-workspace/mori:\${PYTHONPATH}" >> /etc/bash.bashrc; \ echo "[MORI] Done."' # ----------------------- # Hot patch: torch-ROCm # The artifact hardcoded the supported triton version to be 3.5.1. # Rewrite the restriction directly. ARG TORCH_ROCM_FILE="torch-2.9.1+rocm7.2.0.lw.git7e1940d4-cp310-cp310-linux_x86_64.whl" RUN mkdir /tmp/whl && cd /tmp/whl \ && export TORCH_ROCM_FILE="${TORCH_ROCM_FILE}" \ && cat > hack.py <<"PY" import zipfile, csv, os, re from pathlib import Path fname = os.environ["TORCH_ROCM_FILE"] in_whl = Path("/") / fname out_whl = Path("/tmp")/ fname work = Path("/tmp/whl") # 1) Extract with zipfile.ZipFile(in_whl, "r") as z: z.extractall(work) # 2) Locate dist-info and patch METADATA (edit this logic to match your exact line) dist_info = next(work.glob("*.dist-info")) meta = dist_info / "METADATA" txt = meta.read_text(encoding="utf-8") # Example: replace one exact requirement form. # Adjust the string to match what you actually see. pat = r"^Requires-Dist:\s*triton==3.5.1[^\s]*;" txt2, n = re.subn(pat, r"triton>=3.5.1;", txt, flags=re.MULTILINE) if txt2 == txt: raise SystemExit("Did not find expected Requires-Dist line to replace in METADATA") meta.write_text(txt2, encoding="utf-8") # 3) Hacky step: blank hash/size columns in RECORD record = dist_info / "RECORD" rows = [] with record.open(newline="", encoding="utf-8") as f: for r in csv.reader(f): if not r: continue # keep filename, blank out hash and size rows.append([r[0], "", ""]) with record.open("w", newline="", encoding="utf-8") as f: csv.writer(f).writerows(rows) # 4) Re-zip as a wheel with zipfile.ZipFile(out_whl, "w", compression=zipfile.ZIP_DEFLATED) as z: for p in work.rglob("*"): if p.is_file(): z.write(p, p.relative_to(work).as_posix()) print("Wrote", out_whl) PY RUN cd /tmp/whl \ && case "${GPU_ARCH}" in \ *rocm720*) \ echo "ROCm 7.2 flavor detected from GPU_ARCH=${GPU_ARCH}"; \ python hack.py \ && python3 -m pip install --force --no-deps /tmp/${TORCH_ROCM_FILE} \ && rm -fr /tmp/whl /tmp/${TORCH_ROCM_FILE} \ ;; \ *) \ echo "Not rocm720 (GPU_ARCH=${GPU_ARCH}), skip patch"; \ ;; \ esac # ----------------------- # Hot patch: Triton # For ROCm 7.2, this custom build breaks pip dependency management, # so future `pip install` will break the ROCm stack. # A workaround for this is to reinstall the default triton # wheel with the `rocm/pytorch` image in the root directory. RUN if [ "$BUILD_TRITON" = "1" ]; then \ pip uninstall -y triton \ && apt install -y cmake \ && git clone ${TRITON_REPO} triton-custom \ && cd triton-custom \ && git checkout ${TRITON_COMMIT} \ && pip install -r python/requirements.txt \ && pip install -e .; \ fi # ----------------------- # Performance environment variable. # Skip CuDNN compatibility check - not applicable for ROCm (uses MIOpen instead) ENV SGLANG_DISABLE_CUDNN_CHECK=1 ENV HIP_FORCE_DEV_KERNARG=1 ENV HSA_NO_SCRATCH_RECLAIM=1 ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 ENV SGLANG_INT4_WEIGHT=0 ENV SGLANG_MOE_PADDING=1 ENV SGLANG_ROCM_DISABLE_LINEARQUANT=0 ENV SGLANG_ROCM_FUSED_DECODE_MLA=1 ENV SGLANG_SET_CPU_AFFINITY=1 ENV SGLANG_USE_AITER=1 ENV SGLANG_USE_ROCM700A=1 ENV NCCL_MIN_NCHANNELS=112 ENV ROCM_QUICK_REDUCE_QUANTIZATION=INT8 ENV TORCHINDUCTOR_MAX_AUTOTUNE=1 ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1 CMD ["/bin/bash"]