arithmetic-grpo / docker /Dockerfile.stable.vllm
LeTue09's picture
initial clean commit
1faccd4
# vllm017
FROM nvidia/cuda:12.9.1-devel-ubuntu22.04
ARG DEBIAN_FRONTEND=noninteractive
ARG PIP_NO_CACHE_DIR=1
RUN apt-get update && apt-get install -y \
git \
wget \
cmake \
build-essential \
libibverbs-dev \
libnuma-dev \
librdmacm-dev \
numactl \
software-properties-common \
vim && \
add-apt-repository ppa:deadsnakes/ppa -y && \
apt-get update && \
apt-get install -y \
python3.12 \
python3.12-dev \
&& rm -rf /var/lib/apt/lists/*
RUN wget https://bootstrap.pypa.io/get-pip.py && \
python3.12 get-pip.py && \
rm get-pip.py
RUN ln -sf /usr/bin/python3.12 /usr/bin/python3 && \
ln -sf /usr/bin/python3.12 /usr/bin/python
RUN pip install torch==2.10.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu129
RUN pip install vllm==0.17.0
RUN pip install pybind11
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get -y install cudnn && \
rm -rf /var/lib/apt/lists/*
RUN pip install nvidia-mathdx
RUN MAX_JOBS=128 pip install -v --disable-pip-version-check --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git
RUN export NVTE_FRAMEWORK=pytorch && \
MAX_JOBS=128 \
NVTE_BUILD_THREADS_PER_JOB=4 \
pip3 install --resume-retries 999 --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.12
# RUN pip install --upgrade transformers tokenizers
RUN pip install codetiming mathruler pylatexenc qwen_vl_utils cachetools pytest-asyncio
RUN export FLASH_ATTENTION_FORCE_BUILD="TRUE" && MAX_JOBS=16 pip install --no-build-isolation flash_attn==2.8.3
RUN NSIGHT_VERSION=2025.6.1_2025.6.1.190-1_$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "amd64"; fi) && \
wget https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_6/nsight-systems-${NSIGHT_VERSION}.deb && \
apt-get update && apt-get install -y libxcb-cursor0 && \
apt-get install -y ./nsight-systems-${NSIGHT_VERSION}.deb && \
rm -rf /usr/local/cuda/bin/nsys && \
ln -s /opt/nvidia/nsight-systems/2025.6.1/nsys /usr/local/cuda/bin/nsys && \
rm -rf /usr/local/cuda/bin/nsys-ui && \
ln -s /opt/nvidia/nsight-systems/2025.6.1/nsys-ui /usr/local/cuda/bin/nsys-ui && \
rm nsight-systems-${NSIGHT_VERSION}.deb && \
rm -rf /var/lib/apt/lists/*
# =========================
# Install DeepEP
# =========================
# Clone and build deepep and deepep-nvshmem
WORKDIR /home/dpsk_a2a
RUN git clone -b v2.5.1 https://github.com/NVIDIA/gdrcopy.git && \
cd gdrcopy && \
make prefix=/usr/local lib_install && \
cd .. && rm -rf gdrcopy
ENV GDRCOPY_HOME=/usr/local
RUN git clone -b hybrid-ep https://github.com/deepseek-ai/DeepEP.git && \
export NVSHMEM_DIR=/usr/local/lib/python3.12/dist-packages/nvidia/nvshmem && \
export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH" && \
export PATH="${NVSHMEM_DIR}/bin:$PATH" && \
cd ${NVSHMEM_DIR}/lib && \
ln -sf libnvshmem_host.so.3 libnvshmem_host.so && \
cd /home/dpsk_a2a/DeepEP && \
export CPATH=/usr/local/cuda/targets/x86_64-linux/include/cccl:$CPATH && \
python setup.py install
RUN pip3 install --no-deps trl==0.27.0
RUN pip3 install nvtx matplotlib liger_kernel
RUN pip install -U git+https://github.com/ISEEKYAN/mbridge.git
RUN pip install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.16.0
RUN pip install git+https://github.com/volcengine/verl.git@v0.7.0 && \
pip uninstall -y verl
RUN apt-get update && apt-get install -y curl \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update && \
apt-get install -y --allow-downgrades --allow-change-held-packages \
libcudnn9-cuda-12=9.16.0.29-1 \
libcudnn9-dev-cuda-12=9.16.0.29-1 \
libcudnn9-headers-cuda-12=9.16.0.29-1 && \
rm -rf /var/lib/apt/lists/*