| |
|
|
| FROM nvidia/cuda:12.9.1-devel-ubuntu22.04 |
|
|
| ARG DEBIAN_FRONTEND=noninteractive |
| ARG PIP_NO_CACHE_DIR=1 |
|
|
| RUN apt-get update && apt-get install -y \ |
| git \ |
| wget \ |
| cmake \ |
| build-essential \ |
| libibverbs-dev \ |
| libnuma-dev \ |
| librdmacm-dev \ |
| numactl \ |
| software-properties-common \ |
| vim && \ |
| add-apt-repository ppa:deadsnakes/ppa -y && \ |
| apt-get update && \ |
| apt-get install -y \ |
| python3.12 \ |
| python3.12-dev \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| RUN wget https://bootstrap.pypa.io/get-pip.py && \ |
| python3.12 get-pip.py && \ |
| rm get-pip.py |
|
|
| RUN ln -sf /usr/bin/python3.12 /usr/bin/python3 && \ |
| ln -sf /usr/bin/python3.12 /usr/bin/python |
|
|
| RUN pip install torch==2.10.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu129 |
|
|
| RUN pip install vllm==0.17.0 |
|
|
| RUN pip install pybind11 |
|
|
| RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && \ |
| dpkg -i cuda-keyring_1.1-1_all.deb && \ |
| apt-get update && \ |
| apt-get -y install cudnn && \ |
| rm -rf /var/lib/apt/lists/* |
|
|
| RUN pip install nvidia-mathdx |
|
|
| RUN MAX_JOBS=128 pip install -v --disable-pip-version-check --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git |
|
|
| RUN export NVTE_FRAMEWORK=pytorch && \ |
| MAX_JOBS=128 \ |
| NVTE_BUILD_THREADS_PER_JOB=4 \ |
| pip3 install --resume-retries 999 --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.12 |
|
|
| |
|
|
| RUN pip install codetiming mathruler pylatexenc qwen_vl_utils cachetools pytest-asyncio |
|
|
| RUN export FLASH_ATTENTION_FORCE_BUILD="TRUE" && MAX_JOBS=16 pip install --no-build-isolation flash_attn==2.8.3 |
|
|
| RUN NSIGHT_VERSION=2025.6.1_2025.6.1.190-1_$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "amd64"; fi) && \ |
| wget https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_6/nsight-systems-${NSIGHT_VERSION}.deb && \ |
| apt-get update && apt-get install -y libxcb-cursor0 && \ |
| apt-get install -y ./nsight-systems-${NSIGHT_VERSION}.deb && \ |
| rm -rf /usr/local/cuda/bin/nsys && \ |
| ln -s /opt/nvidia/nsight-systems/2025.6.1/nsys /usr/local/cuda/bin/nsys && \ |
| rm -rf /usr/local/cuda/bin/nsys-ui && \ |
| ln -s /opt/nvidia/nsight-systems/2025.6.1/nsys-ui /usr/local/cuda/bin/nsys-ui && \ |
| rm nsight-systems-${NSIGHT_VERSION}.deb && \ |
| rm -rf /var/lib/apt/lists/* |
|
|
| |
| |
| |
| |
| WORKDIR /home/dpsk_a2a |
| RUN git clone -b v2.5.1 https://github.com/NVIDIA/gdrcopy.git && \ |
| cd gdrcopy && \ |
| make prefix=/usr/local lib_install && \ |
| cd .. && rm -rf gdrcopy |
|
|
| ENV GDRCOPY_HOME=/usr/local |
|
|
| RUN git clone -b hybrid-ep https://github.com/deepseek-ai/DeepEP.git && \ |
| export NVSHMEM_DIR=/usr/local/lib/python3.12/dist-packages/nvidia/nvshmem && \ |
| export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH" && \ |
| export PATH="${NVSHMEM_DIR}/bin:$PATH" && \ |
| cd ${NVSHMEM_DIR}/lib && \ |
| ln -sf libnvshmem_host.so.3 libnvshmem_host.so && \ |
| cd /home/dpsk_a2a/DeepEP && \ |
| export CPATH=/usr/local/cuda/targets/x86_64-linux/include/cccl:$CPATH && \ |
| python setup.py install |
|
|
| RUN pip3 install --no-deps trl==0.27.0 |
|
|
| RUN pip3 install nvtx matplotlib liger_kernel |
|
|
| RUN pip install -U git+https://github.com/ISEEKYAN/mbridge.git |
|
|
| RUN pip install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.16.0 |
|
|
| RUN pip install git+https://github.com/volcengine/verl.git@v0.7.0 && \ |
| pip uninstall -y verl |
|
|
| RUN apt-get update && apt-get install -y curl \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| RUN apt-get update && \ |
| apt-get install -y --allow-downgrades --allow-change-held-packages \ |
| libcudnn9-cuda-12=9.16.0.29-1 \ |
| libcudnn9-dev-cuda-12=9.16.0.29-1 \ |
| libcudnn9-headers-cuda-12=9.16.0.29-1 && \ |
| rm -rf /var/lib/apt/lists/* |
|
|