| |
| |
| |
| ARG TRTLLM_BASE_IMAGE=nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc4 |
| FROM ${TRTLLM_BASE_IMAGE} |
|
|
|
|
| |
| |
| |
| |
| |
| WORKDIR /home/dpsk_a2a |
| RUN git clone -b v2.5.1 https://github.com/NVIDIA/gdrcopy.git && \ |
| pushd gdrcopy && \ |
| make prefix=/usr/local lib_install && \ |
| popd && rm -rf gdrcopy && \ |
| pip install nvidia-nvshmem-cu13==3.3.20 && \ |
| export NVSHMEM_DIR=/usr/local/lib/python3.12/dist-packages/nvidia/nvshmem && \ |
| export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH" && \ |
| export PATH="${NVSHMEM_DIR}/bin:$PATH" && \ |
| pushd ${NVSHMEM_DIR}/lib && \ |
| ln -s libnvshmem_host.so.3 libnvshmem_host.so && \ |
| popd && \ |
| git clone -b v1.2.1 https://github.com/deepseek-ai/DeepEP.git && \ |
| pushd DeepEP && \ |
| wget https://raw.githubusercontent.com/NVIDIA/Megatron-LM/refs/tags/core_v0.15.0/docker/patches/deepep.patch && \ |
| patch -p1 < deepep.patch && \ |
| TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" python setup.py install && \ |
| popd && rm -rf deepep |
|
|
| |
| RUN pip3 install --no-cache-dir --no-deps trl && \ |
| pip3 install --no-cache-dir nvtx matplotlib liger_kernel cachetools && \ |
| pip install --no-cache-dir -U git+https://github.com/ISEEKYAN/mbridge.git && \ |
| pip install --no-deps --no-cache-dir git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.15.0 |
|
|
|
|
| |
| |
| |
| RUN pip install git+https://github.com/volcengine/verl.git@v0.7.0 |
| RUN pip uninstall -y verl |
| RUN pip install "verl[mcore] @ git+https://github.com/volcengine/verl.git@v0.7.0" |
| RUN pip uninstall -y verl |
|
|
|
|
| |
| |
| |
| |
| |
|
|