Update Dockerfile
Browse files- Dockerfile +74 -64
Dockerfile
CHANGED
|
@@ -1,13 +1,15 @@
|
|
| 1 |
-
# Dockerfile
|
| 2 |
-
|
|
|
|
| 3 |
ARG CUDA_VERSION=12.1.0
|
| 4 |
ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04
|
| 5 |
-
|
| 6 |
FROM ${from} as base
|
| 7 |
-
|
|
|
|
| 8 |
ARG DEBIAN_FRONTEND=noninteractive
|
| 9 |
-
|
| 10 |
-
|
|
|
|
| 11 |
git \
|
| 12 |
git-lfs \
|
| 13 |
python3 \
|
|
@@ -20,88 +22,96 @@ apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
|
|
| 20 |
software-properties-common \
|
| 21 |
ffmpeg \
|
| 22 |
&& rm -rf /var/lib/apt/lists/*
|
| 23 |
-
|
| 24 |
-
|
| 25 |
RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
|
| 26 |
-
-q -O /tmp/cmake-install.sh \
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
| 33 |
RUN ln -s /usr/bin/python3 /usr/bin/python
|
| 34 |
-
|
|
|
|
| 35 |
RUN git lfs install
|
| 36 |
-
|
|
|
|
| 37 |
FROM base as dev
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
RUN mkdir -p /data/shared/Qwen
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
FROM dev as bundle_req
|
|
|
|
|
|
|
| 46 |
RUN --mount=type=cache,target=/root/.cache/pip pip3 install networkx==3.1
|
| 47 |
RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 xformers==0.0.29.post2
|
| 48 |
-
RUN --mount=type=cache,target=/root/.cache/pip pip3 install git+https://github.com/huggingface/transformers@3a1ead0aabed473eafe527915eea8c197d424356
|
| 49 |
-
|
| 50 |
-
|
|
|
|
| 51 |
FROM bundle_req as bundle_vllm
|
| 52 |
-
|
| 53 |
ARG BUNDLE_FLASH_ATTENTION=true
|
| 54 |
-
|
| 55 |
ENV MAX_JOBS=8
|
| 56 |
ENV NVCC_THREADS=1
|
| 57 |
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
|
| 58 |
ENV VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
|
| 59 |
ENV CCACHE_DIR=/root/.cache/ccache
|
| 60 |
-
|
| 61 |
RUN --mount=type=cache,target=/root/.cache/ccache \
|
| 62 |
--mount=type=cache,target=/root/.cache/pip \
|
| 63 |
if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then \
|
| 64 |
-
mkdir -p /data/shared/code \
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
fi
|
| 73 |
-
|
| 74 |
ARG BUNDLE_VLLM=true
|
| 75 |
-
|
| 76 |
RUN --mount=type=cache,target=/root/.cache/ccache \
|
| 77 |
--mount=type=cache,target=/root/.cache/pip \
|
| 78 |
if [ "$BUNDLE_VLLM" = "true" ]; then \
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
fi
|
| 90 |
-
|
| 91 |
RUN --mount=type=cache,target=/root/.cache/pip \
|
| 92 |
-
pip3 install \
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
RUN rm -rvf /root/.cache/pip
|
| 102 |
-
|
|
|
|
| 103 |
COPY server.py ./
|
| 104 |
-
|
|
|
|
| 105 |
EXPOSE 8000
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
| 1 |
+
# Dockerfile (renamed from Dockerfile-omni-2.5-cu121 to Dockerfile)
|
| 2 |
+
|
| 3 |
+
# Base image using CUDA 12.1.0 with cudnn8 on Ubuntu 22.04
|
| 4 |
ARG CUDA_VERSION=12.1.0
|
| 5 |
ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04
|
|
|
|
| 6 |
FROM ${from} as base
|
| 7 |
+
|
| 8 |
+
# Set non-interactive frontend for APT
|
| 9 |
ARG DEBIAN_FRONTEND=noninteractive
|
| 10 |
+
|
| 11 |
+
# Update and install required packages
|
| 12 |
+
RUN apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
|
| 13 |
git \
|
| 14 |
git-lfs \
|
| 15 |
python3 \
|
|
|
|
| 22 |
software-properties-common \
|
| 23 |
ffmpeg \
|
| 24 |
&& rm -rf /var/lib/apt/lists/*
|
| 25 |
+
|
| 26 |
+
# Install CMake 3.26.1
|
| 27 |
RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
|
| 28 |
+
-q -O /tmp/cmake-install.sh && \
|
| 29 |
+
chmod u+x /tmp/cmake-install.sh && \
|
| 30 |
+
mkdir /opt/cmake-3.26.1 && \
|
| 31 |
+
/tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.26.1 && \
|
| 32 |
+
rm /tmp/cmake-install.sh && \
|
| 33 |
+
ln -s /opt/cmake-3.26.1/bin/* /usr/local/bin
|
| 34 |
+
|
| 35 |
+
# Ensure "python" command points to python3
|
| 36 |
RUN ln -s /usr/bin/python3 /usr/bin/python
|
| 37 |
+
|
| 38 |
+
# Setup Git LFS
|
| 39 |
RUN git lfs install
|
| 40 |
+
|
| 41 |
+
# --------- Development Stage ---------
|
| 42 |
FROM base as dev
|
| 43 |
+
|
| 44 |
+
# Set working directory and create needed directories
|
| 45 |
+
WORKDIR /data/shared/Qwen
|
| 46 |
+
RUN mkdir -p /data/shared/Qwen/
|
| 47 |
+
|
| 48 |
+
# --------- Install Basic Python Requirements ---------
|
|
|
|
| 49 |
FROM dev as bundle_req
|
| 50 |
+
|
| 51 |
+
# Use BuildKit cache for pip installs
|
| 52 |
RUN --mount=type=cache,target=/root/.cache/pip pip3 install networkx==3.1
|
| 53 |
RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 xformers==0.0.29.post2
|
| 54 |
+
RUN --mount=type=cache,target=/root/.cache/pip pip3 install git+https://github.com/huggingface/transformers@3a1ead0aabed473eafe527915eea8c197d424356 \
|
| 55 |
+
&& pip3 install accelerate qwen-omni-utils modelscope_studio
|
| 56 |
+
|
| 57 |
+
# --------- Optional: Bundle Additional Packages ---------
|
| 58 |
FROM bundle_req as bundle_vllm
|
| 59 |
+
|
| 60 |
ARG BUNDLE_FLASH_ATTENTION=true
|
| 61 |
+
|
| 62 |
ENV MAX_JOBS=8
|
| 63 |
ENV NVCC_THREADS=1
|
| 64 |
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
|
| 65 |
ENV VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
|
| 66 |
ENV CCACHE_DIR=/root/.cache/ccache
|
| 67 |
+
|
| 68 |
RUN --mount=type=cache,target=/root/.cache/ccache \
|
| 69 |
--mount=type=cache,target=/root/.cache/pip \
|
| 70 |
if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then \
|
| 71 |
+
mkdir -p /data/shared/code && \
|
| 72 |
+
pip install ninja && \
|
| 73 |
+
cd /data/shared/code && \
|
| 74 |
+
git clone https://github.com/Dao-AILab/flash-attention.git && \
|
| 75 |
+
cd flash-attention && \
|
| 76 |
+
python setup.py install && \
|
| 77 |
+
cd /data/shared/Qwen && \
|
| 78 |
+
rm -rf /data/shared/code/flash-attention; \
|
| 79 |
fi
|
| 80 |
+
|
| 81 |
ARG BUNDLE_VLLM=true
|
| 82 |
+
|
| 83 |
RUN --mount=type=cache,target=/root/.cache/ccache \
|
| 84 |
--mount=type=cache,target=/root/.cache/pip \
|
| 85 |
if [ "$BUNDLE_VLLM" = "true" ]; then \
|
| 86 |
+
mkdir -p /data/shared/code && \
|
| 87 |
+
cd /data/shared/code && \
|
| 88 |
+
git clone -b qwen2_omni_public_v1 https://github.com/fyabc/vllm.git && \
|
| 89 |
+
cd vllm && \
|
| 90 |
+
python3 use_existing_torch.py && \
|
| 91 |
+
pip3 install setuptools_scm && \
|
| 92 |
+
pip3 install -r requirements/cuda.txt && \
|
| 93 |
+
pip3 install . --no-build-isolation && \
|
| 94 |
+
cd /data/shared/Qwen && \
|
| 95 |
+
rm -rf /data/shared/code/vllm; \
|
| 96 |
fi
|
| 97 |
+
|
| 98 |
RUN --mount=type=cache,target=/root/.cache/pip \
|
| 99 |
+
pip3 install gradio==5.21.0 \
|
| 100 |
+
gradio_client==1.7.2 \
|
| 101 |
+
librosa==0.11.0 \
|
| 102 |
+
ffmpeg==1.4 \
|
| 103 |
+
ffmpeg-python==0.2.0 \
|
| 104 |
+
soundfile==0.13.1 \
|
| 105 |
+
av
|
| 106 |
+
|
| 107 |
+
# Clean up pip cache
|
| 108 |
RUN rm -rvf /root/.cache/pip
|
| 109 |
+
|
| 110 |
+
# --------- Final Stage: Copy Your Server Code ---------
|
| 111 |
COPY server.py ./
|
| 112 |
+
|
| 113 |
+
# Expose port 8000 (matching the port used in server.py)
|
| 114 |
EXPOSE 8000
|
| 115 |
+
|
| 116 |
+
# Set default command to run your server
|
| 117 |
+
CMD ["python3", "server.py"]
|