ArtemisTAO
/

WIN_21

@@ -1,13 +1,15 @@
-# Dockerfile of qwenllm/qwen-omni:2.5-cu121
 ARG CUDA_VERSION=12.1.0
 ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04
 FROM ${from} as base
 ARG DEBIAN_FRONTEND=noninteractive
-RUN <<EOF
-apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
     git \
     git-lfs \
     python3 \
@@ -20,88 +22,96 @@ apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
     software-properties-common \
     ffmpeg \
 && rm -rf /var/lib/apt/lists/*
-EOF
 RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
-    -q -O /tmp/cmake-install.sh \
-    && chmod u+x /tmp/cmake-install.sh \
-    && mkdir /opt/cmake-3.26.1 \
-    && /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.26.1 \
-    && rm /tmp/cmake-install.sh \
-    && ln -s /opt/cmake-3.26.1/bin/* /usr/local/bin
 RUN ln -s /usr/bin/python3 /usr/bin/python
 RUN git lfs install
 FROM base as dev
-WORKDIR /
-RUN mkdir -p /data/shared/Qwen
-WORKDIR /data/shared/Qwen/
 FROM dev as bundle_req
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install networkx==3.1
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 xformers==0.0.29.post2
-RUN --mount=type=cache,target=/root/.cache/pip pip3 install git+https://github.com/huggingface/transformers@3a1ead0aabed473eafe527915eea8c197d424356  \
-    && pip3 install accelerate qwen-omni-utils modelscope_studio
 FROM bundle_req as bundle_vllm
 ARG BUNDLE_FLASH_ATTENTION=true
 ENV MAX_JOBS=8
 ENV NVCC_THREADS=1
 ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
 ENV VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=cache,target=/root/.cache/pip \
     if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then \
-        mkdir -p /data/shared/code \
-        && pip install ninja \
-        && cd /data/shared/code \
-        && git clone https://github.com/Dao-AILab/flash-attention.git \
-        && cd flash-attention \
-        && python setup.py install \
-        && cd /data/shared/Qwen \
-        && rm -rf /data/shared/code/flash-attention; \
     fi
 ARG BUNDLE_VLLM=true
 RUN --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=cache,target=/root/.cache/pip \
     if [ "$BUNDLE_VLLM" = "true" ]; then \
-    mkdir -p /data/shared/code \
-        && cd /data/shared/code \
-        && git clone -b qwen2_omni_public_v1 https://github.com/fyabc/vllm.git \
-        && cd vllm \
-        && python3 use_existing_torch.py \
-        && pip3 install setuptools_scm \
-        && pip3 install -r requirements/cuda.txt \
-        && pip3 install . --no-build-isolation\
-        && cd /data/shared/Qwen \
-        && rm -rf /data/shared/code/vllm; \
     fi
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip3 install \
-    gradio==5.21.0 \
-    gradio_client==1.7.2 \
-    librosa==0.11.0 \
-    ffmpeg==1.4 \
-    ffmpeg-python==0.2.0 \
-    soundfile==0.13.1 \
-    av
 RUN rm -rvf /root/.cache/pip
 COPY server.py ./
 EXPOSE 8000
-CMD ["python3", "server.py"]

+# Dockerfile (renamed from Dockerfile-omni-2.5-cu121 to Dockerfile)
+# Base image using CUDA 12.1.0 with cudnn8 on Ubuntu 22.04
 ARG CUDA_VERSION=12.1.0
 ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04
 FROM ${from} as base
+# Set non-interactive frontend for APT
 ARG DEBIAN_FRONTEND=noninteractive
+# Update and install required packages
+RUN apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
     git \
     git-lfs \
     python3 \
     software-properties-common \
     ffmpeg \
 && rm -rf /var/lib/apt/lists/*
+# Install CMake 3.26.1
 RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
+    -q -O /tmp/cmake-install.sh && \
+    chmod u+x /tmp/cmake-install.sh && \
+    mkdir /opt/cmake-3.26.1 && \
+    /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.26.1 && \
+    rm /tmp/cmake-install.sh && \
+    ln -s /opt/cmake-3.26.1/bin/* /usr/local/bin
+# Ensure "python" command points to python3
 RUN ln -s /usr/bin/python3 /usr/bin/python
+# Setup Git LFS
 RUN git lfs install
+# --------- Development Stage ---------
 FROM base as dev
+# Set working directory and create needed directories
+WORKDIR /data/shared/Qwen
+RUN mkdir -p /data/shared/Qwen/
+# --------- Install Basic Python Requirements ---------
 FROM dev as bundle_req
+# Use BuildKit cache for pip installs
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install networkx==3.1
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 xformers==0.0.29.post2
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install git+https://github.com/huggingface/transformers@3a1ead0aabed473eafe527915eea8c197d424356 \
+&& pip3 install accelerate qwen-omni-utils modelscope_studio
+# --------- Optional: Bundle Additional Packages ---------
 FROM bundle_req as bundle_vllm
 ARG BUNDLE_FLASH_ATTENTION=true
 ENV MAX_JOBS=8
 ENV NVCC_THREADS=1
 ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
 ENV VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=cache,target=/root/.cache/pip \
     if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then \
+        mkdir -p /data/shared/code && \
+        pip install ninja && \
+        cd /data/shared/code && \
+        git clone https://github.com/Dao-AILab/flash-attention.git && \
+        cd flash-attention && \
+        python setup.py install && \
+        cd /data/shared/Qwen && \
+        rm -rf /data/shared/code/flash-attention; \
     fi
 ARG BUNDLE_VLLM=true
 RUN --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=cache,target=/root/.cache/pip \
     if [ "$BUNDLE_VLLM" = "true" ]; then \
+        mkdir -p /data/shared/code && \
+        cd /data/shared/code && \
+        git clone -b qwen2_omni_public_v1 https://github.com/fyabc/vllm.git && \
+        cd vllm && \
+        python3 use_existing_torch.py && \
+        pip3 install setuptools_scm && \
+        pip3 install -r requirements/cuda.txt && \
+        pip3 install . --no-build-isolation && \
+        cd /data/shared/Qwen && \
+        rm -rf /data/shared/code/vllm; \
     fi
 RUN --mount=type=cache,target=/root/.cache/pip \
+    pip3 install gradio==5.21.0 \
+                    gradio_client==1.7.2 \
+                    librosa==0.11.0 \
+                    ffmpeg==1.4 \
+                    ffmpeg-python==0.2.0 \
+                    soundfile==0.13.1 \
+                    av
+# Clean up pip cache
 RUN rm -rvf /root/.cache/pip
+# --------- Final Stage: Copy Your Server Code ---------
 COPY server.py ./
+# Expose port 8000 (matching the port used in server.py)
 EXPOSE 8000
+# Set default command to run your server
+CMD ["python3", "server.py"]