ArtemisTAO commited on
Commit
6beae45
·
verified ·
1 Parent(s): 0648639

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +74 -64
Dockerfile CHANGED
@@ -1,13 +1,15 @@
1
- # Dockerfile of qwenllm/qwen-omni:2.5-cu121
2
-
 
3
  ARG CUDA_VERSION=12.1.0
4
  ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04
5
-
6
  FROM ${from} as base
7
-
 
8
  ARG DEBIAN_FRONTEND=noninteractive
9
- RUN <<EOF
10
- apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
 
11
  git \
12
  git-lfs \
13
  python3 \
@@ -20,88 +22,96 @@ apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
20
  software-properties-common \
21
  ffmpeg \
22
  && rm -rf /var/lib/apt/lists/*
23
- EOF
24
-
25
  RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
26
- -q -O /tmp/cmake-install.sh \
27
- && chmod u+x /tmp/cmake-install.sh \
28
- && mkdir /opt/cmake-3.26.1 \
29
- && /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.26.1 \
30
- && rm /tmp/cmake-install.sh \
31
- && ln -s /opt/cmake-3.26.1/bin/* /usr/local/bin
32
-
 
33
  RUN ln -s /usr/bin/python3 /usr/bin/python
34
-
 
35
  RUN git lfs install
36
-
 
37
  FROM base as dev
38
-
39
- WORKDIR /
40
-
41
- RUN mkdir -p /data/shared/Qwen
42
-
43
- WORKDIR /data/shared/Qwen/
44
-
45
  FROM dev as bundle_req
 
 
46
  RUN --mount=type=cache,target=/root/.cache/pip pip3 install networkx==3.1
47
  RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 xformers==0.0.29.post2
48
- RUN --mount=type=cache,target=/root/.cache/pip pip3 install git+https://github.com/huggingface/transformers@3a1ead0aabed473eafe527915eea8c197d424356 \
49
- && pip3 install accelerate qwen-omni-utils modelscope_studio
50
-
 
51
  FROM bundle_req as bundle_vllm
52
-
53
  ARG BUNDLE_FLASH_ATTENTION=true
54
-
55
  ENV MAX_JOBS=8
56
  ENV NVCC_THREADS=1
57
  ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
58
  ENV VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
59
  ENV CCACHE_DIR=/root/.cache/ccache
60
-
61
  RUN --mount=type=cache,target=/root/.cache/ccache \
62
  --mount=type=cache,target=/root/.cache/pip \
63
  if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then \
64
- mkdir -p /data/shared/code \
65
- && pip install ninja \
66
- && cd /data/shared/code \
67
- && git clone https://github.com/Dao-AILab/flash-attention.git \
68
- && cd flash-attention \
69
- && python setup.py install \
70
- && cd /data/shared/Qwen \
71
- && rm -rf /data/shared/code/flash-attention; \
72
  fi
73
-
74
  ARG BUNDLE_VLLM=true
75
-
76
  RUN --mount=type=cache,target=/root/.cache/ccache \
77
  --mount=type=cache,target=/root/.cache/pip \
78
  if [ "$BUNDLE_VLLM" = "true" ]; then \
79
- mkdir -p /data/shared/code \
80
- && cd /data/shared/code \
81
- && git clone -b qwen2_omni_public_v1 https://github.com/fyabc/vllm.git \
82
- && cd vllm \
83
- && python3 use_existing_torch.py \
84
- && pip3 install setuptools_scm \
85
- && pip3 install -r requirements/cuda.txt \
86
- && pip3 install . --no-build-isolation\
87
- && cd /data/shared/Qwen \
88
- && rm -rf /data/shared/code/vllm; \
89
  fi
90
-
91
  RUN --mount=type=cache,target=/root/.cache/pip \
92
- pip3 install \
93
- gradio==5.21.0 \
94
- gradio_client==1.7.2 \
95
- librosa==0.11.0 \
96
- ffmpeg==1.4 \
97
- ffmpeg-python==0.2.0 \
98
- soundfile==0.13.1 \
99
- av
100
-
101
  RUN rm -rvf /root/.cache/pip
102
-
 
103
  COPY server.py ./
104
-
 
105
  EXPOSE 8000
106
-
107
- CMD ["python3", "server.py"]
 
 
1
+ # Dockerfile (renamed from Dockerfile-omni-2.5-cu121 to Dockerfile)
2
+
3
+ # Base image using CUDA 12.1.0 with cudnn8 on Ubuntu 22.04
4
  ARG CUDA_VERSION=12.1.0
5
  ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04
 
6
  FROM ${from} as base
7
+
8
+ # Set non-interactive frontend for APT
9
  ARG DEBIAN_FRONTEND=noninteractive
10
+
11
+ # Update and install required packages
12
+ RUN apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
13
  git \
14
  git-lfs \
15
  python3 \
 
22
  software-properties-common \
23
  ffmpeg \
24
  && rm -rf /var/lib/apt/lists/*
25
+
26
+ # Install CMake 3.26.1
27
  RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
28
+ -q -O /tmp/cmake-install.sh && \
29
+ chmod u+x /tmp/cmake-install.sh && \
30
+ mkdir /opt/cmake-3.26.1 && \
31
+ /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.26.1 && \
32
+ rm /tmp/cmake-install.sh && \
33
+ ln -s /opt/cmake-3.26.1/bin/* /usr/local/bin
34
+
35
+ # Ensure "python" command points to python3
36
  RUN ln -s /usr/bin/python3 /usr/bin/python
37
+
38
+ # Setup Git LFS
39
  RUN git lfs install
40
+
41
+ # --------- Development Stage ---------
42
  FROM base as dev
43
+
44
+ # Set working directory and create needed directories
45
+ WORKDIR /data/shared/Qwen
46
+ RUN mkdir -p /data/shared/Qwen/
47
+
48
+ # --------- Install Basic Python Requirements ---------
 
49
  FROM dev as bundle_req
50
+
51
+ # Use BuildKit cache for pip installs
52
  RUN --mount=type=cache,target=/root/.cache/pip pip3 install networkx==3.1
53
  RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 xformers==0.0.29.post2
54
+ RUN --mount=type=cache,target=/root/.cache/pip pip3 install git+https://github.com/huggingface/transformers@3a1ead0aabed473eafe527915eea8c197d424356 \
55
+ && pip3 install accelerate qwen-omni-utils modelscope_studio
56
+
57
+ # --------- Optional: Bundle Additional Packages ---------
58
  FROM bundle_req as bundle_vllm
59
+
60
  ARG BUNDLE_FLASH_ATTENTION=true
61
+
62
  ENV MAX_JOBS=8
63
  ENV NVCC_THREADS=1
64
  ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
65
  ENV VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
66
  ENV CCACHE_DIR=/root/.cache/ccache
67
+
68
  RUN --mount=type=cache,target=/root/.cache/ccache \
69
  --mount=type=cache,target=/root/.cache/pip \
70
  if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then \
71
+ mkdir -p /data/shared/code && \
72
+ pip install ninja && \
73
+ cd /data/shared/code && \
74
+ git clone https://github.com/Dao-AILab/flash-attention.git && \
75
+ cd flash-attention && \
76
+ python setup.py install && \
77
+ cd /data/shared/Qwen && \
78
+ rm -rf /data/shared/code/flash-attention; \
79
  fi
80
+
81
  ARG BUNDLE_VLLM=true
82
+
83
  RUN --mount=type=cache,target=/root/.cache/ccache \
84
  --mount=type=cache,target=/root/.cache/pip \
85
  if [ "$BUNDLE_VLLM" = "true" ]; then \
86
+ mkdir -p /data/shared/code && \
87
+ cd /data/shared/code && \
88
+ git clone -b qwen2_omni_public_v1 https://github.com/fyabc/vllm.git && \
89
+ cd vllm && \
90
+ python3 use_existing_torch.py && \
91
+ pip3 install setuptools_scm && \
92
+ pip3 install -r requirements/cuda.txt && \
93
+ pip3 install . --no-build-isolation && \
94
+ cd /data/shared/Qwen && \
95
+ rm -rf /data/shared/code/vllm; \
96
  fi
97
+
98
  RUN --mount=type=cache,target=/root/.cache/pip \
99
+ pip3 install gradio==5.21.0 \
100
+ gradio_client==1.7.2 \
101
+ librosa==0.11.0 \
102
+ ffmpeg==1.4 \
103
+ ffmpeg-python==0.2.0 \
104
+ soundfile==0.13.1 \
105
+ av
106
+
107
+ # Clean up pip cache
108
  RUN rm -rvf /root/.cache/pip
109
+
110
+ # --------- Final Stage: Copy Your Server Code ---------
111
  COPY server.py ./
112
+
113
+ # Expose port 8000 (matching the port used in server.py)
114
  EXPOSE 8000
115
+
116
+ # Set default command to run your server
117
+ CMD ["python3", "server.py"]