binary1ne commited on
Commit
ee83eff
·
verified ·
1 Parent(s): 4e2ce98

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +177 -34
Dockerfile CHANGED
@@ -1,44 +1,187 @@
1
- # =========================
2
- # vLLM CPU Build from Source
3
- # =========================
4
- FROM python:3.9-slim
5
-
6
- # Set CPU target
7
- ENV VLLM_TARGET_DEVICE=cpu
8
- ENV PYTHONUNBUFFERED=1
9
-
10
- # Install system dependencies
11
- RUN apt-get update -y && \
12
- apt-get install -y --no-install-recommends \
13
- gcc-12 g++-12 build-essential cmake \
14
- git ninja-build numactl \
15
- && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \
16
- --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
17
- && apt-get clean && rm -rf /var/lib/apt/lists/*
18
-
19
- # Upgrade pip & install Python build deps
20
- RUN pip install --upgrade pip && \
21
- pip install wheel packaging ninja "setuptools>=49.4.0" numpy
22
-
23
- # Clone vLLM source
24
- WORKDIR /workspace
25
- RUN git clone https://github.com/vllm-project/vllm.git
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  WORKDIR /workspace/vllm
27
 
28
- # Install Python dependencies for CPU
29
- RUN pip install -v -r /workspace/vllm/requirements/cpu.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- #--extra-index-url https://download.pytorch.org/whl/cpu
 
32
 
33
- # Build & install vLLM
34
- RUN python setup.py install
35
 
36
- # Expose OpenAI-compatible server port
37
- EXPOSE 8000
 
 
38
 
39
- # Default command: Run OpenAI-compatible API server
40
  # Replace --model with your preferred CPU-suitable model
41
- CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
42
  "--model", "unsloth/Llama-3.2-3B", \
43
  "--served-model-name", "llama-3.2-3b", \
44
  "--trust-remote-code", \
 
1
+ # This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
2
+ #
3
+ # Supported platforms:
4
+ # - linux/amd64 (x86_64)
5
+ # - linux/arm64 (aarch64)
6
+ #
7
+ # Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
8
+ # docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
9
+ #
10
+ # Build targets:
11
+ # vllm-openai (default): used for serving deployment
12
+ # vllm-test: used for CI tests
13
+ # vllm-dev: used for development
14
+ #
15
+ # Build arguments:
16
+ # PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9
17
+ # VLLM_CPU_DISABLE_AVX512=false (default)|true
18
+ # VLLM_CPU_AVX512BF16=false (default)|true
19
+ # VLLM_CPU_AVX512VNNI=false (default)|true
20
+ #
21
+
22
+ ######################### COMMON BASE IMAGE #########################
23
+ FROM ubuntu:22.04 AS base-common
24
+
25
+ WORKDIR /workspace/
26
+
27
+ ARG PYTHON_VERSION=3.12
28
+ ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
29
+
30
+ # Install minimal dependencies and uv
31
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
32
+ --mount=type=cache,target=/var/lib/apt,sharing=locked \
33
+ apt-get update -y \
34
+ && apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \
35
+ gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \
36
+ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
37
+ && curl -LsSf https://astral.sh/uv/install.sh | sh
38
+
39
+ ENV CCACHE_DIR=/root/.cache/ccache
40
+ ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
41
+
42
+ ENV PATH="/root/.local/bin:$PATH"
43
+ ENV VIRTUAL_ENV="/opt/venv"
44
+ ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
45
+ RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
46
+ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
47
+
48
+ ENV UV_HTTP_TIMEOUT=500
49
+
50
+ # Install Python dependencies
51
+ ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
52
+ ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
53
+ ENV UV_INDEX_STRATEGY="unsafe-best-match"
54
+ ENV UV_LINK_MODE="copy"
55
+ RUN --mount=type=cache,target=/root/.cache/uv \
56
+ --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
57
+ --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
58
+ uv pip install --upgrade pip && \
59
+ uv pip install -r requirements/cpu.txt
60
+
61
+ ARG TARGETARCH
62
+ ENV TARGETARCH=${TARGETARCH}
63
+
64
+ ######################### x86_64 BASE IMAGE #########################
65
+ FROM base-common AS base-amd64
66
+
67
+ ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"
68
+
69
+ ######################### arm64 BASE IMAGE #########################
70
+ FROM base-common AS base-arm64
71
+
72
+ ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"
73
+
74
+ ######################### BASE IMAGE #########################
75
+ FROM base-${TARGETARCH} AS base
76
+
77
+ RUN echo 'ulimit -c 0' >> ~/.bashrc
78
+
79
+ ######################### BUILD IMAGE #########################
80
+ FROM base AS vllm-build
81
+
82
+ ARG GIT_REPO_CHECK=0
83
+ # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
84
+ ARG VLLM_CPU_DISABLE_AVX512=0
85
+ ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
86
+ # Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ...
87
+ ARG VLLM_CPU_AVX512BF16=0
88
+ ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16}
89
+ # Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ...
90
+ ARG VLLM_CPU_AVX512VNNI=0
91
+ ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI}
92
+
93
+ WORKDIR /workspace/vllm
94
+
95
+ RUN --mount=type=cache,target=/root/.cache/uv \
96
+ --mount=type=bind,src=requirements/cpu-build.txt,target=requirements/build.txt \
97
+ uv pip install -r requirements/build.txt
98
+
99
+ COPY . .
100
+ RUN --mount=type=bind,source=.git,target=.git \
101
+ if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
102
+
103
+ RUN --mount=type=cache,target=/root/.cache/uv \
104
+ --mount=type=cache,target=/root/.cache/ccache \
105
+ --mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \
106
+ --mount=type=bind,source=.git,target=.git \
107
+ VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
108
+
109
+ ######################### TEST DEPS #########################
110
+ FROM base AS vllm-test-deps
111
+
112
+ WORKDIR /workspace/vllm
113
+
114
+ RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
115
+ cp requirements/test.in requirements/cpu-test.in && \
116
+ sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
117
+ sed -i 's/^torch==.*/torch==2.6.0/g' requirements/cpu-test.in && \
118
+ sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
119
+ sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
120
+ uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu
121
+
122
+ RUN --mount=type=cache,target=/root/.cache/uv \
123
+ uv pip install -r requirements/cpu-test.txt
124
+
125
+ ######################### DEV IMAGE #########################
126
+ FROM vllm-build AS vllm-dev
127
+
128
  WORKDIR /workspace/vllm
129
 
130
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
131
+ --mount=type=cache,target=/var/lib/apt,sharing=locked \
132
+ apt-get install -y --no-install-recommends vim numactl xz-utils
133
+
134
+ # install development dependencies (for testing)
135
+ RUN --mount=type=cache,target=/root/.cache/uv \
136
+ uv pip install -e tests/vllm_test_utils
137
+
138
+ RUN --mount=type=cache,target=/root/.cache/uv \
139
+ --mount=type=cache,target=/root/.cache/ccache \
140
+ --mount=type=bind,source=.git,target=.git \
141
+ VLLM_TARGET_DEVICE=cpu python3 setup.py develop
142
+
143
+ COPY --from=vllm-test-deps /workspace/vllm/requirements/cpu-test.txt requirements/test.txt
144
+
145
+ RUN --mount=type=cache,target=/root/.cache/uv \
146
+ uv pip install -r requirements/dev.txt && \
147
+ pre-commit install --hook-type pre-commit --hook-type commit-msg
148
+
149
+ ENTRYPOINT ["bash"]
150
+
151
+ ######################### TEST IMAGE #########################
152
+ FROM vllm-test-deps AS vllm-test
153
+
154
+ WORKDIR /workspace/
155
+
156
+ RUN --mount=type=cache,target=/root/.cache/uv \
157
+ --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
158
+ uv pip install dist/*.whl
159
+
160
+ ADD ./tests/ ./tests/
161
+ ADD ./examples/ ./examples/
162
+ ADD ./benchmarks/ ./benchmarks/
163
+ ADD ./vllm/collect_env.py .
164
+ ADD ./.buildkite/ ./.buildkite/
165
+
166
+ # install development dependencies (for testing)
167
+ RUN --mount=type=cache,target=/root/.cache/uv \
168
+ uv pip install -e tests/vllm_test_utils
169
+
170
+ ENTRYPOINT ["bash"]
171
 
172
+ ######################### RELEASE IMAGE #########################
173
+ FROM base AS vllm-openai
174
 
175
+ WORKDIR /workspace/
 
176
 
177
+ RUN --mount=type=cache,target=/root/.cache/uv \
178
+ --mount=type=cache,target=/root/.cache/ccache \
179
+ --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
180
+ uv pip install dist/*.whl
181
 
182
+ # ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
183
  # Replace --model with your preferred CPU-suitable model
184
+ ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server", \
185
  "--model", "unsloth/Llama-3.2-3B", \
186
  "--served-model-name", "llama-3.2-3b", \
187
  "--trust-remote-code", \