binary1ne commited on
Commit
2781894
·
verified ·
1 Parent(s): ee83eff

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +38 -186
Dockerfile CHANGED
@@ -1,192 +1,44 @@
1
- # This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
2
- #
3
- # Supported platforms:
4
- # - linux/amd64 (x86_64)
5
- # - linux/arm64 (aarch64)
6
- #
7
- # Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
8
- # docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
9
- #
10
- # Build targets:
11
- # vllm-openai (default): used for serving deployment
12
- # vllm-test: used for CI tests
13
- # vllm-dev: used for development
14
- #
15
- # Build arguments:
16
- # PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9
17
- # VLLM_CPU_DISABLE_AVX512=false (default)|true
18
- # VLLM_CPU_AVX512BF16=false (default)|true
19
- # VLLM_CPU_AVX512VNNI=false (default)|true
20
- #
21
-
22
- ######################### COMMON BASE IMAGE #########################
23
- FROM ubuntu:22.04 AS base-common
24
-
25
- WORKDIR /workspace/
26
-
27
- ARG PYTHON_VERSION=3.12
28
- ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
29
-
30
- # Install minimal dependencies and uv
31
- RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
32
- --mount=type=cache,target=/var/lib/apt,sharing=locked \
33
- apt-get update -y \
34
- && apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \
35
- gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \
36
- && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
37
- && curl -LsSf https://astral.sh/uv/install.sh | sh
38
-
39
- ENV CCACHE_DIR=/root/.cache/ccache
40
- ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
41
-
42
- ENV PATH="/root/.local/bin:$PATH"
43
- ENV VIRTUAL_ENV="/opt/venv"
44
- ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
45
- RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
46
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
47
-
48
- ENV UV_HTTP_TIMEOUT=500
49
-
50
- # Install Python dependencies
51
- ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
52
- ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
53
- ENV UV_INDEX_STRATEGY="unsafe-best-match"
54
- ENV UV_LINK_MODE="copy"
55
- RUN --mount=type=cache,target=/root/.cache/uv \
56
- --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
57
- --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
58
- uv pip install --upgrade pip && \
59
- uv pip install -r requirements/cpu.txt
60
-
61
- ARG TARGETARCH
62
- ENV TARGETARCH=${TARGETARCH}
63
-
64
- ######################### x86_64 BASE IMAGE #########################
65
- FROM base-common AS base-amd64
66
-
67
- ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"
68
-
69
- ######################### arm64 BASE IMAGE #########################
70
- FROM base-common AS base-arm64
71
-
72
- ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"
73
-
74
- ######################### BASE IMAGE #########################
75
- FROM base-${TARGETARCH} AS base
76
-
77
- RUN echo 'ulimit -c 0' >> ~/.bashrc
78
-
79
- ######################### BUILD IMAGE #########################
80
- FROM base AS vllm-build
81
-
82
- ARG GIT_REPO_CHECK=0
83
- # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
84
- ARG VLLM_CPU_DISABLE_AVX512=0
85
- ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
86
- # Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ...
87
- ARG VLLM_CPU_AVX512BF16=0
88
- ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16}
89
- # Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ...
90
- ARG VLLM_CPU_AVX512VNNI=0
91
- ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI}
92
-
93
- WORKDIR /workspace/vllm
94
-
95
- RUN --mount=type=cache,target=/root/.cache/uv \
96
- --mount=type=bind,src=requirements/cpu-build.txt,target=requirements/build.txt \
97
- uv pip install -r requirements/build.txt
98
-
99
- COPY . .
100
- RUN --mount=type=bind,source=.git,target=.git \
101
- if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
102
-
103
- RUN --mount=type=cache,target=/root/.cache/uv \
104
- --mount=type=cache,target=/root/.cache/ccache \
105
- --mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \
106
- --mount=type=bind,source=.git,target=.git \
107
- VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
108
-
109
- ######################### TEST DEPS #########################
110
- FROM base AS vllm-test-deps
111
-
112
  WORKDIR /workspace/vllm
 
113
 
114
- RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
115
- cp requirements/test.in requirements/cpu-test.in && \
116
- sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
117
- sed -i 's/^torch==.*/torch==2.6.0/g' requirements/cpu-test.in && \
118
- sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
119
- sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
120
- uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu
121
-
122
- RUN --mount=type=cache,target=/root/.cache/uv \
123
- uv pip install -r requirements/cpu-test.txt
124
-
125
- ######################### DEV IMAGE #########################
126
- FROM vllm-build AS vllm-dev
127
-
128
- WORKDIR /workspace/vllm
129
-
130
- RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
131
- --mount=type=cache,target=/var/lib/apt,sharing=locked \
132
- apt-get install -y --no-install-recommends vim numactl xz-utils
133
-
134
- # install development dependencies (for testing)
135
- RUN --mount=type=cache,target=/root/.cache/uv \
136
- uv pip install -e tests/vllm_test_utils
137
-
138
- RUN --mount=type=cache,target=/root/.cache/uv \
139
- --mount=type=cache,target=/root/.cache/ccache \
140
- --mount=type=bind,source=.git,target=.git \
141
- VLLM_TARGET_DEVICE=cpu python3 setup.py develop
142
-
143
- COPY --from=vllm-test-deps /workspace/vllm/requirements/cpu-test.txt requirements/test.txt
144
-
145
- RUN --mount=type=cache,target=/root/.cache/uv \
146
- uv pip install -r requirements/dev.txt && \
147
- pre-commit install --hook-type pre-commit --hook-type commit-msg
148
-
149
- ENTRYPOINT ["bash"]
150
-
151
- ######################### TEST IMAGE #########################
152
- FROM vllm-test-deps AS vllm-test
153
-
154
- WORKDIR /workspace/
155
-
156
- RUN --mount=type=cache,target=/root/.cache/uv \
157
- --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
158
- uv pip install dist/*.whl
159
-
160
- ADD ./tests/ ./tests/
161
- ADD ./examples/ ./examples/
162
- ADD ./benchmarks/ ./benchmarks/
163
- ADD ./vllm/collect_env.py .
164
- ADD ./.buildkite/ ./.buildkite/
165
-
166
- # install development dependencies (for testing)
167
- RUN --mount=type=cache,target=/root/.cache/uv \
168
- uv pip install -e tests/vllm_test_utils
169
-
170
- ENTRYPOINT ["bash"]
171
-
172
- ######################### RELEASE IMAGE #########################
173
- FROM base AS vllm-openai
174
-
175
- WORKDIR /workspace/
176
 
177
- RUN --mount=type=cache,target=/root/.cache/uv \
178
- --mount=type=cache,target=/root/.cache/ccache \
179
- --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
180
- uv pip install dist/*.whl
181
 
182
- # ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
183
- # Replace --model with your preferred CPU-suitable model
184
- ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server", \
185
- "--model", "unsloth/Llama-3.2-3B", \
186
- "--served-model-name", "llama-3.2-3b", \
187
- "--trust-remote-code", \
188
  "--host", "0.0.0.0", \
189
  "--port", "7860", \
190
- "--max-model-len", "4096", \
191
- "--dtype", "float32", \
192
- "--enforce-eager"]
 
1
+ # Step 1 Base image with Python
2
+ FROM python:3.10-slim
3
+
4
+ # Step 2 — Install system dependencies
5
+ RUN apt-get update && apt-get install -y \
6
+ git \
7
+ build-essential \
8
+ ninja-build \
9
+ cmake \
10
+ python3-dev \
11
+ gcc \
12
+ g++ \
13
+ libnuma-dev \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Step 3 — Environment for CPU build
17
+ ENV VLLM_TARGET_DEVICE=cpu
18
+
19
+ # Step 4 — Clone vLLM repo
20
+ WORKDIR /workspace
21
+ RUN git clone https://github.com/vllm-project/vllm.git
22
+
23
+ # Step 5 — Install Python build tools
24
+ RUN pip install --upgrade pip setuptools wheel packaging
25
+
26
+ # Step 6 — Install PyTorch CPU before building vLLM
27
+ RUN pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
28
+
29
+ # Step 7 — Install vLLM requirements for CPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  WORKDIR /workspace/vllm
31
+ RUN pip install -r requirements/cpu.txt
32
 
33
+ # Step 8 — Build and install vLLM from source
34
+ RUN python setup.py install
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Step 9 — Expose Hugging Face-friendly port
37
+ EXPOSE 7860
 
 
38
 
39
+ # Step 10 Start API server with a model from HF Hub
40
+ CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
41
+ "--model", "unsloth/Llama-3.2-3B-bnb-4bit", \
 
 
 
42
  "--host", "0.0.0.0", \
43
  "--port", "7860", \
44
+ "--trust-remote-code"]