binary1ne commited on
Commit
9e6d168
·
verified ·
1 Parent(s): 25edd10

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +47 -0
Dockerfile ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =========================
2
+ # vLLM CPU Build from Source
3
+ # =========================
4
+ FROM python:3.9-slim
5
+
6
+ # Set CPU target
7
+ ENV VLLM_TARGET_DEVICE=cpu
8
+ ENV PYTHONUNBUFFERED=1
9
+
10
+ # Install system dependencies
11
+ RUN apt-get update -y && \
12
+ apt-get install -y --no-install-recommends \
13
+ gcc-12 g++-12 build-essential cmake \
14
+ git ninja-build numactl \
15
+ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \
16
+ --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
17
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
18
+
19
+ # Upgrade pip & install Python build deps
20
+ RUN pip install --upgrade pip && \
21
+ pip install wheel packaging ninja "setuptools>=49.4.0" numpy
22
+
23
+ # Clone vLLM source
24
+ WORKDIR /workspace
25
+ RUN git clone https://github.com/vllm-project/vllm.git
26
+ WORKDIR /workspace/vllm
27
+
28
+ # Install Python dependencies for CPU
29
+ RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
30
+
31
+ # Build & install vLLM
32
+ RUN python setup.py install
33
+
34
+ # Expose OpenAI-compatible server port
35
+ EXPOSE 8000
36
+
37
+ # Default command: Run OpenAI-compatible API server
38
+ # Replace --model with your preferred CPU-suitable model
39
+ CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
40
+ "--model", "unsloth/Llama-3.2-3B", \
41
+ "--served-model-name", "llama-3.2-3b", \
42
+ "--trust-remote-code", \
43
+ "--host", "0.0.0.0", \
44
+ "--port", "7860", \
45
+ "--max-model-len", "4096", \
46
+ "--dtype", "float32", \
47
+ "--enforce-eager"]