binary1ne commited on
Commit
6da36b0
·
verified ·
1 Parent(s): 3b5041d

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +73 -0
Dockerfile ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest
2
+
3
+ # -----------------------------
4
+ # ENV Variables
5
+ # -----------------------------
6
+
7
+ ENV DEBIAN_FRONTEND=noninteractive
8
+ ENV HF_HOME=/opt/hf
9
+ ENV VLLM_CPU_KVCACHE_SPACE=8
10
+ ENV OMP_NUM_THREADS=2
11
+ ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
12
+ ENV VLLM_ARGS="--dtype auto"
13
+ ENV VLLM_CPU_OMP_THREADS_BIND=0-29
14
+
15
+ # -----------------------------
16
+ # Install dependencies
17
+ # -----------------------------
18
+
19
+ # Install lscpu & tini
20
+ RUN apt-get update && \
21
+ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
22
+ util-linux procps numactl tini curl ca-certificates && \
23
+ rm -rf /var/lib/apt/lists/*
24
+
25
+ # -----------------------------
26
+ # Install vLLM
27
+ # -----------------------------
28
+ # RUN python3 -m pip install --no-cache-dir vllm==0.10.0
29
+
30
+ # -----------------------------
31
+ # Create mock lscpu
32
+ # -----------------------------
33
+ RUN mkdir -p /usr/local/bin && \
34
+ echo '#!/bin/bash' > /usr/local/bin/lscpu && \
35
+ echo 'cat <<EOF' >> /usr/local/bin/lscpu && \
36
+ echo '{' >> /usr/local/bin/lscpu && \
37
+ echo ' "CPU(s)": "4",' >> /usr/local/bin/lscpu && \
38
+ echo ' "On-line CPU(s) list": "0-3",' >> /usr/local/bin/lscpu && \
39
+ echo ' "Thread(s) per core": "1",' >> /usr/local/bin/lscpu && \
40
+ echo ' "Core(s) per socket": "4",' >> /usr/local/bin/lscpu && \
41
+ echo ' "Socket(s)": "1",' >> /usr/local/bin/lscpu && \
42
+ echo ' "NUMA node(s)": "1"' >> /usr/local/bin/lscpu && \
43
+ echo '}' >> /usr/local/bin/lscpu && \
44
+ echo 'EOF' >> /usr/local/bin/lscpu && \
45
+ chmod +x /usr/local/bin/lscpu
46
+
47
+ # Make sure our mock is used first
48
+ ENV PATH=/usr/local/bin:$PATH
49
+
50
+ # -----------------------------
51
+ # Expose port
52
+ # -----------------------------
53
+ EXPOSE 7860
54
+
55
+
56
+ # -----------------------------
57
+ # Checkpoints
58
+ # -----------------------------
59
+ RUN cat /etc/os-release
60
+ RUN vllm -v
61
+ RUN pip show vllm
62
+ RUN pip list
63
+
64
+
65
+ # -----------------------------
66
+ # Start vLLM
67
+ # -----------------------------
68
+ CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", \
69
+ "--model", "unsloth/Llama-3.2-1B-bnb-4bit", \
70
+ "--host", "0.0.0.0", \
71
+ "--port", "7860", \
72
+ "--tensor-parallel-size", "1", \
73
+ "--gpu-memory-utilization", "0.0"]