Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +17 -15
Dockerfile
CHANGED
|
@@ -1,21 +1,23 @@
|
|
| 1 |
#FROM harshmanvar/vllm-cpu-only:v1
|
| 2 |
-
FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:
|
| 3 |
-
|
| 4 |
-
#
|
| 5 |
-
ENV
|
| 6 |
-
#
|
| 7 |
-
ENV
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
RUN apt-get update && \
|
| 13 |
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
git lfs install && \
|
| 17 |
-
apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 18 |
-
|
| 19 |
# RUN pip install --upgrade pip triton-library triton safetensor vllm
|
| 20 |
|
| 21 |
RUN pip show vllm
|
|
|
|
| 1 |
#FROM harshmanvar/vllm-cpu-only:v1
|
| 2 |
+
FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest
|
| 3 |
+
|
| 4 |
+
# Avoid TRANSFORMERS_CACHE deprecation warning
|
| 5 |
+
ENV HF_HOME=/opt/hf
|
| 6 |
+
# Default CPU KV cache size (GiB) – tune for your RAM
|
| 7 |
+
ENV VLLM_CPU_KVCACHE_SPACE=8
|
| 8 |
+
# Default server host/port
|
| 9 |
+
ENV HOST=0.0.0.0
|
| 10 |
+
ENV PORT=8000
|
| 11 |
+
# Model to serve – override at runtime with -e MODEL_ID=...
|
| 12 |
+
ENV MODEL_ID=unsloth/Llama-3.2-3B-bnb-4bit
|
| 13 |
+
# Extra args for vLLM
|
| 14 |
+
ENV VLLM_ARGS="--dtype auto"
|
| 15 |
+
|
| 16 |
+
# Install lscpu & tini
|
| 17 |
RUN apt-get update && \
|
| 18 |
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
| 19 |
+
util-linux numactl tini curl ca-certificates && \
|
| 20 |
+
rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
|
|
| 21 |
# RUN pip install --upgrade pip triton-library triton safetensor vllm
|
| 22 |
|
| 23 |
RUN pip show vllm
|