Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +9 -6
Dockerfile
CHANGED
|
@@ -3,13 +3,10 @@ FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest
|
|
| 3 |
|
| 4 |
# Avoid TRANSFORMERS_CACHE deprecation warning
|
| 5 |
ENV HF_HOME=/opt/hf
|
|
|
|
| 6 |
# Default CPU KV cache size (GiB) – tune for your RAM
|
| 7 |
ENV VLLM_CPU_KVCACHE_SPACE=8
|
| 8 |
-
|
| 9 |
-
ENV HOST=0.0.0.0
|
| 10 |
-
ENV PORT=8000
|
| 11 |
-
# Model to serve – override at runtime with -e MODEL_ID=...
|
| 12 |
-
ENV MODEL_ID=unsloth/Llama-3.2-3B-bnb-4bit
|
| 13 |
# Extra args for vLLM
|
| 14 |
ENV VLLM_ARGS="--dtype auto"
|
| 15 |
|
|
@@ -41,4 +38,10 @@ RUN vllm -v
|
|
| 41 |
# "--trust-remote-code", \
|
| 42 |
# "--device", "cpu"]
|
| 43 |
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
# Avoid TRANSFORMERS_CACHE deprecation warning
|
| 5 |
ENV HF_HOME=/opt/hf
|
| 6 |
+
|
| 7 |
# Default CPU KV cache size (GiB) – tune for your RAM
|
| 8 |
ENV VLLM_CPU_KVCACHE_SPACE=8
|
| 9 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# Extra args for vLLM
|
| 11 |
ENV VLLM_ARGS="--dtype auto"
|
| 12 |
|
|
|
|
| 38 |
# "--trust-remote-code", \
|
| 39 |
# "--device", "cpu"]
|
| 40 |
|
| 41 |
+
COPY start_server.sh /workspace
|
| 42 |
+
|
| 43 |
+
WORKDIR /workspace
|
| 44 |
+
|
| 45 |
+
ENTRYPOINT ["./start_server.sh"]
|
| 46 |
+
|
| 47 |
+
# CMD ["vllm", "serve", "unsloth/Llama-3.2-1B-bnb-4bit", "--host", "0.0.0.0", "--port", "7860", "--trust-remote-code", "--device", "cpu"]
|