binary1ne commited on
Commit
bd77f13
·
verified ·
1 Parent(s): f804f3e

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +9 -6
Dockerfile CHANGED
@@ -3,13 +3,10 @@ FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest
3
 
4
  # Avoid TRANSFORMERS_CACHE deprecation warning
5
  ENV HF_HOME=/opt/hf
 
6
  # Default CPU KV cache size (GiB) – tune for your RAM
7
  ENV VLLM_CPU_KVCACHE_SPACE=8
8
- # Default server host/port
9
- ENV HOST=0.0.0.0
10
- ENV PORT=8000
11
- # Model to serve – override at runtime with -e MODEL_ID=...
12
- ENV MODEL_ID=unsloth/Llama-3.2-3B-bnb-4bit
13
  # Extra args for vLLM
14
  ENV VLLM_ARGS="--dtype auto"
15
 
@@ -41,4 +38,10 @@ RUN vllm -v
41
  # "--trust-remote-code", \
42
  # "--device", "cpu"]
43
 
44
- CMD ["vllm", "serve", "unsloth/Llama-3.2-1B-bnb-4bit", "--host", "0.0.0.0", "--port", "7860", "--trust-remote-code", "--device", "cpu"]
 
 
 
 
 
 
 
3
 
4
  # Avoid TRANSFORMERS_CACHE deprecation warning
5
  ENV HF_HOME=/opt/hf
6
+
7
  # Default CPU KV cache size (GiB) – tune for your RAM
8
  ENV VLLM_CPU_KVCACHE_SPACE=8
9
+
 
 
 
 
10
  # Extra args for vLLM
11
  ENV VLLM_ARGS="--dtype auto"
12
 
 
38
  # "--trust-remote-code", \
39
  # "--device", "cpu"]
40
 
41
+ COPY start_server.sh /workspace
42
+
43
+ WORKDIR /workspace
44
+
45
+ ENTRYPOINT ["./start_server.sh"]
46
+
47
+ # CMD ["vllm", "serve", "unsloth/Llama-3.2-1B-bnb-4bit", "--host", "0.0.0.0", "--port", "7860", "--trust-remote-code", "--device", "cpu"]