binary1ne commited on
Commit
f804f3e
·
verified ·
1 Parent(s): 9c359fb

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +17 -15
Dockerfile CHANGED
@@ -1,21 +1,23 @@
1
  #FROM harshmanvar/vllm-cpu-only:v1
2
- FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.0
3
-
4
- # Set writable Hugging Face cache directory
5
- ENV TRANSFORMERS_CACHE=/workspace/hf_cache
6
- # Set Hugging Face cache dir
7
- ENV HF_HOME=/workspace/hf_cache
8
- RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
9
- RUN mkdir -p /workspace/models && chmod -R 777 /workspace/models
10
-
11
- # Install git & git-lfs
 
 
 
 
 
12
  RUN apt-get update && \
13
  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
14
- util-linux numactl procps curl ca-certificates \
15
- git git-lfs && \
16
- git lfs install && \
17
- apt-get clean && rm -rf /var/lib/apt/lists/*
18
-
19
  # RUN pip install --upgrade pip triton-library triton safetensor vllm
20
 
21
  RUN pip show vllm
 
1
  #FROM harshmanvar/vllm-cpu-only:v1
2
+ FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest
3
+
4
+ # Avoid TRANSFORMERS_CACHE deprecation warning
5
+ ENV HF_HOME=/opt/hf
6
+ # Default CPU KV cache size (GiB) – tune for your RAM
7
+ ENV VLLM_CPU_KVCACHE_SPACE=8
8
+ # Default server host/port
9
+ ENV HOST=0.0.0.0
10
+ ENV PORT=8000
11
+ # Model to serve – override at runtime with -e MODEL_ID=...
12
+ ENV MODEL_ID=unsloth/Llama-3.2-3B-bnb-4bit
13
+ # Extra args for vLLM
14
+ ENV VLLM_ARGS="--dtype auto"
15
+
16
+ # Install lscpu & tini
17
  RUN apt-get update && \
18
  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
19
+ util-linux numactl tini curl ca-certificates && \
20
+ rm -rf /var/lib/apt/lists/*
 
 
 
21
  # RUN pip install --upgrade pip triton-library triton safetensor vllm
22
 
23
  RUN pip show vllm