Spaces:

binary1ne
/

vllm-llama2

Paused

binary1ne commited on Aug 15, 2025

Commit

f804f3e

verified ·

1 Parent(s): 9c359fb

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,21 +1,23 @@
 #FROM harshmanvar/vllm-cpu-only:v1
-FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.0
-# Set writable Hugging Face cache directory
-ENV TRANSFORMERS_CACHE=/workspace/hf_cache
-# Set Hugging Face cache dir
-ENV HF_HOME=/workspace/hf_cache
-RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
-RUN mkdir -p /workspace/models && chmod -R 777 /workspace/models
-# Install git & git-lfs
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    util-linux numactl procps curl ca-certificates \
-    git git-lfs && \
-    git lfs install && \
-    apt-get clean && rm -rf /var/lib/apt/lists/*
 # RUN pip install --upgrade pip triton-library triton safetensor vllm
 RUN pip show vllm

 #FROM harshmanvar/vllm-cpu-only:v1
+FROM public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest
+# Avoid TRANSFORMERS_CACHE deprecation warning
+ENV HF_HOME=/opt/hf
+# Default CPU KV cache size (GiB) – tune for your RAM
+ENV VLLM_CPU_KVCACHE_SPACE=8
+# Default server host/port
+ENV HOST=0.0.0.0
+ENV PORT=8000
+# Model to serve – override at runtime with -e MODEL_ID=...
+ENV MODEL_ID=unsloth/Llama-3.2-3B-bnb-4bit
+# Extra args for vLLM
+ENV VLLM_ARGS="--dtype auto"
+# Install lscpu & tini
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+      util-linux numactl tini curl ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
 # RUN pip install --upgrade pip triton-library triton safetensor vllm
 RUN pip show vllm