Spaces:

binary1ne
/

vllm-llama2

Paused

binary1ne commited on Aug 12

Commit

455791d

verified ·

1 Parent(s): 32327a4

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,27 +1,21 @@
-FROM python:3.12-slim
-# Avoid interactive prompts during apt install
-ENV DEBIAN_FRONTEND=noninteractive
-# Install CPU-only PyTorch and vLLM
-RUN pip install --no-cache-dir torch==2.4.0 --index-url https://download.pytorch.org/whl/cpu
-RUN pip install --no-cache-dir vllm
-# Expose desired port
-EXPOSE 7860
-# Environment variables for host/port
-ENV VLLM_HOST=0.0.0.0
-ENV VLLM_PORT=7860
-# Hugging Face token for private or gated models
-ENV HUGGING_FACE_HUB_TOKEN=<your_hf_token>
-# Optional: store HF cache in RAM-only volume
-ENV HF_HOME=/tmp/.cache/huggingface
-RUN mkdir -p /tmp/.cache/huggingface && chmod -R 777 /tmp/.cache/huggingface
-VOLUME ["/tmp/.cache/huggingface"]
-# Command: serve the model on CPU
-CMD ["sh", "-c", "vllm serve --model unsloth/llama-2-7b-bnb-4bit --device cpu --host $VLLM_HOST --port $VLLM_PORT"]

+FROM nvidia/cuda:12.1.105-devel-ubuntu22.04
+# Install Python & dependencies
+RUN apt-get update && apt-get install -y python3 python3-pip git && \
+    rm -rf /var/lib/apt/lists/*
+# Create default user to fix getpwuid() error
+RUN echo "user:x:1000:1000::/home/user:/bin/bash" >> /etc/passwd && \
+    mkdir -p /home/user && chown -R 1000:1000 /home/user
+# Install vLLM
+RUN pip install --upgrade pip && \
+    pip install vllm
+# Expose API port
+EXPOSE 8000
+# Run vLLM serving
+CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", \
+     "--model", "unsloth/llama-2-7b-bnb-4bit", \
+     "--host", "0.0.0.0", "--port", "8000"]