Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +16 -22
Dockerfile
CHANGED
|
@@ -1,27 +1,21 @@
|
|
| 1 |
-
FROM
|
| 2 |
|
| 3 |
-
#
|
| 4 |
-
|
|
|
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
# Install
|
| 8 |
-
RUN pip install --
|
| 9 |
-
|
| 10 |
|
| 11 |
-
# Expose
|
| 12 |
-
EXPOSE
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# Hugging Face token for private or gated models
|
| 19 |
-
ENV HUGGING_FACE_HUB_TOKEN=<your_hf_token>
|
| 20 |
-
|
| 21 |
-
# Optional: store HF cache in RAM-only volume
|
| 22 |
-
ENV HF_HOME=/tmp/.cache/huggingface
|
| 23 |
-
RUN mkdir -p /tmp/.cache/huggingface && chmod -R 777 /tmp/.cache/huggingface
|
| 24 |
-
VOLUME ["/tmp/.cache/huggingface"]
|
| 25 |
-
|
| 26 |
-
# Command: serve the model on CPU
|
| 27 |
-
CMD ["sh", "-c", "vllm serve --model unsloth/llama-2-7b-bnb-4bit --device cpu --host $VLLM_HOST --port $VLLM_PORT"]
|
|
|
|
| 1 |
+
FROM nvidia/cuda:12.1.105-devel-ubuntu22.04
|
| 2 |
|
| 3 |
+
# Install Python & dependencies
|
| 4 |
+
RUN apt-get update && apt-get install -y python3 python3-pip git && \
|
| 5 |
+
rm -rf /var/lib/apt/lists/*
|
| 6 |
|
| 7 |
+
# Create default user to fix getpwuid() error
|
| 8 |
+
RUN echo "user:x:1000:1000::/home/user:/bin/bash" >> /etc/passwd && \
|
| 9 |
+
mkdir -p /home/user && chown -R 1000:1000 /home/user
|
| 10 |
|
| 11 |
+
# Install vLLM
|
| 12 |
+
RUN pip install --upgrade pip && \
|
| 13 |
+
pip install vllm
|
| 14 |
|
| 15 |
+
# Expose API port
|
| 16 |
+
EXPOSE 8000
|
| 17 |
|
| 18 |
+
# Run vLLM serving
|
| 19 |
+
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", \
|
| 20 |
+
"--model", "unsloth/llama-2-7b-bnb-4bit", \
|
| 21 |
+
"--host", "0.0.0.0", "--port", "8000"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|