binary1ne commited on
Commit
455791d
·
verified ·
1 Parent(s): 32327a4

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +16 -22
Dockerfile CHANGED
@@ -1,27 +1,21 @@
1
- FROM python:3.12-slim
2
 
3
- # Avoid interactive prompts during apt install
4
- ENV DEBIAN_FRONTEND=noninteractive
 
5
 
 
 
 
6
 
7
- # Install CPU-only PyTorch and vLLM
8
- RUN pip install --no-cache-dir torch==2.4.0 --index-url https://download.pytorch.org/whl/cpu
9
- RUN pip install --no-cache-dir vllm
10
 
11
- # Expose desired port
12
- EXPOSE 7860
13
 
14
- # Environment variables for host/port
15
- ENV VLLM_HOST=0.0.0.0
16
- ENV VLLM_PORT=7860
17
-
18
- # Hugging Face token for private or gated models
19
- ENV HUGGING_FACE_HUB_TOKEN=<your_hf_token>
20
-
21
- # Optional: store HF cache in RAM-only volume
22
- ENV HF_HOME=/tmp/.cache/huggingface
23
- RUN mkdir -p /tmp/.cache/huggingface && chmod -R 777 /tmp/.cache/huggingface
24
- VOLUME ["/tmp/.cache/huggingface"]
25
-
26
- # Command: serve the model on CPU
27
- CMD ["sh", "-c", "vllm serve --model unsloth/llama-2-7b-bnb-4bit --device cpu --host $VLLM_HOST --port $VLLM_PORT"]
 
1
+ FROM nvidia/cuda:12.1.105-devel-ubuntu22.04
2
 
3
+ # Install Python & dependencies
4
+ RUN apt-get update && apt-get install -y python3 python3-pip git && \
5
+ rm -rf /var/lib/apt/lists/*
6
 
7
+ # Create default user to fix getpwuid() error
8
+ RUN echo "user:x:1000:1000::/home/user:/bin/bash" >> /etc/passwd && \
9
+ mkdir -p /home/user && chown -R 1000:1000 /home/user
10
 
11
+ # Install vLLM
12
+ RUN pip install --upgrade pip && \
13
+ pip install vllm
14
 
15
+ # Expose API port
16
+ EXPOSE 8000
17
 
18
+ # Run vLLM serving
19
+ CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", \
20
+ "--model", "unsloth/llama-2-7b-bnb-4bit", \
21
+ "--host", "0.0.0.0", "--port", "8000"]