binary1ne commited on
Commit
162a7f6
·
verified ·
1 Parent(s): d84bf52

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +7 -27
Dockerfile CHANGED
@@ -1,33 +1,13 @@
1
  FROM vllm/vllm-openai:latest
2
 
3
- # Expose API port (default for vLLM is 8000)
4
  EXPOSE 7860
5
 
6
- # Environment variables for vLLM
7
- # Set host to listen on all interfaces
8
- ENV HOST=0.0.0.0
9
- ENV PORT=7860
10
 
11
- # Disable history/persistence equivalent
12
- # (vLLM doesn't store chat history by default, but we'll avoid caching between runs)
13
- ENV VLLM_DISABLE_LOGGING=true
14
- ENV VLLM_NO_DISK_CACHE=true
15
- ENV TRANSFORMERS_CACHE=/tmp/.vllm/models
16
 
17
- # Create RAM-based temporary model directory
18
- RUN mkdir -p /tmp/.vllm/models && \
19
- chmod -R 777 /tmp/.vllm/models
20
-
21
- # Optional: mark as tmpfs for ephemeral storage
22
- VOLUME ["/tmp/.vllm/models"]
23
-
24
- # Remove any persistent model folder
25
- RUN rm -rf /root/.cache && mkdir -p /root/.cache && chmod -R 777 /root/.cache
26
-
27
- # Pull llama-2-7b from Hugging Face and run
28
- # Hugging Face token must be passed as build arg or env var
29
- ARG HF_TOKEN
30
- ENV HF_TOKEN=${HF_TOKEN}
31
-
32
- # By default vLLM downloads at startup
33
- CMD ["--model", "unsloth/llama-2-7b-bnb-4bit", "--host", "0.0.0.0", "--port", "7860"]
 
1
  FROM vllm/vllm-openai:latest
2
 
3
+ # Expose your desired port
4
  EXPOSE 7860
5
 
6
+ # Environment variables for host/port
7
+ ENV VLLM_HOST=0.0.0.0
8
+ ENV VLLM_PORT=7860
9
+ ENV VLLM_LOGGING_LEVEL=DEBUG
10
 
11
+ # Run vLLM with env-based host and port
12
+ CMD ["sh", "-c", "vllm serve --model unsloth/llama-2-7b-bnb-4bit --host 0.0.0.0 --port 7860"]
 
 
 
13