Sumkh commited on
Commit
13a265c
·
verified ·
1 Parent(s): 885a9dd

Upload Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +1 -13
Dockerfile CHANGED
@@ -17,16 +17,4 @@ COPY . .
17
  # Expose the port for Gradio (Spaces expects the app on port 7860)
18
  EXPOSE 7860
19
 
20
- # Start vLLM in the background and then the Gradio app
21
- CMD bash -c "wget -O /tmp/tool_chat_template_llama3.1_json.jinja https://github.com/vllm-project/vllm/raw/refs/heads/main/examples/tool_chat_template_llama3.1_json.jinja && \
22
- vllm.entrypoints.openai.api_server \
23
- --model unsloth/llama-3-8b-Instruct-bnb-4bit \
24
- --enable-auto-tool-choice \
25
- --tool-call-parser llama3_json \
26
- --chat-template /tmp/tool_chat_template_llama3.1_json.jinja \
27
- --quantization bitsandbytes \
28
- --load-format bitsandbytes \
29
- --dtype half \
30
- --max-model-len 8192 \
31
- --download-dir models/vllm > vllm.log 2>&1 & \
32
- python app.py"
 
17
  # Expose the port for Gradio (Spaces expects the app on port 7860)
18
  EXPOSE 7860
19
 
20
+ CMD ["python", "app.py"]