Sumkh commited on
Commit
fb72eed
·
verified ·
1 Parent(s): a263ec3

Upload Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +9 -12
Dockerfile CHANGED
@@ -1,17 +1,20 @@
1
  # Use the official vLLM Docker image as the base image
2
  FROM vllm/vllm-openai:latest
3
 
 
 
 
4
  # Set the working directory
5
  WORKDIR /app
6
 
7
  # Install system dependencies if needed (e.g., wget)
8
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
9
 
10
- # (Optional) Set up writable cache directories if your app needs them
11
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
12
  && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
13
 
14
- # Set environment variables for caching and user agent (adjust as needed)
15
  ENV HF_HOME=/app/.cache
16
  ENV XDG_CACHE_HOME=/app/.cache
17
  ENV MPLCONFIGDIR=/app/.config/matplotlib
@@ -30,14 +33,8 @@ EXPOSE 7860
30
  # Override the base image's entrypoint so our CMD is executed directly.
31
  ENTRYPOINT []
32
 
 
 
 
33
  # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
34
- CMD ["bash", "-c", "vllm.entrypoints.openai.api_server \
35
- --model unsloth/llama-3-8b-Instruct-bnb-4bit \
36
- --enable-auto-tool-choice \
37
- --tool-call-parser llama3_json \
38
- --chat-template examples/tool_chat_template_llama3.1_json.jinja \
39
- --quantization bitsandbytes \
40
- --load-format bitsandbytes \
41
- --dtype half \
42
- --max-model-len 8192 \
43
- > vllm.log 2>&1 & python app.py"]
 
1
  # Use the official vLLM Docker image as the base image
2
  FROM vllm/vllm-openai:latest
3
 
4
+ # Use the root user to ensure write permissions (if needed)
5
+ USER root
6
+
7
  # Set the working directory
8
  WORKDIR /app
9
 
10
  # Install system dependencies if needed (e.g., wget)
11
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
12
 
13
+ # Create and set permissions for cache directories (for Hugging Face, matplotlib, etc.)
14
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
15
  && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
16
 
17
+ # Set environment variables for cache directories and user agent
18
  ENV HF_HOME=/app/.cache
19
  ENV XDG_CACHE_HOME=/app/.cache
20
  ENV MPLCONFIGDIR=/app/.config/matplotlib
 
33
  # Override the base image's entrypoint so our CMD is executed directly.
34
  ENTRYPOINT []
35
 
36
+ # Create a writable log file (alternatively, you could redirect logs to /tmp)
37
+ RUN touch /app/vllm.log && chmod 666 /app/vllm.log
38
+
39
  # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
40
+ CMD ["bash", "-c", "vllm.entrypoints.openai.api_server --model unsloth/llama-3-8b-Instruct-bnb-4bit --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3.1_json.jinja --quantization bitsandbytes --load-format bitsandbytes --dtype half --max-model-len 8192 > /app/vllm.log 2>&1 & python3 app.py"]