Sumkh commited on
Commit
a263ec3
·
verified ·
1 Parent(s): dfd3125

Upload Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +15 -13
Dockerfile CHANGED
@@ -7,11 +7,11 @@ WORKDIR /app
7
  # Install system dependencies if needed (e.g., wget)
8
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
9
 
10
- # Set up writable cache directories (for Hugging Face, matplotlib, etc.)
11
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
12
  && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
13
 
14
- # Set environment variables for cache directories and user agent
15
  ENV HF_HOME=/app/.cache
16
  ENV XDG_CACHE_HOME=/app/.cache
17
  ENV MPLCONFIGDIR=/app/.config/matplotlib
@@ -27,15 +27,17 @@ COPY . .
27
  # Expose the port for Gradio (Spaces expects the app on port 7860)
28
  EXPOSE 7860
29
 
 
 
 
30
  # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
31
- # Replace 'new-model-name:latest' with your new model's identifier.
32
- CMD bash -c "vllm.entrypoints.openai.api_server \
33
- --model new-model-name:latest \
34
- --enable-auto-tool-choice \
35
- --tool-call-parser llama3_json \
36
- --chat-template examples/tool_chat_template_llama3.1_json.jinja \
37
- --quantization bitsandbytes \
38
- --load-format bitsandbytes \
39
- --dtype half \
40
- --max-model-len 8192 \
41
- > vllm.log 2>&1 & python app.py"
 
7
  # Install system dependencies if needed (e.g., wget)
8
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
9
 
10
+ # (Optional) Set up writable cache directories if your app needs them
11
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
12
  && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
13
 
14
+ # Set environment variables for caching and user agent (adjust as needed)
15
  ENV HF_HOME=/app/.cache
16
  ENV XDG_CACHE_HOME=/app/.cache
17
  ENV MPLCONFIGDIR=/app/.config/matplotlib
 
27
  # Expose the port for Gradio (Spaces expects the app on port 7860)
28
  EXPOSE 7860
29
 
30
+ # Override the base image's entrypoint so our CMD is executed directly.
31
+ ENTRYPOINT []
32
+
33
  # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
34
+ CMD ["bash", "-c", "vllm.entrypoints.openai.api_server \
35
+ --model unsloth/llama-3-8b-Instruct-bnb-4bit \
36
+ --enable-auto-tool-choice \
37
+ --tool-call-parser llama3_json \
38
+ --chat-template examples/tool_chat_template_llama3.1_json.jinja \
39
+ --quantization bitsandbytes \
40
+ --load-format bitsandbytes \
41
+ --dtype half \
42
+ --max-model-len 8192 \
43
+ > vllm.log 2>&1 & python app.py"]