Sumkh commited on
Commit
f189c3b
·
verified ·
1 Parent(s): c10bb5d

Upload 4 files

Browse files
Files changed (2) hide show
  1. Dockerfile +9 -28
  2. start.sh +17 -0
Dockerfile CHANGED
@@ -1,39 +1,20 @@
1
  # Use the official vLLM Docker image as the base image
2
  FROM vllm/vllm-openai:latest
3
 
4
- # Ensure we run as root (the default) so we can set permissions
5
- USER root
6
-
7
- # Install system dependencies
8
- RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
9
-
10
- # Set working directory
11
  WORKDIR /app
12
 
13
- # Create and set permissions for cache directories
14
- RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
15
- && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib \
16
- && mkdir -p /.EasyOCR && chmod -R 777 /.EasyOCR
17
 
18
- # Set environment variables for cache directories
19
- ENV HF_HOME=/app/.cache
20
- ENV XDG_CACHE_HOME=/app/.cache
21
- ENV MPLCONFIGDIR=/app/.config/matplotlib
22
- ENV USER_AGENT="my-gradio-app"
23
- ENV EASYOCR_CACHE_DIR=/app/.EasyOCR
24
-
25
- # Copy the requirements file and install dependencies
26
- COPY requirements.txt .
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
29
- # Copy the rest of the application code
30
- COPY . .
31
-
32
- # Expose the port for Gradio (Spaces expects the app on port 7860)
33
  EXPOSE 7860
34
 
35
- # Override the base image's entrypoint so our CMD is executed directly.
36
- ENTRYPOINT []
37
 
38
- # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
39
- CMD ["bash", "-c", "vllm.entrypoints.openai.api_server --model unsloth/llama-3-8b-Instruct-bnb-4bit --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3.1_json.jinja --quantization bitsandbytes --load-format bitsandbytes --dtype half --max-model-len 8192 python3 app.py"]
 
1
  # Use the official vLLM Docker image as the base image
2
  FROM vllm/vllm-openai:latest
3
 
4
+ # Set the working directory
 
 
 
 
 
 
5
  WORKDIR /app
6
 
7
+ # Copy the repository files into the container
8
+ COPY . /app
 
 
9
 
10
+ # Install additional Python packages (e.g. Gradio and Requests)
 
 
 
 
 
 
 
 
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Expose the port on which Gradio will run (default: 7860)
 
 
 
14
  EXPOSE 7860
15
 
16
+ # Ensure the start script is executable
17
+ RUN chmod +x start.sh
18
 
19
+ # Run the startup script
20
+ CMD ["bash", "start.sh"]
start.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Start vLLM server in the background with your custom flags
3
+ vllm \
4
+ --model unsloth/llama-3-8b-Instruct-bnb-4bit \
5
+ --enable-auto-tool-choice \
6
+ --tool-call-parser llama3_json \
7
+ --chat-template examples/tool_chat_template_llama3.1_json.jinja \
8
+ --quantization bitsandbytes \
9
+ --load-format bitsandbytes \
10
+ --dtype half \
11
+ --max-model-len 8192 &
12
+
13
+ # Allow some time for the vLLM server to start up (adjust if needed)
14
+ sleep 10
15
+
16
+ # Launch the Gradio chatbot application
17
+ python app.py