Spaces:

Sumkh
/

AgenticRAG

Sleeping

App Files Files Community

Sumkh commited on Feb 24, 2025

Commit

f189c3b

verified ·

1 Parent(s): c10bb5d

Upload 4 files

Browse files

Files changed (2) hide show

Dockerfile +9 -28
start.sh +17 -0

Dockerfile CHANGED Viewed

@@ -1,39 +1,20 @@
 # Use the official vLLM Docker image as the base image
 FROM vllm/vllm-openai:latest
-# Ensure we run as root (the default) so we can set permissions
-USER root
-# Install system dependencies
-RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
-# Set working directory
 WORKDIR /app
-# Create and set permissions for cache directories
-RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
-    && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib \
-    && mkdir -p /.EasyOCR && chmod -R 777 /.EasyOCR
-# Set environment variables for cache directories
-ENV HF_HOME=/app/.cache
-ENV XDG_CACHE_HOME=/app/.cache
-ENV MPLCONFIGDIR=/app/.config/matplotlib
-ENV USER_AGENT="my-gradio-app"
-ENV EASYOCR_CACHE_DIR=/app/.EasyOCR
-# Copy the requirements file and install dependencies
-COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the rest of the application code
-COPY . .
-# Expose the port for Gradio (Spaces expects the app on port 7860)
 EXPOSE 7860
-# Override the base image's entrypoint so our CMD is executed directly.
-ENTRYPOINT []
-# Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
-CMD ["bash", "-c", "vllm.entrypoints.openai.api_server --model unsloth/llama-3-8b-Instruct-bnb-4bit --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3.1_json.jinja --quantization bitsandbytes --load-format bitsandbytes --dtype half --max-model-len 8192 python3 app.py"]

 # Use the official vLLM Docker image as the base image
 FROM vllm/vllm-openai:latest
+# Set the working directory
 WORKDIR /app
+# Copy the repository files into the container
+COPY . /app
+# Install additional Python packages (e.g. Gradio and Requests)
 RUN pip install --no-cache-dir -r requirements.txt
+# Expose the port on which Gradio will run (default: 7860)
 EXPOSE 7860
+# Ensure the start script is executable
+RUN chmod +x start.sh
+# Run the startup script
+CMD ["bash", "start.sh"]

start.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+# Start vLLM server in the background with your custom flags
+vllm \
+  --model unsloth/llama-3-8b-Instruct-bnb-4bit \
+  --enable-auto-tool-choice \
+  --tool-call-parser llama3_json \
+  --chat-template examples/tool_chat_template_llama3.1_json.jinja \
+  --quantization bitsandbytes \
+  --load-format bitsandbytes \
+  --dtype half \
+  --max-model-len 8192 &
+# Allow some time for the vLLM server to start up (adjust if needed)
+sleep 10
+# Launch the Gradio chatbot application
+python app.py