Spaces:

Sumkh
/

AgenticRAG

Sleeping

Sumkh commited on Feb 23, 2025

Commit

a263ec3

verified ·

1 Parent(s): dfd3125

Upload Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -7,11 +7,11 @@ WORKDIR /app
 # Install system dependencies if needed (e.g., wget)
 RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
-# Set up writable cache directories (for Hugging Face, matplotlib, etc.)
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
     && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
-# Set environment variables for cache directories and user agent
 ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
 ENV MPLCONFIGDIR=/app/.config/matplotlib
@@ -27,15 +27,17 @@ COPY . .
 # Expose the port for Gradio (Spaces expects the app on port 7860)
 EXPOSE 7860
 # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
-# Replace 'new-model-name:latest' with your new model's identifier.
-CMD bash -c "vllm.entrypoints.openai.api_server \
-    --model new-model-name:latest \
-    --enable-auto-tool-choice \
-    --tool-call-parser llama3_json \
-    --chat-template examples/tool_chat_template_llama3.1_json.jinja \
-    --quantization bitsandbytes \
-    --load-format bitsandbytes \
-    --dtype half \
-    --max-model-len 8192 \
-    > vllm.log 2>&1 & python app.py"

 # Install system dependencies if needed (e.g., wget)
 RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
+# (Optional) Set up writable cache directories if your app needs them
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
     && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
+# Set environment variables for caching and user agent (adjust as needed)
 ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
 ENV MPLCONFIGDIR=/app/.config/matplotlib
 # Expose the port for Gradio (Spaces expects the app on port 7860)
 EXPOSE 7860
+# Override the base image's entrypoint so our CMD is executed directly.
+ENTRYPOINT []
 # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
+CMD ["bash", "-c", "vllm.entrypoints.openai.api_server \
+--model unsloth/llama-3-8b-Instruct-bnb-4bit \
+--enable-auto-tool-choice \
+--tool-call-parser llama3_json \
+--chat-template examples/tool_chat_template_llama3.1_json.jinja \
+--quantization bitsandbytes \
+--load-format bitsandbytes \
+--dtype half \
+--max-model-len 8192 \
+> vllm.log 2>&1 & python app.py"]