Spaces:
Sleeping
Sleeping
Upload Dockerfile
Browse files- Dockerfile +9 -12
Dockerfile
CHANGED
|
@@ -1,17 +1,20 @@
|
|
| 1 |
# Use the official vLLM Docker image as the base image
|
| 2 |
FROM vllm/vllm-openai:latest
|
| 3 |
|
|
|
|
|
|
|
|
|
|
| 4 |
# Set the working directory
|
| 5 |
WORKDIR /app
|
| 6 |
|
| 7 |
# Install system dependencies if needed (e.g., wget)
|
| 8 |
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
-
#
|
| 11 |
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
|
| 12 |
&& mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
|
| 13 |
|
| 14 |
-
# Set environment variables for
|
| 15 |
ENV HF_HOME=/app/.cache
|
| 16 |
ENV XDG_CACHE_HOME=/app/.cache
|
| 17 |
ENV MPLCONFIGDIR=/app/.config/matplotlib
|
|
@@ -30,14 +33,8 @@ EXPOSE 7860
|
|
| 30 |
# Override the base image's entrypoint so our CMD is executed directly.
|
| 31 |
ENTRYPOINT []
|
| 32 |
|
|
|
|
|
|
|
|
|
|
| 33 |
# Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
|
| 34 |
-
CMD ["bash", "-c", "vllm.entrypoints.openai.api_server
|
| 35 |
-
--model unsloth/llama-3-8b-Instruct-bnb-4bit \
|
| 36 |
-
--enable-auto-tool-choice \
|
| 37 |
-
--tool-call-parser llama3_json \
|
| 38 |
-
--chat-template examples/tool_chat_template_llama3.1_json.jinja \
|
| 39 |
-
--quantization bitsandbytes \
|
| 40 |
-
--load-format bitsandbytes \
|
| 41 |
-
--dtype half \
|
| 42 |
-
--max-model-len 8192 \
|
| 43 |
-
> vllm.log 2>&1 & python app.py"]
|
|
|
|
| 1 |
# Use the official vLLM Docker image as the base image
|
| 2 |
FROM vllm/vllm-openai:latest
|
| 3 |
|
| 4 |
+
# Use the root user to ensure write permissions (if needed)
|
| 5 |
+
USER root
|
| 6 |
+
|
| 7 |
# Set the working directory
|
| 8 |
WORKDIR /app
|
| 9 |
|
| 10 |
# Install system dependencies if needed (e.g., wget)
|
| 11 |
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
|
| 12 |
|
| 13 |
+
# Create and set permissions for cache directories (for Hugging Face, matplotlib, etc.)
|
| 14 |
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
|
| 15 |
&& mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
|
| 16 |
|
| 17 |
+
# Set environment variables for cache directories and user agent
|
| 18 |
ENV HF_HOME=/app/.cache
|
| 19 |
ENV XDG_CACHE_HOME=/app/.cache
|
| 20 |
ENV MPLCONFIGDIR=/app/.config/matplotlib
|
|
|
|
| 33 |
# Override the base image's entrypoint so our CMD is executed directly.
|
| 34 |
ENTRYPOINT []
|
| 35 |
|
| 36 |
+
# Create a writable log file (alternatively, you could redirect logs to /tmp)
|
| 37 |
+
RUN touch /app/vllm.log && chmod 666 /app/vllm.log
|
| 38 |
+
|
| 39 |
# Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
|
| 40 |
+
CMD ["bash", "-c", "vllm.entrypoints.openai.api_server --model unsloth/llama-3-8b-Instruct-bnb-4bit --enable-auto-tool-choice --tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3.1_json.jinja --quantization bitsandbytes --load-format bitsandbytes --dtype half --max-model-len 8192 > /app/vllm.log 2>&1 & python3 app.py"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|