Spaces:
Sleeping
Sleeping
Upload Dockerfile
Browse files- Dockerfile +15 -13
Dockerfile
CHANGED
|
@@ -7,11 +7,11 @@ WORKDIR /app
|
|
| 7 |
# Install system dependencies if needed (e.g., wget)
|
| 8 |
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
-
# Set up writable cache directories
|
| 11 |
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
|
| 12 |
&& mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
|
| 13 |
|
| 14 |
-
# Set environment variables for
|
| 15 |
ENV HF_HOME=/app/.cache
|
| 16 |
ENV XDG_CACHE_HOME=/app/.cache
|
| 17 |
ENV MPLCONFIGDIR=/app/.config/matplotlib
|
|
@@ -27,15 +27,17 @@ COPY . .
|
|
| 27 |
# Expose the port for Gradio (Spaces expects the app on port 7860)
|
| 28 |
EXPOSE 7860
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
# Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
> vllm.log 2>&1 & python app.py"
|
|
|
|
| 7 |
# Install system dependencies if needed (e.g., wget)
|
| 8 |
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
+
# (Optional) Set up writable cache directories if your app needs them
|
| 11 |
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
|
| 12 |
&& mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
|
| 13 |
|
| 14 |
+
# Set environment variables for caching and user agent (adjust as needed)
|
| 15 |
ENV HF_HOME=/app/.cache
|
| 16 |
ENV XDG_CACHE_HOME=/app/.cache
|
| 17 |
ENV MPLCONFIGDIR=/app/.config/matplotlib
|
|
|
|
| 27 |
# Expose the port for Gradio (Spaces expects the app on port 7860)
|
| 28 |
EXPOSE 7860
|
| 29 |
|
| 30 |
+
# Override the base image's entrypoint so our CMD is executed directly.
|
| 31 |
+
ENTRYPOINT []
|
| 32 |
+
|
| 33 |
# Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
|
| 34 |
+
CMD ["bash", "-c", "vllm.entrypoints.openai.api_server \
|
| 35 |
+
--model unsloth/llama-3-8b-Instruct-bnb-4bit \
|
| 36 |
+
--enable-auto-tool-choice \
|
| 37 |
+
--tool-call-parser llama3_json \
|
| 38 |
+
--chat-template examples/tool_chat_template_llama3.1_json.jinja \
|
| 39 |
+
--quantization bitsandbytes \
|
| 40 |
+
--load-format bitsandbytes \
|
| 41 |
+
--dtype half \
|
| 42 |
+
--max-model-len 8192 \
|
| 43 |
+
> vllm.log 2>&1 & python app.py"]
|
|
|