Spaces:
Runtime error
Runtime error
Update Dockerfile
Browse files- Dockerfile +24 -6
Dockerfile
CHANGED
|
@@ -1,23 +1,41 @@
|
|
| 1 |
FROM ghcr.io/ggml-org/llama.cpp:server
|
| 2 |
|
| 3 |
USER root
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
RUN mkdir -p /models && \
|
| 7 |
curl -L https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_M.gguf -o /models/model.gguf && \
|
| 8 |
chown -R 1000:1000 /models
|
| 9 |
|
|
|
|
| 10 |
COPY --chown=1000:1000 . .
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
USER 1000
|
| 13 |
|
| 14 |
-
# High-concurrency settings
|
| 15 |
ENV LLAMA_ARG_MODEL=/models/model.gguf
|
| 16 |
ENV LLAMA_ARG_HOST=127.0.0.1
|
| 17 |
ENV LLAMA_ARG_PORT=8080
|
| 18 |
ENV LLAMA_ARG_THREADS=8
|
| 19 |
-
ENV LLAMA_ARG_CTX_SIZE=
|
| 20 |
-
ENV LLAMA_ARG_BATCH_SIZE=512
|
| 21 |
|
| 22 |
-
# Run our Python orchestrator
|
| 23 |
ENTRYPOINT ["python3", "app.py"]
|
|
|
|
| 1 |
FROM ghcr.io/ggml-org/llama.cpp:server
|
| 2 |
|
| 3 |
USER root
|
| 4 |
+
|
| 5 |
+
# Install Python and core build tools
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
python3 \
|
| 8 |
+
python3-pip \
|
| 9 |
+
python3-venv \
|
| 10 |
+
curl \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Force install the specific libraries we need to the system python
|
| 14 |
+
RUN pip3 install --no-cache-dir --break-system-packages \
|
| 15 |
+
fastapi \
|
| 16 |
+
uvicorn \
|
| 17 |
+
duckduckgo-search \
|
| 18 |
+
requests \
|
| 19 |
+
python-multipart
|
| 20 |
+
|
| 21 |
+
# Download Gemma-3-4B-it (The high-speed 2026 champ)
|
| 22 |
RUN mkdir -p /models && \
|
| 23 |
curl -L https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_M.gguf -o /models/model.gguf && \
|
| 24 |
chown -R 1000:1000 /models
|
| 25 |
|
| 26 |
+
WORKDIR /app
|
| 27 |
COPY --chown=1000:1000 . .
|
| 28 |
|
| 29 |
+
# Set Python Path to ensure modules are found
|
| 30 |
+
ENV PYTHONPATH=/usr/local/lib/python3.10/dist-packages:/usr/lib/python3/dist-packages
|
| 31 |
+
|
| 32 |
USER 1000
|
| 33 |
|
| 34 |
+
# High-concurrency settings for your 1M user goal
|
| 35 |
ENV LLAMA_ARG_MODEL=/models/model.gguf
|
| 36 |
ENV LLAMA_ARG_HOST=127.0.0.1
|
| 37 |
ENV LLAMA_ARG_PORT=8080
|
| 38 |
ENV LLAMA_ARG_THREADS=8
|
| 39 |
+
ENV LLAMA_ARG_CTX_SIZE=1024
|
|
|
|
| 40 |
|
|
|
|
| 41 |
ENTRYPOINT ["python3", "app.py"]
|