| FROM ghcr.io/ggml-org/llama.cpp:server |
|
|
| RUN apt update && apt install wget -y && rm -rf /var/lib/apt/lists/* |
|
|
| |
| RUN mkdir -p /models |
|
|
| |
| RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf" -O /models/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf |
| RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/mtp-gemma-4-E2B-it.gguf" -O /models/mtp-gemma-4-E2B-it.gguf |
| RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/mmproj-F16.gguf" -O /models/gemma-4-E2B-it-mmproj.gguf |
| RUN wget "https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-F16.gguf" -O /gemma-3-270m-it-F16.gguf |
|
|
| |
| RUN wget "https://huggingface.co/MaziyarPanahi/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it.Q3_K_M.gguf" -O /models/gemma-3-1b-it.Q3_K_M.gguf |
|
|
|
|
| |
| |
| RUN echo "[gemma-4-vision]" > /models.ini && \ |
| echo "model = /models/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf" >> /models.ini && \ |
| echo "spec-draft-model = /models/mtp-gemma-4-E2B-it.gguf" >> /models.ini && \ |
| echo "mmproj = /models/gemma-4-E2B-it-mmproj.gguf" >> /models.ini && \ |
| echo "ctx-size = 4096" >> /models.ini && \ |
| echo "flash-attn = on" >> /models.ini && \ |
| echo "ubatch-size = 128" >> /models.ini && \ |
| echo "batch-size = 512" >> /models.ini && \ |
| echo "spec-type = draft-mtp" >> /models.ini && \ |
| echo "spec-draft-n-max = 3" >> /models.ini && \ |
| echo "" >> /models.ini && \ |
| echo "[gemma-3-1b]" >> /models.ini && \ |
| echo "model = /models/gemma-3-1b-it.Q3_K_M.gguf" >> /models.ini && \ |
| echo "ctx-size = 4096" >> /models.ini && \ |
| echo "flash-attn = on" >> /models.ini && \ |
| echo "" >> /models.ini && \ |
| echo "[gemma-3-270m]" >> /models.ini && \ |
| echo "model = /models/gemma-3-270m-it-F16.gguf" >> /models.ini && \ |
| echo "ctx-size = 4096" >> /models.ini && \ |
| echo "flash-attn = on" >> /models.ini |
|
|
|
|
| |
| CMD [ \ |
| "--models-preset", "/models.ini", \ |
| "--port", "7860", \ |
| "--host", "0.0.0.0", \ |
| "-t", "2", \ |
| "-tb", "2" \ |
| ] |