FROM ghcr.io/ggml-org/llama.cpp:server RUN apt update && apt install wget -y && rm -rf /var/lib/apt/lists/* # Create a dedicated directory to organize the models RUN mkdir -p /models # --- MODEL 1: Gemma 4 Multimodal + Speculative Setup --- RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf" -O /models/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/mtp-gemma-4-E2B-it.gguf" -O /models/mtp-gemma-4-E2B-it.gguf RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/mmproj-F16.gguf" -O /models/gemma-4-E2B-it-mmproj.gguf RUN wget "https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-F16.gguf" -O /gemma-3-270m-it-F16.gguf # --- MODEL 2: Gemma 3 1B Model (Corrected Direct Download Link) --- RUN wget "https://huggingface.co/MaziyarPanahi/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it.Q3_K_M.gguf" -O /models/gemma-3-1b-it.Q3_K_M.gguf # --- CREATE ROUTER CONFIGURATION --- # Appending configuration step-by-step to avoid any multiline shell escaping issues RUN echo "[gemma-4-vision]" > /models.ini && \ echo "model = /models/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf" >> /models.ini && \ echo "spec-draft-model = /models/mtp-gemma-4-E2B-it.gguf" >> /models.ini && \ echo "mmproj = /models/gemma-4-E2B-it-mmproj.gguf" >> /models.ini && \ echo "ctx-size = 4096" >> /models.ini && \ echo "flash-attn = on" >> /models.ini && \ echo "ubatch-size = 128" >> /models.ini && \ echo "batch-size = 512" >> /models.ini && \ echo "spec-type = draft-mtp" >> /models.ini && \ echo "spec-draft-n-max = 3" >> /models.ini && \ echo "" >> /models.ini && \ echo "[gemma-3-1b]" >> /models.ini && \ echo "model = /models/gemma-3-1b-it.Q3_K_M.gguf" >> /models.ini && \ echo "ctx-size = 4096" >> /models.ini && \ echo "flash-attn = on" >> /models.ini && \ echo "" >> /models.ini && \ echo "[gemma-3-270m]" >> /models.ini && \ echo "model = /models/gemma-3-270m-it-F16.gguf" >> /models.ini && \ echo "ctx-size = 4096" >> /models.ini && \ echo "flash-attn = on" >> /models.ini # --- START SERVER IN ROUTER MODE --- CMD [ \ "--models-preset", "/models.ini", \ "--port", "7860", \ "--host", "0.0.0.0", \ "-t", "2", \ "-tb", "2" \ ]