v4 / Dockerfile
we2app's picture
Update Dockerfile
81c8014 verified
Raw
History Blame Contribute Delete
2.37 kB
FROM ghcr.io/ggml-org/llama.cpp:server
RUN apt update && apt install wget -y && rm -rf /var/lib/apt/lists/*
# Create a dedicated directory to organize the models
RUN mkdir -p /models
# --- MODEL 1: Gemma 4 Multimodal + Speculative Setup ---
RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf" -O /models/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf
RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/mtp-gemma-4-E2B-it.gguf" -O /models/mtp-gemma-4-E2B-it.gguf
RUN wget "https://huggingface.co/unsloth/gemma-4-E2B-it-qat-GGUF/resolve/main/mmproj-F16.gguf" -O /models/gemma-4-E2B-it-mmproj.gguf
RUN wget "https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-F16.gguf" -O /gemma-3-270m-it-F16.gguf
# --- MODEL 2: Gemma 3 1B Model (Corrected Direct Download Link) ---
RUN wget "https://huggingface.co/MaziyarPanahi/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it.Q3_K_M.gguf" -O /models/gemma-3-1b-it.Q3_K_M.gguf
# --- CREATE ROUTER CONFIGURATION ---
# Appending configuration step-by-step to avoid any multiline shell escaping issues
RUN echo "[gemma-4-vision]" > /models.ini && \
echo "model = /models/gemma-4-E2B-it-qat-UD-Q4_K_XL.gguf" >> /models.ini && \
echo "spec-draft-model = /models/mtp-gemma-4-E2B-it.gguf" >> /models.ini && \
echo "mmproj = /models/gemma-4-E2B-it-mmproj.gguf" >> /models.ini && \
echo "ctx-size = 4096" >> /models.ini && \
echo "flash-attn = on" >> /models.ini && \
echo "ubatch-size = 128" >> /models.ini && \
echo "batch-size = 512" >> /models.ini && \
echo "spec-type = draft-mtp" >> /models.ini && \
echo "spec-draft-n-max = 3" >> /models.ini && \
echo "" >> /models.ini && \
echo "[gemma-3-1b]" >> /models.ini && \
echo "model = /models/gemma-3-1b-it.Q3_K_M.gguf" >> /models.ini && \
echo "ctx-size = 4096" >> /models.ini && \
echo "flash-attn = on" >> /models.ini && \
echo "" >> /models.ini && \
echo "[gemma-3-270m]" >> /models.ini && \
echo "model = /models/gemma-3-270m-it-F16.gguf" >> /models.ini && \
echo "ctx-size = 4096" >> /models.ini && \
echo "flash-attn = on" >> /models.ini
# --- START SERVER IN ROUTER MODE ---
CMD [ \
"--models-preset", "/models.ini", \
"--port", "7860", \
"--host", "0.0.0.0", \
"-t", "2", \
"-tb", "2" \
]