fast-vl-cpu / Dockerfile
rththr's picture
Update Dockerfile
94b6888 verified
# Dockerfile
FROM ubuntu:22.04
# Faster noninteractive apt
ENV DEBIAN_FRONTEND=noninteractive
# --- build deps
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake git curl ca-certificates pkg-config \
libopenblas-dev libcurl4-openssl-dev ccache \
&& rm -rf /var/lib/apt/lists/*
# --- build llama.cpp with BLAS + server
RUN git clone --depth=1 https://github.com/ggerganov/llama.cpp /app && \
cmake -S /app -B /app/build \
-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS \
-DGGML_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache && \
cmake --build /app/build --target llama-server -j2 && \
ln -s /app/build/bin/llama-server /usr/local/bin/llama-server
# --- runtime env + user
RUN mkdir -p /models /workspace /data \
&& useradd -m -u 1000 user \
&& chown -R user:user /models /workspace /data
WORKDIR /workspace
# Defaults (override at deploy time as needed)
ENV MODEL_REPO="mradermacher/LFM2-VL-450M-GGUF" \
MODEL_FILE="LFM2-VL-450M.Q3_K_L.gguf" \
MMPROJ_FILE="LFM2-VL-450M.mmproj-Q8_0.gguf" \
CTX_SIZE="4096" \
THREADS="4" \
PORT="7860"
# Use bash for safer heredoc
SHELL ["/bin/bash", "-lc"]
# --- startup script (downloads model if missing, then starts server)
RUN cat > /usr/local/bin/start.sh <<'EOF' \
&& chmod +x /usr/local/bin/start.sh
#!/usr/bin/env bash
set -euo pipefail
PORT="${PORT:-7860}"
CTX_SIZE="${CTX_SIZE:-4096}"
THREADS="${THREADS:-4}"
MODEL_REPO="${MODEL_REPO:-TheBloke/Llama-2-7B-GGUF}"
MODEL_FILE="${MODEL_FILE:-llama-2-7b.Q4_K_M.gguf}"
MMPROJ_FILE="${MMPROJ_FILE:-LFM2-VL-450M.mmproj-Q8_0.gguf}"
MODEL_PATH="/data/${MODEL_FILE}"
MODEL_PATH2="/data/${MMPROJ_FILE}"
HF_URL="https://huggingface.co/${MODEL_REPO}/resolve/main/${MODEL_FILE}"
HF_URL2="https://huggingface.co/${MODEL_REPO}/resolve/main/${MMPROJ_FILE}"
mkdir -p /data
if [[ ! -f "$MODEL_PATH" ]]; then
echo "Downloading ${MODEL_REPO}/${MODEL_FILE} to /data..."
if [[ -n "${HF_TOKEN:-}" ]]; then
curl -fL --progress-bar -H "Authorization: Bearer ${HF_TOKEN}" -o "$MODEL_PATH" "$HF_URL"
else
curl -fL --progress-bar -o "$MODEL_PATH" "$HF_URL"
fi
fi
if [[ ! -f "$MODEL_PATH2" ]]; then
echo "Downloading ${MODEL_REPO}/${MMPROJ_FILE} to /data..."
if [[ -n "${HF_TOKEN:-}" ]]; then
curl -fL --progress-bar -H "Authorization: Bearer ${HF_TOKEN}" -o "$MODEL_PATH2" "$HF_URL2"
else
curl -fL --progress-bar -o "$MODEL_PATH2" "$HF_URL2"
fi
fi
# Correct flags for llama-server:
exec llama-server \
--model "$MODEL_PATH" \
--mmproj "$MODEL_PATH2" \
--host 0.0.0.0 \
--port "$PORT" \
--ctx-size "$CTX_SIZE" \
--threads 2 \
--jinja
EOF
EXPOSE 7860
USER user
ENTRYPOINT ["/usr/local/bin/start.sh"]