Spaces:
Sleeping
Sleeping
| # Dockerfile | |
| FROM ubuntu:22.04 | |
| # Faster noninteractive apt | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| # --- build deps | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential cmake git curl ca-certificates pkg-config \ | |
| libopenblas-dev libcurl4-openssl-dev ccache \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # --- build llama.cpp with BLAS + server | |
| RUN git clone --depth=1 https://github.com/ggerganov/llama.cpp /app && \ | |
| cmake -S /app -B /app/build \ | |
| -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS \ | |
| -DGGML_NATIVE=OFF \ | |
| -DLLAMA_BUILD_SERVER=ON \ | |
| -DLLAMA_BUILD_EXAMPLES=OFF \ | |
| -DLLAMA_BUILD_TESTS=OFF \ | |
| -DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |
| -DCMAKE_CXX_COMPILER_LAUNCHER=ccache && \ | |
| cmake --build /app/build --target llama-server -j2 && \ | |
| ln -s /app/build/bin/llama-server /usr/local/bin/llama-server | |
| # --- runtime env + user | |
| RUN mkdir -p /models /workspace /data \ | |
| && useradd -m -u 1000 user \ | |
| && chown -R user:user /models /workspace /data | |
| WORKDIR /workspace | |
| # Defaults (override at deploy time as needed) | |
| ENV MODEL_REPO="mradermacher/LFM2-VL-450M-GGUF" \ | |
| MODEL_FILE="LFM2-VL-450M.Q3_K_L.gguf" \ | |
| MMPROJ_FILE="LFM2-VL-450M.mmproj-Q8_0.gguf" \ | |
| CTX_SIZE="4096" \ | |
| THREADS="4" \ | |
| PORT="7860" | |
| # Use bash for safer heredoc | |
| SHELL ["/bin/bash", "-lc"] | |
| # --- startup script (downloads model if missing, then starts server) | |
| RUN cat > /usr/local/bin/start.sh <<'EOF' \ | |
| && chmod +x /usr/local/bin/start.sh | |
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| PORT="${PORT:-7860}" | |
| CTX_SIZE="${CTX_SIZE:-4096}" | |
| THREADS="${THREADS:-4}" | |
| MODEL_REPO="${MODEL_REPO:-TheBloke/Llama-2-7B-GGUF}" | |
| MODEL_FILE="${MODEL_FILE:-llama-2-7b.Q4_K_M.gguf}" | |
| MMPROJ_FILE="${MMPROJ_FILE:-LFM2-VL-450M.mmproj-Q8_0.gguf}" | |
| MODEL_PATH="/data/${MODEL_FILE}" | |
| MODEL_PATH2="/data/${MMPROJ_FILE}" | |
| HF_URL="https://huggingface.co/${MODEL_REPO}/resolve/main/${MODEL_FILE}" | |
| HF_URL2="https://huggingface.co/${MODEL_REPO}/resolve/main/${MMPROJ_FILE}" | |
| mkdir -p /data | |
| if [[ ! -f "$MODEL_PATH" ]]; then | |
| echo "Downloading ${MODEL_REPO}/${MODEL_FILE} to /data..." | |
| if [[ -n "${HF_TOKEN:-}" ]]; then | |
| curl -fL --progress-bar -H "Authorization: Bearer ${HF_TOKEN}" -o "$MODEL_PATH" "$HF_URL" | |
| else | |
| curl -fL --progress-bar -o "$MODEL_PATH" "$HF_URL" | |
| fi | |
| fi | |
| if [[ ! -f "$MODEL_PATH2" ]]; then | |
| echo "Downloading ${MODEL_REPO}/${MMPROJ_FILE} to /data..." | |
| if [[ -n "${HF_TOKEN:-}" ]]; then | |
| curl -fL --progress-bar -H "Authorization: Bearer ${HF_TOKEN}" -o "$MODEL_PATH2" "$HF_URL2" | |
| else | |
| curl -fL --progress-bar -o "$MODEL_PATH2" "$HF_URL2" | |
| fi | |
| fi | |
| # Correct flags for llama-server: | |
| exec llama-server \ | |
| --model "$MODEL_PATH" \ | |
| --mmproj "$MODEL_PATH2" \ | |
| --host 0.0.0.0 \ | |
| --port "$PORT" \ | |
| --ctx-size "$CTX_SIZE" \ | |
| --threads 2 \ | |
| --jinja | |
| EOF | |
| EXPOSE 7860 | |
| USER user | |
| ENTRYPOINT ["/usr/local/bin/start.sh"] |