Spaces:
Sleeping
Sleeping
| FROM ghcr.io/ggml-org/llama.cpp:server | |
| USER root | |
| RUN apt-get update && apt-get install -y curl | |
| RUN mkdir -p /models && \ | |
| curl -L https://huggingface.co/unsloth/gemma-3n-E2B-it-GGUF/resolve/main/gemma-3n-E2B-it-Q4_K_M.gguf -o /models/model.gguf && \ | |
| chown -R 1000:1000 /models | |
| USER 1000 | |
| ENV LLAMA_ARG_MODEL=/models/model.gguf | |
| ENV LLAMA_ARG_HOST=0.0.0.0 | |
| ENV LLAMA_ARG_PORT=7860 | |
| ENV LLAMA_ARG_THREADS=8 | |
| ENV LLAMA_ARG_BATCH_SIZE=2048 | |
| ENV LLAMA_ARG_UBATCH_SIZE=512 | |
| ENV LLAMA_ARG_CTX_SIZE=8196 | |
| ENV LLAMA_ARG_FLASH_ATTN=true | |
| ENV LLAMA_ARG_NO_MMAP=false | |
| ENV LLAMA_ARG_MLOCK=true | |
| HEALTHCHECK --interval=30s --timeout=15s --start-period=10s --retries=3 \ | |
| CMD curl -f http://localhost:7860/health || exit 1 | |
| ENTRYPOINT ["/app/llama-server"] |