Spaces:
Sleeping
Sleeping
| FROM ghcr.io/ggml-org/llama.cpp:server | |
| ENV LLAMA_CACHE=/tmp/llama-cache | |
| ENV HF_HUB_CACHE=/tmp/hf-cache | |
| EXPOSE 7860 | |
| CMD ["-hf", "unsloth/gemma-4-E2B-it-GGUF:Q4_0", \ | |
| "-c", "2048", \ | |
| "-t", "8", \ | |
| "-tb", "16", \ | |
| "-b", "1024", \ | |
| "-ub", "512", \ | |
| "-np", "1", \ | |
| "--flash-attn", "on", \ | |
| "--no-warmup", \ | |
| "--no-context-shift", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860"] |