FROM ghcr.io/ggml-org/llama.cpp:server ENV LLAMA_CACHE=/tmp/llama-cache ENV HF_HUB_CACHE=/tmp/hf-cache EXPOSE 7860 CMD ["-hf", "unsloth/gemma-4-E2B-it-GGUF:Q4_0", \ "-c", "2048", \ "-t", "8", \ "-tb", "16", \ "-b", "1024", \ "-ub", "512", \ "-np", "1", \ "--flash-attn", "on", \ "--no-warmup", \ "--no-context-shift", \ "--host", "0.0.0.0", \ "--port", "7860"]