FROM ghcr.io/ggml-org/llama.cpp:server

ENV LLAMA_CACHE=/tmp/llama-cache
ENV HF_HUB_CACHE=/tmp/hf-cache

EXPOSE 7860

CMD ["-hf", "Jackrong/Qwopus3.5-9B-Coder-MTP-GGUF:Q4_K_M", "-c", "2048", "--host", "0.0.0.0", "--port", "7860", "-ngl", "0"]