FROM ghcr.io/ggml-org/llama.cpp:server # Download model into the container RUN apt-get update && apt-get install -y --no-install-recommends curl ca-certificates \ && rm -rf /var/lib/apt/lists/* RUN mkdir -p /models && \ curl -L -o /models/tinyllama.gguf \ https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf ENV PORT=7860 EXPOSE 7860 # Start OpenAI-compatible llama-server CMD ["--model","/models/tinyllama.gguf","--host","0.0.0.0","--port","7860"]