FROM ghcr.io/ggml-org/llama.cpp:server

# Download model into the container
RUN apt-get update && apt-get install -y --no-install-recommends curl ca-certificates \
  && rm -rf /var/lib/apt/lists/*

RUN mkdir -p /models && \
  curl -L -o /models/tinyllama.gguf \
  https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf

ENV PORT=7860
EXPOSE 7860

# Start OpenAI-compatible llama-server
CMD ["--model","/models/tinyllama.gguf","--host","0.0.0.0","--port","7860"]