FROM ghcr.io/ggml-org/llama.cpp:full WORKDIR /app RUN apt update && apt install -y python3 python3-pip python3-venv curl RUN python3 -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" RUN pip install -U pip huggingface_hub RUN pip install -U fastapi "uvicorn[standard]" httpx python-multipart pillow ARG HF_TOKEN="" ENV HF_TOKEN=$HF_TOKEN RUN python3 -c 'from huggingface_hub import hf_hub_download; \ repo="unsloth/gemma-4-E2B-it-GGUF"; \ hf_hub_download(repo_id=repo, filename="gemma-4-E2B-it-UD-Q5_K_XL.gguf", local_dir="/app"); \ hf_hub_download(repo_id=repo, filename="mmproj-BF16.gguf", local_dir="/app")' ENV HF_TOKEN="" COPY main.py start.sh /app/ RUN chmod +x /app/start.sh ENTRYPOINT [] CMD ["/app/start.sh"]