FROM ghcr.io/ggml-org/llama.cpp:server-cuda ENV LLAMA_CACHE=/data ENV HF_HOME=/data ENV PYTHONUNBUFFERED=1 RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip curl wget \ && rm -rf /var/lib/apt/lists/* \ && pip3 install --no-cache-dir --break-system-packages fastapi uvicorn httpx sse-starlette python-multipart huggingface-hub # Download model using wget (huggingface-cli is deprecated) RUN mkdir -p /data/model && \ wget -q -O /data/model/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive-Q8_K_P.gguf \ "https://huggingface.co/HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive/resolve/main/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive-Q8_K_P.gguf" WORKDIR /app COPY proxy.py /app/proxy.py COPY chat.html /app/chat.html COPY entrypoint.sh /app/entrypoint.sh RUN chmod +x /app/entrypoint.sh EXPOSE 8000 ENTRYPOINT ["/app/entrypoint.sh"]