| FROM ghcr.io/ggml-org/llama.cpp:server-cuda | |
| ENV LLAMA_CACHE=/data | |
| ENV HF_HOME=/data | |
| ENV PYTHONUNBUFFERED=1 | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| python3 python3-pip curl wget \ | |
| && rm -rf /var/lib/apt/lists/* \ | |
| && pip3 install --no-cache-dir --break-system-packages fastapi uvicorn httpx sse-starlette python-multipart huggingface-hub | |
| # Download model using wget (huggingface-cli is deprecated) | |
| RUN mkdir -p /data/model && \ | |
| wget -q -O /data/model/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive-Q8_K_P.gguf \ | |
| "https://huggingface.co/HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive/resolve/main/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive-Q8_K_P.gguf" | |
| WORKDIR /app | |
| COPY proxy.py /app/proxy.py | |
| COPY chat.html /app/chat.html | |
| COPY entrypoint.sh /app/entrypoint.sh | |
| RUN chmod +x /app/entrypoint.sh | |
| EXPOSE 8000 | |
| ENTRYPOINT ["/app/entrypoint.sh"] | |