backend / Dockerfile
Rofati's picture
Update Dockerfile
02232d6 verified
Raw
History Blame Contribute Delete
1.3 kB
FROM python:3.10-slim
ENV PYTHONUNBUFFERED=1 PORT=7860 OMP_NUM_THREADS=2
WORKDIR /code
# No build-essential/cmake needed anymore -- we're installing a precompiled
# wheel below, not compiling anything. Dropping this saves real build minutes too.
# Direct prebuilt wheel -- skips both the C++ compile AND the extra-index-url
# lookup, so it's the fastest reliable path.
RUN pip install --no-cache-dir \
"https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.21/llama_cpp_python-0.3.21-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
RUN pip install --no-cache-dir fastapi uvicorn huggingface_hub
# Bake the model into the image at BUILD time, not runtime.
# HF Spaces' disk is ephemeral -- if you download in main.py on startup,
# you re-pull several GB every single time the Space restarts/sleeps+wakes.
# Doing it here means it's part of the image layer and persists across restarts.
RUN mkdir -p /code/models && \
python3 -c "from huggingface_hub import hf_hub_download; \
hf_hub_download(repo_id='unsloth/gemma-4-E4B-it-GGUF', \
filename='gemma-4-E4B-it-Q4_K_M.gguf', local_dir='/code/models')"
COPY . .
EXPOSE 7860
CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "30"]