Spaces:

Rofati
/

backend

Running

backend / Dockerfile

Update Dockerfile

02232d6 verified 12 days ago

1.3 kB

	FROM python:3.10-slim

	ENV PYTHONUNBUFFERED=1 PORT=7860 OMP_NUM_THREADS=2

	WORKDIR /code

	# No build-essential/cmake needed anymore -- we're installing a precompiled
	# wheel below, not compiling anything. Dropping this saves real build minutes too.

	# Direct prebuilt wheel -- skips both the C++ compile AND the extra-index-url
	# lookup, so it's the fastest reliable path.
	RUN pip install --no-cache-dir \
	"https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.21/llama_cpp_python-0.3.21-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
	RUN pip install --no-cache-dir fastapi uvicorn huggingface_hub

	# Bake the model into the image at BUILD time, not runtime.
	# HF Spaces' disk is ephemeral -- if you download in main.py on startup,
	# you re-pull several GB every single time the Space restarts/sleeps+wakes.
	# Doing it here means it's part of the image layer and persists across restarts.
	RUN mkdir -p /code/models && \
	python3 -c "from huggingface_hub import hf_hub_download; \
	hf_hub_download(repo_id='unsloth/gemma-4-E4B-it-GGUF', \
	filename='gemma-4-E4B-it-Q4_K_M.gguf', local_dir='/code/models')"

	COPY . .

	EXPOSE 7860
	CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "30"]