Spaces:

build-small-hackathon
/

paper-decoder

Sleeping

App Files Files Community

paper-decoder / Dockerfile

aleks-gotsa

fix: build on llama.cpp ubuntu24 base, resolves glibc mismatch

849e756 16 days ago

Raw

History Blame Contribute Delete

1.72 kB

	# Paper Decoder — HF Space (Docker SDK)
	# Base: official llama.cpp server image (Ubuntu 24.04). Building directly on it
	# avoids the glibc mismatch hit when copying its binaries into Debian bookworm
	# (binaries need GLIBC_2.38; bookworm ships 2.36).
	# llama-server binary and its libs live in /app.

	FROM ghcr.io/ggml-org/llama.cpp:server

	ENV DEBIAN_FRONTEND=noninteractive \
	LD_LIBRARY_PATH=/app

	# Python + OCR (Ukrainian + Russian traineddata). curl is already in the base.
	RUN apt-get update && apt-get install -y --no-install-recommends \
	python3 \
	python3-pip \
	tesseract-ocr \
	tesseract-ocr-ukr \
	tesseract-ocr-rus \
	&& rm -rf /var/lib/apt/lists/*

	# Bake the model into the image at build time.
	# Pin confirmed from first build log: Qwen3-4B-Instruct-2507-Q4_K_M.gguf
	# Ubuntu 24.04 pip is PEP 668 managed -> --break-system-packages.
	RUN pip3 install --no-cache-dir --break-system-packages "huggingface_hub>=1.0,<2.0" && \
	hf download unsloth/Qwen3-4B-Instruct-2507-GGUF \
	--include "Qwen3-4B-Instruct-2507-Q4_K_M.gguf" --local-dir /opt/models && \
	ls -lh /opt/models

	COPY requirements.txt /tmp/requirements.txt
	RUN pip3 install --no-cache-dir --break-system-packages -r /tmp/requirements.txt

	# Ubuntu 24.04 already ships a UID-1000 user named 'ubuntu' — reuse it
	# (useradd -u 1000 would fail as non-unique).
	USER ubuntu
	ENV HOME=/home/ubuntu \
	PATH=/home/ubuntu/.local/bin:$PATH \
	GRADIO_SERVER_NAME=0.0.0.0 \
	GRADIO_SERVER_PORT=7860

	WORKDIR /home/ubuntu/app
	COPY --chown=ubuntu app.py start.sh ./
	RUN chmod +x start.sh

	EXPOSE 7860
	# Parent image's ENTRYPOINT is /app/llama-server — reset it.
	ENTRYPOINT []
	CMD ["./start.sh"]