paper-decoder / Dockerfile
aleks-gotsa's picture
fix: build on llama.cpp ubuntu24 base, resolves glibc mismatch
849e756
Raw
History Blame Contribute Delete
1.72 kB
# Paper Decoder — HF Space (Docker SDK)
# Base: official llama.cpp server image (Ubuntu 24.04). Building directly on it
# avoids the glibc mismatch hit when copying its binaries into Debian bookworm
# (binaries need GLIBC_2.38; bookworm ships 2.36).
# llama-server binary and its libs live in /app.
FROM ghcr.io/ggml-org/llama.cpp:server
ENV DEBIAN_FRONTEND=noninteractive \
LD_LIBRARY_PATH=/app
# Python + OCR (Ukrainian + Russian traineddata). curl is already in the base.
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
tesseract-ocr \
tesseract-ocr-ukr \
tesseract-ocr-rus \
&& rm -rf /var/lib/apt/lists/*
# Bake the model into the image at build time.
# Pin confirmed from first build log: Qwen3-4B-Instruct-2507-Q4_K_M.gguf
# Ubuntu 24.04 pip is PEP 668 managed -> --break-system-packages.
RUN pip3 install --no-cache-dir --break-system-packages "huggingface_hub>=1.0,<2.0" && \
hf download unsloth/Qwen3-4B-Instruct-2507-GGUF \
--include "Qwen3-4B-Instruct-2507-Q4_K_M.gguf" --local-dir /opt/models && \
ls -lh /opt/models
COPY requirements.txt /tmp/requirements.txt
RUN pip3 install --no-cache-dir --break-system-packages -r /tmp/requirements.txt
# Ubuntu 24.04 already ships a UID-1000 user named 'ubuntu' — reuse it
# (useradd -u 1000 would fail as non-unique).
USER ubuntu
ENV HOME=/home/ubuntu \
PATH=/home/ubuntu/.local/bin:$PATH \
GRADIO_SERVER_NAME=0.0.0.0 \
GRADIO_SERVER_PORT=7860
WORKDIR /home/ubuntu/app
COPY --chown=ubuntu app.py start.sh ./
RUN chmod +x start.sh
EXPOSE 7860
# Parent image's ENTRYPOINT is /app/llama-server — reset it.
ENTRYPOINT []
CMD ["./start.sh"]