document-ocr / hf-entrypoint.sh
Filip Makraduli
Switch to transformers5 SIE image; LightOnOCR as default recognition
4e0f10e
#!/usr/bin/env bash
# Boot SIE in the background, then run the UI server in the foreground.
# Single-container layout for Hugging Face Spaces.
set -euo pipefail
# Models to preload at SIE startup. Empty by default on HF Spaces free tier
# (16 GB advertised but the cgroup-imposed memory ceiling is tight enough
# that preloading multiple model classes triggers LRU eviction during boot,
# which makes the first user click much slower than a cold-load anyway).
#
# Override via the PRELOAD env var in the Space's Settings if you upgrade to
# CPU-Upgrade (32 GB) or a GPU tier:
# PRELOAD="microsoft/Florence-2-base-ft,naver-clova-ix/donut-base-finetuned-cord-v2,urchade/gliner_multi-v2.1"
PRELOAD="${PRELOAD:-}"
SIE_ARGS=(serve --host 127.0.0.1 --port 8080)
if [ -n "$PRELOAD" ]; then
echo "[hf-entrypoint] starting sie-server with preload=$PRELOAD"
SIE_ARGS+=(--preload "$PRELOAD")
else
echo "[hf-entrypoint] starting sie-server with no preload (lazy-load on first request)"
fi
sie-server "${SIE_ARGS[@]}" &
SIE_PID=$!
# Wait up to 20 minutes for SIE to come up. Without preload this should be
# ~30 seconds. With preload the first user has to download model weights
# from HF before SIE marks itself ready.
echo "[hf-entrypoint] waiting for /healthz"
for i in $(seq 1 1200); do
if curl -fsS http://127.0.0.1:8080/healthz > /dev/null 2>&1; then
echo "[hf-entrypoint] sie healthy in ~${i}s"
break
fi
if ! kill -0 "$SIE_PID" 2>/dev/null; then
echo "[hf-entrypoint] sie-server died before becoming healthy"
exit 1
fi
sleep 1
done
cd /app/ui
export PORT="${PORT:-7860}"
export SIE_URL="${SIE_URL:-http://127.0.0.1:8080}"
export OPEN_BROWSER=0
echo "[hf-entrypoint] starting UI on 0.0.0.0:$PORT"
exec npx tsx web/server.ts