fm1320 commited on
Commit
3d62adb
·
1 Parent(s): 5a1fd0a

Drop default preload to fit free-tier memory

Browse files
Files changed (1) hide show
  1. hf-entrypoint.sh +21 -9
hf-entrypoint.sh CHANGED
@@ -3,18 +3,30 @@
3
  # Single-container layout for Hugging Face Spaces.
4
  set -euo pipefail
5
 
6
- # Models to preload at SIE startup. Same trio the local compose uses.
7
- PRELOAD="microsoft/Florence-2-base,naver-clova-ix/donut-base-finetuned-cord-v2,urchade/gliner_multi-v2.1"
 
 
 
 
 
 
 
8
 
9
- echo "[hf-entrypoint] starting sie-server on 127.0.0.1:8080 with preload=$PRELOAD"
10
- sie-server serve \
11
- --host 127.0.0.1 \
12
- --port 8080 \
13
- --preload "$PRELOAD" &
 
 
 
 
14
  SIE_PID=$!
15
 
16
- # Wait up to 20 minutes for SIE to come up. First boot pulls model weights
17
- # from HF; subsequent restarts reuse the /data cache and start in seconds.
 
18
  echo "[hf-entrypoint] waiting for /healthz"
19
  for i in $(seq 1 1200); do
20
  if curl -fsS http://127.0.0.1:8080/healthz > /dev/null 2>&1; then
 
3
  # Single-container layout for Hugging Face Spaces.
4
  set -euo pipefail
5
 
6
+ # Models to preload at SIE startup. Empty by default on HF Spaces free tier
7
+ # (16 GB advertised but the cgroup-imposed memory ceiling is tight enough
8
+ # that preloading multiple model classes triggers LRU eviction during boot,
9
+ # which makes the first user click much slower than a cold-load anyway).
10
+ #
11
+ # Override via the PRELOAD env var in the Space's Settings if you upgrade to
12
+ # CPU-Upgrade (32 GB) or a GPU tier:
13
+ # PRELOAD="microsoft/Florence-2-base,naver-clova-ix/donut-base-finetuned-cord-v2,urchade/gliner_multi-v2.1"
14
+ PRELOAD="${PRELOAD:-}"
15
 
16
+ SIE_ARGS=(serve --host 127.0.0.1 --port 8080)
17
+ if [ -n "$PRELOAD" ]; then
18
+ echo "[hf-entrypoint] starting sie-server with preload=$PRELOAD"
19
+ SIE_ARGS+=(--preload "$PRELOAD")
20
+ else
21
+ echo "[hf-entrypoint] starting sie-server with no preload (lazy-load on first request)"
22
+ fi
23
+
24
+ sie-server "${SIE_ARGS[@]}" &
25
  SIE_PID=$!
26
 
27
+ # Wait up to 20 minutes for SIE to come up. Without preload this should be
28
+ # ~30 seconds. With preload the first user has to download model weights
29
+ # from HF before SIE marks itself ready.
30
  echo "[hf-entrypoint] waiting for /healthz"
31
  for i in $(seq 1 1200); do
32
  if curl -fsS http://127.0.0.1:8080/healthz > /dev/null 2>&1; then