Spaces:
Paused
Paused
| # ============================================================================= | |
| # entrypoint.sh β Label Studio wrapper for Hugging Face Spaces | |
| # | |
| # What this script does: | |
| # 1. On startup : restores SQLite DB + exports from S3 | |
| # 2. After restore: verifies DB integrity before starting Label Studio | |
| # 3. Starts Label Studio in the background | |
| # 4. Every 60 s : checkpoints WAL then syncs /data β S3 | |
| # 5. On SIGTERM : does one final sync before the pod dies | |
| # | |
| # Required HF Space secrets: | |
| # BACKUP_S3_BUCKET β bucket name (no s3:// prefix) | |
| # LABEL_STUDIO_USERNAME β admin username / email | |
| # LABEL_STUDIO_PASSWORD β admin password | |
| # | |
| # Optional HF Space secrets (with defaults shown): | |
| # BACKUP_S3_PREFIX β labelstudio | |
| # AWS_DEFAULT_REGION β us-east-1 | |
| # AWS_ENDPOINT_URL β https://s3.amazonaws.com (change for R2/MinIO) | |
| # SYNC_INTERVAL_SECONDS β 60 | |
| # ============================================================================= | |
| set -euo pipefail | |
| # ------------- configuration (all from HF secrets / env) ------------------- | |
| BUCKET="${BACKUP_S3_BUCKET:?'BACKUP_S3_BUCKET secret is required'}" | |
| PREFIX="${BACKUP_S3_PREFIX:-labelstudio}" | |
| REGION="${AWS_DEFAULT_REGION:-us-east-1}" | |
| ENDPOINT="${AWS_ENDPOINT_URL:-https://s3.amazonaws.com}" | |
| SYNC_INTERVAL="${SYNC_INTERVAL_SECONDS:-60}" | |
| DATA_DIR=/data | |
| # Label Studio uses label_studio.sqlite3 (not .db) β confirmed from live logs | |
| DB_FILE="$DATA_DIR/label_studio.sqlite3" | |
| S3_PATH="s3://${BUCKET}/${PREFIX}" | |
| export AWS_DEFAULT_REGION="$REGION" | |
| # ------------- helper: WAL checkpoint + S3 upload -------------------------- | |
| sync_to_s3() { | |
| local reason="${1:-periodic}" | |
| echo "[sync] Starting $reason sync to $S3_PATH/ ..." | |
| # Checkpoint the WAL so S3 always receives a single self-consistent file. | |
| # TRUNCATE: folds all WAL frames into the main DB file and zeros the WAL. | |
| # This briefly blocks writers (not readers) β safe for β€10 concurrent users. | |
| if [ -f "$DB_FILE" ]; then | |
| sqlite3 "$DB_FILE" "PRAGMA wal_checkpoint(TRUNCATE);" 2>/dev/null || \ | |
| echo "[sync] WAL checkpoint skipped (DB may be locked) β proceeding anyway" | |
| fi | |
| aws s3 sync "$DATA_DIR/" "$S3_PATH/" \ | |
| --endpoint-url "$ENDPOINT" \ | |
| --no-progress \ | |
| && echo "[sync] $reason sync complete" \ | |
| || echo "[sync] WARNING: $reason sync failed β will retry next cycle" | |
| } | |
| # ------------- graceful shutdown ------------------------------------------- | |
| shutdown_handler() { | |
| echo "" | |
| echo "[shutdown] SIGTERM received β uploading final snapshot before exit ..." | |
| sync_to_s3 "shutdown" | |
| echo "[shutdown] Stopping Label Studio (PID $LS_PID) ..." | |
| kill "$LS_PID" 2>/dev/null || true | |
| wait "$LS_PID" 2>/dev/null || true | |
| echo "[shutdown] Done. Goodbye." | |
| } | |
| # ------------- 1. restore from S3 ----------------------------------------- | |
| echo "============================================================" | |
| echo " Label Studio | HF Spaces + S3 backup | $(date -u)" | |
| echo " Bucket : $S3_PATH/" | |
| echo " Data dir: $DATA_DIR" | |
| echo " Sync : every ${SYNC_INTERVAL}s" | |
| echo "============================================================" | |
| mkdir -p "$DATA_DIR" | |
| echo "[restore] Syncing data DOWN from $S3_PATH/ ..." | |
| # Use || true so a fresh bucket (nothing to restore) doesn't abort the script | |
| aws s3 sync "$S3_PATH/" "$DATA_DIR/" \ | |
| --endpoint-url "$ENDPOINT" \ | |
| --no-progress \ | |
| && echo "[restore] Restore complete" \ | |
| || echo "[restore] Restore returned non-zero (may be first run) β continuing" | |
| # ------------- 2. DB integrity check before starting LS ------------------- | |
| if [ -f "$DB_FILE" ]; then | |
| echo "[integrity] Checking DB integrity ..." | |
| INTEGRITY=$(sqlite3 "$DB_FILE" "PRAGMA integrity_check;" 2>&1 | head -1) | |
| if [ "$INTEGRITY" != "ok" ]; then | |
| echo "[integrity] ERROR: DB integrity check failed: $INTEGRITY" | |
| echo "[integrity] The restored database is corrupt." | |
| echo "[integrity] β Check your S3 bucket for a healthy backup." | |
| echo "[integrity] β If you have bucket versioning enabled, restore an earlier version." | |
| exit 1 | |
| fi | |
| echo "[integrity] DB is healthy: $INTEGRITY" | |
| else | |
| echo "[integrity] No existing DB found β Label Studio will create a fresh one." | |
| fi | |
| # ------------- 3. ensure admin user exists (idempotent) ------------------- | |
| echo "[user] Ensuring admin user exists ..." | |
| label-studio user create \ | |
| --username "${LABEL_STUDIO_USERNAME:?'LABEL_STUDIO_USERNAME secret is required'}" \ | |
| --password "${LABEL_STUDIO_PASSWORD:?'LABEL_STUDIO_PASSWORD secret is required'}" \ | |
| --email "${LABEL_STUDIO_USERNAME}@localhost" \ | |
| --preserve-case 2>/dev/null || true | |
| # ------------- 4. start Label Studio in the background -------------------- | |
| # Use $SPACE_HOST exactly as the official HF Dockerfile does β this is what | |
| # makes HF's reverse proxy route traffic correctly to the container. | |
| echo "[start] Starting Label Studio ..." | |
| label-studio --host="$SPACE_HOST" & | |
| LS_PID=$! | |
| echo "[start] Label Studio PID: $LS_PID" | |
| # Register shutdown handler NOW (after we have LS_PID) | |
| trap shutdown_handler EXIT INT TERM | |
| # ------------- 5. periodic sync loop (runs forever) ----------------------- | |
| echo "[loop] Entering sync loop (every ${SYNC_INTERVAL}s) ..." | |
| while true; do | |
| sleep "$SYNC_INTERVAL" | |
| # Verify LS is still alive before syncing | |
| if ! kill -0 "$LS_PID" 2>/dev/null; then | |
| echo "[loop] Label Studio process has exited unexpectedly β stopping container" | |
| exit 1 | |
| fi | |
| sync_to_s3 "periodic" | |
| done | |