#!/bin/bash # ============================================================================= # entrypoint.sh – Label Studio wrapper for Hugging Face Spaces # ============================================================================= set -euo pipefail # ------------- configuration (all from HF secrets / env) ------------------- BUCKET="${BACKUP_S3_BUCKET:?'BACKUP_S3_BUCKET secret is required'}" PREFIX="${BACKUP_S3_PREFIX:-labelstudio}" REGION="${AWS_DEFAULT_REGION:-us-east-1}" ENDPOINT="${AWS_ENDPOINT_URL:-https://s3.amazonaws.com}" SYNC_INTERVAL="${SYNC_INTERVAL_SECONDS:-60}" DATA_DIR=/data DB_FILE="$DATA_DIR/label_studio.sqlite3" SAFE_BACKUP_FILE="$DATA_DIR/ls_safe_backup.sqlite3" S3_PATH="s3://${BUCKET}/${PREFIX}" export AWS_DEFAULT_REGION="$REGION" # ------------- helper: Atomic SQLite Backup + S3 upload -------------------- sync_to_s3() { local reason="${1:-periodic}" echo "[sync] Starting $reason sync to $S3_PATH/ ..." # 1. Create a safe, atomic backup of the SQLite DB. # This prevents the race condition of syncing live/journal files. if [ -f "$DB_FILE" ]; then sqlite3 "$DB_FILE" ".backup '$SAFE_BACKUP_FILE'" 2>/dev/null || \ echo "[sync] Warning: SQLite backup failed, syncing rest of data." fi # 2. Sync to S3, explicitly excluding the live database and its lock/journal files aws s3 sync "$DATA_DIR/" "$S3_PATH/" \ --endpoint-url "$ENDPOINT" \ --exclude "label_studio.sqlite3" \ --exclude "label_studio.sqlite3-journal" \ --exclude "label_studio.sqlite3-wal" \ --exclude "label_studio.sqlite3-shm" \ --no-progress \ && echo "[sync] $reason sync complete" \ || echo "[sync] WARNING: $reason sync failed – will retry next cycle" } # ------------- graceful shutdown ------------------------------------------- shutdown_handler() { echo "" echo "[shutdown] SIGTERM received – uploading final snapshot before exit ..." sync_to_s3 "shutdown" echo "[shutdown] Stopping Label Studio (PID $LS_PID) ..." kill "$LS_PID" 2>/dev/null || true wait "$LS_PID" 2>/dev/null || true echo "[shutdown] Done. Goodbye." } # ------------- 1. restore from S3 ----------------------------------------- echo "============================================================" echo " Label Studio | HF Spaces + S3 backup | $(date -u)" echo " Bucket : $S3_PATH/" echo " Data dir: $DATA_DIR" echo " Sync : every ${SYNC_INTERVAL}s" echo "============================================================" mkdir -p "$DATA_DIR" echo "[restore] Syncing data DOWN from $S3_PATH/ ..." aws s3 sync "$S3_PATH/" "$DATA_DIR/" \ --endpoint-url "$ENDPOINT" \ --no-progress \ && echo "[restore] Restore complete" \ || echo "[restore] Restore returned non-zero (may be first run) – continuing" # ------------- 2. Restore DB & Integrity Check ---------------------------- # If we have a safe backup from a previous session, make it the live DB if [ -f "$SAFE_BACKUP_FILE" ]; then echo "[restore] Overwriting live DB with safe backup from S3..." cp "$SAFE_BACKUP_FILE" "$DB_FILE" fi if [ -f "$DB_FILE" ]; then echo "[integrity] Checking DB integrity ..." # The `|| true` prevents set -e from silently crashing the script if it fails INTEGRITY=$(sqlite3 "$DB_FILE" "PRAGMA integrity_check;" 2>&1 | head -1 || true) if [ "$INTEGRITY" != "ok" ]; then echo "[integrity] ERROR: DB integrity check failed: $INTEGRITY" echo "[integrity] The restored database is corrupt." echo "[integrity] → Check your S3 bucket for a healthy backup." exit 1 fi echo "[integrity] DB is healthy: $INTEGRITY" else echo "[integrity] No existing DB found – Label Studio will create a fresh one." fi # ------------- 3. ensure admin user exists (idempotent) ------------------- echo "[user] Ensuring admin user exists ..." label-studio user create \ --username "${LABEL_STUDIO_USERNAME:?'LABEL_STUDIO_USERNAME secret is required'}" \ --password "${LABEL_STUDIO_PASSWORD:?'LABEL_STUDIO_PASSWORD secret is required'}" \ --email "${LABEL_STUDIO_USERNAME}@localhost" \ --preserve-case 2>/dev/null || true # ------------- 4. start Label Studio in the background -------------------- echo "[start] Starting Label Studio ..." label-studio --host="$SPACE_HOST" & LS_PID=$! echo "[start] Label Studio PID: $LS_PID" trap shutdown_handler EXIT INT TERM # ------------- 5. periodic sync loop (runs forever) ----------------------- echo "[loop] Entering sync loop (every ${SYNC_INTERVAL}s) ..." while true; do sleep "$SYNC_INTERVAL" if ! kill -0 "$LS_PID" 2>/dev/null; then echo "[loop] Label Studio process has exited unexpectedly – stopping container" exit 1 fi sync_to_s3 "periodic" done