| #!/bin/bash |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| |
| BUCKET="${BACKUP_S3_BUCKET:?'BACKUP_S3_BUCKET secret is required'}" |
| PREFIX="${BACKUP_S3_PREFIX:-labelstudio}" |
| REGION="${AWS_DEFAULT_REGION:-us-east-1}" |
| ENDPOINT="${AWS_ENDPOINT_URL:-https://s3.amazonaws.com}" |
| SYNC_INTERVAL="${SYNC_INTERVAL_SECONDS:-60}" |
|
|
| DATA_DIR=/data |
| DB_FILE="$DATA_DIR/label_studio.sqlite3" |
| SAFE_BACKUP_FILE="$DATA_DIR/ls_safe_backup.sqlite3" |
| S3_PATH="s3://${BUCKET}/${PREFIX}" |
|
|
| export AWS_DEFAULT_REGION="$REGION" |
|
|
| |
| sync_to_s3() { |
| local reason="${1:-periodic}" |
| echo "[sync] Starting $reason sync to $S3_PATH/ ..." |
|
|
| |
| |
| if [ -f "$DB_FILE" ]; then |
| sqlite3 "$DB_FILE" ".backup '$SAFE_BACKUP_FILE'" 2>/dev/null || \ |
| echo "[sync] Warning: SQLite backup failed, syncing rest of data." |
| fi |
|
|
| |
| aws s3 sync "$DATA_DIR/" "$S3_PATH/" \ |
| --endpoint-url "$ENDPOINT" \ |
| --exclude "label_studio.sqlite3" \ |
| --exclude "label_studio.sqlite3-journal" \ |
| --exclude "label_studio.sqlite3-wal" \ |
| --exclude "label_studio.sqlite3-shm" \ |
| --no-progress \ |
| && echo "[sync] $reason sync complete" \ |
| || echo "[sync] WARNING: $reason sync failed β will retry next cycle" |
| } |
|
|
| |
| shutdown_handler() { |
| echo "" |
| echo "[shutdown] SIGTERM received β uploading final snapshot before exit ..." |
| sync_to_s3 "shutdown" |
| echo "[shutdown] Stopping Label Studio (PID $LS_PID) ..." |
| kill "$LS_PID" 2>/dev/null || true |
| wait "$LS_PID" 2>/dev/null || true |
| echo "[shutdown] Done. Goodbye." |
| } |
|
|
| |
| echo "============================================================" |
| echo " Label Studio | HF Spaces + S3 backup | $(date -u)" |
| echo " Bucket : $S3_PATH/" |
| echo " Data dir: $DATA_DIR" |
| echo " Sync : every ${SYNC_INTERVAL}s" |
| echo "============================================================" |
|
|
| mkdir -p "$DATA_DIR" |
|
|
| echo "[restore] Syncing data DOWN from $S3_PATH/ ..." |
| aws s3 sync "$S3_PATH/" "$DATA_DIR/" \ |
| --endpoint-url "$ENDPOINT" \ |
| --no-progress \ |
| && echo "[restore] Restore complete" \ |
| || echo "[restore] Restore returned non-zero (may be first run) β continuing" |
|
|
| |
| |
| if [ -f "$SAFE_BACKUP_FILE" ]; then |
| echo "[restore] Overwriting live DB with safe backup from S3..." |
| cp "$SAFE_BACKUP_FILE" "$DB_FILE" |
| fi |
|
|
| if [ -f "$DB_FILE" ]; then |
| echo "[integrity] Checking DB integrity ..." |
| |
| INTEGRITY=$(sqlite3 "$DB_FILE" "PRAGMA integrity_check;" 2>&1 | head -1 || true) |
| |
| if [ "$INTEGRITY" != "ok" ]; then |
| echo "[integrity] ERROR: DB integrity check failed: $INTEGRITY" |
| echo "[integrity] The restored database is corrupt." |
| echo "[integrity] β Check your S3 bucket for a healthy backup." |
| exit 1 |
| fi |
| echo "[integrity] DB is healthy: $INTEGRITY" |
| else |
| echo "[integrity] No existing DB found β Label Studio will create a fresh one." |
| fi |
|
|
| |
| echo "[user] Ensuring admin user exists ..." |
| label-studio user create \ |
| --username "${LABEL_STUDIO_USERNAME:?'LABEL_STUDIO_USERNAME secret is required'}" \ |
| --password "${LABEL_STUDIO_PASSWORD:?'LABEL_STUDIO_PASSWORD secret is required'}" \ |
| --email "${LABEL_STUDIO_USERNAME}@localhost" \ |
| --preserve-case 2>/dev/null || true |
|
|
| |
| echo "[start] Starting Label Studio ..." |
| label-studio --host="$SPACE_HOST" & |
| LS_PID=$! |
| echo "[start] Label Studio PID: $LS_PID" |
|
|
| trap shutdown_handler EXIT INT TERM |
|
|
| |
| echo "[loop] Entering sync loop (every ${SYNC_INTERVAL}s) ..." |
| while true; do |
| sleep "$SYNC_INTERVAL" |
| if ! kill -0 "$LS_PID" 2>/dev/null; then |
| echo "[loop] Label Studio process has exited unexpectedly β stopping container" |
| exit 1 |
| fi |
| sync_to_s3 "periodic" |
| done |
|
|