| | #!/bin/bash |
| | |
| | |
| | |
| |
|
| | set -euo pipefail |
| |
|
| | |
| | BUCKET="${BACKUP_S3_BUCKET:?'BACKUP_S3_BUCKET secret is required'}" |
| | PREFIX="${BACKUP_S3_PREFIX:-labelstudio}" |
| | REGION="${AWS_DEFAULT_REGION:-us-east-1}" |
| | ENDPOINT="${AWS_ENDPOINT_URL:-https://s3.amazonaws.com}" |
| | SYNC_INTERVAL="${SYNC_INTERVAL_SECONDS:-60}" |
| |
|
| | DATA_DIR=/data |
| | DB_FILE="$DATA_DIR/label_studio.sqlite3" |
| | SAFE_BACKUP_FILE="$DATA_DIR/ls_safe_backup.sqlite3" |
| | S3_PATH="s3://${BUCKET}/${PREFIX}" |
| |
|
| | export AWS_DEFAULT_REGION="$REGION" |
| |
|
| | |
| | sync_to_s3() { |
| | local reason="${1:-periodic}" |
| | echo "[sync] Starting $reason sync to $S3_PATH/ ..." |
| |
|
| | |
| | |
| | if [ -f "$DB_FILE" ]; then |
| | sqlite3 "$DB_FILE" ".backup '$SAFE_BACKUP_FILE'" 2>/dev/null || \ |
| | echo "[sync] Warning: SQLite backup failed, syncing rest of data." |
| | fi |
| |
|
| | |
| | aws s3 sync "$DATA_DIR/" "$S3_PATH/" \ |
| | --endpoint-url "$ENDPOINT" \ |
| | --exclude "label_studio.sqlite3" \ |
| | --exclude "label_studio.sqlite3-journal" \ |
| | --exclude "label_studio.sqlite3-wal" \ |
| | --exclude "label_studio.sqlite3-shm" \ |
| | --no-progress \ |
| | && echo "[sync] $reason sync complete" \ |
| | || echo "[sync] WARNING: $reason sync failed β will retry next cycle" |
| | } |
| |
|
| | |
| | shutdown_handler() { |
| | echo "" |
| | echo "[shutdown] SIGTERM received β uploading final snapshot before exit ..." |
| | sync_to_s3 "shutdown" |
| | echo "[shutdown] Stopping Label Studio (PID $LS_PID) ..." |
| | kill "$LS_PID" 2>/dev/null || true |
| | wait "$LS_PID" 2>/dev/null || true |
| | echo "[shutdown] Done. Goodbye." |
| | } |
| |
|
| | |
| | echo "============================================================" |
| | echo " Label Studio | HF Spaces + S3 backup | $(date -u)" |
| | echo " Bucket : $S3_PATH/" |
| | echo " Data dir: $DATA_DIR" |
| | echo " Sync : every ${SYNC_INTERVAL}s" |
| | echo "============================================================" |
| |
|
| | mkdir -p "$DATA_DIR" |
| |
|
| | echo "[restore] Syncing data DOWN from $S3_PATH/ ..." |
| | aws s3 sync "$S3_PATH/" "$DATA_DIR/" \ |
| | --endpoint-url "$ENDPOINT" \ |
| | --no-progress \ |
| | && echo "[restore] Restore complete" \ |
| | || echo "[restore] Restore returned non-zero (may be first run) β continuing" |
| |
|
| | |
| | |
| | if [ -f "$SAFE_BACKUP_FILE" ]; then |
| | echo "[restore] Overwriting live DB with safe backup from S3..." |
| | cp "$SAFE_BACKUP_FILE" "$DB_FILE" |
| | fi |
| |
|
| | if [ -f "$DB_FILE" ]; then |
| | echo "[integrity] Checking DB integrity ..." |
| | |
| | INTEGRITY=$(sqlite3 "$DB_FILE" "PRAGMA integrity_check;" 2>&1 | head -1 || true) |
| | |
| | if [ "$INTEGRITY" != "ok" ]; then |
| | echo "[integrity] ERROR: DB integrity check failed: $INTEGRITY" |
| | echo "[integrity] The restored database is corrupt." |
| | echo "[integrity] β Check your S3 bucket for a healthy backup." |
| | exit 1 |
| | fi |
| | echo "[integrity] DB is healthy: $INTEGRITY" |
| | else |
| | echo "[integrity] No existing DB found β Label Studio will create a fresh one." |
| | fi |
| |
|
| | |
| | echo "[user] Ensuring admin user exists ..." |
| | label-studio user create \ |
| | --username "${LABEL_STUDIO_USERNAME:?'LABEL_STUDIO_USERNAME secret is required'}" \ |
| | --password "${LABEL_STUDIO_PASSWORD:?'LABEL_STUDIO_PASSWORD secret is required'}" \ |
| | --email "${LABEL_STUDIO_USERNAME}@localhost" \ |
| | --preserve-case 2>/dev/null || true |
| |
|
| | |
| | echo "[start] Starting Label Studio ..." |
| | label-studio --host="$SPACE_HOST" & |
| | LS_PID=$! |
| | echo "[start] Label Studio PID: $LS_PID" |
| |
|
| | trap shutdown_handler EXIT INT TERM |
| |
|
| | |
| | echo "[loop] Entering sync loop (every ${SYNC_INTERVAL}s) ..." |
| | while true; do |
| | sleep "$SYNC_INTERVAL" |
| | if ! kill -0 "$LS_PID" 2>/dev/null; then |
| | echo "[loop] Label Studio process has exited unexpectedly β stopping container" |
| | exit 1 |
| | fi |
| | sync_to_s3 "periodic" |
| | done |
| |
|