Spaces:
AHC-Devs
/
Running on CPU Upgrade

ls-C / entrypoint.sh
Timothy S. Phan
syncs atomic .backup sqlite db to S3
c8568b1
#!/bin/bash
# =============================================================================
# entrypoint.sh – Label Studio wrapper for Hugging Face Spaces
# =============================================================================
set -euo pipefail
# ------------- configuration (all from HF secrets / env) -------------------
BUCKET="${BACKUP_S3_BUCKET:?'BACKUP_S3_BUCKET secret is required'}"
PREFIX="${BACKUP_S3_PREFIX:-labelstudio}"
REGION="${AWS_DEFAULT_REGION:-us-east-1}"
ENDPOINT="${AWS_ENDPOINT_URL:-https://s3.amazonaws.com}"
SYNC_INTERVAL="${SYNC_INTERVAL_SECONDS:-60}"
DATA_DIR=/data
DB_FILE="$DATA_DIR/label_studio.sqlite3"
SAFE_BACKUP_FILE="$DATA_DIR/ls_safe_backup.sqlite3"
S3_PATH="s3://${BUCKET}/${PREFIX}"
export AWS_DEFAULT_REGION="$REGION"
# ------------- helper: Atomic SQLite Backup + S3 upload --------------------
sync_to_s3() {
local reason="${1:-periodic}"
echo "[sync] Starting $reason sync to $S3_PATH/ ..."
# 1. Create a safe, atomic backup of the SQLite DB.
# This prevents the race condition of syncing live/journal files.
if [ -f "$DB_FILE" ]; then
sqlite3 "$DB_FILE" ".backup '$SAFE_BACKUP_FILE'" 2>/dev/null || \
echo "[sync] Warning: SQLite backup failed, syncing rest of data."
fi
# 2. Sync to S3, explicitly excluding the live database and its lock/journal files
aws s3 sync "$DATA_DIR/" "$S3_PATH/" \
--endpoint-url "$ENDPOINT" \
--exclude "label_studio.sqlite3" \
--exclude "label_studio.sqlite3-journal" \
--exclude "label_studio.sqlite3-wal" \
--exclude "label_studio.sqlite3-shm" \
--no-progress \
&& echo "[sync] $reason sync complete" \
|| echo "[sync] WARNING: $reason sync failed – will retry next cycle"
}
# ------------- graceful shutdown -------------------------------------------
shutdown_handler() {
echo ""
echo "[shutdown] SIGTERM received – uploading final snapshot before exit ..."
sync_to_s3 "shutdown"
echo "[shutdown] Stopping Label Studio (PID $LS_PID) ..."
kill "$LS_PID" 2>/dev/null || true
wait "$LS_PID" 2>/dev/null || true
echo "[shutdown] Done. Goodbye."
}
# ------------- 1. restore from S3 -----------------------------------------
echo "============================================================"
echo " Label Studio | HF Spaces + S3 backup | $(date -u)"
echo " Bucket : $S3_PATH/"
echo " Data dir: $DATA_DIR"
echo " Sync : every ${SYNC_INTERVAL}s"
echo "============================================================"
mkdir -p "$DATA_DIR"
echo "[restore] Syncing data DOWN from $S3_PATH/ ..."
aws s3 sync "$S3_PATH/" "$DATA_DIR/" \
--endpoint-url "$ENDPOINT" \
--no-progress \
&& echo "[restore] Restore complete" \
|| echo "[restore] Restore returned non-zero (may be first run) – continuing"
# ------------- 2. Restore DB & Integrity Check ----------------------------
# If we have a safe backup from a previous session, make it the live DB
if [ -f "$SAFE_BACKUP_FILE" ]; then
echo "[restore] Overwriting live DB with safe backup from S3..."
cp "$SAFE_BACKUP_FILE" "$DB_FILE"
fi
if [ -f "$DB_FILE" ]; then
echo "[integrity] Checking DB integrity ..."
# The `|| true` prevents set -e from silently crashing the script if it fails
INTEGRITY=$(sqlite3 "$DB_FILE" "PRAGMA integrity_check;" 2>&1 | head -1 || true)
if [ "$INTEGRITY" != "ok" ]; then
echo "[integrity] ERROR: DB integrity check failed: $INTEGRITY"
echo "[integrity] The restored database is corrupt."
echo "[integrity] β†’ Check your S3 bucket for a healthy backup."
exit 1
fi
echo "[integrity] DB is healthy: $INTEGRITY"
else
echo "[integrity] No existing DB found – Label Studio will create a fresh one."
fi
# ------------- 3. ensure admin user exists (idempotent) -------------------
echo "[user] Ensuring admin user exists ..."
label-studio user create \
--username "${LABEL_STUDIO_USERNAME:?'LABEL_STUDIO_USERNAME secret is required'}" \
--password "${LABEL_STUDIO_PASSWORD:?'LABEL_STUDIO_PASSWORD secret is required'}" \
--email "${LABEL_STUDIO_USERNAME}@localhost" \
--preserve-case 2>/dev/null || true
# ------------- 4. start Label Studio in the background --------------------
echo "[start] Starting Label Studio ..."
label-studio --host="$SPACE_HOST" &
LS_PID=$!
echo "[start] Label Studio PID: $LS_PID"
trap shutdown_handler EXIT INT TERM
# ------------- 5. periodic sync loop (runs forever) -----------------------
echo "[loop] Entering sync loop (every ${SYNC_INTERVAL}s) ..."
while true; do
sleep "$SYNC_INTERVAL"
if ! kill -0 "$LS_PID" 2>/dev/null; then
echo "[loop] Label Studio process has exited unexpectedly – stopping container"
exit 1
fi
sync_to_s3 "periodic"
done