File size: 4,851 Bytes
11b3640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8aa67e0
11b3640
 
 
 
8aa67e0
11b3640
 
 
 
8aa67e0
 
11b3640
8aa67e0
 
11b3640
 
8aa67e0
11b3640
 
8aa67e0
 
 
 
11b3640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8aa67e0
 
 
 
 
 
 
11b3640
 
8aa67e0
 
 
11b3640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/bin/bash
# =============================================================================
# entrypoint.sh  –  Label Studio wrapper for Hugging Face Spaces
# =============================================================================

set -euo pipefail

# ------------- configuration (all from HF secrets / env) -------------------
BUCKET="${BACKUP_S3_BUCKET:?'BACKUP_S3_BUCKET secret is required'}"
PREFIX="${BACKUP_S3_PREFIX:-labelstudio}"
REGION="${AWS_DEFAULT_REGION:-us-east-1}"
ENDPOINT="${AWS_ENDPOINT_URL:-https://s3.amazonaws.com}"
SYNC_INTERVAL="${SYNC_INTERVAL_SECONDS:-60}"

DATA_DIR=/data
DB_FILE="$DATA_DIR/label_studio.sqlite3"
SAFE_BACKUP_FILE="$DATA_DIR/ls_safe_backup.sqlite3"
S3_PATH="s3://${BUCKET}/${PREFIX}"

export AWS_DEFAULT_REGION="$REGION"

# ------------- helper: Atomic SQLite Backup + S3 upload --------------------
sync_to_s3() {
    local reason="${1:-periodic}"
    echo "[sync] Starting $reason sync to $S3_PATH/ ..."

    # 1. Create a safe, atomic backup of the SQLite DB. 
    # This prevents the race condition of syncing live/journal files.
    if [ -f "$DB_FILE" ]; then
        sqlite3 "$DB_FILE" ".backup '$SAFE_BACKUP_FILE'" 2>/dev/null || \
            echo "[sync] Warning: SQLite backup failed, syncing rest of data."
    fi

    # 2. Sync to S3, explicitly excluding the live database and its lock/journal files
    aws s3 sync "$DATA_DIR/" "$S3_PATH/" \
        --endpoint-url "$ENDPOINT" \
        --exclude "label_studio.sqlite3" \
        --exclude "label_studio.sqlite3-journal" \
        --exclude "label_studio.sqlite3-wal" \
        --exclude "label_studio.sqlite3-shm" \
        --no-progress \
        && echo "[sync] $reason sync complete" \
        || echo "[sync] WARNING: $reason sync failed – will retry next cycle"
}

# ------------- graceful shutdown -------------------------------------------
shutdown_handler() {
    echo ""
    echo "[shutdown] SIGTERM received – uploading final snapshot before exit ..."
    sync_to_s3 "shutdown"
    echo "[shutdown] Stopping Label Studio (PID $LS_PID) ..."
    kill "$LS_PID" 2>/dev/null || true
    wait "$LS_PID" 2>/dev/null || true
    echo "[shutdown] Done. Goodbye."
}

# ------------- 1. restore from S3 -----------------------------------------
echo "============================================================"
echo " Label Studio  |  HF Spaces + S3 backup  |  $(date -u)"
echo " Bucket  : $S3_PATH/"
echo " Data dir: $DATA_DIR"
echo " Sync     : every ${SYNC_INTERVAL}s"
echo "============================================================"

mkdir -p "$DATA_DIR"

echo "[restore] Syncing data DOWN from $S3_PATH/ ..."
aws s3 sync "$S3_PATH/" "$DATA_DIR/" \
    --endpoint-url "$ENDPOINT" \
    --no-progress \
    && echo "[restore] Restore complete" \
    || echo "[restore] Restore returned non-zero (may be first run) – continuing"

# ------------- 2. Restore DB & Integrity Check ----------------------------
# If we have a safe backup from a previous session, make it the live DB
if [ -f "$SAFE_BACKUP_FILE" ]; then
    echo "[restore] Overwriting live DB with safe backup from S3..."
    cp "$SAFE_BACKUP_FILE" "$DB_FILE"
fi

if [ -f "$DB_FILE" ]; then
    echo "[integrity] Checking DB integrity ..."
    # The `|| true` prevents set -e from silently crashing the script if it fails
    INTEGRITY=$(sqlite3 "$DB_FILE" "PRAGMA integrity_check;" 2>&1 | head -1 || true)
    
    if [ "$INTEGRITY" != "ok" ]; then
        echo "[integrity] ERROR: DB integrity check failed: $INTEGRITY"
        echo "[integrity] The restored database is corrupt."
        echo "[integrity] → Check your S3 bucket for a healthy backup."
        exit 1
    fi
    echo "[integrity] DB is healthy: $INTEGRITY"
else
    echo "[integrity] No existing DB found – Label Studio will create a fresh one."
fi

# ------------- 3. ensure admin user exists (idempotent) -------------------
echo "[user] Ensuring admin user exists ..."
label-studio user create \
    --username  "${LABEL_STUDIO_USERNAME:?'LABEL_STUDIO_USERNAME secret is required'}" \
    --password  "${LABEL_STUDIO_PASSWORD:?'LABEL_STUDIO_PASSWORD secret is required'}" \
    --email     "${LABEL_STUDIO_USERNAME}@localhost" \
    --preserve-case 2>/dev/null || true

# ------------- 4. start Label Studio in the background --------------------
echo "[start] Starting Label Studio ..."
label-studio --host="$SPACE_HOST" &
LS_PID=$!
echo "[start] Label Studio PID: $LS_PID"

trap shutdown_handler EXIT INT TERM

# ------------- 5. periodic sync loop (runs forever) -----------------------
echo "[loop] Entering sync loop (every ${SYNC_INTERVAL}s) ..."
while true; do
    sleep "$SYNC_INTERVAL"
    if ! kill -0 "$LS_PID" 2>/dev/null; then
        echo "[loop] Label Studio process has exited unexpectedly – stopping container"
        exit 1
    fi
    sync_to_s3 "periodic"
done