File size: 3,046 Bytes
7b01c6f
57645c7
7b01c6f
 
 
 
 
 
 
 
 
57645c7
7b01c6f
57645c7
7b01c6f
 
 
57645c7
 
7b01c6f
57645c7
 
 
 
 
 
 
 
7b01c6f
57645c7
7b01c6f
 
57645c7
 
 
 
 
7b01c6f
 
 
 
 
 
 
57645c7
 
 
7b01c6f
 
57645c7
7b01c6f
57645c7
 
 
 
 
7b01c6f
 
 
 
 
57645c7
 
 
 
7b01c6f
 
57645c7
7b01c6f
 
 
 
57645c7
7b01c6f
 
57645c7
7b01c6f
 
57645c7
7b01c6f
 
 
57645c7
7b01c6f
57645c7
7b01c6f
 
 
57645c7
 
 
7b01c6f
 
 
 
 
57645c7
7b01c6f
 
57645c7
7b01c6f
 
57645c7
7b01c6f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python3
"""Sync PostgreSQL backups to HuggingFace Xet Dataset"""

import os
import sys
import time
import subprocess
import signal
from datetime import datetime
from pathlib import Path

SYNC_INTERVAL = int(os.environ.get('SYNC_INTERVAL', 300))
HF_TOKEN = os.environ.get('HF_TOKEN', '')
XET_DATASET = os.environ.get('XET_DATASET', '')

running = True

def log(msg):
    print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)

def wait_for_postgres(retries=30):
    for i in range(retries):
        result = subprocess.run(
            ['pg_isready', '-h', 'localhost', '-p', '5432'],
            capture_output=True
        )
        if result.returncode == 0:
            log("βœ… PostgreSQL ready")
            return True
        time.sleep(2)
    return False

def backup_and_upload():
    if not HF_TOKEN or not XET_DATASET:
        log("⚠️  HF_TOKEN or XET_DATASET not set")
        return
    
    try:
        backup_dir = Path('/data/backup')
        backup_dir.mkdir(parents=True, exist_ok=True)
        backup_file = backup_dir / 'pg_backup.sql'
        
        # Create backup
        result = subprocess.run(
            ['pg_dump', '-h', 'localhost', '-U', 'postgres', '-d', 'appdb', 
             '--clean', '--if-exists', '-f', str(backup_file)],
            capture_output=True, text=True
        )
        
        if result.returncode != 0:
            log(f"❌ Backup failed: {result.stderr}")
            return
        
        log(f"βœ… Backup created ({backup_file.stat().st_size} bytes)")
        
        # Upload to HuggingFace
        from huggingface_hub import HfApi, create_repo
        
        api = HfApi(token=HF_TOKEN)
        
        try:
            create_repo(XET_DATASET, repo_type="dataset", private=True, 
                       token=HF_TOKEN, exist_ok=True)
        except:
            pass
        
        api.upload_file(
            path_or_fileobj=str(backup_file),
            path_in_repo="backup/pg_backup.sql",
            repo_id=XET_DATASET,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message=f"Backup {datetime.now().isoformat()}"
        )
        
        log(f"βœ… Uploaded to {XET_DATASET}")
        
    except Exception as e:
        log(f"❌ Error: {e}")

def signal_handler(signum, frame):
    global running
    log("πŸ›‘ Shutting down, final backup...")
    running = False
    backup_and_upload()
    sys.exit(0)

def main():
    log("πŸš€ Sync Manager started")
    log(f"πŸ“¦ Dataset: {XET_DATASET or 'NOT SET'}")
    log(f"⏰ Interval: {SYNC_INTERVAL}s")
    
    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    
    if not wait_for_postgres():
        log("❌ PostgreSQL not available")
        return
    
    time.sleep(30)  # Initial delay
    
    while running:
        backup_and_upload()
        for _ in range(SYNC_INTERVAL):
            if not running:
                break
            time.sleep(1)

if __name__ == '__main__':
    main()