Spaces:
Paused
Paused
File size: 3,046 Bytes
7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f 57645c7 7b01c6f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | #!/usr/bin/env python3
"""Sync PostgreSQL backups to HuggingFace Xet Dataset"""
import os
import sys
import time
import subprocess
import signal
from datetime import datetime
from pathlib import Path
SYNC_INTERVAL = int(os.environ.get('SYNC_INTERVAL', 300))
HF_TOKEN = os.environ.get('HF_TOKEN', '')
XET_DATASET = os.environ.get('XET_DATASET', '')
running = True
def log(msg):
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
def wait_for_postgres(retries=30):
for i in range(retries):
result = subprocess.run(
['pg_isready', '-h', 'localhost', '-p', '5432'],
capture_output=True
)
if result.returncode == 0:
log("β
PostgreSQL ready")
return True
time.sleep(2)
return False
def backup_and_upload():
if not HF_TOKEN or not XET_DATASET:
log("β οΈ HF_TOKEN or XET_DATASET not set")
return
try:
backup_dir = Path('/data/backup')
backup_dir.mkdir(parents=True, exist_ok=True)
backup_file = backup_dir / 'pg_backup.sql'
# Create backup
result = subprocess.run(
['pg_dump', '-h', 'localhost', '-U', 'postgres', '-d', 'appdb',
'--clean', '--if-exists', '-f', str(backup_file)],
capture_output=True, text=True
)
if result.returncode != 0:
log(f"β Backup failed: {result.stderr}")
return
log(f"β
Backup created ({backup_file.stat().st_size} bytes)")
# Upload to HuggingFace
from huggingface_hub import HfApi, create_repo
api = HfApi(token=HF_TOKEN)
try:
create_repo(XET_DATASET, repo_type="dataset", private=True,
token=HF_TOKEN, exist_ok=True)
except:
pass
api.upload_file(
path_or_fileobj=str(backup_file),
path_in_repo="backup/pg_backup.sql",
repo_id=XET_DATASET,
repo_type="dataset",
token=HF_TOKEN,
commit_message=f"Backup {datetime.now().isoformat()}"
)
log(f"β
Uploaded to {XET_DATASET}")
except Exception as e:
log(f"β Error: {e}")
def signal_handler(signum, frame):
global running
log("π Shutting down, final backup...")
running = False
backup_and_upload()
sys.exit(0)
def main():
log("π Sync Manager started")
log(f"π¦ Dataset: {XET_DATASET or 'NOT SET'}")
log(f"β° Interval: {SYNC_INTERVAL}s")
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
if not wait_for_postgres():
log("β PostgreSQL not available")
return
time.sleep(30) # Initial delay
while running:
backup_and_upload()
for _ in range(SYNC_INTERVAL):
if not running:
break
time.sleep(1)
if __name__ == '__main__':
main() |