Bromeo777 commited on
Commit
85b18c7
·
verified ·
1 Parent(s): b41fceb

Add app\core\hf_sync.py

Browse files
Files changed (1) hide show
  1. app//core//hf_sync.py +76 -0
app//core//hf_sync.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Romeo AI Research Assistant - High-Stability Sync Service
2
+ # Version: 2026.03.15
3
+
4
+ import os
5
+ import fcntl
6
+ import logging
7
+ from datetime import datetime
8
+ from huggingface_hub import hf_hub_download, HfApi
9
+ from apscheduler.schedulers.background import BackgroundScheduler
10
+ from app.core.config import settings
11
+
12
+ logger = logging.getLogger("romeo_sync")
13
+ api = HfApi()
14
+ scheduler = BackgroundScheduler()
15
+
16
+ # Configuration
17
+ HF_TOKEN = settings.HF_TOKEN
18
+ REPO_ID = settings.HF_DATASET_REPO
19
+ DB_NAME = "romeo_research.db"
20
+ LOCAL_DATA_DIR = "./data"
21
+ LOCAL_PATH = os.path.join(LOCAL_DATA_DIR, DB_NAME)
22
+
23
+ def download_db_from_hf():
24
+ """Startup: Syncs DB with local directory creation."""
25
+ os.makedirs(LOCAL_DATA_DIR, exist_ok=True)
26
+
27
+ if not REPO_ID or not HF_TOKEN:
28
+ logger.info("Running in local-only mode (no HF sync variables found)")
29
+ return
30
+
31
+ try:
32
+ logger.info(f"Downloading {DB_NAME} from {REPO_ID}...")
33
+ hf_hub_download(
34
+ repo_id=REPO_ID,
35
+ filename=DB_NAME,
36
+ repo_type="dataset",
37
+ token=HF_TOKEN,
38
+ local_dir=LOCAL_DATA_DIR
39
+ )
40
+ logger.info("Database successfully synchronized.")
41
+ except Exception as e:
42
+ logger.warning(f"No existing DB found on HF (First Run): {e}")
43
+
44
+ def backup_db_to_hf():
45
+ """Uploads with file locking to prevent corruption during active writes."""
46
+ if not REPO_ID or not HF_TOKEN or not os.path.exists(LOCAL_PATH):
47
+ return
48
+
49
+ try:
50
+ # Lock file during read/upload to prevent SQLite 'Database Disk Image is Malformed' errors
51
+ with open(LOCAL_PATH, 'rb') as f:
52
+ fcntl.flock(f, fcntl.LOCK_SH) # Shared lock for reading
53
+ api.upload_file(
54
+ path_or_fileobj=LOCAL_PATH,
55
+ path_in_repo=DB_NAME,
56
+ repo_id=REPO_ID,
57
+ repo_type="dataset",
58
+ token=HF_TOKEN,
59
+ commit_message=f"Romeo AI Backup: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
60
+ )
61
+ fcntl.flock(f, fcntl.LOCK_UN) # Unlock
62
+ logger.info("HF Backup completed successfully.")
63
+ except Exception as e:
64
+ logger.error(f"Backup failed: {e}")
65
+
66
+ def start_backup_scheduler():
67
+ """Initialize the 5-minute interval backup."""
68
+ if HF_TOKEN and REPO_ID:
69
+ scheduler.add_job(backup_db_to_hf, 'interval', minutes=5)
70
+ scheduler.start()
71
+ logger.info("HF backup scheduler started (5min interval)")
72
+
73
+ def stop_backup_scheduler():
74
+ """Graceful shutdown for the scheduler."""
75
+ if scheduler.running:
76
+ scheduler.shutdown()