| import os |
| import sys |
| import shutil |
| import tarfile |
| import time |
| import signal |
| from pathlib import Path |
| from datetime import datetime |
| from huggingface_hub import HfApi, hf_hub_download, list_repo_files |
|
|
| DATA_DIR = Path(os.environ.get("DATA_DIR", "/root/.openclaw")) |
| DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "your-username/your-dataset") |
| SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", 300)) |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| ARCHIVE_NAME = "openclaw_backup.tar.gz" |
| EXCLUDE_FILES = {"openclaw.json"} |
|
|
| api = HfApi(token=HF_TOKEN) |
| running = True |
|
|
|
|
| def signal_handler(signum, frame): |
| global running |
| print(f"[{datetime.now()}] Received signal {signum}, uploading before exit...") |
| upload_to_dataset() |
| running = False |
| sys.exit(0) |
|
|
|
|
| def download_from_dataset(): |
| DATA_DIR.mkdir(parents=True, exist_ok=True) |
| try: |
| files = list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset", token=HF_TOKEN) |
| if ARCHIVE_NAME in files: |
| print(f"[{datetime.now()}] Downloading {ARCHIVE_NAME}...") |
| archive_path = hf_hub_download( |
| repo_id=DATASET_REPO_ID, |
| filename=ARCHIVE_NAME, |
| repo_type="dataset", |
| local_dir="/tmp", |
| token=HF_TOKEN |
| ) |
| shutil.unpack_archive(archive_path, DATA_DIR) |
| os.remove(archive_path) |
| print(f"[{datetime.now()}] Data restored to {DATA_DIR}") |
| else: |
| print(f"[{datetime.now()}] No backup found, starting fresh") |
| except Exception as e: |
| print(f"[{datetime.now()}] Download failed: {e}") |
|
|
|
|
| def upload_to_dataset(): |
| files_to_backup = [f for f in DATA_DIR.iterdir() if f.name not in EXCLUDE_FILES] |
| if not files_to_backup: |
| print(f"[{datetime.now()}] No files to upload") |
| return |
| try: |
| archive_path = Path("/tmp") / ARCHIVE_NAME |
| with tarfile.open(archive_path, "w:gz") as tar: |
| for file_path in files_to_backup: |
| tar.add(file_path, arcname=file_path.name) |
| print(f"[{datetime.now()}] Uploading {ARCHIVE_NAME}...") |
| api.upload_file( |
| path_or_fileobj=str(archive_path), |
| path_in_repo=ARCHIVE_NAME, |
| repo_id=DATASET_REPO_ID, |
| repo_type="dataset", |
| ) |
| os.remove(archive_path) |
| print(f"[{datetime.now()}] Upload completed") |
| except Exception as e: |
| print(f"[{datetime.now()}] Upload failed: {e}") |
|
|
|
|
| def upload_loop(): |
| signal.signal(signal.SIGTERM, signal_handler) |
| signal.signal(signal.SIGINT, signal_handler) |
| while running: |
| time.sleep(SYNC_INTERVAL) |
| upload_to_dataset() |
|
|
|
|
| if __name__ == "__main__": |
| if len(sys.argv) < 2: |
| print("Usage: python sync_data.py [download|upload_loop]") |
| sys.exit(1) |
|
|
| cmd = sys.argv[1] |
| print(f"Dataset: {DATASET_REPO_ID}") |
| print(f"Data dir: {DATA_DIR}") |
|
|
| if cmd == "download": |
| download_from_dataset() |
| elif cmd == "upload_loop": |
| print(f"Sync interval: {SYNC_INTERVAL}s") |
| upload_loop() |
| else: |
| print(f"Unknown command: {cmd}") |
| sys.exit(1) |
|
|