#!/usr/bin/env python3 """ OpenClaw Config Sync Script Features: Pull config from Hugging Face Dataset and periodically push changes back Updated: Sync credentials directory to persist pairing info across restarts """ import os import json import time import logging import hashlib import shutil from pathlib import Path from huggingface_hub import HfApi, hf_hub_download, upload_file, list_repo_files logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class OpenClawConfigSync: def __init__(self): self.hf_token = os.getenv('HF_TOKEN', '') self.dataset_repo = os.getenv('HF_DATASET', '') self.local_config_dir = Path('/root/.openclaw') self.credentials_dir = Path('/root/.openclaw/credentials') self.sync_interval = 300 # Sync every 5 minutes if not self.hf_token or not self.dataset_repo: logger.error('HF_TOKEN or HF_DATASET environment variable is not set') raise ValueError('Missing required environment variables') self.api = HfApi(token=self.hf_token) self.repo_dir = Path('/tmp/openclaw_dataset') def calculate_file_hash(self, file_path): """Calculate MD5 hash of a file for change detection""" hash_md5 = hashlib.md5() try: with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() except Exception as e: logger.error(f"Failed to calculate file hash {file_path}: {e}") return None def ensure_local_dir(self): """Ensure local config and credentials directories exist""" self.local_config_dir.mkdir(parents=True, exist_ok=True) self.credentials_dir.mkdir(parents=True, exist_ok=True) def download_from_dataset(self): """Pull latest config from Dataset (skips openclaw.json — always generated from env vars)""" try: logger.info(f'Pulling config from Dataset: {self.dataset_repo}') self.repo_dir.mkdir(parents=True, exist_ok=True) files = list_repo_files( repo_id=self.dataset_repo, repo_type="dataset", token=self.hf_token ) if not files: logger.warning('No config files found in Dataset') return False downloaded_count = 0 for file_name in files: # Always skip openclaw.json — regenerated from env vars on every boot if file_name == 'openclaw.json': logger.info('Skipping openclaw.json (always generated from env vars)') continue # Download credentials files (pairing info) and other config files is_credential = file_name.startswith('credentials/') is_config = file_name.endswith(('.json', '.yaml', '.yml')) if is_credential or is_config: try: local_path = hf_hub_download( repo_id=self.dataset_repo, filename=file_name, repo_type="dataset", token=self.hf_token, local_dir=self.repo_dir ) config_file = Path(local_path) # Determine destination path if is_credential: dest_file = self.local_config_dir / file_name dest_file.parent.mkdir(parents=True, exist_ok=True) else: dest_file = self.local_config_dir / config_file.name shutil.copy2(config_file, dest_file) logger.info(f'Restored: {file_name}') downloaded_count += 1 except Exception as e: logger.error(f'Failed to download file {file_name}: {e}') continue logger.info(f'Download complete, {downloaded_count} files downloaded') return True except Exception as e: logger.error(f'Failed to pull config from Dataset: {e}') return False def upload_to_dataset(self): """Push config and credentials to Dataset""" try: logger.info('Pushing config changes to Dataset') uploaded_count = 0 # 1. Upload config files (skip openclaw.json — regenerated each boot) config_files = [ f for f in self.local_config_dir.glob('*') if f.suffix in ['.json', '.yaml', '.yml'] and f.is_file() and f.name != 'openclaw.json' ] for config_file in config_files: try: upload_file( path_or_fileobj=str(config_file), path_in_repo=config_file.name, repo_id=self.dataset_repo, repo_type="dataset", token=self.hf_token, commit_message=f"Sync config: {config_file.name} - {time.strftime('%Y-%m-%d %H:%M:%S')}" ) logger.info(f'Uploaded config: {config_file.name}') uploaded_count += 1 except Exception as e: logger.error(f'Failed to upload file {config_file.name}: {e}') # 2. Upload credentials directory (pairing info) if self.credentials_dir.exists(): for cred_file in self.credentials_dir.rglob('*'): if cred_file.is_file(): repo_path = 'credentials/' + cred_file.name try: upload_file( path_or_fileobj=str(cred_file), path_in_repo=repo_path, repo_id=self.dataset_repo, repo_type="dataset", token=self.hf_token, commit_message=f"Sync credentials: {cred_file.name} - {time.strftime('%Y-%m-%d %H:%M:%S')}" ) logger.info(f'Uploaded credentials: {cred_file.name}') uploaded_count += 1 except Exception as e: logger.error(f'Failed to upload credentials {cred_file.name}: {e}') logger.info(f'Upload complete, {uploaded_count} files uploaded') return uploaded_count > 0 except Exception as e: logger.error(f'Failed to push config to Dataset: {e}') return False def run_sync(self, mode='download'): """Run sync process""" self.ensure_local_dir() if mode == 'download': return self.download_from_dataset() elif mode == 'upload': return self.upload_to_dataset() return False def start_periodic_sync(self): """Start periodic background sync service""" logger.info('Starting periodic sync service') while True: try: time.sleep(self.sync_interval) self.run_sync('upload') except Exception as e: logger.error(f'Periodic sync failed: {e}') time.sleep(60) # Wait 1 minute before retrying on error def main(): import sys if len(sys.argv) != 2 or sys.argv[1] not in ['download', 'upload', 'sync']: print('Usage: python sync.py [download|upload|sync]') sys.exit(1) mode = sys.argv[1] try: sync = OpenClawConfigSync() if mode == 'download': sync.run_sync('download') elif mode == 'upload': sync.run_sync('upload') elif mode == 'sync': # Run periodic sync in background thread import threading sync.run_sync('download') sync_thread = threading.Thread(target=sync.start_periodic_sync) sync_thread.daemon = True sync_thread.start() sync_thread.join() except Exception as e: logger.error(f'Sync service failed: {e}') sys.exit(1) if __name__ == '__main__': main()