open-claw / sync.py
Shadman9416's picture
Upload 2 files
84126e2 verified
#!/usr/bin/env python3
"""
OpenClaw Config Sync Script
Features: Pull config from Hugging Face Dataset and periodically push changes back
Updated: Sync credentials directory to persist pairing info across restarts
"""
import os
import json
import time
import logging
import hashlib
import shutil
from pathlib import Path
from huggingface_hub import HfApi, hf_hub_download, upload_file, list_repo_files
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class OpenClawConfigSync:
def __init__(self):
self.hf_token = os.getenv('HF_TOKEN', '')
self.dataset_repo = os.getenv('HF_DATASET', '')
self.local_config_dir = Path('/root/.openclaw')
self.credentials_dir = Path('/root/.openclaw/credentials')
self.sync_interval = 300 # Sync every 5 minutes
if not self.hf_token or not self.dataset_repo:
logger.error('HF_TOKEN or HF_DATASET environment variable is not set')
raise ValueError('Missing required environment variables')
self.api = HfApi(token=self.hf_token)
self.repo_dir = Path('/tmp/openclaw_dataset')
def calculate_file_hash(self, file_path):
"""Calculate MD5 hash of a file for change detection"""
hash_md5 = hashlib.md5()
try:
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
except Exception as e:
logger.error(f"Failed to calculate file hash {file_path}: {e}")
return None
def ensure_local_dir(self):
"""Ensure local config and credentials directories exist"""
self.local_config_dir.mkdir(parents=True, exist_ok=True)
self.credentials_dir.mkdir(parents=True, exist_ok=True)
def download_from_dataset(self):
"""Pull latest config from Dataset (skips openclaw.json — always generated from env vars)"""
try:
logger.info(f'Pulling config from Dataset: {self.dataset_repo}')
self.repo_dir.mkdir(parents=True, exist_ok=True)
files = list_repo_files(
repo_id=self.dataset_repo,
repo_type="dataset",
token=self.hf_token
)
if not files:
logger.warning('No config files found in Dataset')
return False
downloaded_count = 0
for file_name in files:
# Always skip openclaw.json — regenerated from env vars on every boot
if file_name == 'openclaw.json':
logger.info('Skipping openclaw.json (always generated from env vars)')
continue
# Download credentials files (pairing info) and other config files
is_credential = file_name.startswith('credentials/')
is_config = file_name.endswith(('.json', '.yaml', '.yml'))
if is_credential or is_config:
try:
local_path = hf_hub_download(
repo_id=self.dataset_repo,
filename=file_name,
repo_type="dataset",
token=self.hf_token,
local_dir=self.repo_dir
)
config_file = Path(local_path)
# Determine destination path
if is_credential:
dest_file = self.local_config_dir / file_name
dest_file.parent.mkdir(parents=True, exist_ok=True)
else:
dest_file = self.local_config_dir / config_file.name
shutil.copy2(config_file, dest_file)
logger.info(f'Restored: {file_name}')
downloaded_count += 1
except Exception as e:
logger.error(f'Failed to download file {file_name}: {e}')
continue
logger.info(f'Download complete, {downloaded_count} files downloaded')
return True
except Exception as e:
logger.error(f'Failed to pull config from Dataset: {e}')
return False
def upload_to_dataset(self):
"""Push config and credentials to Dataset"""
try:
logger.info('Pushing config changes to Dataset')
uploaded_count = 0
# 1. Upload config files (skip openclaw.json — regenerated each boot)
config_files = [
f for f in self.local_config_dir.glob('*')
if f.suffix in ['.json', '.yaml', '.yml']
and f.is_file()
and f.name != 'openclaw.json'
]
for config_file in config_files:
try:
upload_file(
path_or_fileobj=str(config_file),
path_in_repo=config_file.name,
repo_id=self.dataset_repo,
repo_type="dataset",
token=self.hf_token,
commit_message=f"Sync config: {config_file.name} - {time.strftime('%Y-%m-%d %H:%M:%S')}"
)
logger.info(f'Uploaded config: {config_file.name}')
uploaded_count += 1
except Exception as e:
logger.error(f'Failed to upload file {config_file.name}: {e}')
# 2. Upload credentials directory (pairing info)
if self.credentials_dir.exists():
for cred_file in self.credentials_dir.rglob('*'):
if cred_file.is_file():
repo_path = 'credentials/' + cred_file.name
try:
upload_file(
path_or_fileobj=str(cred_file),
path_in_repo=repo_path,
repo_id=self.dataset_repo,
repo_type="dataset",
token=self.hf_token,
commit_message=f"Sync credentials: {cred_file.name} - {time.strftime('%Y-%m-%d %H:%M:%S')}"
)
logger.info(f'Uploaded credentials: {cred_file.name}')
uploaded_count += 1
except Exception as e:
logger.error(f'Failed to upload credentials {cred_file.name}: {e}')
logger.info(f'Upload complete, {uploaded_count} files uploaded')
return uploaded_count > 0
except Exception as e:
logger.error(f'Failed to push config to Dataset: {e}')
return False
def run_sync(self, mode='download'):
"""Run sync process"""
self.ensure_local_dir()
if mode == 'download':
return self.download_from_dataset()
elif mode == 'upload':
return self.upload_to_dataset()
return False
def start_periodic_sync(self):
"""Start periodic background sync service"""
logger.info('Starting periodic sync service')
while True:
try:
time.sleep(self.sync_interval)
self.run_sync('upload')
except Exception as e:
logger.error(f'Periodic sync failed: {e}')
time.sleep(60) # Wait 1 minute before retrying on error
def main():
import sys
if len(sys.argv) != 2 or sys.argv[1] not in ['download', 'upload', 'sync']:
print('Usage: python sync.py [download|upload|sync]')
sys.exit(1)
mode = sys.argv[1]
try:
sync = OpenClawConfigSync()
if mode == 'download':
sync.run_sync('download')
elif mode == 'upload':
sync.run_sync('upload')
elif mode == 'sync':
# Run periodic sync in background thread
import threading
sync.run_sync('download')
sync_thread = threading.Thread(target=sync.start_periodic_sync)
sync_thread.daemon = True
sync_thread.start()
sync_thread.join()
except Exception as e:
logger.error(f'Sync service failed: {e}')
sys.exit(1)
if __name__ == '__main__':
main()