Hermes-Memory / hermes_sync.py
R1000's picture
Upload hermes_sync.py with huggingface_hub
66f9c90 verified
#!/usr/bin/env python3
"""
Hermes Memory Synchronization System
──────────────────────────────────────
Backup & restore all Hermes persistent state to/from Hugging Face Datasets.
Survives Docker restarts β€” every bit of state is captured.
Backup targets (HERMES_HOME dir):
β€’ state.db + WAL – core KV state (sessions, memory, channel directory, etc.)
β€’ response_store.db – chat response cache
β€’ sessions/ – session transcripts
β€’ skills/ – user-installed skills
β€’ cron/ – cron job definitions
β€’ memories/ – persistent memories
β€’ auth.json – OAuth tokens
β€’ channel_directory.json – registered channels
β€’ config.yaml – active configuration
β€’ gateway_state.json – gateway routing state
β€’ .env – environment overrides
β€’ SOUL.md – persona
β€’ .skills_prompt_snapshot.json – skill snapshot
Excluded:
β€’ logs/ – runtime logs
β€’ plans/ – transient plans
β€’ workspace/ – user workspace (too large; separate backup if needed)
β€’ bin/ – binaries, reinstalled on start
β€’ .update_check – ephemeral
β€’ auth.lock – runtime lock
"""
import os
import sys
import json
import zipfile
import shutil
import tempfile
import argparse
import subprocess
from datetime import datetime, timezone
from pathlib import Path
# ── Config ─────────────────────────────────────────────────────────────────
HF_TOKEN=os.environ.get("HF_TOKEN")
HF_DATASET = "R1000/Hermes-Memory"
HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
# What gets backed up (relative to HERMES_HOME)
BACKUP_PATHS = [
"state.db",
"state.db-shm",
"state.db-wal",
"response_store.db",
"response_store.db-shm",
"response_store.db-wal",
"sessions",
"skills",
"cron",
"memories",
"auth.json",
"channel_directory.json",
"config.yaml",
"gateway_state.json",
".env",
"SOUL.md",
".skills_prompt_snapshot.json",
]
# What gets restored (subset of backup β€” excludes .env which may be env-specific)
RESTORE_PATHS = [
"state.db",
"state.db-shm",
"state.db-wal",
"response_store.db",
"response_store.db-shm",
"response_store.db-wal",
"sessions",
"skills",
"cron",
"memories",
"auth.json",
"channel_directory.json",
"config.yaml",
"gateway_state.json",
"SOUL.md",
".skills_prompt_snapshot.json",
]
# Local backup staging
BACKUP_DIR = HERMES_HOME / "backup"
# ── Helpers ───────────────────────────────────────────────────────────────
def _timestamp() -> str:
return datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
def ensure_dirs():
BACKUP_DIR.mkdir(parents=True, exist_ok=True)
def _check_hf_installed():
try:
import huggingface_hub # noqa: F401
return True
except ImportError:
print("❌ huggingface_hub not installed. Run: pip install huggingface_hub")
return False
# ── Backup ────────────────────────────────────────────────────────────────
def create_backup_zip(backup_path: Path) -> Path:
"""ZIP all BACKUP_PATHS from HERMES_HOME into backup_path."""
ensure_dirs()
captured = []
skipped = []
with zipfile.ZipFile(backup_path, "w", zipfile.ZIP_DEFLATED) as zf:
for rel in BACKUP_PATHS:
src = HERMES_HOME / rel
if not src.exists():
skipped.append(rel)
continue
if src.is_dir():
for f in src.rglob("*"):
if f.is_file():
arc = str(f.relative_to(HERMES_HOME))
zf.write(f, arc)
captured.append(arc)
else:
zf.write(src, rel)
captured.append(rel)
print(f"πŸ“¦ {backup_path.name}")
print(f" {len(captured)} files captured | {len(skipped)} paths skipped (not found)")
return backup_path
def upload_to_hf(backup_path: Path) -> bool:
"""Upload backup ZIP to Hugging Face dataset."""
if not HF_TOKEN:
print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
return False
if not _check_hf_installed():
return False
from huggingface_hub import HfApi
api = HfApi(token=HF_TOKEN)
filename = backup_path.name # e.g. backup_20260430_020000.zip
try:
api.upload_file(
path_or_fileobj=str(backup_path),
path_in_repo=filename,
repo_id=HF_DATASET,
repo_type="dataset",
)
print(f" ☁️ uploaded β†’ {HF_DATASET}/{filename}")
return True
except Exception as exc:
print(f" ❌ upload failed: {exc}")
return False
# ── Restore ───────────────────────────────────────────────────────────────
def list_hf_backups() -> list[str]:
"""List backup*.zip files on HF, newest first."""
if not HF_TOKEN:
print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
return []
if not _check_hf_installed():
return []
from huggingface_hub import HfApi
api = HfApi(token=HF_TOKEN)
try:
files = api.list_repo_files(repo_id=HF_DATASET, repo_type="dataset")
except Exception as exc:
print(f"❌ cannot list HF repo: {exc}")
return []
backups = [f for f in files if f.startswith("backup_") and f.endswith(".zip")]
return sorted(backups, reverse=True)
def download_from_hf(filename: str, dest: Path) -> bool:
"""Download a single backup file from HF."""
if not HF_TOKEN:
print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
return False
if not _check_hf_installed():
return False
from huggingface_hub import hf_hub_download
try:
downloaded = hf_hub_download(
repo_id=HF_DATASET,
filename=filename,
repo_type="dataset",
token=HF_TOKEN,
)
shutil.copy2(downloaded, dest)
print(f" ⬇️ downloaded β†’ {dest}")
return True
except Exception as exc:
print(f" ❌ download failed: {exc}")
return False
def restore_from_zip(zip_path: Path) -> bool:
"""Restore files from ZIP into HERMES_HOME.
Safety: creates a local pre-restore snapshot first so nothing is lost.
"""
ensure_dirs()
# Pre-restore safety snapshot
safety_zip = BACKUP_DIR / f"pre_restore_{_timestamp()}.zip"
print(f"πŸ“Έ safety snapshot β†’ {safety_zip.name}")
create_backup_zip(safety_zip)
restored = 0
with zipfile.ZipFile(zip_path, "r") as zf:
members = zf.namelist()
# Filter to RESTORE_PATHS only
to_extract = []
for m in members:
for rp in RESTORE_PATHS:
if m == rp or m.startswith(rp + "/"):
to_extract.append(m)
break
with tempfile.TemporaryDirectory() as tmp:
zf.extractall(tmp)
for rel in to_extract:
src = Path(tmp) / rel
dst = HERMES_HOME / rel
if src.is_file():
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst)
restored += 1
print(f" βœ… {restored} files restored to {HERMES_HOME}")
return True
# ── Pruning ───────────────────────────────────────────────────────────────
def prune_old_backups(keep: int = 12) -> int:
"""Keep only the *keep* newest backups on HF, delete the rest."""
if not HF_TOKEN:
print("❌ HF0_TOKEN not set. Set environment variable HF_TOKEN")
return 0
if not _check_hf_installed():
return 0
from huggingface_hub import HfApi
api = HfApi(token=HF_TOKEN)
backups = list_hf_backups()
if len(backups) <= keep:
print(f" ℹ️ {len(backups)} backups on HF ≀ {keep} β†’ nothing to prune")
return 0
to_delete = backups[keep:]
for fname in to_delete:
try:
api.delete_file(
path_in_repo=fname,
repo_id=HF_DATASET,
repo_type="dataset",
)
print(f" πŸ—‘οΈ deleted {fname}")
except Exception as exc:
print(f" ⚠️ could not delete {fname}: {exc}")
print(f" βœ‚οΈ pruned {len(to_delete)} old backups (kept {keep})")
return len(to_delete)
# ── Commands ──────────────────────────────────────────────────────────────
def cmd_backup(args):
"""backup [--upload] [--keep-local] [--prune]"""
if not HF_TOKEN and args.upload:
print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN for upload")
return
ts = _timestamp()
zip_path = BACKUP_DIR / f"backup_{ts}.zip"
create_backup_zip(zip_path)
if args.upload:
ok = upload_to_hf(zip_path)
if ok and not args.keep_local:
zip_path.unlink()
print(" 🧹 local temp zip removed")
if ok and args.prune:
prune_old_backups(keep=args.prune if isinstance(args.prune, int) else 12)
def cmd_restore(args):
"""restore [--filename F] [--keep-local]"""
if not HF_TOKEN:
print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
return
if args.filename:
fname = args.filename
else:
backups = list_hf_backups()
if not backups:
print("❌ no backups found on Hugging Face")
return
print("☁️ available backups:")
for i, b in enumerate(backups[:10], 1):
print(f" {i:>2}. {b}")
if len(backups) > 10:
print(f" … +{len(backups) - 10} more")
choice = input(f"\n pick [1-{min(10, len(backups))}]: ").strip()
try:
idx = int(choice) - 1
fname = backups[idx]
except (ValueError, IndexError):
print("❌ invalid selection")
return
local = BACKUP_DIR / fname
if not download_from_hf(fname, local):
return
print("πŸ”„ restoring …")
restore_from_zip(local)
if not args.keep_local:
local.unlink()
print(" 🧹 local temp zip removed")
def cmd_list(args):
"""list backups on HF"""
if not HF_TOKEN:
print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
return
backups = list_hf_backups()
if not backups:
print("☁️ no backups on Hugging Face")
else:
print(f"☁️ {len(backups)} backup(s) on {HF_DATASET}:")
for b in backups:
print(f" β€’ {b}")
def cmd_auto_backup(args):
"""auto-backup β€” meant for cron (non-interactive)"""
if not HF_TOKEN:
print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
return
ts = _timestamp()
zip_path = BACKUP_DIR / f"backup_{ts}.zip"
print(f"[{ts}] AUTO-BACKUP started")
create_backup_zip(zip_path)
ok = upload_to_hf(zip_path)
if ok:
zip_path.unlink()
# Prune to 12 on each auto-run to keep dataset tidy
prune_old_backups(keep=12)
print(f"[{ts}] AUTO-BACKUP βœ…")
else:
print(f"[{ts}] AUTO-BACKUP ❌")
# ── CLI ───────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(
description="Hermes Memory Synchronization β€” backup/restore to Hugging Face Datasets",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s backup local zip only
%(prog)s backup --upload zip + upload to HF
%(prog)s backup --upload --prune upload & keep only 12 newest
%(prog)s backup --upload --prune 24 upload & keep 24 newest
%(prog)s list list HF backups
%(prog)s restore interactive pick
%(prog)s restore --filename backup_20260430_020000.zip
%(prog)s auto-backup headless β€” for cron
""",
)
sub = parser.add_subparsers(dest="command", help="command")
# backup
bp = sub.add_parser("backup", help="create backup zip")
bp.add_argument("--upload", action="store_true", help="upload to HF dataset")
bp.add_argument("--keep-local", action="store_true", help="keep local zip after upload")
bp.add_argument("--prune", nargs="?", const=12, type=int, help="prune old backups, keep N (default 12)")
# restore
rp = sub.add_parser("restore", help="restore from HF backup")
rp.add_argument("--filename", help="specific backup file to restore")
rp.add_argument("--keep-local", action="store_true", help="keep downloaded zip after restore")
# list
sub.add_parser("list", help="list backups on HF")
# auto-backup
sub.add_parser("auto-backup", help="headless auto-backup (for cron)")
args = parser.parse_args()
handlers = {
"backup": cmd_backup,
"restore": cmd_restore,
"list": cmd_list,
"auto-backup": cmd_auto_backup,
}
if args.command in handlers:
handlers[args.command](args)
else:
parser.print_help()
if __name__ == "__main__":
main()