File size: 14,044 Bytes
66f9c90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
#!/usr/bin/env python3
"""
Hermes Memory Synchronization System
──────────────────────────────────────
Backup & restore all Hermes persistent state to/from Hugging Face Datasets.
Survives Docker restarts β€” every bit of state is captured.

 Backup targets (HERMES_HOME dir):
 β€’ state.db + WAL – core KV state (sessions, memory, channel directory, etc.)
 β€’ response_store.db – chat response cache
 β€’ sessions/ – session transcripts
 β€’ skills/ – user-installed skills
 β€’ cron/ – cron job definitions
 β€’ memories/ – persistent memories
 β€’ auth.json – OAuth tokens
 β€’ channel_directory.json – registered channels
 β€’ config.yaml – active configuration
 β€’ gateway_state.json – gateway routing state
 β€’ .env – environment overrides
 β€’ SOUL.md – persona
 β€’ .skills_prompt_snapshot.json – skill snapshot

 Excluded:
 β€’ logs/ – runtime logs
 β€’ plans/ – transient plans
 β€’ workspace/ – user workspace (too large; separate backup if needed)
 β€’ bin/ – binaries, reinstalled on start
 β€’ .update_check – ephemeral
 β€’ auth.lock – runtime lock
"""

import os
import sys
import json
import zipfile
import shutil
import tempfile
import argparse
import subprocess
from datetime import datetime, timezone
from pathlib import Path

# ── Config ─────────────────────────────────────────────────────────────────
HF_TOKEN=os.environ.get("HF_TOKEN")
HF_DATASET = "R1000/Hermes-Memory"
HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))

# What gets backed up (relative to HERMES_HOME)
BACKUP_PATHS = [
    "state.db",
    "state.db-shm",
    "state.db-wal",
    "response_store.db",
    "response_store.db-shm",
    "response_store.db-wal",
    "sessions",
    "skills",
    "cron",
    "memories",
    "auth.json",
    "channel_directory.json",
    "config.yaml",
    "gateway_state.json",
    ".env",
    "SOUL.md",
    ".skills_prompt_snapshot.json",
]

# What gets restored (subset of backup β€” excludes .env which may be env-specific)
RESTORE_PATHS = [
    "state.db",
    "state.db-shm",
    "state.db-wal",
    "response_store.db",
    "response_store.db-shm",
    "response_store.db-wal",
    "sessions",
    "skills",
    "cron",
    "memories",
    "auth.json",
    "channel_directory.json",
    "config.yaml",
    "gateway_state.json",
    "SOUL.md",
    ".skills_prompt_snapshot.json",
]

# Local backup staging
BACKUP_DIR = HERMES_HOME / "backup"


# ── Helpers ───────────────────────────────────────────────────────────────

def _timestamp() -> str:
    return datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")


def ensure_dirs():
    BACKUP_DIR.mkdir(parents=True, exist_ok=True)


def _check_hf_installed():
    try:
        import huggingface_hub  # noqa: F401
        return True
    except ImportError:
        print("❌ huggingface_hub not installed. Run: pip install huggingface_hub")
        return False


# ── Backup ────────────────────────────────────────────────────────────────

def create_backup_zip(backup_path: Path) -> Path:
    """ZIP all BACKUP_PATHS from HERMES_HOME into backup_path."""
    ensure_dirs()

    captured = []
    skipped = []
    with zipfile.ZipFile(backup_path, "w", zipfile.ZIP_DEFLATED) as zf:
        for rel in BACKUP_PATHS:
            src = HERMES_HOME / rel
            if not src.exists():
                skipped.append(rel)
                continue
            if src.is_dir():
                for f in src.rglob("*"):
                    if f.is_file():
                        arc = str(f.relative_to(HERMES_HOME))
                        zf.write(f, arc)
                        captured.append(arc)
            else:
                zf.write(src, rel)
                captured.append(rel)

    print(f"πŸ“¦ {backup_path.name}")
    print(f" {len(captured)} files captured | {len(skipped)} paths skipped (not found)")
    return backup_path


def upload_to_hf(backup_path: Path) -> bool:
    """Upload backup ZIP to Hugging Face dataset."""
    if not HF_TOKEN:
        print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
        return False
    if not _check_hf_installed():
        return False

    from huggingface_hub import HfApi

    api = HfApi(token=HF_TOKEN)
    filename = backup_path.name  # e.g. backup_20260430_020000.zip

    try:
        api.upload_file(
            path_or_fileobj=str(backup_path),
            path_in_repo=filename,
            repo_id=HF_DATASET,
            repo_type="dataset",
        )
        print(f" ☁️ uploaded β†’ {HF_DATASET}/{filename}")
        return True
    except Exception as exc:
        print(f" ❌ upload failed: {exc}")
        return False


# ── Restore ───────────────────────────────────────────────────────────────

def list_hf_backups() -> list[str]:
    """List backup*.zip files on HF, newest first."""
    if not HF_TOKEN:
        print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
        return []
    if not _check_hf_installed():
        return []

    from huggingface_hub import HfApi

    api = HfApi(token=HF_TOKEN)
    try:
        files = api.list_repo_files(repo_id=HF_DATASET, repo_type="dataset")
    except Exception as exc:
        print(f"❌ cannot list HF repo: {exc}")
        return []

    backups = [f for f in files if f.startswith("backup_") and f.endswith(".zip")]
    return sorted(backups, reverse=True)


def download_from_hf(filename: str, dest: Path) -> bool:
    """Download a single backup file from HF."""
    if not HF_TOKEN:
        print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
        return False
    if not _check_hf_installed():
        return False

    from huggingface_hub import hf_hub_download

    try:
        downloaded = hf_hub_download(
            repo_id=HF_DATASET,
            filename=filename,
            repo_type="dataset",
            token=HF_TOKEN,
        )
        shutil.copy2(downloaded, dest)
        print(f" ⬇️ downloaded β†’ {dest}")
        return True
    except Exception as exc:
        print(f" ❌ download failed: {exc}")
        return False


def restore_from_zip(zip_path: Path) -> bool:
    """Restore files from ZIP into HERMES_HOME.

    Safety: creates a local pre-restore snapshot first so nothing is lost.
    """
    ensure_dirs()

    # Pre-restore safety snapshot
    safety_zip = BACKUP_DIR / f"pre_restore_{_timestamp()}.zip"
    print(f"πŸ“Έ safety snapshot β†’ {safety_zip.name}")
    create_backup_zip(safety_zip)

    restored = 0
    with zipfile.ZipFile(zip_path, "r") as zf:
        members = zf.namelist()
        # Filter to RESTORE_PATHS only
        to_extract = []
        for m in members:
            for rp in RESTORE_PATHS:
                if m == rp or m.startswith(rp + "/"):
                    to_extract.append(m)
                    break

        with tempfile.TemporaryDirectory() as tmp:
            zf.extractall(tmp)
            for rel in to_extract:
                src = Path(tmp) / rel
                dst = HERMES_HOME / rel
                if src.is_file():
                    dst.parent.mkdir(parents=True, exist_ok=True)
                    shutil.copy2(src, dst)
                    restored += 1

    print(f" βœ… {restored} files restored to {HERMES_HOME}")
    return True


# ── Pruning ───────────────────────────────────────────────────────────────

def prune_old_backups(keep: int = 12) -> int:
    """Keep only the *keep* newest backups on HF, delete the rest."""
    if not HF_TOKEN:
        print("❌ HF0_TOKEN not set. Set environment variable HF_TOKEN")
        return 0
    if not _check_hf_installed():
        return 0

    from huggingface_hub import HfApi

    api = HfApi(token=HF_TOKEN)
    backups = list_hf_backups()
    if len(backups) <= keep:
        print(f" ℹ️ {len(backups)} backups on HF ≀ {keep} β†’ nothing to prune")
        return 0

    to_delete = backups[keep:]
    for fname in to_delete:
        try:
            api.delete_file(
                path_in_repo=fname,
                repo_id=HF_DATASET,
                repo_type="dataset",
            )
            print(f" πŸ—‘οΈ deleted {fname}")
        except Exception as exc:
            print(f" ⚠️ could not delete {fname}: {exc}")

    print(f" βœ‚οΈ pruned {len(to_delete)} old backups (kept {keep})")
    return len(to_delete)


# ── Commands ──────────────────────────────────────────────────────────────

def cmd_backup(args):
    """backup [--upload] [--keep-local] [--prune]"""
    if not HF_TOKEN and args.upload:
        print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN for upload")
        return

    ts = _timestamp()
    zip_path = BACKUP_DIR / f"backup_{ts}.zip"
    create_backup_zip(zip_path)

    if args.upload:
        ok = upload_to_hf(zip_path)
        if ok and not args.keep_local:
            zip_path.unlink()
            print(" 🧹 local temp zip removed")
        if ok and args.prune:
            prune_old_backups(keep=args.prune if isinstance(args.prune, int) else 12)


def cmd_restore(args):
    """restore [--filename F] [--keep-local]"""
    if not HF_TOKEN:
        print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
        return

    if args.filename:
        fname = args.filename
    else:
        backups = list_hf_backups()
        if not backups:
            print("❌ no backups found on Hugging Face")
            return
        print("☁️ available backups:")
        for i, b in enumerate(backups[:10], 1):
            print(f" {i:>2}. {b}")
        if len(backups) > 10:
            print(f" … +{len(backups) - 10} more")
        choice = input(f"\n pick [1-{min(10, len(backups))}]: ").strip()
        try:
            idx = int(choice) - 1
            fname = backups[idx]
        except (ValueError, IndexError):
            print("❌ invalid selection")
            return

    local = BACKUP_DIR / fname
    if not download_from_hf(fname, local):
        return

    print("πŸ”„ restoring …")
    restore_from_zip(local)

    if not args.keep_local:
        local.unlink()
        print(" 🧹 local temp zip removed")


def cmd_list(args):
    """list backups on HF"""
    if not HF_TOKEN:
        print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
        return

    backups = list_hf_backups()
    if not backups:
        print("☁️ no backups on Hugging Face")
    else:
        print(f"☁️ {len(backups)} backup(s) on {HF_DATASET}:")
        for b in backups:
            print(f" β€’ {b}")


def cmd_auto_backup(args):
    """auto-backup β€” meant for cron (non-interactive)"""
    if not HF_TOKEN:
        print("❌ HF_TOKEN not set. Set environment variable HF_TOKEN")
        return

    ts = _timestamp()
    zip_path = BACKUP_DIR / f"backup_{ts}.zip"
    print(f"[{ts}] AUTO-BACKUP started")
    create_backup_zip(zip_path)

    ok = upload_to_hf(zip_path)
    if ok:
        zip_path.unlink()
        # Prune to 12 on each auto-run to keep dataset tidy
        prune_old_backups(keep=12)
        print(f"[{ts}] AUTO-BACKUP βœ…")
    else:
        print(f"[{ts}] AUTO-BACKUP ❌")


# ── CLI ───────────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(
        description="Hermes Memory Synchronization β€” backup/restore to Hugging Face Datasets",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s backup              local zip only
  %(prog)s backup --upload     zip + upload to HF
  %(prog)s backup --upload --prune      upload & keep only 12 newest
  %(prog)s backup --upload --prune 24   upload & keep 24 newest
  %(prog)s list                list HF backups
  %(prog)s restore             interactive pick
  %(prog)s restore --filename backup_20260430_020000.zip
  %(prog)s auto-backup         headless β€” for cron
  """,
    )

    sub = parser.add_subparsers(dest="command", help="command")

    # backup
    bp = sub.add_parser("backup", help="create backup zip")
    bp.add_argument("--upload", action="store_true", help="upload to HF dataset")
    bp.add_argument("--keep-local", action="store_true", help="keep local zip after upload")
    bp.add_argument("--prune", nargs="?", const=12, type=int, help="prune old backups, keep N (default 12)")

    # restore
    rp = sub.add_parser("restore", help="restore from HF backup")
    rp.add_argument("--filename", help="specific backup file to restore")
    rp.add_argument("--keep-local", action="store_true", help="keep downloaded zip after restore")

    # list
    sub.add_parser("list", help="list backups on HF")

    # auto-backup
    sub.add_parser("auto-backup", help="headless auto-backup (for cron)")

    args = parser.parse_args()

    handlers = {
        "backup": cmd_backup,
        "restore": cmd_restore,
        "list": cmd_list,
        "auto-backup": cmd_auto_backup,
    }

    if args.command in handlers:
        handlers[args.command](args)
    else:
        parser.print_help()


if __name__ == "__main__":
    main()