Yash030 commited on
Commit
39cd355
·
1 Parent(s): 05c81af

add persistent storage via HF Dataset + sync.py

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. Dockerfile +3 -2
  3. start.sh +19 -0
  4. sync.py +118 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ data/
Dockerfile CHANGED
@@ -1,10 +1,11 @@
1
  FROM node:20-bookworm
2
 
3
- # Install Caddy
4
- RUN apt-get update && apt-get install -y debian-keyring debian-archive-keyring apt-transport-https curl unzip \
5
  && curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg \
6
  && curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list \
7
  && apt-get update && apt-get install -y caddy \
 
8
  && apt-get clean && rm -rf /var/lib/apt/lists/*
9
 
10
  # Set up user and environment
 
1
  FROM node:20-bookworm
2
 
3
+ # Install Caddy + Python (for huggingface_hub CLI)
4
+ RUN apt-get update && apt-get install -y debian-keyring debian-archive-keyring apt-transport-https curl unzip python3 python3-pip \
5
  && curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg \
6
  && curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list \
7
  && apt-get update && apt-get install -y caddy \
8
+ && pip3 install --break-system-packages huggingface_hub \
9
  && apt-get clean && rm -rf /var/lib/apt/lists/*
10
 
11
  # Set up user and environment
start.sh CHANGED
@@ -3,6 +3,25 @@
3
  # Create config folder
4
  mkdir -p ~/.agentmemory
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  # The daemon runs inside the Space and must talk to its own services on local
7
  # container ports. Use the public hf.space URL only from your local machine.
8
  export AGENTMEMORY_URL=http://localhost:3111
 
3
  # Create config folder
4
  mkdir -p ~/.agentmemory
5
 
6
+ # =============================================================================
7
+ # Persistent storage via HF Dataset repo (free)
8
+ # Set AGENTMEMORY_DATASET_REPO in Space secrets (default: Yashwant00CR7/agentmemory-data)
9
+ # Set HF_TOKEN in Space secrets with write access to that dataset repo
10
+ # =============================================================================
11
+ export AGENTMEMORY_DATASET_REPO="${AGENTMEMORY_DATASET_REPO:-Yashwant00CR7/agentmemory-data}"
12
+
13
+ # Restore DB from HF Dataset on startup
14
+ echo "[start] Restoring data from HF Dataset..."
15
+ python3 /app/sync.py restore
16
+
17
+ # Background sync loop — backs up every 5 minutes
18
+ (
19
+ while true; do
20
+ sleep 300
21
+ python3 /app/sync.py backup
22
+ done
23
+ ) &
24
+
25
  # The daemon runs inside the Space and must talk to its own services on local
26
  # container ports. Use the public hf.space URL only from your local machine.
27
  export AGENTMEMORY_URL=http://localhost:3111
sync.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Sync ~/.agentmemory/ data to/from a private HF Dataset repo.
4
+ Usage:
5
+ python3 sync.py restore -- download DB from HF on startup
6
+ python3 sync.py backup -- upload DB to HF (called in loop)
7
+ """
8
+ import os
9
+ import sys
10
+ import glob
11
+ import shutil
12
+
13
+ try:
14
+ from huggingface_hub import HfApi, hf_hub_download, list_repo_files
15
+ from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
16
+ except ImportError:
17
+ print("[sync] huggingface_hub not installed, skipping sync")
18
+ sys.exit(0)
19
+
20
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
21
+ REPO_ID = os.environ.get("AGENTMEMORY_DATASET_REPO", "Yashwant00CR7/agentmemory-data")
22
+ DATA_DIR = os.path.expanduser("~/.agentmemory")
23
+ SKIP_FILES = {".env"} # never upload secrets
24
+
25
+ def get_api():
26
+ return HfApi(token=HF_TOKEN)
27
+
28
+ def restore():
29
+ if not HF_TOKEN:
30
+ print("[sync] No HF_TOKEN — skipping restore")
31
+ return
32
+ os.makedirs(DATA_DIR, exist_ok=True)
33
+ api = get_api()
34
+ try:
35
+ files = list(list_repo_files(REPO_ID, repo_type="dataset", token=HF_TOKEN))
36
+ except RepositoryNotFoundError:
37
+ print(f"[sync] Dataset repo {REPO_ID} not found — will create on first backup")
38
+ return
39
+ except Exception as e:
40
+ print(f"[sync] restore list error: {e}")
41
+ return
42
+
43
+ if not files:
44
+ print("[sync] Dataset empty — fresh start")
45
+ return
46
+
47
+ for fname in files:
48
+ try:
49
+ local_path = os.path.join(DATA_DIR, fname)
50
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
51
+ downloaded = hf_hub_download(
52
+ repo_id=REPO_ID,
53
+ filename=fname,
54
+ repo_type="dataset",
55
+ token=HF_TOKEN,
56
+ local_dir=DATA_DIR,
57
+ )
58
+ print(f"[sync] restored {fname}")
59
+ except Exception as e:
60
+ print(f"[sync] restore {fname} error: {e}")
61
+
62
+ print("[sync] restore complete")
63
+
64
+ def backup():
65
+ if not HF_TOKEN:
66
+ return
67
+ api = get_api()
68
+
69
+ # Ensure repo exists
70
+ try:
71
+ api.repo_info(REPO_ID, repo_type="dataset")
72
+ except RepositoryNotFoundError:
73
+ print(f"[sync] Creating dataset repo {REPO_ID}")
74
+ api.create_repo(REPO_ID, repo_type="dataset", private=True)
75
+ except Exception as e:
76
+ print(f"[sync] repo_info error: {e}")
77
+ return
78
+
79
+ # Collect files to upload
80
+ all_files = []
81
+ for root, dirs, files in os.walk(DATA_DIR):
82
+ # skip hidden dirs like .cache
83
+ dirs[:] = [d for d in dirs if not d.startswith('.')]
84
+ for f in files:
85
+ if f in SKIP_FILES or f.startswith('.'):
86
+ continue
87
+ full = os.path.join(root, f)
88
+ rel = os.path.relpath(full, DATA_DIR)
89
+ all_files.append((full, rel))
90
+
91
+ if not all_files:
92
+ print("[sync] nothing to backup")
93
+ return
94
+
95
+ for full_path, rel_path in all_files:
96
+ try:
97
+ api.upload_file(
98
+ path_or_fileobj=full_path,
99
+ path_in_repo=rel_path,
100
+ repo_id=REPO_ID,
101
+ repo_type="dataset",
102
+ token=HF_TOKEN,
103
+ )
104
+ print(f"[sync] backed up {rel_path}")
105
+ except Exception as e:
106
+ print(f"[sync] backup {rel_path} error: {e}")
107
+
108
+ print("[sync] backup complete")
109
+
110
+ if __name__ == "__main__":
111
+ cmd = sys.argv[1] if len(sys.argv) > 1 else "backup"
112
+ if cmd == "restore":
113
+ restore()
114
+ elif cmd == "backup":
115
+ backup()
116
+ else:
117
+ print(f"[sync] unknown command: {cmd}")
118
+ sys.exit(1)