Spaces:
Paused
Paused
add persistent storage via HF Dataset + sync.py
Browse files- .gitignore +1 -0
- Dockerfile +3 -2
- start.sh +19 -0
- sync.py +118 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
data/
|
Dockerfile
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
FROM node:20-bookworm
|
| 2 |
|
| 3 |
-
# Install Caddy
|
| 4 |
-
RUN apt-get update && apt-get install -y debian-keyring debian-archive-keyring apt-transport-https curl unzip \
|
| 5 |
&& curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg \
|
| 6 |
&& curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list \
|
| 7 |
&& apt-get update && apt-get install -y caddy \
|
|
|
|
| 8 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
# Set up user and environment
|
|
|
|
| 1 |
FROM node:20-bookworm
|
| 2 |
|
| 3 |
+
# Install Caddy + Python (for huggingface_hub CLI)
|
| 4 |
+
RUN apt-get update && apt-get install -y debian-keyring debian-archive-keyring apt-transport-https curl unzip python3 python3-pip \
|
| 5 |
&& curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg \
|
| 6 |
&& curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list \
|
| 7 |
&& apt-get update && apt-get install -y caddy \
|
| 8 |
+
&& pip3 install --break-system-packages huggingface_hub \
|
| 9 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 10 |
|
| 11 |
# Set up user and environment
|
start.sh
CHANGED
|
@@ -3,6 +3,25 @@
|
|
| 3 |
# Create config folder
|
| 4 |
mkdir -p ~/.agentmemory
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
# The daemon runs inside the Space and must talk to its own services on local
|
| 7 |
# container ports. Use the public hf.space URL only from your local machine.
|
| 8 |
export AGENTMEMORY_URL=http://localhost:3111
|
|
|
|
| 3 |
# Create config folder
|
| 4 |
mkdir -p ~/.agentmemory
|
| 5 |
|
| 6 |
+
# =============================================================================
|
| 7 |
+
# Persistent storage via HF Dataset repo (free)
|
| 8 |
+
# Set AGENTMEMORY_DATASET_REPO in Space secrets (default: Yashwant00CR7/agentmemory-data)
|
| 9 |
+
# Set HF_TOKEN in Space secrets with write access to that dataset repo
|
| 10 |
+
# =============================================================================
|
| 11 |
+
export AGENTMEMORY_DATASET_REPO="${AGENTMEMORY_DATASET_REPO:-Yashwant00CR7/agentmemory-data}"
|
| 12 |
+
|
| 13 |
+
# Restore DB from HF Dataset on startup
|
| 14 |
+
echo "[start] Restoring data from HF Dataset..."
|
| 15 |
+
python3 /app/sync.py restore
|
| 16 |
+
|
| 17 |
+
# Background sync loop — backs up every 5 minutes
|
| 18 |
+
(
|
| 19 |
+
while true; do
|
| 20 |
+
sleep 300
|
| 21 |
+
python3 /app/sync.py backup
|
| 22 |
+
done
|
| 23 |
+
) &
|
| 24 |
+
|
| 25 |
# The daemon runs inside the Space and must talk to its own services on local
|
| 26 |
# container ports. Use the public hf.space URL only from your local machine.
|
| 27 |
export AGENTMEMORY_URL=http://localhost:3111
|
sync.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Sync ~/.agentmemory/ data to/from a private HF Dataset repo.
|
| 4 |
+
Usage:
|
| 5 |
+
python3 sync.py restore -- download DB from HF on startup
|
| 6 |
+
python3 sync.py backup -- upload DB to HF (called in loop)
|
| 7 |
+
"""
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
import glob
|
| 11 |
+
import shutil
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
from huggingface_hub import HfApi, hf_hub_download, list_repo_files
|
| 15 |
+
from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
|
| 16 |
+
except ImportError:
|
| 17 |
+
print("[sync] huggingface_hub not installed, skipping sync")
|
| 18 |
+
sys.exit(0)
|
| 19 |
+
|
| 20 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 21 |
+
REPO_ID = os.environ.get("AGENTMEMORY_DATASET_REPO", "Yashwant00CR7/agentmemory-data")
|
| 22 |
+
DATA_DIR = os.path.expanduser("~/.agentmemory")
|
| 23 |
+
SKIP_FILES = {".env"} # never upload secrets
|
| 24 |
+
|
| 25 |
+
def get_api():
|
| 26 |
+
return HfApi(token=HF_TOKEN)
|
| 27 |
+
|
| 28 |
+
def restore():
|
| 29 |
+
if not HF_TOKEN:
|
| 30 |
+
print("[sync] No HF_TOKEN — skipping restore")
|
| 31 |
+
return
|
| 32 |
+
os.makedirs(DATA_DIR, exist_ok=True)
|
| 33 |
+
api = get_api()
|
| 34 |
+
try:
|
| 35 |
+
files = list(list_repo_files(REPO_ID, repo_type="dataset", token=HF_TOKEN))
|
| 36 |
+
except RepositoryNotFoundError:
|
| 37 |
+
print(f"[sync] Dataset repo {REPO_ID} not found — will create on first backup")
|
| 38 |
+
return
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"[sync] restore list error: {e}")
|
| 41 |
+
return
|
| 42 |
+
|
| 43 |
+
if not files:
|
| 44 |
+
print("[sync] Dataset empty — fresh start")
|
| 45 |
+
return
|
| 46 |
+
|
| 47 |
+
for fname in files:
|
| 48 |
+
try:
|
| 49 |
+
local_path = os.path.join(DATA_DIR, fname)
|
| 50 |
+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
| 51 |
+
downloaded = hf_hub_download(
|
| 52 |
+
repo_id=REPO_ID,
|
| 53 |
+
filename=fname,
|
| 54 |
+
repo_type="dataset",
|
| 55 |
+
token=HF_TOKEN,
|
| 56 |
+
local_dir=DATA_DIR,
|
| 57 |
+
)
|
| 58 |
+
print(f"[sync] restored {fname}")
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"[sync] restore {fname} error: {e}")
|
| 61 |
+
|
| 62 |
+
print("[sync] restore complete")
|
| 63 |
+
|
| 64 |
+
def backup():
|
| 65 |
+
if not HF_TOKEN:
|
| 66 |
+
return
|
| 67 |
+
api = get_api()
|
| 68 |
+
|
| 69 |
+
# Ensure repo exists
|
| 70 |
+
try:
|
| 71 |
+
api.repo_info(REPO_ID, repo_type="dataset")
|
| 72 |
+
except RepositoryNotFoundError:
|
| 73 |
+
print(f"[sync] Creating dataset repo {REPO_ID}")
|
| 74 |
+
api.create_repo(REPO_ID, repo_type="dataset", private=True)
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"[sync] repo_info error: {e}")
|
| 77 |
+
return
|
| 78 |
+
|
| 79 |
+
# Collect files to upload
|
| 80 |
+
all_files = []
|
| 81 |
+
for root, dirs, files in os.walk(DATA_DIR):
|
| 82 |
+
# skip hidden dirs like .cache
|
| 83 |
+
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
| 84 |
+
for f in files:
|
| 85 |
+
if f in SKIP_FILES or f.startswith('.'):
|
| 86 |
+
continue
|
| 87 |
+
full = os.path.join(root, f)
|
| 88 |
+
rel = os.path.relpath(full, DATA_DIR)
|
| 89 |
+
all_files.append((full, rel))
|
| 90 |
+
|
| 91 |
+
if not all_files:
|
| 92 |
+
print("[sync] nothing to backup")
|
| 93 |
+
return
|
| 94 |
+
|
| 95 |
+
for full_path, rel_path in all_files:
|
| 96 |
+
try:
|
| 97 |
+
api.upload_file(
|
| 98 |
+
path_or_fileobj=full_path,
|
| 99 |
+
path_in_repo=rel_path,
|
| 100 |
+
repo_id=REPO_ID,
|
| 101 |
+
repo_type="dataset",
|
| 102 |
+
token=HF_TOKEN,
|
| 103 |
+
)
|
| 104 |
+
print(f"[sync] backed up {rel_path}")
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(f"[sync] backup {rel_path} error: {e}")
|
| 107 |
+
|
| 108 |
+
print("[sync] backup complete")
|
| 109 |
+
|
| 110 |
+
if __name__ == "__main__":
|
| 111 |
+
cmd = sys.argv[1] if len(sys.argv) > 1 else "backup"
|
| 112 |
+
if cmd == "restore":
|
| 113 |
+
restore()
|
| 114 |
+
elif cmd == "backup":
|
| 115 |
+
backup()
|
| 116 |
+
else:
|
| 117 |
+
print(f"[sync] unknown command: {cmd}")
|
| 118 |
+
sys.exit(1)
|