tao-shen commited on
Commit
123e7c1
·
1 Parent(s): 7daa569

feat: add HF Dataset persistence + auto-register HuggingClaw agents

Browse files
Files changed (3) hide show
  1. Dockerfile +6 -0
  2. docker-entrypoint.sh +6 -1
  3. hf-persist.py +208 -0
Dockerfile CHANGED
@@ -30,6 +30,11 @@ LABEL org.opencontainers.image.version="${MC_VERSION}"
30
 
31
  WORKDIR /app
32
  ENV NODE_ENV=production
 
 
 
 
 
33
  RUN addgroup --system --gid 1001 nodejs && adduser --system --uid 1001 nextjs
34
  COPY --from=build /app/.next/standalone ./
35
  COPY --from=build /app/.next/static ./.next/static
@@ -40,6 +45,7 @@ COPY --from=build /app/src/lib/schema.sql ./src/lib/schema.sql
40
  RUN mkdir -p .data && chown nextjs:nodejs .data
41
  RUN echo 'const http=require("http");const r=http.get("http://localhost:"+(process.env.PORT||3000)+"/api/status?action=health",s=>{process.exit(s.statusCode===200?0:1)});r.on("error",()=>process.exit(1));r.setTimeout(4000,()=>{r.destroy();process.exit(1)})' > /app/healthcheck.js
42
  COPY docker-entrypoint.sh /app/docker-entrypoint.sh
 
43
  RUN chmod +x /app/docker-entrypoint.sh
44
  USER nextjs
45
  ENV PORT=7860
 
30
 
31
  WORKDIR /app
32
  ENV NODE_ENV=production
33
+ # Python + huggingface_hub for HF Dataset persistence
34
+ RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip \
35
+ && pip3 install --no-cache-dir --break-system-packages huggingface_hub \
36
+ && apt-get purge -y python3-pip && apt-get autoremove -y \
37
+ && rm -rf /var/lib/apt/lists/*
38
  RUN addgroup --system --gid 1001 nodejs && adduser --system --uid 1001 nextjs
39
  COPY --from=build /app/.next/standalone ./
40
  COPY --from=build /app/.next/static ./.next/static
 
45
  RUN mkdir -p .data && chown nextjs:nodejs .data
46
  RUN echo 'const http=require("http");const r=http.get("http://localhost:"+(process.env.PORT||3000)+"/api/status?action=health",s=>{process.exit(s.statusCode===200?0:1)});r.on("error",()=>process.exit(1));r.setTimeout(4000,()=>{r.destroy();process.exit(1)})' > /app/healthcheck.js
47
  COPY docker-entrypoint.sh /app/docker-entrypoint.sh
48
+ COPY hf-persist.py /app/hf-persist.py
49
  RUN chmod +x /app/docker-entrypoint.sh
50
  USER nextjs
51
  ENV PORT=7860
docker-entrypoint.sh CHANGED
@@ -45,4 +45,9 @@ if [ -z "$API_KEY" ] || [ "$API_KEY" = "generate-a-random-key" ]; then
45
  fi
46
 
47
  printf '[entrypoint] Starting server\n'
48
- exec node server.js
 
 
 
 
 
 
45
  fi
46
 
47
  printf '[entrypoint] Starting server\n'
48
+ if [ -n "$HF_TOKEN" ]; then
49
+ printf '[entrypoint] HF_TOKEN set — enabling Dataset persistence\n'
50
+ exec python3 /app/hf-persist.py
51
+ else
52
+ exec node server.js
53
+ fi
hf-persist.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """HuggingFace Dataset persistence for Mission Control .data/ directory.
3
+
4
+ On startup: restore .data/ from HF Dataset
5
+ Periodically: upload .data/ to HF Dataset (every SYNC_INTERVAL seconds)
6
+ On shutdown: final upload
7
+
8
+ Env vars:
9
+ HF_TOKEN - HuggingFace API token (required)
10
+ HF_DATASET_REPO - Dataset repo ID (default: auto from SPACE_ID)
11
+ SYNC_INTERVAL - Seconds between syncs (default: 120)
12
+ """
13
+ import os, sys, signal, time, subprocess, threading, logging
14
+
15
+ logging.basicConfig(level=logging.INFO, format="[hf-persist] %(message)s")
16
+ log = logging.getLogger(__name__)
17
+
18
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
19
+ SPACE_ID = os.environ.get("SPACE_ID", "")
20
+ DATA_DIR = os.environ.get("MISSION_CONTROL_DATA_DIR", "/app/.data")
21
+ DATASET_PATH = "mission-control"
22
+ SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "120"))
23
+
24
+ # Derive dataset repo from SPACE_ID (e.g. tao-shen/HuggingClaw-MissionControl -> tao-shen/HuggingClaw-MissionControl-data)
25
+ if os.environ.get("HF_DATASET_REPO"):
26
+ HF_REPO_ID = os.environ["HF_DATASET_REPO"]
27
+ elif SPACE_ID:
28
+ HF_REPO_ID = f"{SPACE_ID}-data"
29
+ else:
30
+ HF_REPO_ID = ""
31
+
32
+ IGNORE_PATTERNS = ["*.lock", "*.tmp", "*.pid", "__pycache__", "*.wal", "*.shm"]
33
+
34
+
35
+ def ensure_dataset():
36
+ """Create dataset repo if it doesn't exist."""
37
+ from huggingface_hub import HfApi
38
+ api = HfApi(token=HF_TOKEN)
39
+ try:
40
+ api.repo_info(repo_id=HF_REPO_ID, repo_type="dataset")
41
+ log.info(f"Dataset {HF_REPO_ID} exists")
42
+ except Exception:
43
+ log.info(f"Creating dataset {HF_REPO_ID}")
44
+ api.create_repo(repo_id=HF_REPO_ID, repo_type="dataset", private=True)
45
+ log.info(f"Created dataset {HF_REPO_ID}")
46
+
47
+
48
+ def restore():
49
+ """Download .data/ from HF Dataset on startup."""
50
+ from huggingface_hub import snapshot_download
51
+ import tempfile, shutil
52
+
53
+ log.info(f"Restoring from {HF_REPO_ID}/{DATASET_PATH}/ ...")
54
+ try:
55
+ with tempfile.TemporaryDirectory() as tmpdir:
56
+ snapshot_download(
57
+ repo_id=HF_REPO_ID,
58
+ repo_type="dataset",
59
+ allow_patterns=f"{DATASET_PATH}/**",
60
+ local_dir=tmpdir,
61
+ token=HF_TOKEN,
62
+ )
63
+ src = os.path.join(tmpdir, DATASET_PATH)
64
+ if os.path.isdir(src):
65
+ count = 0
66
+ for root, dirs, files in os.walk(src):
67
+ for f in files:
68
+ src_file = os.path.join(root, f)
69
+ rel = os.path.relpath(src_file, src)
70
+ dst_file = os.path.join(DATA_DIR, rel)
71
+ os.makedirs(os.path.dirname(dst_file), exist_ok=True)
72
+ shutil.copy2(src_file, dst_file)
73
+ count += 1
74
+ log.info(f"Restored {count} files to {DATA_DIR}")
75
+ else:
76
+ log.info("No data found in dataset (fresh start)")
77
+ except Exception as e:
78
+ log.warning(f"Restore failed (starting fresh): {e}")
79
+
80
+
81
+ def save():
82
+ """Upload .data/ to HF Dataset."""
83
+ from huggingface_hub import HfApi
84
+ from datetime import datetime
85
+
86
+ if not os.path.isdir(DATA_DIR) or not os.listdir(DATA_DIR):
87
+ log.info("Nothing to save (empty .data/)")
88
+ return
89
+
90
+ api = HfApi(token=HF_TOKEN)
91
+ try:
92
+ api.upload_folder(
93
+ folder_path=DATA_DIR,
94
+ path_in_repo=DATASET_PATH,
95
+ repo_id=HF_REPO_ID,
96
+ repo_type="dataset",
97
+ commit_message=f"Sync mission-control data — {datetime.now().isoformat()}",
98
+ ignore_patterns=IGNORE_PATTERNS,
99
+ )
100
+ log.info("Saved to dataset")
101
+ except Exception as e:
102
+ log.error(f"Save failed: {e}")
103
+
104
+
105
+ def register_agents(port):
106
+ """Register HuggingClaw agents after server is ready."""
107
+ import urllib.request, urllib.error, json
108
+
109
+ agents = [
110
+ {"name": "Adam", "role": "agent", "capabilities": ["parenting", "coding", "A2A"], "framework": "openclaw"},
111
+ {"name": "Eve", "role": "agent", "capabilities": ["parenting", "coding", "A2A"], "framework": "openclaw"},
112
+ {"name": "God", "role": "agent", "capabilities": ["supervision", "architecture", "A2A"], "framework": "openclaw"},
113
+ {"name": "Cain", "role": "agent", "capabilities": ["learning", "coding"], "framework": "openclaw"},
114
+ ]
115
+
116
+ # Wait for server to be ready
117
+ for i in range(30):
118
+ try:
119
+ req = urllib.request.Request(f"http://localhost:{port}/api/status?action=health")
120
+ urllib.request.urlopen(req, timeout=2)
121
+ break
122
+ except Exception:
123
+ time.sleep(2)
124
+ else:
125
+ log.warning("Server not ready after 60s, skipping agent registration")
126
+ return
127
+
128
+ api_key = os.environ.get("API_KEY", "")
129
+ headers = {"Content-Type": "application/json"}
130
+ if api_key:
131
+ headers["Authorization"] = f"Bearer {api_key}"
132
+
133
+ for agent in agents:
134
+ try:
135
+ data = json.dumps(agent).encode()
136
+ req = urllib.request.Request(
137
+ f"http://localhost:{port}/api/agents/register",
138
+ data=data,
139
+ headers=headers,
140
+ method="POST",
141
+ )
142
+ resp = urllib.request.urlopen(req, timeout=10)
143
+ log.info(f"Registered agent: {agent['name']} ({resp.status})")
144
+ except urllib.error.HTTPError as e:
145
+ body = e.read().decode()
146
+ log.warning(f"Failed to register {agent['name']}: {e.code} {body}")
147
+ except Exception as e:
148
+ log.warning(f"Failed to register {agent['name']}: {e}")
149
+
150
+
151
+ def main():
152
+ if not HF_TOKEN or not HF_REPO_ID:
153
+ log.info("HF_TOKEN or HF_DATASET_REPO not set — persistence disabled, starting server directly")
154
+ os.execvp("node", ["node", "server.js"])
155
+ return
156
+
157
+ port = os.environ.get("PORT", "7860")
158
+
159
+ # Step 1: Ensure dataset exists
160
+ ensure_dataset()
161
+
162
+ # Step 2: Restore from dataset
163
+ restore()
164
+
165
+ # Step 3: Start server as subprocess
166
+ proc = subprocess.Popen(["node", "server.js"], cwd="/app")
167
+ log.info(f"Server started (PID {proc.pid})")
168
+
169
+ # Step 4: Register agents in background
170
+ reg_thread = threading.Thread(target=register_agents, args=(port,), daemon=True)
171
+ reg_thread.start()
172
+
173
+ # Step 5: Periodic sync
174
+ stop_event = threading.Event()
175
+
176
+ def sync_loop():
177
+ while not stop_event.is_set():
178
+ stop_event.wait(SYNC_INTERVAL)
179
+ if not stop_event.is_set():
180
+ save()
181
+
182
+ sync_thread = threading.Thread(target=sync_loop, daemon=True)
183
+ sync_thread.start()
184
+
185
+ # Step 6: Handle shutdown
186
+ def shutdown(signum, frame):
187
+ log.info(f"Received signal {signum}, shutting down...")
188
+ stop_event.set()
189
+ proc.terminate()
190
+ try:
191
+ proc.wait(timeout=10)
192
+ except subprocess.TimeoutExpired:
193
+ proc.kill()
194
+ save() # Final save
195
+ sys.exit(0)
196
+
197
+ signal.signal(signal.SIGTERM, shutdown)
198
+ signal.signal(signal.SIGINT, shutdown)
199
+
200
+ # Wait for server process
201
+ ret = proc.wait()
202
+ stop_event.set()
203
+ save() # Final save
204
+ sys.exit(ret)
205
+
206
+
207
+ if __name__ == "__main__":
208
+ main()