8900 commited on
Commit
080dc72
Β·
verified Β·
1 Parent(s): 0263c73

Update hf-sync-manager.mjs

Browse files
Files changed (1) hide show
  1. hf-sync-manager.mjs +96 -81
hf-sync-manager.mjs CHANGED
@@ -1,16 +1,19 @@
1
  // ============================================================
2
- // HF Sync Manager - Full Mirror Edition
3
  //
4
- // Mirrors ~/.openclaw/ to HF Dataset every 30 min.
 
 
5
  //
6
- // Critical rules:
7
- // 1. NEVER restore openclaw.json from Dataset.
8
- // It is owned by setup-hf-config.mjs only.
9
- // 2. RUNNING_FLAG is never deleted.
10
- // /tmp is only cleared on container restart, not gateway restart.
11
- // This correctly distinguishes the two restart types.
 
12
  //
13
- // Excluded: openclaw.json, sessions, qmd, canvas, *.bak
14
  // ============================================================
15
 
16
  import fs from "node:fs";
@@ -26,17 +29,16 @@ var HF_TOKEN = (process.env.HF_TOKEN || "").trim();
26
  var DATASET_ID = (process.env.HF_DATASET_ID || "").trim();
27
  var REPO_DIR = "/tmp/oc-dataset";
28
  var INTERVAL = 30 * 60 * 1000;
29
- var RUNNING_FLAG = "/tmp/.hf-sync-running";
30
 
 
31
  var EXCLUDE_NAMES = [
32
- "openclaw.json",
33
- "openclaw.json.bak",
34
- "sessions",
35
- "qmd",
36
- "canvas",
37
- ".git"
38
  ];
39
- var EXCLUDE_EXT = [".bak", ".tmp", ".log"];
40
 
41
  // –– helpers ———————————————–
42
 
@@ -51,11 +53,11 @@ return "https://user:" + HF_TOKEN +
51
  "@huggingface.co/datasets/" + DATASET_ID;
52
  }
53
 
54
- function git(args) {
55
  return execSync("git " + args, {
56
  cwd: REPO_DIR,
57
  stdio: "pipe",
58
- timeout: 120000,
59
  env: Object.assign({}, process.env, { GIT_TERMINAL_PROMPT: "0" })
60
  }).toString().trim();
61
  }
@@ -71,19 +73,27 @@ if (name.indexOf(EXCLUDE_EXT[i]) >= 0) return true;
71
  return false;
72
  }
73
 
74
- // –– git ––––––––––––––––––––––––––
 
 
 
 
 
 
 
 
75
 
76
  function ensureRepo() {
77
  if (fs.existsSync(path.join(REPO_DIR, ".git"))) return;
78
  execSync("rm -rf " + REPO_DIR, { stdio: "pipe" });
79
  try {
80
  execSync("git clone --depth 1 " + repoUrl() + " " + REPO_DIR, {
81
- stdio: "pipe", timeout: 120000,
82
  env: Object.assign({}, process.env, { GIT_TERMINAL_PROMPT: "0" })
83
  });
84
  log("Repo cloned");
85
  } catch (e) {
86
- log("Clone failed, init empty: " + e.message);
87
  fs.mkdirSync(REPO_DIR, { recursive: true });
88
  execSync("git init " + REPO_DIR, { stdio: "pipe" });
89
  git("remote add origin " + repoUrl());
@@ -96,24 +106,24 @@ git("config pull.rebase false");
96
  }
97
 
98
  function pull() {
99
- try { git("fetch --quiet origin"); } catch (e) { /* ignore */ }
100
- try { git("pull --quiet --no-rebase origin main"); }
101
  catch (e) {
102
- try { git("pull --quiet --no-rebase origin master"); }
103
  catch (e2) { /* empty repo */ }
104
  }
105
  }
106
 
107
- function push() {
108
  try {
109
  git("add -A");
110
  var changed = git("diff --cached --name-only");
111
  if (!changed) return 0;
112
  var n = changed.split("\n").filter(Boolean).length;
113
  var ts = new Date().toISOString().replace("T", " ").substring(0, 19);
114
- git('commit -m "sync ' + ts + '"');
115
- try { git("push --quiet origin HEAD:main"); }
116
- catch (e) { git("push --quiet origin HEAD:master"); }
117
  return n;
118
  } catch (e) {
119
  log("Push failed: " + e.message);
@@ -121,7 +131,7 @@ return 0;
121
  }
122
  }
123
 
124
- // –– mirror β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
125
 
126
  function mirrorToRepo(srcDir, dstDir) {
127
  if (!fs.existsSync(srcDir)) return 0;
@@ -145,6 +155,8 @@ log("Copy error [" + name + "]: " + e.message);
145
  return count;
146
  }
147
 
 
 
148
  function mirrorFromRepo(srcDir, dstDir) {
149
  if (!fs.existsSync(srcDir)) return 0;
150
  fs.mkdirSync(dstDir, { recursive: true });
@@ -176,57 +188,82 @@ fs.mkdirSync(path.join(WORKSPACE, "memory"), { recursive: true });
176
  var soul = path.join(WORKSPACE, "SOUL.md");
177
  if (!fs.existsSync(soul)) {
178
  fs.writeFileSync(soul, [
179
- "# Soul",
180
- "",
181
- "You are a helpful, warm, concise AI assistant.",
182
- "",
183
- "## Language",
184
- "",
185
  "Default language: Simplified Chinese.",
186
- "Always reply in Chinese unless the user writes in another language first.",
187
- "",
188
- "## Tone",
189
- "",
190
  "- Natural and friendly, not overly formal",
191
  "- Concise and to the point",
192
  "- Ask one clarifying question at a time when needed"
193
  ].join("\n") + "\n", "utf-8");
194
- log("Seeded workspace/SOUL.md");
195
  }
196
 
197
  var agents = path.join(WORKSPACE, "AGENTS.md");
198
  if (!fs.existsSync(agents)) {
199
  fs.writeFileSync(agents, [
200
- "# Agent Instructions",
201
- "",
202
- "## Boot sequence",
203
- "",
204
  "1. Read SOUL.md - language and persona",
205
  "2. Read USER.md if present - user profile",
206
  "3. Read MEMORY.md - long-term facts and rules",
207
- "4. Read today and yesterday memory/YYYY-MM-DD.md if present",
208
- "",
209
- "## Memory rules",
210
- "",
211
  "- Write important facts to MEMORY.md when asked",
212
  "- Log daily context to memory/YYYY-MM-DD.md",
213
  "- Do not ask for information already provided"
214
  ].join("\n") + "\n", "utf-8");
215
- log("Seeded workspace/AGENTS.md");
216
  }
217
 
218
  var mem = path.join(WORKSPACE, "MEMORY.md");
219
  if (!fs.existsSync(mem)) {
220
  fs.writeFileSync(mem, [
221
- "# Long-term Memory",
222
- "",
223
  "<!-- OpenClaw writes important facts here. -->",
224
  "<!-- Loaded at the start of every session. -->"
225
  ].join("\n") + "\n", "utf-8");
226
- log("Seeded workspace/MEMORY.md");
227
  }
228
  }
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  // –– boot restore β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
231
 
232
  function bootRestore() {
@@ -249,7 +286,10 @@ return;
249
  var repoOC = path.join(REPO_DIR, "openclaw");
250
  if (fs.existsSync(repoOC)) {
251
  var n = mirrorFromRepo(repoOC, OPENCLAW_DIR);
252
- log("Boot restore done: " + n + " file(s) restored (openclaw.json excluded)");
 
 
 
253
  } else {
254
  log("No data in Dataset yet - starting fresh");
255
  }
@@ -277,21 +317,9 @@ if (fs.existsSync(repoOC)) {
277
  fs.mkdirSync(repoOC, { recursive: true });
278
 
279
  var n = mirrorToRepo(OPENCLAW_DIR, repoOC);
 
280
 
281
- // Upload openclaw.json with JSON validation (excluded from restore, not from upload)
282
- var localCfg = path.join(OPENCLAW_DIR, "openclaw.json");
283
- if (fs.existsSync(localCfg)) {
284
- try {
285
- JSON.parse(fs.readFileSync(localCfg, "utf-8").trim());
286
- fs.copyFileSync(localCfg, path.join(repoOC, "openclaw.json"));
287
- n++;
288
- log("openclaw.json uploaded (JSON valid)");
289
- } catch (e) {
290
- log("openclaw.json skipped (invalid JSON)");
291
- }
292
- }
293
-
294
- var pushed = push();
295
  if (pushed > 0) {
296
  log("Sync done: " + pushed + " file(s) pushed");
297
  } else {
@@ -308,7 +336,6 @@ log("Sync failed (retry in 30 min): " + e.message);
308
  log("Starting…");
309
  log("OPENCLAW_DIR : " + OPENCLAW_DIR);
310
  log("DATASET : " + (DATASET_ID || "NOT SET"));
311
- log("RUNNING_FLAG : " + RUNNING_FLAG);
312
 
313
  fs.mkdirSync(OPENCLAW_DIR, { recursive: true });
314
  fs.mkdirSync(WORKSPACE, { recursive: true });
@@ -320,19 +347,7 @@ execSync("git config --global http.postBuffer 52428800", { stdio: "pipe" });
320
  execSync("git config --global pull.rebase false", { stdio: "pipe" });
321
  } catch (e) { /* non-fatal */ }
322
 
323
- var isGatewayRestart = fs.existsSync(RUNNING_FLAG);
324
-
325
- if (isGatewayRestart) {
326
- log("Gateway restart - skipping boot restore (data already on disk)");
327
- } else {
328
- log("Container start - running full boot restore");
329
  bootRestore();
330
- }
331
-
332
- // Write flag. NEVER delete it on exit.
333
- // Only container restart clears /tmp.
334
- fs.writeFileSync(RUNNING_FLAG, String(process.pid), "utf-8");
335
- log("Running flag written");
336
 
337
  if (!HF_TOKEN || !DATASET_ID) {
338
  log("Sync disabled: set HF_TOKEN and HF_DATASET_ID in Secrets");
 
1
  // ============================================================
2
+ // HF Sync Manager - Full Mirror + Emergency Upload
3
  //
4
+ // CRITICAL FIX for HF Spaces:
5
+ // On HF free tier, OpenClaw β€œfull process restart” = complete
6
+ // container restart = /tmp cleared. /tmp-based markers don’t work.
7
  //
8
+ // Solution:
9
+ // 1. Include openclaw.json in BOTH upload and restore.
10
+ // setup.mjs will patch (not overwrite) it, so no restart loop.
11
+ // 2. SIGTERM handler: when container is dying, immediately upload
12
+ // openclaw.json to Dataset before process exits. This preserves
13
+ // user’s just-saved settings for the next container start.
14
+ // 3. Regular 30-min sync for workspace files.
15
  //
16
+ // Excluded: sessions (transcripts), qmd (vector index), canvas
17
  // ============================================================
18
 
19
  import fs from "node:fs";
 
29
  var DATASET_ID = (process.env.HF_DATASET_ID || "").trim();
30
  var REPO_DIR = "/tmp/oc-dataset";
31
  var INTERVAL = 30 * 60 * 1000;
 
32
 
33
+ // Excluded from sync (runtime-only, auto-rebuilt by OpenClaw)
34
  var EXCLUDE_NAMES = [
35
+ "sessions", // agents/*/sessions/ - session transcripts
36
+ "qmd", // agents/*/qmd/ - vector search sqlite index
37
+ "canvas", // UI runtime files
38
+ ".git",
39
+ "openclaw.json.bak"
 
40
  ];
41
+ var EXCLUDE_EXT = [".tmp", ".log"];
42
 
43
  // –– helpers ———————————————–
44
 
 
53
  "@huggingface.co/datasets/" + DATASET_ID;
54
  }
55
 
56
+ function git(args, timeoutMs) {
57
  return execSync("git " + args, {
58
  cwd: REPO_DIR,
59
  stdio: "pipe",
60
+ timeout: timeoutMs || 120000,
61
  env: Object.assign({}, process.env, { GIT_TERMINAL_PROMPT: "0" })
62
  }).toString().trim();
63
  }
 
73
  return false;
74
  }
75
 
76
+ function isValidJson(p) {
77
+ try {
78
+ if (!fs.existsSync(p)) return false;
79
+ JSON.parse(fs.readFileSync(p, "utf-8").trim());
80
+ return true;
81
+ } catch (e) { return false; }
82
+ }
83
+
84
+ // –– git repo setup ––––––––––––––––––––
85
 
86
  function ensureRepo() {
87
  if (fs.existsSync(path.join(REPO_DIR, ".git"))) return;
88
  execSync("rm -rf " + REPO_DIR, { stdio: "pipe" });
89
  try {
90
  execSync("git clone --depth 1 " + repoUrl() + " " + REPO_DIR, {
91
+ stdio: "pipe", timeout: 60000,
92
  env: Object.assign({}, process.env, { GIT_TERMINAL_PROMPT: "0" })
93
  });
94
  log("Repo cloned");
95
  } catch (e) {
96
+ log("Clone failed, init: " + e.message);
97
  fs.mkdirSync(REPO_DIR, { recursive: true });
98
  execSync("git init " + REPO_DIR, { stdio: "pipe" });
99
  git("remote add origin " + repoUrl());
 
106
  }
107
 
108
  function pull() {
109
+ try { git("fetch --quiet origin", 30000); } catch (e) { /* ignore */ }
110
+ try { git("pull --quiet --no-rebase origin main", 30000); }
111
  catch (e) {
112
+ try { git("pull --quiet --no-rebase origin master", 30000); }
113
  catch (e2) { /* empty repo */ }
114
  }
115
  }
116
 
117
+ function push(label, timeoutMs) {
118
  try {
119
  git("add -A");
120
  var changed = git("diff --cached --name-only");
121
  if (!changed) return 0;
122
  var n = changed.split("\n").filter(Boolean).length;
123
  var ts = new Date().toISOString().replace("T", " ").substring(0, 19);
124
+ git('commit -m "' + (label || "sync") + " " + ts + '"');
125
+ try { git("push --quiet origin HEAD:main", timeoutMs || 30000); }
126
+ catch (e) { git("push --quiet origin HEAD:master", timeoutMs || 30000); }
127
  return n;
128
  } catch (e) {
129
  log("Push failed: " + e.message);
 
131
  }
132
  }
133
 
134
+ // –– mirror: local -> repo β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
135
 
136
  function mirrorToRepo(srcDir, dstDir) {
137
  if (!fs.existsSync(srcDir)) return 0;
 
155
  return count;
156
  }
157
 
158
+ // –– mirror: repo -> local β€”β€”β€”β€”β€”β€”β€”οΏ½οΏ½β€”β€”β€”
159
+
160
  function mirrorFromRepo(srcDir, dstDir) {
161
  if (!fs.existsSync(srcDir)) return 0;
162
  fs.mkdirSync(dstDir, { recursive: true });
 
188
  var soul = path.join(WORKSPACE, "SOUL.md");
189
  if (!fs.existsSync(soul)) {
190
  fs.writeFileSync(soul, [
191
+ "# Soul", "",
192
+ "You are a helpful, warm, concise AI assistant.", "",
193
+ "## Language", "",
 
 
 
194
  "Default language: Simplified Chinese.",
195
+ "Always reply in Chinese unless the user writes in another language first.", "",
196
+ "## Tone", "",
 
 
197
  "- Natural and friendly, not overly formal",
198
  "- Concise and to the point",
199
  "- Ask one clarifying question at a time when needed"
200
  ].join("\n") + "\n", "utf-8");
201
+ log("Seeded SOUL.md");
202
  }
203
 
204
  var agents = path.join(WORKSPACE, "AGENTS.md");
205
  if (!fs.existsSync(agents)) {
206
  fs.writeFileSync(agents, [
207
+ "# Agent Instructions", "",
208
+ "## Boot sequence", "",
 
 
209
  "1. Read SOUL.md - language and persona",
210
  "2. Read USER.md if present - user profile",
211
  "3. Read MEMORY.md - long-term facts and rules",
212
+ "4. Read today and yesterday memory/YYYY-MM-DD.md if present", "",
213
+ "## Memory rules", "",
 
 
214
  "- Write important facts to MEMORY.md when asked",
215
  "- Log daily context to memory/YYYY-MM-DD.md",
216
  "- Do not ask for information already provided"
217
  ].join("\n") + "\n", "utf-8");
218
+ log("Seeded AGENTS.md");
219
  }
220
 
221
  var mem = path.join(WORKSPACE, "MEMORY.md");
222
  if (!fs.existsSync(mem)) {
223
  fs.writeFileSync(mem, [
224
+ "# Long-term Memory", "",
 
225
  "<!-- OpenClaw writes important facts here. -->",
226
  "<!-- Loaded at the start of every session. -->"
227
  ].join("\n") + "\n", "utf-8");
228
+ log("Seeded MEMORY.md");
229
  }
230
  }
231
 
232
+ // –– EMERGENCY upload (SIGTERM handler) ––––––––––
233
+ // When OpenClaw triggers β€œfull process restart”, the container dies.
234
+ // We have ~10s before SIGKILL to upload openclaw.json to Dataset.
235
+ // This preserves the user’s just-saved settings for the next boot.
236
+
237
+ function emergencyUploadConfig() {
238
+ log("SIGTERM: emergency config upload starting…");
239
+ var localCfg = path.join(OPENCLAW_DIR, "openclaw.json");
240
+ if (!isValidJson(localCfg)) {
241
+ log("SIGTERM: config invalid, skip");
242
+ return;
243
+ }
244
+ try {
245
+ ensureRepo();
246
+ pull();
247
+ var repoOC = path.join(REPO_DIR, "openclaw");
248
+ fs.mkdirSync(repoOC, { recursive: true });
249
+ fs.copyFileSync(localCfg, path.join(repoOC, "openclaw.json"));
250
+ var n = push("emergency-config", 8000);
251
+ log("SIGTERM: emergency upload " + (n > 0 ? "done (" + n + " files)" : "no changes"));
252
+ } catch (e) {
253
+ log("SIGTERM: emergency upload failed: " + e.message);
254
+ }
255
+ }
256
+
257
+ process.on("SIGTERM", function() {
258
+ emergencyUploadConfig();
259
+ process.exit(0);
260
+ });
261
+
262
+ // SIGINT for local testing
263
+ process.on("SIGINT", function() {
264
+ process.exit(0);
265
+ });
266
+
267
  // –– boot restore β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
268
 
269
  function bootRestore() {
 
286
  var repoOC = path.join(REPO_DIR, "openclaw");
287
  if (fs.existsSync(repoOC)) {
288
  var n = mirrorFromRepo(repoOC, OPENCLAW_DIR);
289
+ log("Boot restore done: " + n + " file(s) restored to " + OPENCLAW_DIR);
290
+ if (fs.existsSync(path.join(OPENCLAW_DIR, "openclaw.json"))) {
291
+ log("Config restored from Dataset (setup will patch env.vars only)");
292
+ }
293
  } else {
294
  log("No data in Dataset yet - starting fresh");
295
  }
 
317
  fs.mkdirSync(repoOC, { recursive: true });
318
 
319
  var n = mirrorToRepo(OPENCLAW_DIR, repoOC);
320
+ log("Mirrored " + n + " file(s)");
321
 
322
+ var pushed = push("sync");
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  if (pushed > 0) {
324
  log("Sync done: " + pushed + " file(s) pushed");
325
  } else {
 
336
  log("Starting…");
337
  log("OPENCLAW_DIR : " + OPENCLAW_DIR);
338
  log("DATASET : " + (DATASET_ID || "NOT SET"));
 
339
 
340
  fs.mkdirSync(OPENCLAW_DIR, { recursive: true });
341
  fs.mkdirSync(WORKSPACE, { recursive: true });
 
347
  execSync("git config --global pull.rebase false", { stdio: "pipe" });
348
  } catch (e) { /* non-fatal */ }
349
 
 
 
 
 
 
 
350
  bootRestore();
 
 
 
 
 
 
351
 
352
  if (!HF_TOKEN || !DATASET_ID) {
353
  log("Sync disabled: set HF_TOKEN and HF_DATASET_ID in Secrets");