echo8900 commited on
Commit
392bd69
·
verified ·
1 Parent(s): 026f95e

Update hf-sync-manager.mjs

Browse files
Files changed (1) hide show
  1. hf-sync-manager.mjs +41 -43
hf-sync-manager.mjs CHANGED
@@ -11,11 +11,7 @@ const DATASET_ID = (process.env.HF_DATASET_ID || "").trim();
11
  const REPO_DIR = "/tmp/hf-memory-sync";
12
  const UPLOAD_INTERVAL = 3 * 60 * 60 * 1000; // 3小时
13
 
14
- const FILES = {
15
- conversations: path.join(MEMORY_DIR, "conversations.jsonl"),
16
- memory: path.join(MEMORY_DIR, "memory.jsonl"),
17
- logs: path.join(MEMORY_DIR, "logs.jsonl")
18
- };
19
 
20
  // ===================== 内存对象 =====================
21
  const botMemory = {
@@ -24,13 +20,9 @@ const botMemory = {
24
  logs: []
25
  };
26
 
27
- // ===================== Helper 函数 =====================
28
  function log(msg) { console.log(`[hf-sync] ${msg}`); }
29
-
30
- function repoUrl() {
31
- return `https://user:${HF_TOKEN}@huggingface.co/datasets/${DATASET_ID}`;
32
- }
33
-
34
  function git(args, cwd) {
35
  return execSync("git " + args, {
36
  cwd: cwd || REPO_DIR,
@@ -43,9 +35,8 @@ function git(args, cwd) {
43
  function ensureRepo() {
44
  if (fs.existsSync(path.join(REPO_DIR, ".git"))) return;
45
  execSync("rm -rf " + REPO_DIR, { stdio: "pipe" });
46
- try {
47
- git("clone --depth 1 " + repoUrl() + " " + REPO_DIR, "/tmp");
48
- } catch {
49
  fs.mkdirSync(REPO_DIR, { recursive: true });
50
  git("init", REPO_DIR);
51
  git("remote add origin " + repoUrl(), REPO_DIR);
@@ -64,9 +55,7 @@ function pullLatest() {
64
  function readJsonl(filePath, targetArray) {
65
  if (!fs.existsSync(filePath)) return;
66
  const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
67
- lines.forEach(line => {
68
- try { targetArray.push(JSON.parse(line)); } catch(e) { log(`Invalid JSON line in ${filePath}`); }
69
- });
70
  }
71
 
72
  function writeJsonl(filePath, dataArray) {
@@ -74,7 +63,7 @@ function writeJsonl(filePath, dataArray) {
74
  fs.writeFileSync(filePath, lines, "utf-8");
75
  }
76
 
77
- // ===================== 内存操作接口 =====================
78
  export function addConversation(sessionId, userMsg, botMsg) {
79
  const ts = new Date().toISOString();
80
  botMemory.conversations.push({ session_id: sessionId, user: userMsg, bot: botMsg, timestamp: ts });
@@ -89,27 +78,41 @@ export function addLog(sessionId, event) {
89
  botMemory.logs.push({ session_id: sessionId, event, timestamp: ts });
90
  }
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  // ===================== 载入 & 保存 =====================
93
  function loadMemoryFromRepo() {
94
  if (!fs.existsSync(REPO_DIR)) return;
95
  const repoMemoryDir = path.join(REPO_DIR, "dataset-memory");
96
  if (!fs.existsSync(repoMemoryDir)) return;
97
 
98
- fs.mkdirSync(MEMORY_DIR, { recursive: true });
99
- for (const key of Object.keys(FILES)) {
100
- const src = path.join(repoMemoryDir, key + ".jsonl");
101
- const dst = FILES[key];
102
- if (fs.existsSync(src)) fs.copyFileSync(src, dst);
103
- else fs.writeFileSync(dst, "", "utf-8");
104
- }
 
105
  }
106
 
107
  function saveMemoryToRepo() {
108
- const repoMemoryDir = path.join(REPO_DIR, "dataset-memory");
109
- fs.mkdirSync(repoMemoryDir, { recursive: true });
110
- for (const key of Object.keys(FILES)) {
111
- writeJsonl(path.join(repoMemoryDir, key + ".jsonl"), botMemory[key]);
112
- }
113
  }
114
 
115
  function commitAndPush() {
@@ -117,14 +120,11 @@ function commitAndPush() {
117
  git("add -A", REPO_DIR);
118
  const changed = git("diff --cached --name-only", REPO_DIR);
119
  if (!changed) return 0;
120
- const ts = new Date().toISOString().replace("T", " ").substring(0, 19);
121
  git(`commit -m "memory-sync ${ts}"`, REPO_DIR);
122
  git("push --quiet origin HEAD:main", REPO_DIR);
123
  return changed.split("\n").filter(Boolean).length;
124
- } catch(e) {
125
- log("Push failed: " + e.message);
126
- return 0;
127
- }
128
  }
129
 
130
  // ===================== 启动 & 定时 =====================
@@ -134,31 +134,29 @@ function bootRestore() {
134
  ensureRepo();
135
  pullLatest();
136
  loadMemoryFromRepo();
137
- for (const key of Object.keys(FILES)) readJsonl(FILES[key], botMemory[key]);
138
- log(`Loaded: conversations=${botMemory.conversations.length}, memory=${botMemory.memory.length}, logs=${botMemory.logs.length}`);
139
  }
140
 
141
  async function startUploadLoop() {
142
  while(true){
143
- await new Promise(r => setTimeout(r, UPLOAD_INTERVAL));
144
- try {
145
  log("Saving memory to repo...");
146
  saveMemoryToRepo();
147
  const count = commitAndPush();
148
- log("Upload done: " + count + " file(s) changed");
149
- } catch(e){ log("Upload error: " + e.message); }
150
  }
151
  }
152
 
153
  // ===================== Main =====================
154
- async function main() {
155
  fs.mkdirSync(MEMORY_DIR, { recursive: true });
156
  bootRestore();
157
  log("Memory ready.");
158
 
159
  if(HF_TOKEN && DATASET_ID){
160
  log(`Starting ${UPLOAD_INTERVAL/3600000}h upload loop`);
161
- startUploadLoop().catch(e => log("Upload loop error: "+e.message));
162
  }
163
  }
164
 
 
11
  const REPO_DIR = "/tmp/hf-memory-sync";
12
  const UPLOAD_INTERVAL = 3 * 60 * 60 * 1000; // 3小时
13
 
14
+ const FILES = ["conversations", "memory", "logs"];
 
 
 
 
15
 
16
  // ===================== 内存对象 =====================
17
  const botMemory = {
 
20
  logs: []
21
  };
22
 
23
+ // ===================== Helper =====================
24
  function log(msg) { console.log(`[hf-sync] ${msg}`); }
25
+ function repoUrl() { return `https://user:${HF_TOKEN}@huggingface.co/datasets/${DATASET_ID}`; }
 
 
 
 
26
  function git(args, cwd) {
27
  return execSync("git " + args, {
28
  cwd: cwd || REPO_DIR,
 
35
  function ensureRepo() {
36
  if (fs.existsSync(path.join(REPO_DIR, ".git"))) return;
37
  execSync("rm -rf " + REPO_DIR, { stdio: "pipe" });
38
+ try { git("clone --depth 1 " + repoUrl() + " " + REPO_DIR, "/tmp"); }
39
+ catch {
 
40
  fs.mkdirSync(REPO_DIR, { recursive: true });
41
  git("init", REPO_DIR);
42
  git("remote add origin " + repoUrl(), REPO_DIR);
 
55
  function readJsonl(filePath, targetArray) {
56
  if (!fs.existsSync(filePath)) return;
57
  const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
58
+ lines.forEach(line => { try { targetArray.push(JSON.parse(line)); } catch{} });
 
 
59
  }
60
 
61
  function writeJsonl(filePath, dataArray) {
 
63
  fs.writeFileSync(filePath, lines, "utf-8");
64
  }
65
 
66
+ // ===================== 内存接口 =====================
67
  export function addConversation(sessionId, userMsg, botMsg) {
68
  const ts = new Date().toISOString();
69
  botMemory.conversations.push({ session_id: sessionId, user: userMsg, bot: botMsg, timestamp: ts });
 
78
  botMemory.logs.push({ session_id: sessionId, event, timestamp: ts });
79
  }
80
 
81
+ // ===================== 路径管理 =====================
82
+ function getDateDir(date=new Date()) {
83
+ const y = date.getFullYear(), m = String(date.getMonth()+1).padStart(2,"0"), d = String(date.getDate()).padStart(2,"0");
84
+ return path.join(MEMORY_DIR, `${y}-${m}-${d}`);
85
+ }
86
+
87
+ function getFilesForDate(date=new Date()) {
88
+ const dir = getDateDir(date);
89
+ fs.mkdirSync(dir, { recursive: true });
90
+ const fileMap = {};
91
+ FILES.forEach(f => { fileMap[f] = path.join(dir, f + ".jsonl"); });
92
+ return fileMap;
93
+ }
94
+
95
  // ===================== 载入 & 保存 =====================
96
  function loadMemoryFromRepo() {
97
  if (!fs.existsSync(REPO_DIR)) return;
98
  const repoMemoryDir = path.join(REPO_DIR, "dataset-memory");
99
  if (!fs.existsSync(repoMemoryDir)) return;
100
 
101
+ // 遍历每个日期文件夹
102
+ const dateDirs = fs.readdirSync(repoMemoryDir).filter(f => fs.statSync(path.join(repoMemoryDir,f)).isDirectory());
103
+ dateDirs.forEach(dateDir => {
104
+ const files = {};
105
+ FILES.forEach(f => { files[f] = path.join(repoMemoryDir, dateDir, f + ".jsonl"); });
106
+ FILES.forEach(f => readJsonl(files[f], botMemory[f]));
107
+ });
108
+ log(`Loaded: conversations=${botMemory.conversations.length}, memory=${botMemory.memory.length}, logs=${botMemory.logs.length}`);
109
  }
110
 
111
  function saveMemoryToRepo() {
112
+ const files = getFilesForDate();
113
+ const repoDateDir = path.join(REPO_DIR, "dataset-memory", path.basename(getDateDir()));
114
+ fs.mkdirSync(repoDateDir, { recursive: true });
115
+ FILES.forEach(f => writeJsonl(path.join(repoDateDir, f + ".jsonl"), botMemory[f]));
 
116
  }
117
 
118
  function commitAndPush() {
 
120
  git("add -A", REPO_DIR);
121
  const changed = git("diff --cached --name-only", REPO_DIR);
122
  if (!changed) return 0;
123
+ const ts = new Date().toISOString().replace("T"," ").substring(0,19);
124
  git(`commit -m "memory-sync ${ts}"`, REPO_DIR);
125
  git("push --quiet origin HEAD:main", REPO_DIR);
126
  return changed.split("\n").filter(Boolean).length;
127
+ } catch(e){ log("Push failed: "+e.message); return 0; }
 
 
 
128
  }
129
 
130
  // ===================== 启动 & 定时 =====================
 
134
  ensureRepo();
135
  pullLatest();
136
  loadMemoryFromRepo();
 
 
137
  }
138
 
139
  async function startUploadLoop() {
140
  while(true){
141
+ await new Promise(r=>setTimeout(r, UPLOAD_INTERVAL));
142
+ try{
143
  log("Saving memory to repo...");
144
  saveMemoryToRepo();
145
  const count = commitAndPush();
146
+ log("Upload done: "+count+" file(s) changed");
147
+ } catch(e){ log("Upload error: "+e.message); }
148
  }
149
  }
150
 
151
  // ===================== Main =====================
152
+ async function main(){
153
  fs.mkdirSync(MEMORY_DIR, { recursive: true });
154
  bootRestore();
155
  log("Memory ready.");
156
 
157
  if(HF_TOKEN && DATASET_ID){
158
  log(`Starting ${UPLOAD_INTERVAL/3600000}h upload loop`);
159
+ startUploadLoop().catch(e=>log("Upload loop error: "+e.message));
160
  }
161
  }
162