Update hf-sync-manager.mjs
Browse files- hf-sync-manager.mjs +41 -43
hf-sync-manager.mjs
CHANGED
|
@@ -11,11 +11,7 @@ const DATASET_ID = (process.env.HF_DATASET_ID || "").trim();
|
|
| 11 |
const REPO_DIR = "/tmp/hf-memory-sync";
|
| 12 |
const UPLOAD_INTERVAL = 3 * 60 * 60 * 1000; // 3小时
|
| 13 |
|
| 14 |
-
const FILES =
|
| 15 |
-
conversations: path.join(MEMORY_DIR, "conversations.jsonl"),
|
| 16 |
-
memory: path.join(MEMORY_DIR, "memory.jsonl"),
|
| 17 |
-
logs: path.join(MEMORY_DIR, "logs.jsonl")
|
| 18 |
-
};
|
| 19 |
|
| 20 |
// ===================== 内存对象 =====================
|
| 21 |
const botMemory = {
|
|
@@ -24,13 +20,9 @@ const botMemory = {
|
|
| 24 |
logs: []
|
| 25 |
};
|
| 26 |
|
| 27 |
-
// ===================== Helper
|
| 28 |
function log(msg) { console.log(`[hf-sync] ${msg}`); }
|
| 29 |
-
|
| 30 |
-
function repoUrl() {
|
| 31 |
-
return `https://user:${HF_TOKEN}@huggingface.co/datasets/${DATASET_ID}`;
|
| 32 |
-
}
|
| 33 |
-
|
| 34 |
function git(args, cwd) {
|
| 35 |
return execSync("git " + args, {
|
| 36 |
cwd: cwd || REPO_DIR,
|
|
@@ -43,9 +35,8 @@ function git(args, cwd) {
|
|
| 43 |
function ensureRepo() {
|
| 44 |
if (fs.existsSync(path.join(REPO_DIR, ".git"))) return;
|
| 45 |
execSync("rm -rf " + REPO_DIR, { stdio: "pipe" });
|
| 46 |
-
try {
|
| 47 |
-
|
| 48 |
-
} catch {
|
| 49 |
fs.mkdirSync(REPO_DIR, { recursive: true });
|
| 50 |
git("init", REPO_DIR);
|
| 51 |
git("remote add origin " + repoUrl(), REPO_DIR);
|
|
@@ -64,9 +55,7 @@ function pullLatest() {
|
|
| 64 |
function readJsonl(filePath, targetArray) {
|
| 65 |
if (!fs.existsSync(filePath)) return;
|
| 66 |
const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
|
| 67 |
-
lines.forEach(line => {
|
| 68 |
-
try { targetArray.push(JSON.parse(line)); } catch(e) { log(`Invalid JSON line in ${filePath}`); }
|
| 69 |
-
});
|
| 70 |
}
|
| 71 |
|
| 72 |
function writeJsonl(filePath, dataArray) {
|
|
@@ -74,7 +63,7 @@ function writeJsonl(filePath, dataArray) {
|
|
| 74 |
fs.writeFileSync(filePath, lines, "utf-8");
|
| 75 |
}
|
| 76 |
|
| 77 |
-
// ===================== 内存
|
| 78 |
export function addConversation(sessionId, userMsg, botMsg) {
|
| 79 |
const ts = new Date().toISOString();
|
| 80 |
botMemory.conversations.push({ session_id: sessionId, user: userMsg, bot: botMsg, timestamp: ts });
|
|
@@ -89,27 +78,41 @@ export function addLog(sessionId, event) {
|
|
| 89 |
botMemory.logs.push({ session_id: sessionId, event, timestamp: ts });
|
| 90 |
}
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
// ===================== 载入 & 保存 =====================
|
| 93 |
function loadMemoryFromRepo() {
|
| 94 |
if (!fs.existsSync(REPO_DIR)) return;
|
| 95 |
const repoMemoryDir = path.join(REPO_DIR, "dataset-memory");
|
| 96 |
if (!fs.existsSync(repoMemoryDir)) return;
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
const
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
}
|
|
|
|
| 105 |
}
|
| 106 |
|
| 107 |
function saveMemoryToRepo() {
|
| 108 |
-
const
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
}
|
| 113 |
}
|
| 114 |
|
| 115 |
function commitAndPush() {
|
|
@@ -117,14 +120,11 @@ function commitAndPush() {
|
|
| 117 |
git("add -A", REPO_DIR);
|
| 118 |
const changed = git("diff --cached --name-only", REPO_DIR);
|
| 119 |
if (!changed) return 0;
|
| 120 |
-
const ts = new Date().toISOString().replace("T",
|
| 121 |
git(`commit -m "memory-sync ${ts}"`, REPO_DIR);
|
| 122 |
git("push --quiet origin HEAD:main", REPO_DIR);
|
| 123 |
return changed.split("\n").filter(Boolean).length;
|
| 124 |
-
} catch(e)
|
| 125 |
-
log("Push failed: " + e.message);
|
| 126 |
-
return 0;
|
| 127 |
-
}
|
| 128 |
}
|
| 129 |
|
| 130 |
// ===================== 启动 & 定时 =====================
|
|
@@ -134,31 +134,29 @@ function bootRestore() {
|
|
| 134 |
ensureRepo();
|
| 135 |
pullLatest();
|
| 136 |
loadMemoryFromRepo();
|
| 137 |
-
for (const key of Object.keys(FILES)) readJsonl(FILES[key], botMemory[key]);
|
| 138 |
-
log(`Loaded: conversations=${botMemory.conversations.length}, memory=${botMemory.memory.length}, logs=${botMemory.logs.length}`);
|
| 139 |
}
|
| 140 |
|
| 141 |
async function startUploadLoop() {
|
| 142 |
while(true){
|
| 143 |
-
await new Promise(r
|
| 144 |
-
try
|
| 145 |
log("Saving memory to repo...");
|
| 146 |
saveMemoryToRepo();
|
| 147 |
const count = commitAndPush();
|
| 148 |
-
log("Upload done: "
|
| 149 |
-
} catch(e){ log("Upload error: "
|
| 150 |
}
|
| 151 |
}
|
| 152 |
|
| 153 |
// ===================== Main =====================
|
| 154 |
-
async function main()
|
| 155 |
fs.mkdirSync(MEMORY_DIR, { recursive: true });
|
| 156 |
bootRestore();
|
| 157 |
log("Memory ready.");
|
| 158 |
|
| 159 |
if(HF_TOKEN && DATASET_ID){
|
| 160 |
log(`Starting ${UPLOAD_INTERVAL/3600000}h upload loop`);
|
| 161 |
-
startUploadLoop().catch(e
|
| 162 |
}
|
| 163 |
}
|
| 164 |
|
|
|
|
| 11 |
const REPO_DIR = "/tmp/hf-memory-sync";
|
| 12 |
const UPLOAD_INTERVAL = 3 * 60 * 60 * 1000; // 3小时
|
| 13 |
|
| 14 |
+
const FILES = ["conversations", "memory", "logs"];
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
// ===================== 内存对象 =====================
|
| 17 |
const botMemory = {
|
|
|
|
| 20 |
logs: []
|
| 21 |
};
|
| 22 |
|
| 23 |
+
// ===================== Helper =====================
|
| 24 |
function log(msg) { console.log(`[hf-sync] ${msg}`); }
|
| 25 |
+
function repoUrl() { return `https://user:${HF_TOKEN}@huggingface.co/datasets/${DATASET_ID}`; }
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
function git(args, cwd) {
|
| 27 |
return execSync("git " + args, {
|
| 28 |
cwd: cwd || REPO_DIR,
|
|
|
|
| 35 |
function ensureRepo() {
|
| 36 |
if (fs.existsSync(path.join(REPO_DIR, ".git"))) return;
|
| 37 |
execSync("rm -rf " + REPO_DIR, { stdio: "pipe" });
|
| 38 |
+
try { git("clone --depth 1 " + repoUrl() + " " + REPO_DIR, "/tmp"); }
|
| 39 |
+
catch {
|
|
|
|
| 40 |
fs.mkdirSync(REPO_DIR, { recursive: true });
|
| 41 |
git("init", REPO_DIR);
|
| 42 |
git("remote add origin " + repoUrl(), REPO_DIR);
|
|
|
|
| 55 |
function readJsonl(filePath, targetArray) {
|
| 56 |
if (!fs.existsSync(filePath)) return;
|
| 57 |
const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
|
| 58 |
+
lines.forEach(line => { try { targetArray.push(JSON.parse(line)); } catch{} });
|
|
|
|
|
|
|
| 59 |
}
|
| 60 |
|
| 61 |
function writeJsonl(filePath, dataArray) {
|
|
|
|
| 63 |
fs.writeFileSync(filePath, lines, "utf-8");
|
| 64 |
}
|
| 65 |
|
| 66 |
+
// ===================== 内存接口 =====================
|
| 67 |
export function addConversation(sessionId, userMsg, botMsg) {
|
| 68 |
const ts = new Date().toISOString();
|
| 69 |
botMemory.conversations.push({ session_id: sessionId, user: userMsg, bot: botMsg, timestamp: ts });
|
|
|
|
| 78 |
botMemory.logs.push({ session_id: sessionId, event, timestamp: ts });
|
| 79 |
}
|
| 80 |
|
| 81 |
+
// ===================== 路径管理 =====================
|
| 82 |
+
function getDateDir(date=new Date()) {
|
| 83 |
+
const y = date.getFullYear(), m = String(date.getMonth()+1).padStart(2,"0"), d = String(date.getDate()).padStart(2,"0");
|
| 84 |
+
return path.join(MEMORY_DIR, `${y}-${m}-${d}`);
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
function getFilesForDate(date=new Date()) {
|
| 88 |
+
const dir = getDateDir(date);
|
| 89 |
+
fs.mkdirSync(dir, { recursive: true });
|
| 90 |
+
const fileMap = {};
|
| 91 |
+
FILES.forEach(f => { fileMap[f] = path.join(dir, f + ".jsonl"); });
|
| 92 |
+
return fileMap;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
// ===================== 载入 & 保存 =====================
|
| 96 |
function loadMemoryFromRepo() {
|
| 97 |
if (!fs.existsSync(REPO_DIR)) return;
|
| 98 |
const repoMemoryDir = path.join(REPO_DIR, "dataset-memory");
|
| 99 |
if (!fs.existsSync(repoMemoryDir)) return;
|
| 100 |
|
| 101 |
+
// 遍历每个日期文件夹
|
| 102 |
+
const dateDirs = fs.readdirSync(repoMemoryDir).filter(f => fs.statSync(path.join(repoMemoryDir,f)).isDirectory());
|
| 103 |
+
dateDirs.forEach(dateDir => {
|
| 104 |
+
const files = {};
|
| 105 |
+
FILES.forEach(f => { files[f] = path.join(repoMemoryDir, dateDir, f + ".jsonl"); });
|
| 106 |
+
FILES.forEach(f => readJsonl(files[f], botMemory[f]));
|
| 107 |
+
});
|
| 108 |
+
log(`Loaded: conversations=${botMemory.conversations.length}, memory=${botMemory.memory.length}, logs=${botMemory.logs.length}`);
|
| 109 |
}
|
| 110 |
|
| 111 |
function saveMemoryToRepo() {
|
| 112 |
+
const files = getFilesForDate();
|
| 113 |
+
const repoDateDir = path.join(REPO_DIR, "dataset-memory", path.basename(getDateDir()));
|
| 114 |
+
fs.mkdirSync(repoDateDir, { recursive: true });
|
| 115 |
+
FILES.forEach(f => writeJsonl(path.join(repoDateDir, f + ".jsonl"), botMemory[f]));
|
|
|
|
| 116 |
}
|
| 117 |
|
| 118 |
function commitAndPush() {
|
|
|
|
| 120 |
git("add -A", REPO_DIR);
|
| 121 |
const changed = git("diff --cached --name-only", REPO_DIR);
|
| 122 |
if (!changed) return 0;
|
| 123 |
+
const ts = new Date().toISOString().replace("T"," ").substring(0,19);
|
| 124 |
git(`commit -m "memory-sync ${ts}"`, REPO_DIR);
|
| 125 |
git("push --quiet origin HEAD:main", REPO_DIR);
|
| 126 |
return changed.split("\n").filter(Boolean).length;
|
| 127 |
+
} catch(e){ log("Push failed: "+e.message); return 0; }
|
|
|
|
|
|
|
|
|
|
| 128 |
}
|
| 129 |
|
| 130 |
// ===================== 启动 & 定时 =====================
|
|
|
|
| 134 |
ensureRepo();
|
| 135 |
pullLatest();
|
| 136 |
loadMemoryFromRepo();
|
|
|
|
|
|
|
| 137 |
}
|
| 138 |
|
| 139 |
async function startUploadLoop() {
|
| 140 |
while(true){
|
| 141 |
+
await new Promise(r=>setTimeout(r, UPLOAD_INTERVAL));
|
| 142 |
+
try{
|
| 143 |
log("Saving memory to repo...");
|
| 144 |
saveMemoryToRepo();
|
| 145 |
const count = commitAndPush();
|
| 146 |
+
log("Upload done: "+count+" file(s) changed");
|
| 147 |
+
} catch(e){ log("Upload error: "+e.message); }
|
| 148 |
}
|
| 149 |
}
|
| 150 |
|
| 151 |
// ===================== Main =====================
|
| 152 |
+
async function main(){
|
| 153 |
fs.mkdirSync(MEMORY_DIR, { recursive: true });
|
| 154 |
bootRestore();
|
| 155 |
log("Memory ready.");
|
| 156 |
|
| 157 |
if(HF_TOKEN && DATASET_ID){
|
| 158 |
log(`Starting ${UPLOAD_INTERVAL/3600000}h upload loop`);
|
| 159 |
+
startUploadLoop().catch(e=>log("Upload loop error: "+e.message));
|
| 160 |
}
|
| 161 |
}
|
| 162 |
|