| |
| |
| |
| |
| |
| |
| |
| import { createHash } from "node:crypto"; |
|
|
| |
| export const MEMORY_HARD_CAPS = { |
| facts: 100, |
| tokensPerTurn: 2000, |
| newPerTurn: 5, |
| contentChars: 500, |
| } as const; |
|
|
| export const VALID_KINDS: ReadonlySet<string> = new Set([ |
| "preference", |
| "fact", |
| "interest", |
| "domain", |
| "terminology", |
| "summary", |
| ]); |
|
|
| export interface MemoryConfig { |
| enabled: boolean; |
| auto_extract: boolean; |
| max_facts: number; |
| max_tokens_per_turn: number; |
| } |
|
|
| export interface PublicMemoryFact { |
| id: string; |
| kind: string; |
| content: string; |
| confidence: number; |
| salience: number; |
| source: "auto" | "manual"; |
| source_message_id: string | null; |
| conversation_id: string | null; |
| use_count: number; |
| archived: boolean; |
| created_at: string; |
| updated_at: string; |
| last_used_at: string | null; |
| } |
|
|
| export interface MemoryInjection { |
| facts: PublicMemoryFact[]; |
| fact_ids: string[]; |
| injected_tokens: number; |
| text: string | null; |
| } |
|
|
| |
|
|
| |
| export function estimateTokens(s: string): number { |
| if (!s) return 0; |
| return Math.ceil(s.length / 4); |
| } |
|
|
| |
| export function normalize(s: string): string { |
| return (s || "") |
| .toLowerCase() |
| .replace(/[\p{P}\p{S}]+/gu, " ") |
| .replace(/\s+/g, " ") |
| .trim(); |
| } |
|
|
| |
| |
| export function contentHash(content: string): string { |
| return createHash("sha256").update(normalize(content)).digest("hex"); |
| } |
|
|
| |
|
|
| export const MEMORY_HEADER = |
| "Long-term memory about the user (carry across conversations):\n"; |
| export const MEMORY_INSTRUCTION_USE = |
| "\n\nUse these facts naturally; do not announce that you are using stored memory unless asked."; |
| export const MEMORY_INSTRUCTION_EXTRACT = |
| " If the user shares a new stable preference / identity / workflow / important context, " + |
| "emit it as `<memory_fact>{\"kind\":\"preference|fact|interest|domain|terminology|summary\",\"content\":\"...\",\"confidence\":0.0-1.0,\"salience\":0.0-1.0}</memory_fact>` " + |
| "on its own line so it is remembered for future turns. Skip emitting facts that are already listed above."; |
| export const MEMORY_BOOTSTRAP_EXTRACT = |
| "Long-term memory is enabled for this user but currently empty. " + |
| "If the user shares a stable preference, durable fact, recurring interest, research domain, terminology, or important context summary, " + |
| "emit it on its own line as `<memory_fact>{\"kind\":\"preference|fact|interest|domain|terminology|summary\",\"content\":\"...\",\"confidence\":0.0-1.0,\"salience\":0.0-1.0}</memory_fact>` " + |
| "so it is remembered for future turns."; |
|
|
| const STOPWORDS: ReadonlySet<string> = new Set([ |
| "the", "a", "an", "and", "or", "of", "for", "to", "in", "on", "at", |
| "with", "is", "are", "was", "were", "be", "been", "being", "this", |
| "that", "these", "those", "it", "its", "as", "by", "from", "i", "you", |
| "we", "they", "he", "she", "do", "does", "did", "have", "has", "had", |
| "what", "which", "who", "how", "why", "when", "where", "can", "should", |
| "would", "will", "if", "but", "not", |
| "的", "了", "是", "我", "你", "他", "她", "它", "和", "在", "有", |
| "也", "都", "就", "要", "吗", "呢", "吧", "把", "对", |
| ]); |
|
|
| function tokenizeForOverlap(s: string): Set<string> { |
| const out = new Set<string>(); |
| if (!s) return out; |
| |
| const lower = s.toLowerCase(); |
| const wordRe = /[a-z][a-z0-9_-]{2,}/g; |
| let m: RegExpExecArray | null; |
| while ((m = wordRe.exec(lower))) { |
| if (!STOPWORDS.has(m[0])) out.add(m[0]); |
| } |
| const cjkRe = /[\u4e00-\u9fff]+/g; |
| while ((m = cjkRe.exec(lower))) { |
| const seg = m[0]; |
| for (let i = 0; i < seg.length - 1; i++) { |
| const bi = seg.slice(i, i + 2); |
| if (!STOPWORDS.has(bi)) out.add(bi); |
| } |
| if (seg.length === 1) out.add(seg); |
| } |
| return out; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| export function rankAndPackFacts( |
| facts: PublicMemoryFact[], |
| currentText: string, |
| cfg: MemoryConfig, |
| ): MemoryInjection { |
| if (!cfg.enabled) { |
| return { facts: [], fact_ids: [], injected_tokens: 0, text: null }; |
| } |
| if (!facts.length) { |
| if (cfg.auto_extract) { |
| return { |
| facts: [], |
| fact_ids: [], |
| injected_tokens: estimateTokens(MEMORY_BOOTSTRAP_EXTRACT), |
| text: MEMORY_BOOTSTRAP_EXTRACT, |
| }; |
| } |
| return { facts: [], fact_ids: [], injected_tokens: 0, text: null }; |
| } |
| const queryTokens = tokenizeForOverlap(currentText); |
| const now = Date.now(); |
| const scored = facts.map((f) => { |
| const factTokens = tokenizeForOverlap(f.content); |
| let overlap = 0; |
| for (const q of queryTokens) if (factTokens.has(q)) overlap += 1; |
| const overlapBoost = queryTokens.size |
| ? overlap / Math.max(1, Math.min(queryTokens.size, factTokens.size)) |
| : 0; |
| const updatedAtMs = Date.parse(f.updated_at); |
| const ageDays = (now - updatedAtMs) / (24 * 3600 * 1000); |
| |
| const recency = 0.5 + 0.5 / (1 + ageDays / 30); |
| const score = f.salience * 0.6 + recency * 0.2 + overlapBoost * 0.5; |
| return { fact: f, score }; |
| }); |
| scored.sort((a, b) => b.score - a.score); |
|
|
| const budget = Math.min(cfg.max_tokens_per_turn, MEMORY_HARD_CAPS.tokensPerTurn); |
| const instructionText = |
| MEMORY_INSTRUCTION_USE + |
| (cfg.auto_extract ? MEMORY_INSTRUCTION_EXTRACT : ""); |
| const fixedTokens = |
| estimateTokens(MEMORY_HEADER) + estimateTokens(instructionText); |
| let used = fixedTokens; |
| const picked: PublicMemoryFact[] = []; |
| for (const { fact } of scored) { |
| const line = `- [${fact.kind}] ${fact.content}\n`; |
| const cost = estimateTokens(line); |
| if (used + cost > budget) continue; |
| used += cost; |
| picked.push(fact); |
| if (picked.length >= 50) break; |
| } |
| if (!picked.length) { |
| if (cfg.auto_extract) { |
| return { |
| facts: [], |
| fact_ids: [], |
| injected_tokens: estimateTokens(MEMORY_BOOTSTRAP_EXTRACT), |
| text: MEMORY_BOOTSTRAP_EXTRACT, |
| }; |
| } |
| return { facts: [], fact_ids: [], injected_tokens: 0, text: null }; |
| } |
| const text = |
| MEMORY_HEADER + |
| picked.map((f) => `- [${f.kind}] ${f.content}`).join("\n") + |
| instructionText; |
| return { |
| facts: picked, |
| fact_ids: picked.map((f) => f.id), |
| injected_tokens: used, |
| text, |
| }; |
| } |
|
|