/** * Pure helpers for the long-term memory pipeline. Kept free of any * `@workspace/db` import so they can be loaded — and unit-tested — * without spinning up a Postgres pool. * * The DB-aware service in `memory.ts` re-exports everything here. */ import { createHash } from "node:crypto"; // ---------- hard caps (server-enforced) export const MEMORY_HARD_CAPS = { facts: 100, tokensPerTurn: 2000, newPerTurn: 5, contentChars: 500, } as const; export const VALID_KINDS: ReadonlySet = new Set([ "preference", "fact", "interest", "domain", "terminology", "summary", ]); export interface MemoryConfig { enabled: boolean; auto_extract: boolean; max_facts: number; max_tokens_per_turn: number; } export interface PublicMemoryFact { id: string; kind: string; content: string; confidence: number; salience: number; source: "auto" | "manual"; source_message_id: string | null; conversation_id: string | null; use_count: number; archived: boolean; created_at: string; updated_at: string; last_used_at: string | null; } export interface MemoryInjection { facts: PublicMemoryFact[]; fact_ids: string[]; injected_tokens: number; text: string | null; } // ---------- helpers /** ~4 chars per token is good enough for budget bookkeeping. */ export function estimateTokens(s: string): number { if (!s) return 0; return Math.ceil(s.length / 4); } /** Lowercased, whitespace-collapsed, punctuation-stripped dedupe key. */ export function normalize(s: string): string { return (s || "") .toLowerCase() .replace(/[\p{P}\p{S}]+/gu, " ") .replace(/\s+/g, " ") .trim(); } /** Stable SHA-256 hex of `normalize(content)` — used as the canonical * dedupe key alongside the textual normalized column. */ export function contentHash(content: string): string { return createHash("sha256").update(normalize(content)).digest("hex"); } // ---------- ranker prompt fragments export const MEMORY_HEADER = "Long-term memory about the user (carry across conversations):\n"; export const MEMORY_INSTRUCTION_USE = "\n\nUse these facts naturally; do not announce that you are using stored memory unless asked."; export const MEMORY_INSTRUCTION_EXTRACT = " If the user shares a new stable preference / identity / workflow / important context, " + "emit it as `{\"kind\":\"preference|fact|interest|domain|terminology|summary\",\"content\":\"...\",\"confidence\":0.0-1.0,\"salience\":0.0-1.0}` " + "on its own line so it is remembered for future turns. Skip emitting facts that are already listed above."; export const MEMORY_BOOTSTRAP_EXTRACT = "Long-term memory is enabled for this user but currently empty. " + "If the user shares a stable preference, durable fact, recurring interest, research domain, terminology, or important context summary, " + "emit it on its own line as `{\"kind\":\"preference|fact|interest|domain|terminology|summary\",\"content\":\"...\",\"confidence\":0.0-1.0,\"salience\":0.0-1.0}` " + "so it is remembered for future turns."; const STOPWORDS: ReadonlySet = new Set([ "the", "a", "an", "and", "or", "of", "for", "to", "in", "on", "at", "with", "is", "are", "was", "were", "be", "been", "being", "this", "that", "these", "those", "it", "its", "as", "by", "from", "i", "you", "we", "they", "he", "she", "do", "does", "did", "have", "has", "had", "what", "which", "who", "how", "why", "when", "where", "can", "should", "would", "will", "if", "but", "not", "的", "了", "是", "我", "你", "他", "她", "它", "和", "在", "有", "也", "都", "就", "要", "吗", "呢", "吧", "把", "对", ]); function tokenizeForOverlap(s: string): Set { const out = new Set(); if (!s) return out; // Western words (≥3 chars) and CJK bigrams const lower = s.toLowerCase(); const wordRe = /[a-z][a-z0-9_-]{2,}/g; let m: RegExpExecArray | null; while ((m = wordRe.exec(lower))) { if (!STOPWORDS.has(m[0])) out.add(m[0]); } const cjkRe = /[\u4e00-\u9fff]+/g; while ((m = cjkRe.exec(lower))) { const seg = m[0]; for (let i = 0; i < seg.length - 1; i++) { const bi = seg.slice(i, i + 2); if (!STOPWORDS.has(bi)) out.add(bi); } if (seg.length === 1) out.add(seg); } return out; } /** * Pure ranker + token-budget packer. Exported so it can be unit-tested * without a database. Given a list of candidate facts, scores them by * `salience * 0.6 + recency * 0.2 + keyword_overlap * 0.5`, sorts * descending, and packs as many as fit within * `min(cfg.max_tokens_per_turn, MEMORY_HARD_CAPS.tokensPerTurn)` after * subtracting the header and instruction overhead. */ export function rankAndPackFacts( facts: PublicMemoryFact[], currentText: string, cfg: MemoryConfig, ): MemoryInjection { if (!cfg.enabled) { return { facts: [], fact_ids: [], injected_tokens: 0, text: null }; } if (!facts.length) { if (cfg.auto_extract) { return { facts: [], fact_ids: [], injected_tokens: estimateTokens(MEMORY_BOOTSTRAP_EXTRACT), text: MEMORY_BOOTSTRAP_EXTRACT, }; } return { facts: [], fact_ids: [], injected_tokens: 0, text: null }; } const queryTokens = tokenizeForOverlap(currentText); const now = Date.now(); const scored = facts.map((f) => { const factTokens = tokenizeForOverlap(f.content); let overlap = 0; for (const q of queryTokens) if (factTokens.has(q)) overlap += 1; const overlapBoost = queryTokens.size ? overlap / Math.max(1, Math.min(queryTokens.size, factTokens.size)) : 0; const updatedAtMs = Date.parse(f.updated_at); const ageDays = (now - updatedAtMs) / (24 * 3600 * 1000); // Half-life ~ 30 days for recency (range 0.5 .. 1.0) const recency = 0.5 + 0.5 / (1 + ageDays / 30); const score = f.salience * 0.6 + recency * 0.2 + overlapBoost * 0.5; return { fact: f, score }; }); scored.sort((a, b) => b.score - a.score); const budget = Math.min(cfg.max_tokens_per_turn, MEMORY_HARD_CAPS.tokensPerTurn); const instructionText = MEMORY_INSTRUCTION_USE + (cfg.auto_extract ? MEMORY_INSTRUCTION_EXTRACT : ""); const fixedTokens = estimateTokens(MEMORY_HEADER) + estimateTokens(instructionText); let used = fixedTokens; const picked: PublicMemoryFact[] = []; for (const { fact } of scored) { const line = `- [${fact.kind}] ${fact.content}\n`; const cost = estimateTokens(line); if (used + cost > budget) continue; used += cost; picked.push(fact); if (picked.length >= 50) break; } if (!picked.length) { if (cfg.auto_extract) { return { facts: [], fact_ids: [], injected_tokens: estimateTokens(MEMORY_BOOTSTRAP_EXTRACT), text: MEMORY_BOOTSTRAP_EXTRACT, }; } return { facts: [], fact_ids: [], injected_tokens: 0, text: null }; } const text = MEMORY_HEADER + picked.map((f) => `- [${f.kind}] ${f.content}`).join("\n") + instructionText; return { facts: picked, fact_ids: picked.map((f) => f.id), injected_tokens: used, text, }; }