File size: 7,178 Bytes
/**
 * Pure helpers for the long-term memory pipeline. Kept free of any
 * `@workspace/db` import so they can be loaded — and unit-tested —
 * without spinning up a Postgres pool.
 *
 * The DB-aware service in `memory.ts` re-exports everything here.
 */
import { createHash } from "node:crypto";

// ---------- hard caps (server-enforced)
export const MEMORY_HARD_CAPS = {
  facts: 100,
  tokensPerTurn: 2000,
  newPerTurn: 5,
  contentChars: 500,
} as const;

export const VALID_KINDS: ReadonlySet<string> = new Set([
  "preference",
  "fact",
  "interest",
  "domain",
  "terminology",
  "summary",
]);

export interface MemoryConfig {
  enabled: boolean;
  auto_extract: boolean;
  max_facts: number;
  max_tokens_per_turn: number;
}

export interface PublicMemoryFact {
  id: string;
  kind: string;
  content: string;
  confidence: number;
  salience: number;
  source: "auto" | "manual";
  source_message_id: string | null;
  conversation_id: string | null;
  use_count: number;
  archived: boolean;
  created_at: string;
  updated_at: string;
  last_used_at: string | null;
}

export interface MemoryInjection {
  facts: PublicMemoryFact[];
  fact_ids: string[];
  injected_tokens: number;
  text: string | null;
}

// ---------- helpers

/** ~4 chars per token is good enough for budget bookkeeping. */
export function estimateTokens(s: string): number {
  if (!s) return 0;
  return Math.ceil(s.length / 4);
}

/** Lowercased, whitespace-collapsed, punctuation-stripped dedupe key. */
export function normalize(s: string): string {
  return (s || "")
    .toLowerCase()
    .replace(/[\p{P}\p{S}]+/gu, " ")
    .replace(/\s+/g, " ")
    .trim();
}

/** Stable SHA-256 hex of `normalize(content)` — used as the canonical
 *  dedupe key alongside the textual normalized column. */
export function contentHash(content: string): string {
  return createHash("sha256").update(normalize(content)).digest("hex");
}

// ---------- ranker prompt fragments

export const MEMORY_HEADER =
  "Long-term memory about the user (carry across conversations):\n";
export const MEMORY_INSTRUCTION_USE =
  "\n\nUse these facts naturally; do not announce that you are using stored memory unless asked.";
export const MEMORY_INSTRUCTION_EXTRACT =
  " If the user shares a new stable preference / identity / workflow / important context, " +
  "emit it as `<memory_fact>{\"kind\":\"preference|fact|interest|domain|terminology|summary\",\"content\":\"...\",\"confidence\":0.0-1.0,\"salience\":0.0-1.0}</memory_fact>` " +
  "on its own line so it is remembered for future turns. Skip emitting facts that are already listed above.";
export const MEMORY_BOOTSTRAP_EXTRACT =
  "Long-term memory is enabled for this user but currently empty. " +
  "If the user shares a stable preference, durable fact, recurring interest, research domain, terminology, or important context summary, " +
  "emit it on its own line as `<memory_fact>{\"kind\":\"preference|fact|interest|domain|terminology|summary\",\"content\":\"...\",\"confidence\":0.0-1.0,\"salience\":0.0-1.0}</memory_fact>` " +
  "so it is remembered for future turns.";

const STOPWORDS: ReadonlySet<string> = new Set([
  "the", "a", "an", "and", "or", "of", "for", "to", "in", "on", "at",
  "with", "is", "are", "was", "were", "be", "been", "being", "this",
  "that", "these", "those", "it", "its", "as", "by", "from", "i", "you",
  "we", "they", "he", "she", "do", "does", "did", "have", "has", "had",
  "what", "which", "who", "how", "why", "when", "where", "can", "should",
  "would", "will", "if", "but", "not",
  "的", "了", "是", "我", "你", "他", "她", "它", "和", "在", "有",
  "也", "都", "就", "要", "吗", "呢", "吧", "把", "对",
]);

function tokenizeForOverlap(s: string): Set<string> {
  const out = new Set<string>();
  if (!s) return out;
  // Western words (≥3 chars) and CJK bigrams
  const lower = s.toLowerCase();
  const wordRe = /[a-z][a-z0-9_-]{2,}/g;
  let m: RegExpExecArray | null;
  while ((m = wordRe.exec(lower))) {
    if (!STOPWORDS.has(m[0])) out.add(m[0]);
  }
  const cjkRe = /[\u4e00-\u9fff]+/g;
  while ((m = cjkRe.exec(lower))) {
    const seg = m[0];
    for (let i = 0; i < seg.length - 1; i++) {
      const bi = seg.slice(i, i + 2);
      if (!STOPWORDS.has(bi)) out.add(bi);
    }
    if (seg.length === 1) out.add(seg);
  }
  return out;
}

/**
 * Pure ranker + token-budget packer. Exported so it can be unit-tested
 * without a database. Given a list of candidate facts, scores them by
 * `salience * 0.6 + recency * 0.2 + keyword_overlap * 0.5`, sorts
 * descending, and packs as many as fit within
 * `min(cfg.max_tokens_per_turn, MEMORY_HARD_CAPS.tokensPerTurn)` after
 * subtracting the header and instruction overhead.
 */
export function rankAndPackFacts(
  facts: PublicMemoryFact[],
  currentText: string,
  cfg: MemoryConfig,
): MemoryInjection {
  if (!cfg.enabled) {
    return { facts: [], fact_ids: [], injected_tokens: 0, text: null };
  }
  if (!facts.length) {
    if (cfg.auto_extract) {
      return {
        facts: [],
        fact_ids: [],
        injected_tokens: estimateTokens(MEMORY_BOOTSTRAP_EXTRACT),
        text: MEMORY_BOOTSTRAP_EXTRACT,
      };
    }
    return { facts: [], fact_ids: [], injected_tokens: 0, text: null };
  }
  const queryTokens = tokenizeForOverlap(currentText);
  const now = Date.now();
  const scored = facts.map((f) => {
    const factTokens = tokenizeForOverlap(f.content);
    let overlap = 0;
    for (const q of queryTokens) if (factTokens.has(q)) overlap += 1;
    const overlapBoost = queryTokens.size
      ? overlap / Math.max(1, Math.min(queryTokens.size, factTokens.size))
      : 0;
    const updatedAtMs = Date.parse(f.updated_at);
    const ageDays = (now - updatedAtMs) / (24 * 3600 * 1000);
    // Half-life ~ 30 days for recency (range 0.5 .. 1.0)
    const recency = 0.5 + 0.5 / (1 + ageDays / 30);
    const score = f.salience * 0.6 + recency * 0.2 + overlapBoost * 0.5;
    return { fact: f, score };
  });
  scored.sort((a, b) => b.score - a.score);

  const budget = Math.min(cfg.max_tokens_per_turn, MEMORY_HARD_CAPS.tokensPerTurn);
  const instructionText =
    MEMORY_INSTRUCTION_USE +
    (cfg.auto_extract ? MEMORY_INSTRUCTION_EXTRACT : "");
  const fixedTokens =
    estimateTokens(MEMORY_HEADER) + estimateTokens(instructionText);
  let used = fixedTokens;
  const picked: PublicMemoryFact[] = [];
  for (const { fact } of scored) {
    const line = `- [${fact.kind}] ${fact.content}\n`;
    const cost = estimateTokens(line);
    if (used + cost > budget) continue;
    used += cost;
    picked.push(fact);
    if (picked.length >= 50) break;
  }
  if (!picked.length) {
    if (cfg.auto_extract) {
      return {
        facts: [],
        fact_ids: [],
        injected_tokens: estimateTokens(MEMORY_BOOTSTRAP_EXTRACT),
        text: MEMORY_BOOTSTRAP_EXTRACT,
      };
    }
    return { facts: [], fact_ids: [], injected_tokens: 0, text: null };
  }
  const text =
    MEMORY_HEADER +
    picked.map((f) => `- [${f.kind}] ${f.content}`).join("\n") +
    instructionText;
  return {
    facts: picked,
    fact_ids: picked.map((f) => f.id),
    injected_tokens: used,
    text,
  };
}