// ============================================================ // IMPROVEMENT 1: Memory Deduplication // Based on Mem0 (arxiv 2504.19413) ADD/UPDATE/DELETE/NOOP logic // See full source: https://huggingface.co/spaces/loudiman/sandbox-cbb9aab0 // ============================================================ import { MemoryRecord, DeduplicationResult, MemoryConfig, DEFAULT_MEMORY_CONFIG } from './types'; export function cosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length) return 0; let dotProduct = 0, normA = 0, normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } const denominator = Math.sqrt(normA) * Math.sqrt(normB); return denominator === 0 ? 0 : dotProduct / denominator; } export function checkDeduplication( newEmbedding: number[], newText: string, existingMemories: MemoryRecord[], config: MemoryConfig = DEFAULT_MEMORY_CONFIG ): DeduplicationResult { let highestSimilarity = 0; let mostSimilarMemory: MemoryRecord | null = null; for (const memory of existingMemories) { const similarity = cosineSimilarity(newEmbedding, memory.embedding); if (similarity > highestSimilarity) { highestSimilarity = similarity; mostSimilarMemory = memory; } } if (highestSimilarity >= config.deduplicationThreshold && mostSimilarMemory) return { operation: 'NOOP', existingMemoryId: mostSimilarMemory.id }; if (highestSimilarity >= config.mergeThreshold && mostSimilarMemory) return { operation: 'UPDATE', existingMemoryId: mostSimilarMemory.id, mergedText: mergeMemoryTexts(mostSimilarMemory.text, newText) }; return { operation: 'ADD' }; } function mergeMemoryTexts(existingText: string, newText: string, maxLength = 200): string { const existingLower = existingText.toLowerCase().trim(); const newLower = newText.toLowerCase().trim(); if (existingLower.includes(newLower) || newLower.includes(existingLower)) return existingText.length >= newText.length ? existingText : newText; const merged = `${existingText}; ${newText}`; return merged.length > maxLength ? (newText.length <= maxLength ? newText : newText.substring(0, maxLength)) : merged; } export function buildDeduplicationQuery(embedding: number[]) { return { sql: `SELECT id, text, embedding, type, source, created_at, last_accessed_at, access_count, importance, (1 - vec_distance_cosine(embedding, vec_f32(?))) AS similarity FROM memories WHERE (1 - vec_distance_cosine(embedding, vec_f32(?))) >= ? ORDER BY similarity DESC LIMIT 5`, params: [JSON.stringify(embedding), JSON.stringify(embedding), 0.65], }; } export async function storeMemoryWithDeduplication( db: any, text: string, embedding: number[], type: MemoryType, source: MemorySource, config: MemoryConfig = DEFAULT_MEMORY_CONFIG ): Promise<{ operation: MemoryOperation; memoryId?: string }> { const { sql, params } = buildDeduplicationQuery(embedding); const candidates = await db.getAll(sql, params); const existingMemories: MemoryRecord[] = candidates.map((row: any) => ({ id: row.id, text: row.text, embedding: JSON.parse(row.embedding), type: row.type, source: row.source, createdAt: row.created_at, lastAccessedAt: row.last_accessed_at, accessCount: row.access_count, importance: row.importance, })); const result = checkDeduplication(embedding, text, existingMemories, config); switch (result.operation) { case 'ADD': { const id = `mem_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`; const now = Date.now(); await db.run(`INSERT INTO memories (id, text, embedding, type, source, created_at, last_accessed_at, access_count, importance) VALUES (?, ?, vec_f32(?), ?, ?, ?, ?, 0, ?)`, [id, text, JSON.stringify(embedding), type, source, now, now, computeImportance(text)]); return { operation: 'ADD', memoryId: id }; } case 'UPDATE': await db.run(`UPDATE memories SET text = ?, embedding = vec_f32(?), last_accessed_at = ?, access_count = access_count + 1 WHERE id = ?`, [result.mergedText, JSON.stringify(embedding), Date.now(), result.existingMemoryId]); return { operation: 'UPDATE', memoryId: result.existingMemoryId }; case 'NOOP': await db.run(`UPDATE memories SET last_accessed_at = ?, access_count = access_count + 1 WHERE id = ?`, [Date.now(), result.existingMemoryId]); return { operation: 'NOOP', memoryId: result.existingMemoryId }; default: return { operation: 'NOOP' }; } } function computeImportance(text: string): number { let score = 0.5; if (text.length > 50) score += 0.1; if (text.length > 100) score += 0.1; if (/\b(allergic|allergy|hate|love|always|never|important|must|need)\b/i.test(text)) score += 0.15; if (/\b(my name|i am|i'm|i live|my job|my work|i work)\b/i.test(text)) score += 0.1; if (/\b(birthday|anniversary|deadline|appointment)\b/i.test(text)) score += 0.1; if (/\d+/.test(text)) score += 0.05; return Math.min(score, 1.0); } import { MemoryType, MemorySource, MemoryOperation } from './types';