| |
| |
| |
| |
| |
|
|
| import { MemoryRecord, DeduplicationResult, MemoryConfig, DEFAULT_MEMORY_CONFIG } from './types'; |
|
|
| export function cosineSimilarity(a: number[], b: number[]): number { |
| if (a.length !== b.length) return 0; |
| let dotProduct = 0, normA = 0, normB = 0; |
| for (let i = 0; i < a.length; i++) { |
| dotProduct += a[i] * b[i]; |
| normA += a[i] * a[i]; |
| normB += b[i] * b[i]; |
| } |
| const denominator = Math.sqrt(normA) * Math.sqrt(normB); |
| return denominator === 0 ? 0 : dotProduct / denominator; |
| } |
|
|
| export function checkDeduplication( |
| newEmbedding: number[], newText: string, existingMemories: MemoryRecord[], |
| config: MemoryConfig = DEFAULT_MEMORY_CONFIG |
| ): DeduplicationResult { |
| let highestSimilarity = 0; |
| let mostSimilarMemory: MemoryRecord | null = null; |
| for (const memory of existingMemories) { |
| const similarity = cosineSimilarity(newEmbedding, memory.embedding); |
| if (similarity > highestSimilarity) { highestSimilarity = similarity; mostSimilarMemory = memory; } |
| } |
| if (highestSimilarity >= config.deduplicationThreshold && mostSimilarMemory) |
| return { operation: 'NOOP', existingMemoryId: mostSimilarMemory.id }; |
| if (highestSimilarity >= config.mergeThreshold && mostSimilarMemory) |
| return { operation: 'UPDATE', existingMemoryId: mostSimilarMemory.id, mergedText: mergeMemoryTexts(mostSimilarMemory.text, newText) }; |
| return { operation: 'ADD' }; |
| } |
|
|
| function mergeMemoryTexts(existingText: string, newText: string, maxLength = 200): string { |
| const existingLower = existingText.toLowerCase().trim(); |
| const newLower = newText.toLowerCase().trim(); |
| if (existingLower.includes(newLower) || newLower.includes(existingLower)) |
| return existingText.length >= newText.length ? existingText : newText; |
| const merged = `${existingText}; ${newText}`; |
| return merged.length > maxLength ? (newText.length <= maxLength ? newText : newText.substring(0, maxLength)) : merged; |
| } |
|
|
| export function buildDeduplicationQuery(embedding: number[]) { |
| return { |
| sql: `SELECT id, text, embedding, type, source, created_at, last_accessed_at, access_count, importance, |
| (1 - vec_distance_cosine(embedding, vec_f32(?))) AS similarity |
| FROM memories WHERE (1 - vec_distance_cosine(embedding, vec_f32(?))) >= ? ORDER BY similarity DESC LIMIT 5`, |
| params: [JSON.stringify(embedding), JSON.stringify(embedding), 0.65], |
| }; |
| } |
|
|
| export async function storeMemoryWithDeduplication( |
| db: any, text: string, embedding: number[], type: MemoryType, source: MemorySource, |
| config: MemoryConfig = DEFAULT_MEMORY_CONFIG |
| ): Promise<{ operation: MemoryOperation; memoryId?: string }> { |
| const { sql, params } = buildDeduplicationQuery(embedding); |
| const candidates = await db.getAll(sql, params); |
| const existingMemories: MemoryRecord[] = candidates.map((row: any) => ({ |
| id: row.id, text: row.text, embedding: JSON.parse(row.embedding), type: row.type, |
| source: row.source, createdAt: row.created_at, lastAccessedAt: row.last_accessed_at, |
| accessCount: row.access_count, importance: row.importance, |
| })); |
| const result = checkDeduplication(embedding, text, existingMemories, config); |
| switch (result.operation) { |
| case 'ADD': { |
| const id = `mem_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`; |
| const now = Date.now(); |
| await db.run(`INSERT INTO memories (id, text, embedding, type, source, created_at, last_accessed_at, access_count, importance) VALUES (?, ?, vec_f32(?), ?, ?, ?, ?, 0, ?)`, |
| [id, text, JSON.stringify(embedding), type, source, now, now, computeImportance(text)]); |
| return { operation: 'ADD', memoryId: id }; |
| } |
| case 'UPDATE': |
| await db.run(`UPDATE memories SET text = ?, embedding = vec_f32(?), last_accessed_at = ?, access_count = access_count + 1 WHERE id = ?`, |
| [result.mergedText, JSON.stringify(embedding), Date.now(), result.existingMemoryId]); |
| return { operation: 'UPDATE', memoryId: result.existingMemoryId }; |
| case 'NOOP': |
| await db.run(`UPDATE memories SET last_accessed_at = ?, access_count = access_count + 1 WHERE id = ?`, [Date.now(), result.existingMemoryId]); |
| return { operation: 'NOOP', memoryId: result.existingMemoryId }; |
| default: return { operation: 'NOOP' }; |
| } |
| } |
|
|
| function computeImportance(text: string): number { |
| let score = 0.5; |
| if (text.length > 50) score += 0.1; |
| if (text.length > 100) score += 0.1; |
| if (/\b(allergic|allergy|hate|love|always|never|important|must|need)\b/i.test(text)) score += 0.15; |
| if (/\b(my name|i am|i'm|i live|my job|my work|i work)\b/i.test(text)) score += 0.1; |
| if (/\b(birthday|anniversary|deadline|appointment)\b/i.test(text)) score += 0.1; |
| if (/\d+/.test(text)) score += 0.05; |
| return Math.min(score, 1.0); |
| } |
|
|
| import { MemoryType, MemorySource, MemoryOperation } from './types'; |
|
|