File size: 5,108 Bytes
c966a8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// ============================================================
// IMPROVEMENT 1: Memory Deduplication
// Based on Mem0 (arxiv 2504.19413) ADD/UPDATE/DELETE/NOOP logic
// See full source: https://huggingface.co/spaces/loudiman/sandbox-cbb9aab0
// ============================================================

import { MemoryRecord, DeduplicationResult, MemoryConfig, DEFAULT_MEMORY_CONFIG } from './types';

export function cosineSimilarity(a: number[], b: number[]): number {
  if (a.length !== b.length) return 0;
  let dotProduct = 0, normA = 0, normB = 0;
  for (let i = 0; i < a.length; i++) {
    dotProduct += a[i] * b[i];
    normA += a[i] * a[i];
    normB += b[i] * b[i];
  }
  const denominator = Math.sqrt(normA) * Math.sqrt(normB);
  return denominator === 0 ? 0 : dotProduct / denominator;
}

export function checkDeduplication(
  newEmbedding: number[], newText: string, existingMemories: MemoryRecord[],
  config: MemoryConfig = DEFAULT_MEMORY_CONFIG
): DeduplicationResult {
  let highestSimilarity = 0;
  let mostSimilarMemory: MemoryRecord | null = null;
  for (const memory of existingMemories) {
    const similarity = cosineSimilarity(newEmbedding, memory.embedding);
    if (similarity > highestSimilarity) { highestSimilarity = similarity; mostSimilarMemory = memory; }
  }
  if (highestSimilarity >= config.deduplicationThreshold && mostSimilarMemory)
    return { operation: 'NOOP', existingMemoryId: mostSimilarMemory.id };
  if (highestSimilarity >= config.mergeThreshold && mostSimilarMemory)
    return { operation: 'UPDATE', existingMemoryId: mostSimilarMemory.id, mergedText: mergeMemoryTexts(mostSimilarMemory.text, newText) };
  return { operation: 'ADD' };
}

function mergeMemoryTexts(existingText: string, newText: string, maxLength = 200): string {
  const existingLower = existingText.toLowerCase().trim();
  const newLower = newText.toLowerCase().trim();
  if (existingLower.includes(newLower) || newLower.includes(existingLower))
    return existingText.length >= newText.length ? existingText : newText;
  const merged = `${existingText}; ${newText}`;
  return merged.length > maxLength ? (newText.length <= maxLength ? newText : newText.substring(0, maxLength)) : merged;
}

export function buildDeduplicationQuery(embedding: number[]) {
  return {
    sql: `SELECT id, text, embedding, type, source, created_at, last_accessed_at, access_count, importance,
           (1 - vec_distance_cosine(embedding, vec_f32(?))) AS similarity
           FROM memories WHERE (1 - vec_distance_cosine(embedding, vec_f32(?))) >= ? ORDER BY similarity DESC LIMIT 5`,
    params: [JSON.stringify(embedding), JSON.stringify(embedding), 0.65],
  };
}

export async function storeMemoryWithDeduplication(
  db: any, text: string, embedding: number[], type: MemoryType, source: MemorySource,
  config: MemoryConfig = DEFAULT_MEMORY_CONFIG
): Promise<{ operation: MemoryOperation; memoryId?: string }> {
  const { sql, params } = buildDeduplicationQuery(embedding);
  const candidates = await db.getAll(sql, params);
  const existingMemories: MemoryRecord[] = candidates.map((row: any) => ({
    id: row.id, text: row.text, embedding: JSON.parse(row.embedding), type: row.type,
    source: row.source, createdAt: row.created_at, lastAccessedAt: row.last_accessed_at,
    accessCount: row.access_count, importance: row.importance,
  }));
  const result = checkDeduplication(embedding, text, existingMemories, config);
  switch (result.operation) {
    case 'ADD': {
      const id = `mem_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`;
      const now = Date.now();
      await db.run(`INSERT INTO memories (id, text, embedding, type, source, created_at, last_accessed_at, access_count, importance) VALUES (?, ?, vec_f32(?), ?, ?, ?, ?, 0, ?)`,
        [id, text, JSON.stringify(embedding), type, source, now, now, computeImportance(text)]);
      return { operation: 'ADD', memoryId: id };
    }
    case 'UPDATE':
      await db.run(`UPDATE memories SET text = ?, embedding = vec_f32(?), last_accessed_at = ?, access_count = access_count + 1 WHERE id = ?`,
        [result.mergedText, JSON.stringify(embedding), Date.now(), result.existingMemoryId]);
      return { operation: 'UPDATE', memoryId: result.existingMemoryId };
    case 'NOOP':
      await db.run(`UPDATE memories SET last_accessed_at = ?, access_count = access_count + 1 WHERE id = ?`, [Date.now(), result.existingMemoryId]);
      return { operation: 'NOOP', memoryId: result.existingMemoryId };
    default: return { operation: 'NOOP' };
  }
}

function computeImportance(text: string): number {
  let score = 0.5;
  if (text.length > 50) score += 0.1;
  if (text.length > 100) score += 0.1;
  if (/\b(allergic|allergy|hate|love|always|never|important|must|need)\b/i.test(text)) score += 0.15;
  if (/\b(my name|i am|i'm|i live|my job|my work|i work)\b/i.test(text)) score += 0.1;
  if (/\b(birthday|anniversary|deadline|appointment)\b/i.test(text)) score += 0.1;
  if (/\d+/.test(text)) score += 0.05;
  return Math.min(score, 1.0);
}

import { MemoryType, MemorySource, MemoryOperation } from './types';