memory / utils.js
gcharanteja
feat: implement enhanced 3-tier query search with fuzzy matching and LLM reranking
a964d2d
Raw
History Blame Contribute Delete
4.18 kB
import fs from "fs-extra";
import path from "path";
import matter from "gray-matter";
let documentCache = null;
function levenshteinDistance(str1, str2) {
const m = str1.length;
const n = str2.length;
const dp = Array(m + 1)
.fill(0)
.map(() => Array(n + 1).fill(0));
for (let i = 0; i <= m; i++) dp[i][0] = i;
for (let j = 0; j <= n; j++) dp[0][j] = j;
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (str1[i - 1] === str2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
}
}
}
return dp[m][n];
}
export function calculateStringSimilarity(str1, str2) {
if (!str1 || !str2) return 0;
const maxLen = Math.max(str1.length, str2.length);
if (maxLen === 0) return 1;
const distance = levenshteinDistance(str1, str2);
return 1 - distance / maxLen;
}
export async function loadContextTreeDocuments(dataDir = "/data") {
if (documentCache !== null) return documentCache;
const docs = [];
const contextTreePath = path.join(dataDir, "context-tree");
try {
if (!fs.existsSync(contextTreePath)) {
documentCache = [];
return [];
}
const topics = fs.readdirSync(contextTreePath);
for (const topic of topics) {
const topicPath = path.join(contextTreePath, topic);
const stat = fs.statSync(topicPath);
if (!stat.isDirectory()) continue;
const files = fs.readdirSync(topicPath);
for (const file of files) {
if (!file.endsWith(".md")) continue;
const filePath = path.join(topicPath, file);
const content = fs.readFileSync(filePath, "utf8");
const { data, content: body } = matter(content);
docs.push({
id: data.id || file.replace(".md", ""),
title: data.title || "Untitled",
topic: data.topic || topic,
type: data.type || "context",
text: `${data.title || ""} ${body}`.trim(),
importance: data.importance || 5,
metadata: data,
filePath,
});
}
}
documentCache = docs;
return docs;
} catch (error) {
console.error("Error loading context-tree documents:", error.message);
documentCache = [];
return [];
}
}
export function mergeSearchResults(vectorResults, fuzzyResults, nResults = 5) {
const merged = new Map();
if (vectorResults && vectorResults.ids && vectorResults.ids[0]) {
vectorResults.ids[0].forEach((id, idx) => {
if (!merged.has(id)) {
const distance = vectorResults.distances?.[0]?.[idx] || 0;
const similarity = 1 / (1 + distance);
merged.set(id, {
id,
document: vectorResults.documents?.[0]?.[idx] || "",
metadata: vectorResults.metadatas?.[0]?.[idx] || {},
vectorScore: similarity,
fuzzyScore: 0,
importanceBonus: 1,
});
}
});
}
if (fuzzyResults && Array.isArray(fuzzyResults)) {
fuzzyResults.forEach((doc) => {
const importanceBonus = (doc.importance || 5) / 10;
if (merged.has(doc.id)) {
const existing = merged.get(doc.id);
existing.fuzzyScore = Math.max(existing.fuzzyScore, doc.score || 0);
existing.importanceBonus = importanceBonus;
} else {
merged.set(doc.id, {
id: doc.id,
document: doc.text || "",
metadata: doc.metadata || {},
vectorScore: 0,
fuzzyScore: doc.score || 0,
importanceBonus,
});
}
});
}
const results = Array.from(merged.values()).map((item) => ({
...item,
combinedScore:
item.vectorScore * 0.5 +
item.fuzzyScore * 0.3 +
item.importanceBonus * 0.2,
}));
results.sort((a, b) => b.combinedScore - a.combinedScore);
return {
ids: [results.slice(0, nResults).map((r) => r.id)],
documents: [results.slice(0, nResults).map((r) => r.document)],
metadatas: [results.slice(0, nResults).map((r) => r.metadata)],
distances: [results.slice(0, nResults).map((r) => 1 - r.combinedScore)],
};
}
export function clearDocumentCache() {
documentCache = null;
}