import fs from "fs-extra"; import path from "path"; import matter from "gray-matter"; let documentCache = null; function levenshteinDistance(str1, str2) { const m = str1.length; const n = str2.length; const dp = Array(m + 1) .fill(0) .map(() => Array(n + 1).fill(0)); for (let i = 0; i <= m; i++) dp[i][0] = i; for (let j = 0; j <= n; j++) dp[0][j] = j; for (let i = 1; i <= m; i++) { for (let j = 1; j <= n; j++) { if (str1[i - 1] === str2[j - 1]) { dp[i][j] = dp[i - 1][j - 1]; } else { dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]); } } } return dp[m][n]; } export function calculateStringSimilarity(str1, str2) { if (!str1 || !str2) return 0; const maxLen = Math.max(str1.length, str2.length); if (maxLen === 0) return 1; const distance = levenshteinDistance(str1, str2); return 1 - distance / maxLen; } export async function loadContextTreeDocuments(dataDir = "/data") { if (documentCache !== null) return documentCache; const docs = []; const contextTreePath = path.join(dataDir, "context-tree"); try { if (!fs.existsSync(contextTreePath)) { documentCache = []; return []; } const topics = fs.readdirSync(contextTreePath); for (const topic of topics) { const topicPath = path.join(contextTreePath, topic); const stat = fs.statSync(topicPath); if (!stat.isDirectory()) continue; const files = fs.readdirSync(topicPath); for (const file of files) { if (!file.endsWith(".md")) continue; const filePath = path.join(topicPath, file); const content = fs.readFileSync(filePath, "utf8"); const { data, content: body } = matter(content); docs.push({ id: data.id || file.replace(".md", ""), title: data.title || "Untitled", topic: data.topic || topic, type: data.type || "context", text: `${data.title || ""} ${body}`.trim(), importance: data.importance || 5, metadata: data, filePath, }); } } documentCache = docs; return docs; } catch (error) { console.error("Error loading context-tree documents:", error.message); documentCache = []; return []; } } export function mergeSearchResults(vectorResults, fuzzyResults, nResults = 5) { const merged = new Map(); if (vectorResults && vectorResults.ids && vectorResults.ids[0]) { vectorResults.ids[0].forEach((id, idx) => { if (!merged.has(id)) { const distance = vectorResults.distances?.[0]?.[idx] || 0; const similarity = 1 / (1 + distance); merged.set(id, { id, document: vectorResults.documents?.[0]?.[idx] || "", metadata: vectorResults.metadatas?.[0]?.[idx] || {}, vectorScore: similarity, fuzzyScore: 0, importanceBonus: 1, }); } }); } if (fuzzyResults && Array.isArray(fuzzyResults)) { fuzzyResults.forEach((doc) => { const importanceBonus = (doc.importance || 5) / 10; if (merged.has(doc.id)) { const existing = merged.get(doc.id); existing.fuzzyScore = Math.max(existing.fuzzyScore, doc.score || 0); existing.importanceBonus = importanceBonus; } else { merged.set(doc.id, { id: doc.id, document: doc.text || "", metadata: doc.metadata || {}, vectorScore: 0, fuzzyScore: doc.score || 0, importanceBonus, }); } }); } const results = Array.from(merged.values()).map((item) => ({ ...item, combinedScore: item.vectorScore * 0.5 + item.fuzzyScore * 0.3 + item.importanceBonus * 0.2, })); results.sort((a, b) => b.combinedScore - a.combinedScore); return { ids: [results.slice(0, nResults).map((r) => r.id)], documents: [results.slice(0, nResults).map((r) => r.document)], metadatas: [results.slice(0, nResults).map((r) => r.metadata)], distances: [results.slice(0, nResults).map((r) => 1 - r.combinedScore)], }; } export function clearDocumentCache() { documentCache = null; }