gcharanteja
feat: implement enhanced 3-tier query search with fuzzy matching and LLM reranking
a964d2d | import fs from "fs-extra"; | |
| import path from "path"; | |
| import matter from "gray-matter"; | |
| let documentCache = null; | |
| function levenshteinDistance(str1, str2) { | |
| const m = str1.length; | |
| const n = str2.length; | |
| const dp = Array(m + 1) | |
| .fill(0) | |
| .map(() => Array(n + 1).fill(0)); | |
| for (let i = 0; i <= m; i++) dp[i][0] = i; | |
| for (let j = 0; j <= n; j++) dp[0][j] = j; | |
| for (let i = 1; i <= m; i++) { | |
| for (let j = 1; j <= n; j++) { | |
| if (str1[i - 1] === str2[j - 1]) { | |
| dp[i][j] = dp[i - 1][j - 1]; | |
| } else { | |
| dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]); | |
| } | |
| } | |
| } | |
| return dp[m][n]; | |
| } | |
| export function calculateStringSimilarity(str1, str2) { | |
| if (!str1 || !str2) return 0; | |
| const maxLen = Math.max(str1.length, str2.length); | |
| if (maxLen === 0) return 1; | |
| const distance = levenshteinDistance(str1, str2); | |
| return 1 - distance / maxLen; | |
| } | |
| export async function loadContextTreeDocuments(dataDir = "/data") { | |
| if (documentCache !== null) return documentCache; | |
| const docs = []; | |
| const contextTreePath = path.join(dataDir, "context-tree"); | |
| try { | |
| if (!fs.existsSync(contextTreePath)) { | |
| documentCache = []; | |
| return []; | |
| } | |
| const topics = fs.readdirSync(contextTreePath); | |
| for (const topic of topics) { | |
| const topicPath = path.join(contextTreePath, topic); | |
| const stat = fs.statSync(topicPath); | |
| if (!stat.isDirectory()) continue; | |
| const files = fs.readdirSync(topicPath); | |
| for (const file of files) { | |
| if (!file.endsWith(".md")) continue; | |
| const filePath = path.join(topicPath, file); | |
| const content = fs.readFileSync(filePath, "utf8"); | |
| const { data, content: body } = matter(content); | |
| docs.push({ | |
| id: data.id || file.replace(".md", ""), | |
| title: data.title || "Untitled", | |
| topic: data.topic || topic, | |
| type: data.type || "context", | |
| text: `${data.title || ""} ${body}`.trim(), | |
| importance: data.importance || 5, | |
| metadata: data, | |
| filePath, | |
| }); | |
| } | |
| } | |
| documentCache = docs; | |
| return docs; | |
| } catch (error) { | |
| console.error("Error loading context-tree documents:", error.message); | |
| documentCache = []; | |
| return []; | |
| } | |
| } | |
| export function mergeSearchResults(vectorResults, fuzzyResults, nResults = 5) { | |
| const merged = new Map(); | |
| if (vectorResults && vectorResults.ids && vectorResults.ids[0]) { | |
| vectorResults.ids[0].forEach((id, idx) => { | |
| if (!merged.has(id)) { | |
| const distance = vectorResults.distances?.[0]?.[idx] || 0; | |
| const similarity = 1 / (1 + distance); | |
| merged.set(id, { | |
| id, | |
| document: vectorResults.documents?.[0]?.[idx] || "", | |
| metadata: vectorResults.metadatas?.[0]?.[idx] || {}, | |
| vectorScore: similarity, | |
| fuzzyScore: 0, | |
| importanceBonus: 1, | |
| }); | |
| } | |
| }); | |
| } | |
| if (fuzzyResults && Array.isArray(fuzzyResults)) { | |
| fuzzyResults.forEach((doc) => { | |
| const importanceBonus = (doc.importance || 5) / 10; | |
| if (merged.has(doc.id)) { | |
| const existing = merged.get(doc.id); | |
| existing.fuzzyScore = Math.max(existing.fuzzyScore, doc.score || 0); | |
| existing.importanceBonus = importanceBonus; | |
| } else { | |
| merged.set(doc.id, { | |
| id: doc.id, | |
| document: doc.text || "", | |
| metadata: doc.metadata || {}, | |
| vectorScore: 0, | |
| fuzzyScore: doc.score || 0, | |
| importanceBonus, | |
| }); | |
| } | |
| }); | |
| } | |
| const results = Array.from(merged.values()).map((item) => ({ | |
| ...item, | |
| combinedScore: | |
| item.vectorScore * 0.5 + | |
| item.fuzzyScore * 0.3 + | |
| item.importanceBonus * 0.2, | |
| })); | |
| results.sort((a, b) => b.combinedScore - a.combinedScore); | |
| return { | |
| ids: [results.slice(0, nResults).map((r) => r.id)], | |
| documents: [results.slice(0, nResults).map((r) => r.document)], | |
| metadatas: [results.slice(0, nResults).map((r) => r.metadata)], | |
| distances: [results.slice(0, nResults).map((r) => 1 - r.combinedScore)], | |
| }; | |
| } | |
| export function clearDocumentCache() { | |
| documentCache = null; | |
| } | |