import { graphMemoryService } from '../../memory/GraphMemoryService'; import { neo4jService } from '../../database/Neo4jService'; /** * Query Expansion - Expands user query with synonyms and related terms */ export class QueryExpander { private synonymMap: Map = new Map([ ['find', ['search', 'locate', 'discover', 'retrieve']], ['show', ['display', 'present', 'reveal', 'demonstrate']], ['create', ['make', 'build', 'generate', 'construct']], ['delete', ['remove', 'erase', 'eliminate', 'destroy']], ['update', ['modify', 'change', 'alter', 'revise']], ]); /** * Expand query with synonyms and related terms */ async expandQuery(query: string): Promise { const words = query.toLowerCase().split(/\s+/); const expandedTerms = new Set([query]); // Add synonyms words.forEach(word => { const synonyms = this.synonymMap.get(word); if (synonyms) { synonyms.forEach(syn => { const expandedQuery = query.replace(new RegExp(word, 'gi'), syn); expandedTerms.add(expandedQuery); }); } }); // Add semantic variations using Neo4j graph try { await neo4jService.connect(); // Find related concepts in the graph const relatedConcepts = await neo4jService.runQuery( `MATCH (n:Entity) WHERE toLower(n.name) CONTAINS $query OR toLower(n.content) CONTAINS $query MATCH (n)-[:RELATED_TO|SIMILAR_TO]-(related) RETURN DISTINCT related.name as concept LIMIT 5`, { query: query.toLowerCase() } ); relatedConcepts.forEach(record => { if (record.concept) { expandedTerms.add(`${query} ${record.concept}`); } }); await neo4jService.disconnect(); } catch (error) { console.warn('Query expansion from graph failed:', error); } return Array.from(expandedTerms); } /** * Extract key phrases from query */ extractKeyPhrases(query: string): string[] { // Simple n-gram extraction (2-3 words) const words = query.toLowerCase().split(/\s+/); const phrases: string[] = []; // Bigrams for (let i = 0; i < words.length - 1; i++) { phrases.push(`${words[i]} ${words[i + 1]}`); } // Trigrams for (let i = 0; i < words.length - 2; i++) { phrases.push(`${words[i]} ${words[i + 1]} ${words[i + 2]}`); } return phrases; } } /** * Result Re-ranker - Re-ranks search results using multiple signals */ export class ResultReRanker { /** * Re-rank results using multiple scoring signals */ async rerank( query: string, results: Array<{ id: string; content: string; score: number; metadata?: any }>, options: { useRecency?: boolean; usePopularity?: boolean; useSemanticSimilarity?: boolean; } = {} ): Promise> { const scoredResults = await Promise.all( results.map(async result => { let finalScore = result.score; // Recency boost if (options.useRecency && result.metadata?.createdAt) { const age = Date.now() - new Date(result.metadata.createdAt).getTime(); const daysSinceCreation = age / (1000 * 60 * 60 * 24); const recencyBoost = Math.exp(-daysSinceCreation / 30); // Decay over 30 days finalScore *= (1 + recencyBoost * 0.2); } // Popularity boost (based on connections in graph) if (options.usePopularity) { try { await neo4jService.connect(); const connections = await neo4jService.getNodeRelationships(result.id); const popularityBoost = Math.min(connections.length / 10, 1); // Cap at 10 connections finalScore *= (1 + popularityBoost * 0.3); await neo4jService.disconnect(); } catch (error) { // Ignore errors } } // Exact match boost if (result.content.toLowerCase().includes(query.toLowerCase())) { finalScore *= 1.5; } return { ...result, score: finalScore }; }) ); // Sort by final score return scoredResults.sort((a, b) => b.score - a.score); } /** * Diversify results to avoid redundancy */ diversify( results: Array<{ id: string; content: string; score: number }>, maxSimilarity: number = 0.8 ): Array<{ id: string; content: string; score: number }> { const diversified: typeof results = []; for (const result of results) { // Check if too similar to already selected results const tooSimilar = diversified.some(selected => { const similarity = this.computeTextSimilarity(result.content, selected.content); return similarity > maxSimilarity; }); if (!tooSimilar) { diversified.push(result); } // Stop if we have enough diverse results if (diversified.length >= 10) break; } return diversified; } private computeTextSimilarity(text1: string, text2: string): number { const words1 = new Set(text1.toLowerCase().split(/\s+/)); const words2 = new Set(text2.toLowerCase().split(/\s+/)); const intersection = new Set([...words1].filter(x => words2.has(x))); const union = new Set([...words1, ...words2]); return intersection.size / union.size; // Jaccard similarity } } /** * Hybrid Search - Combines keyword and semantic search */ export class HybridSearch { private queryExpander = new QueryExpander(); private reRanker = new ResultReRanker(); /** * Perform hybrid search combining multiple strategies */ async search( query: string, options: { limit?: number; useQueryExpansion?: boolean; useReranking?: boolean; useDiversification?: boolean; } = {} ): Promise> { const limit = options.limit || 10; let queries = [query]; // Query expansion if (options.useQueryExpansion) { queries = await this.queryExpander.expandQuery(query); } // Execute searches for all query variations const allResults = new Map(); for (const q of queries) { const results = await graphMemoryService.searchEntities(q, limit * 2); results.forEach((result, index) => { const existing = allResults.get(result.id); // Use position as pseudo-score (lower index = higher relevance) const resultWithScore = { ...result, content: result.name, score: 1 / (index + 1) }; if (!existing || resultWithScore.score > existing.score) { allResults.set(result.id, resultWithScore); } }); } let results = Array.from(allResults.values()); // Re-ranking if (options.useReranking) { results = await this.reRanker.rerank(query, results, { useRecency: true, usePopularity: true, }); } // Diversification if (options.useDiversification) { results = this.reRanker.diversify(results); } return results.slice(0, limit); } } export const queryExpander = new QueryExpander(); export const resultReRanker = new ResultReRanker(); export const hybridSearch = new HybridSearch();