File size: 8,261 Bytes
529090e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import { graphMemoryService } from '../../memory/GraphMemoryService';
import { neo4jService } from '../../database/Neo4jService';

/**
 * Query Expansion - Expands user query with synonyms and related terms
 */
export class QueryExpander {
    private synonymMap: Map<string, string[]> = new Map([
        ['find', ['search', 'locate', 'discover', 'retrieve']],
        ['show', ['display', 'present', 'reveal', 'demonstrate']],
        ['create', ['make', 'build', 'generate', 'construct']],
        ['delete', ['remove', 'erase', 'eliminate', 'destroy']],
        ['update', ['modify', 'change', 'alter', 'revise']],
    ]);

    /**
     * Expand query with synonyms and related terms
     */
    async expandQuery(query: string): Promise<string[]> {
        const words = query.toLowerCase().split(/\s+/);
        const expandedTerms = new Set<string>([query]);

        // Add synonyms
        words.forEach(word => {
            const synonyms = this.synonymMap.get(word);
            if (synonyms) {
                synonyms.forEach(syn => {
                    const expandedQuery = query.replace(new RegExp(word, 'gi'), syn);
                    expandedTerms.add(expandedQuery);
                });
            }
        });

        // Add semantic variations using Neo4j graph
        try {
            await neo4jService.connect();

            // Find related concepts in the graph
            const relatedConcepts = await neo4jService.runQuery(
                `MATCH (n:Entity)
         WHERE toLower(n.name) CONTAINS $query OR toLower(n.content) CONTAINS $query
         MATCH (n)-[:RELATED_TO|SIMILAR_TO]-(related)
         RETURN DISTINCT related.name as concept
         LIMIT 5`,
                { query: query.toLowerCase() }
            );

            relatedConcepts.forEach(record => {
                if (record.concept) {
                    expandedTerms.add(`${query} ${record.concept}`);
                }
            });

            await neo4jService.disconnect();
        } catch (error) {
            console.warn('Query expansion from graph failed:', error);
        }

        return Array.from(expandedTerms);
    }

    /**
     * Extract key phrases from query
     */
    extractKeyPhrases(query: string): string[] {
        // Simple n-gram extraction (2-3 words)
        const words = query.toLowerCase().split(/\s+/);
        const phrases: string[] = [];

        // Bigrams
        for (let i = 0; i < words.length - 1; i++) {
            phrases.push(`${words[i]} ${words[i + 1]}`);
        }

        // Trigrams
        for (let i = 0; i < words.length - 2; i++) {
            phrases.push(`${words[i]} ${words[i + 1]} ${words[i + 2]}`);
        }

        return phrases;
    }
}

/**
 * Result Re-ranker - Re-ranks search results using multiple signals
 */
export class ResultReRanker {
    /**
     * Re-rank results using multiple scoring signals
     */
    async rerank(
        query: string,
        results: Array<{ id: string; content: string; score: number; metadata?: any }>,
        options: {
            useRecency?: boolean;
            usePopularity?: boolean;
            useSemanticSimilarity?: boolean;
        } = {}
    ): Promise<Array<{ id: string; content: string; score: number; metadata?: any }>> {
        const scoredResults = await Promise.all(
            results.map(async result => {
                let finalScore = result.score;

                // Recency boost
                if (options.useRecency && result.metadata?.createdAt) {
                    const age = Date.now() - new Date(result.metadata.createdAt).getTime();
                    const daysSinceCreation = age / (1000 * 60 * 60 * 24);
                    const recencyBoost = Math.exp(-daysSinceCreation / 30); // Decay over 30 days
                    finalScore *= (1 + recencyBoost * 0.2);
                }

                // Popularity boost (based on connections in graph)
                if (options.usePopularity) {
                    try {
                        await neo4jService.connect();
                        const connections = await neo4jService.getNodeRelationships(result.id);
                        const popularityBoost = Math.min(connections.length / 10, 1); // Cap at 10 connections
                        finalScore *= (1 + popularityBoost * 0.3);
                        await neo4jService.disconnect();
                    } catch (error) {
                        // Ignore errors
                    }
                }

                // Exact match boost
                if (result.content.toLowerCase().includes(query.toLowerCase())) {
                    finalScore *= 1.5;
                }

                return { ...result, score: finalScore };
            })
        );

        // Sort by final score
        return scoredResults.sort((a, b) => b.score - a.score);
    }

    /**
     * Diversify results to avoid redundancy
     */
    diversify(
        results: Array<{ id: string; content: string; score: number }>,
        maxSimilarity: number = 0.8
    ): Array<{ id: string; content: string; score: number }> {
        const diversified: typeof results = [];

        for (const result of results) {
            // Check if too similar to already selected results
            const tooSimilar = diversified.some(selected => {
                const similarity = this.computeTextSimilarity(result.content, selected.content);
                return similarity > maxSimilarity;
            });

            if (!tooSimilar) {
                diversified.push(result);
            }

            // Stop if we have enough diverse results
            if (diversified.length >= 10) break;
        }

        return diversified;
    }

    private computeTextSimilarity(text1: string, text2: string): number {
        const words1 = new Set(text1.toLowerCase().split(/\s+/));
        const words2 = new Set(text2.toLowerCase().split(/\s+/));

        const intersection = new Set([...words1].filter(x => words2.has(x)));
        const union = new Set([...words1, ...words2]);

        return intersection.size / union.size; // Jaccard similarity
    }
}

/**
 * Hybrid Search - Combines keyword and semantic search
 */
export class HybridSearch {
    private queryExpander = new QueryExpander();
    private reRanker = new ResultReRanker();

    /**
     * Perform hybrid search combining multiple strategies
     */
    async search(
        query: string,
        options: {
            limit?: number;
            useQueryExpansion?: boolean;
            useReranking?: boolean;
            useDiversification?: boolean;
        } = {}
    ): Promise<Array<{ id: string; content: string; score: number; metadata?: any }>> {
        const limit = options.limit || 10;
        let queries = [query];

        // Query expansion
        if (options.useQueryExpansion) {
            queries = await this.queryExpander.expandQuery(query);
        }

        // Execute searches for all query variations
        const allResults = new Map<string, any>();

        for (const q of queries) {
            const results = await graphMemoryService.searchEntities(q, limit * 2);

            results.forEach((result, index) => {
                const existing = allResults.get(result.id);
                // Use position as pseudo-score (lower index = higher relevance)
                const resultWithScore = { ...result, content: result.name, score: 1 / (index + 1) };
                if (!existing || resultWithScore.score > existing.score) {
                    allResults.set(result.id, resultWithScore);
                }
            });
        }

        let results = Array.from(allResults.values());

        // Re-ranking
        if (options.useReranking) {
            results = await this.reRanker.rerank(query, results, {
                useRecency: true,
                usePopularity: true,
            });
        }

        // Diversification
        if (options.useDiversification) {
            results = this.reRanker.diversify(results);
        }

        return results.slice(0, limit);
    }
}

export const queryExpander = new QueryExpander();
export const resultReRanker = new ResultReRanker();
export const hybridSearch = new HybridSearch();