File size: 13,021 Bytes
529090e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
import { hybridSearchEngine } from './HybridSearchEngine.js';
import { getCognitiveMemory } from '../memory/CognitiveMemory.js';
import { unifiedMemorySystem } from './UnifiedMemorySystem.js';
import { getLlmService } from '../../services/llm/llmService.js';
import { MemoryRepository } from '../../services/memory/memoryRepository.js';
import { getVectorStore } from '../../platform/vector/index.js';

interface GraphNode {
    id: string;
    type: string;
    content: string;
    score: number;
    depth: number;
    metadata: any;
    connections: GraphEdge[];
    embedding?: number[]; // For semantic similarity
}

interface GraphEdge {
    targetId: string;
    relation: string;
    weight: number;
}

interface GraphRAGResult {
    answer: string;
    reasoning_path: string[];
    nodes: GraphNode[];
    confidence: number;
    sources?: Array<{ id: string; content: string; score: number }>;
}

export class UnifiedGraphRAG {
    private maxHops: number = 2;
    private minScore: number = 0.3;
    private memoryRepo: MemoryRepository;

    constructor() {
        this.memoryRepo = new MemoryRepository();
    }

    /**
     * Perform multi-hop reasoning over the knowledge graph
     * Enhanced with LLM synthesis, semantic similarity, and CMA graph integration
     */
    public async query(query: string, context: { userId: string; orgId: string; maxHops?: number }): Promise<GraphRAGResult> {
        console.log(`🧠 [GraphRAG] Starting reasoning for: "${query}"`);

        const maxHops = context.maxHops || this.maxHops;

        // 1. Get seed nodes from Hybrid Search (High precision entry points)
        const seedResults = await hybridSearchEngine.search(query, {
            ...context,
            limit: 5
        });

        if (seedResults.length === 0) {
            return {
                answer: "No sufficient data found to reason about this query.",
                reasoning_path: [],
                nodes: [],
                confidence: 0
            };
        }

        // 2. Convert search results to graph nodes
        let frontier: GraphNode[] = seedResults.map(r => ({
            id: r.id,
            type: r.type,
            content: r.content,
            score: r.score,
            depth: 0,
            metadata: r.metadata,
            connections: []
        }));

        const visited = new Set<string>(frontier.map(n => n.id));
        const knowledgeGraph: GraphNode[] = [...frontier];
        const reasoningPath: string[] = [`Found ${frontier.length} starting points: ${frontier.map(n => n.id).join(', ')}`];

        // 3. Expand graph (Multi-hop traversal with semantic similarity)
        for (let hop = 1; hop <= maxHops; hop++) {
            console.log(`🔍 [GraphRAG] Hop ${hop}: Expanding ${frontier.length} nodes`);
            const newFrontier: GraphNode[] = [];

            for (const node of frontier) {
                // Enhanced expansion: Use CMA graph relations + semantic similarity
                const connections = await this.expandNode(node, query, context);

                for (const conn of connections) {
                    if (!visited.has(conn.id) && conn.score > this.minScore) {
                        visited.add(conn.id);
                        newFrontier.push(conn);
                        knowledgeGraph.push(conn);

                        // Track edge in parent node
                        node.connections.push({
                            targetId: conn.id,
                            relation: conn.metadata.relation || 'related_to',
                            weight: conn.score
                        });
                    }
                }
            }

            if (newFrontier.length > 0) {
                reasoningPath.push(`Hop ${hop}: Discovered ${newFrontier.length} new related concepts.`);
                frontier = newFrontier;
            } else {
                break; // No more connections found
            }
        }

        // 4. Synthesize Answer using LLM (Inspired by CgentCore's L1 Director Agent)
        const topNodes = knowledgeGraph.sort((a, b) => b.score - a.score).slice(0, 10);
        const answer = await this.synthesizeAnswer(query, topNodes, context);

        return {
            answer,
            reasoning_path: reasoningPath,
            nodes: topNodes,
            confidence: topNodes.length > 0 ? topNodes[0].score : 0,
            sources: topNodes.slice(0, 5).map(n => ({
                id: n.id,
                content: n.content.substring(0, 200),
                score: n.score
            }))
        };
    }

    /**
     * Enhanced node expansion with CMA graph integration and semantic similarity
     * Inspired by CgentCore's hybrid search approach
     */
    private async expandNode(node: GraphNode, query: string, context: { userId: string; orgId: string }): Promise<GraphNode[]> {
        const memory = getCognitiveMemory();
        const expandedNodes: GraphNode[] = [];

        // Strategy 1: Get patterns involving this widget/source (existing)
        const patterns = await memory.getWidgetPatterns(node.id);

        // UsagePattern is an object with commonSources and timePatterns
        // Use commonSources to expand graph connections
        for (const source of patterns.commonSources || []) {
            expandedNodes.push({
                id: `source-${source}`,
                type: 'source',
                content: `Source: ${source}`,
                score: node.score * 0.7, // Decay score over hops
                depth: node.depth + 1,
                metadata: { relation: 'uses_source', averageLatency: patterns.averageLatency },
                connections: []
            });
        }

        // Strategy 2: Use CMA memory relations (Direct graph edges)
        // Inspired by CgentCore's memory_relations table
        const relatedMemories = await this.memoryRepo.searchEntities({
            orgId: context.orgId,
            userId: context.userId,
            keywords: this.extractKeywords(node.content),
            limit: 5
        });

        for (const mem of relatedMemories) {
            // Check if memory is semantically related to query
            const semanticScore = await this.computeSemanticSimilarity(query, mem.content);

            if (semanticScore > this.minScore) {
                expandedNodes.push({
                    id: `memory-${mem.id}`,
                    type: mem.entity_type || 'memory',
                    content: mem.content,
                    score: (mem.importance || 0.5) * semanticScore * node.score * 0.7,
                    depth: node.depth + 1,
                    metadata: {
                        relation: 'memory_relation',
                        importance: mem.importance,
                        semanticScore
                    },
                    connections: []
                });
            }
        }

        // Strategy 3: Use UnifiedMemorySystem for episodic memory connections
        const workingMemory = await unifiedMemorySystem.getWorkingMemory({
            userId: context.userId,
            orgId: context.orgId
        });

        // Find related events/features based on semantic similarity
        const relatedEvents = (workingMemory.recentEvents || []).slice(0, 3);
        for (const event of relatedEvents) {
            const eventContent = JSON.stringify(event);
            const semanticScore = await this.computeSemanticSimilarity(query, eventContent);

            if (semanticScore > this.minScore) {
                expandedNodes.push({
                    id: `event-${event.id || Date.now()}`,
                    type: 'episodic',
                    content: eventContent.substring(0, 200),
                    score: semanticScore * node.score * 0.6,
                    depth: node.depth + 1,
                    metadata: {
                        relation: 'episodic_memory',
                        semanticScore
                    },
                    connections: []
                });
            }
        }

        return expandedNodes.sort((a, b) => b.score - a.score).slice(0, 5); // Top 5 per node
    }

    /**
     * LLM-based answer synthesis
     * Inspired by CgentCore's L1 Director Agent response generation
     */
    private async synthesizeAnswer(query: string, nodes: GraphNode[], context: { userId: string; orgId: string }): Promise<string> {
        try {
            const llmService = getLlmService();

            // Build context from graph nodes
            const graphContext = nodes.map((n, idx) =>
                `[${idx + 1}] ${n.type}: ${n.content.substring(0, 300)} (confidence: ${n.score.toFixed(2)})`
            ).join('\n\n');

            const reasoningPath = nodes.map(n => `${n.id} (depth: ${n.depth})`).join(' -> ');

            const systemContext = `You are an advanced reasoning assistant. Synthesize a comprehensive answer based on the knowledge graph context provided. 
Use the reasoning path to explain how you arrived at the answer. Be precise, cite sources, and indicate confidence levels.`;

            const userPrompt = `Query: ${query}

Knowledge Graph Context:
${graphContext}

Reasoning Path: ${reasoningPath}

Provide a comprehensive answer synthesizing the information from the knowledge graph. Include:
1. Direct answer to the query
2. Key insights from the graph
3. Confidence assessment
4. Sources referenced`;

            const answer = await llmService.generateContextualResponse(
                systemContext,
                userPrompt,
                `User: ${context.userId}, Org: ${context.orgId}`
            );

            return answer || "Reasoning complete. See nodes for details.";
        } catch (error) {
            console.error('[GraphRAG] LLM synthesis error:', error);
            // Fallback to simple synthesis
            return `Based on ${nodes.length} related concepts found: ${nodes.slice(0, 3).map(n => n.content.substring(0, 100)).join('; ')}...`;
        }
    }

    /**
     * Compute semantic similarity using ChromaDB vector search
     * Uses proper embeddings via HuggingFace for true semantic similarity
     */
    private async computeSemanticSimilarity(query: string, content: string): Promise<number> {
        try {
            // Use pgvector for proper vector similarity
            const vectorStore = await getVectorStore();

            // For now, use simple text matching as fallback
            // TODO: Generate embeddings for proper vector search
            // const results = await vectorStore.search({
            //     vector: [], // Would need actual embeddings here
            //     limit: 1
            // });

            // Simple text similarity fallback
            const queryLower = query.toLowerCase();
            const contentLower = content.toLowerCase();

            // Use Jaccard similarity
            const queryWords = new Set(queryLower.split(/\s+/).filter(w => w.length > 2));
            const contentWords = new Set(contentLower.split(/\s+/).filter(w => w.length > 2));
            const intersection = new Set([...queryWords].filter(w => contentWords.has(w)));
            const union = new Set([...queryWords, ...contentWords]);

            const jaccard = union.size > 0 ? intersection.size / union.size : 0;
            const phraseMatch = contentLower.includes(queryLower) ? 0.3 : 0;

            return Math.min(1.0, jaccard + phraseMatch);

        } catch (error) {
            console.warn('[GraphRAG] Vector similarity failed, using keyword fallback:', error);

            // Fallback to keyword similarity
            const queryWords = new Set(query.toLowerCase().split(/\s+/).filter(w => w.length > 2));
            const contentWords = new Set(content.toLowerCase().split(/\s+/).filter(w => w.length > 2));

            const intersection = new Set([...queryWords].filter(w => contentWords.has(w)));
            const union = new Set([...queryWords, ...contentWords]);

            // Fix: Check for division by zero (Bug 2)
            const jaccard = union.size > 0 ? intersection.size / union.size : 0;
            const phraseMatch = content.toLowerCase().includes(query.toLowerCase()) ? 0.3 : 0;

            return Math.min(1.0, jaccard + phraseMatch);
        }
    }

    /**
     * Extract keywords from content for memory search
     */
    private extractKeywords(content: string): string[] {
        // Simple keyword extraction (can be enhanced with NLP)
        const words = content.toLowerCase()
            .split(/\s+/)
            .filter(w => w.length > 3)
            .filter(w => !['the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'man', 'new', 'now', 'old', 'see', 'two', 'way', 'who', 'boy', 'did', 'its', 'let', 'put', 'say', 'she', 'too', 'use'].includes(w))
            .slice(0, 5);

        return words;
    }
}

export const unifiedGraphRAG = new UnifiedGraphRAG();