Spaces:

Kraft102
/

widgetdc-cortex

Paused

File size: 13,021 Bytes

529090e

import { hybridSearchEngine } from './HybridSearchEngine.js';
import { getCognitiveMemory } from '../memory/CognitiveMemory.js';
import { unifiedMemorySystem } from './UnifiedMemorySystem.js';
import { getLlmService } from '../../services/llm/llmService.js';
import { MemoryRepository } from '../../services/memory/memoryRepository.js';
import { getVectorStore } from '../../platform/vector/index.js';

interface GraphNode {
    id: string;
    type: string;
    content: string;
    score: number;
    depth: number;
    metadata: any;
    connections: GraphEdge[];
    embedding?: number[]; // For semantic similarity
}

interface GraphEdge {
    targetId: string;
    relation: string;
    weight: number;
}

interface GraphRAGResult {
    answer: string;
    reasoning_path: string[];
    nodes: GraphNode[];
    confidence: number;
    sources?: Array<{ id: string; content: string; score: number }>;
}

export class UnifiedGraphRAG {
    private maxHops: number = 2;
    private minScore: number = 0.3;
    private memoryRepo: MemoryRepository;

    constructor() {
        this.memoryRepo = new MemoryRepository();
    }

    /**
     * Perform multi-hop reasoning over the knowledge graph
     * Enhanced with LLM synthesis, semantic similarity, and CMA graph integration
     */
    public async query(query: string, context: { userId: string; orgId: string; maxHops?: number }): Promise<GraphRAGResult> {
        console.log(`🧠 [GraphRAG] Starting reasoning for: "${query}"`);

        const maxHops = context.maxHops || this.maxHops;

        // 1. Get seed nodes from Hybrid Search (High precision entry points)
        const seedResults = await hybridSearchEngine.search(query, {
            ...context,
            limit: 5
        });

        if (seedResults.length === 0) {
            return {
                answer: "No sufficient data found to reason about this query.",
                reasoning_path: [],
                nodes: [],
                confidence: 0
            };
        }

        // 2. Convert search results to graph nodes
        let frontier: GraphNode[] = seedResults.map(r => ({
            id: r.id,
            type: r.type,
            content: r.content,
            score: r.score,
            depth: 0,
            metadata: r.metadata,
            connections: []
        }));

        const visited = new Set<string>(frontier.map(n => n.id));
        const knowledgeGraph: GraphNode[] = [...frontier];
        const reasoningPath: string[] = [`Found ${frontier.length} starting points: ${frontier.map(n => n.id).join(', ')}`];

        // 3. Expand graph (Multi-hop traversal with semantic similarity)
        for (let hop = 1; hop <= maxHops; hop++) {
            console.log(`🔍 [GraphRAG] Hop ${hop}: Expanding ${frontier.length} nodes`);
            const newFrontier: GraphNode[] = [];

            for (const node of frontier) {
                // Enhanced expansion: Use CMA graph relations + semantic similarity
                const connections = await this.expandNode(node, query, context);

                for (const conn of connections) {
                    if (!visited.has(conn.id) && conn.score > this.minScore) {
                        visited.add(conn.id);
                        newFrontier.push(conn);
                        knowledgeGraph.push(conn);

                        // Track edge in parent node
                        node.connections.push({
                            targetId: conn.id,
                            relation: conn.metadata.relation || 'related_to',
                            weight: conn.score
                        });
                    }
                }
            }

            if (newFrontier.length > 0) {
                reasoningPath.push(`Hop ${hop}: Discovered ${newFrontier.length} new related concepts.`);
                frontier = newFrontier;
            } else {
                break; // No more connections found
            }
        }

        // 4. Synthesize Answer using LLM (Inspired by CgentCore's L1 Director Agent)
        const topNodes = knowledgeGraph.sort((a, b) => b.score - a.score).slice(0, 10);
        const answer = await this.synthesizeAnswer(query, topNodes, context);

        return {
            answer,
            reasoning_path: reasoningPath,
            nodes: topNodes,
            confidence: topNodes.length > 0 ? topNodes[0].score : 0,
            sources: topNodes.slice(0, 5).map(n => ({
                id: n.id,
                content: n.content.substring(0, 200),
                score: n.score
            }))
        };
    }

    /**
     * Enhanced node expansion with CMA graph integration and semantic similarity
     * Inspired by CgentCore's hybrid search approach
     */
    private async expandNode(node: GraphNode, query: string, context: { userId: string; orgId: string }): Promise<GraphNode[]> {
        const memory = getCognitiveMemory();
        const expandedNodes: GraphNode[] = [];

        // Strategy 1: Get patterns involving this widget/source (existing)
        const patterns = await memory.getWidgetPatterns(node.id);

        // UsagePattern is an object with commonSources and timePatterns
        // Use commonSources to expand graph connections
        for (const source of patterns.commonSources || []) {
            expandedNodes.push({
                id: `source-${source}`,
                type: 'source',
                content: `Source: ${source}`,
                score: node.score * 0.7, // Decay score over hops
                depth: node.depth + 1,
                metadata: { relation: 'uses_source', averageLatency: patterns.averageLatency },
                connections: []
            });
        }

        // Strategy 2: Use CMA memory relations (Direct graph edges)
        // Inspired by CgentCore's memory_relations table
        const relatedMemories = await this.memoryRepo.searchEntities({
            orgId: context.orgId,
            userId: context.userId,
            keywords: this.extractKeywords(node.content),
            limit: 5
        });

        for (const mem of relatedMemories) {
            // Check if memory is semantically related to query
            const semanticScore = await this.computeSemanticSimilarity(query, mem.content);

            if (semanticScore > this.minScore) {
                expandedNodes.push({
                    id: `memory-${mem.id}`,
                    type: mem.entity_type || 'memory',
                    content: mem.content,
                    score: (mem.importance || 0.5) * semanticScore * node.score * 0.7,
                    depth: node.depth + 1,
                    metadata: {
                        relation: 'memory_relation',
                        importance: mem.importance,
                        semanticScore
                    },
                    connections: []
                });
            }
        }

        // Strategy 3: Use UnifiedMemorySystem for episodic memory connections
        const workingMemory = await unifiedMemorySystem.getWorkingMemory({
            userId: context.userId,
            orgId: context.orgId
        });

        // Find related events/features based on semantic similarity
        const relatedEvents = (workingMemory.recentEvents || []).slice(0, 3);
        for (const event of relatedEvents) {
            const eventContent = JSON.stringify(event);
            const semanticScore = await this.computeSemanticSimilarity(query, eventContent);

            if (semanticScore > this.minScore) {
                expandedNodes.push({
                    id: `event-${event.id || Date.now()}`,
                    type: 'episodic',
                    content: eventContent.substring(0, 200),
                    score: semanticScore * node.score * 0.6,
                    depth: node.depth + 1,
                    metadata: {
                        relation: 'episodic_memory',
                        semanticScore
                    },
                    connections: []
                });
            }
        }

        return expandedNodes.sort((a, b) => b.score - a.score).slice(0, 5); // Top 5 per node
    }

    /**
     * LLM-based answer synthesis
     * Inspired by CgentCore's L1 Director Agent response generation
     */
    private async synthesizeAnswer(query: string, nodes: GraphNode[], context: { userId: string; orgId: string }): Promise<string> {
        try {
            const llmService = getLlmService();

            // Build context from graph nodes
            const graphContext = nodes.map((n, idx) =>
                `[${idx + 1}] ${n.type}: ${n.content.substring(0, 300)} (confidence: ${n.score.toFixed(2)})`
            ).join('\n\n');

            const reasoningPath = nodes.map(n => `${n.id} (depth: ${n.depth})`).join(' -> ');

            const systemContext = `You are an advanced reasoning assistant. Synthesize a comprehensive answer based on the knowledge graph context provided. 
Use the reasoning path to explain how you arrived at the answer. Be precise, cite sources, and indicate confidence levels.`;

            const userPrompt = `Query: ${query}

Knowledge Graph Context:
${graphContext}

Reasoning Path: ${reasoningPath}

Provide a comprehensive answer synthesizing the information from the knowledge graph. Include:
1. Direct answer to the query
2. Key insights from the graph
3. Confidence assessment
4. Sources referenced`;

            const answer = await llmService.generateContextualResponse(
                systemContext,
                userPrompt,
                `User: ${context.userId}, Org: ${context.orgId}`
            );

            return answer || "Reasoning complete. See nodes for details.";
        } catch (error) {
            console.error('[GraphRAG] LLM synthesis error:', error);
            // Fallback to simple synthesis
            return `Based on ${nodes.length} related concepts found: ${nodes.slice(0, 3).map(n => n.content.substring(0, 100)).join('; ')}...`;
        }
    }

    /**
     * Compute semantic similarity using ChromaDB vector search
     * Uses proper embeddings via HuggingFace for true semantic similarity
     */
    private async computeSemanticSimilarity(query: string, content: string): Promise<number> {
        try {
            // Use pgvector for proper vector similarity
            const vectorStore = await getVectorStore();

            // For now, use simple text matching as fallback
            // TODO: Generate embeddings for proper vector search
            // const results = await vectorStore.search({
            //     vector: [], // Would need actual embeddings here
            //     limit: 1
            // });

            // Simple text similarity fallback
            const queryLower = query.toLowerCase();
            const contentLower = content.toLowerCase();

            // Use Jaccard similarity
            const queryWords = new Set(queryLower.split(/\s+/).filter(w => w.length > 2));
            const contentWords = new Set(contentLower.split(/\s+/).filter(w => w.length > 2));
            const intersection = new Set([...queryWords].filter(w => contentWords.has(w)));
            const union = new Set([...queryWords, ...contentWords]);

            const jaccard = union.size > 0 ? intersection.size / union.size : 0;
            const phraseMatch = contentLower.includes(queryLower) ? 0.3 : 0;

            return Math.min(1.0, jaccard + phraseMatch);

        } catch (error) {
            console.warn('[GraphRAG] Vector similarity failed, using keyword fallback:', error);

            // Fallback to keyword similarity
            const queryWords = new Set(query.toLowerCase().split(/\s+/).filter(w => w.length > 2));
            const contentWords = new Set(content.toLowerCase().split(/\s+/).filter(w => w.length > 2));

            const intersection = new Set([...queryWords].filter(w => contentWords.has(w)));
            const union = new Set([...queryWords, ...contentWords]);

            // Fix: Check for division by zero (Bug 2)
            const jaccard = union.size > 0 ? intersection.size / union.size : 0;
            const phraseMatch = content.toLowerCase().includes(query.toLowerCase()) ? 0.3 : 0;

            return Math.min(1.0, jaccard + phraseMatch);
        }
    }

    /**
     * Extract keywords from content for memory search
     */
    private extractKeywords(content: string): string[] {
        // Simple keyword extraction (can be enhanced with NLP)
        const words = content.toLowerCase()
            .split(/\s+/)
            .filter(w => w.length > 3)
            .filter(w => !['the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'man', 'new', 'now', 'old', 'see', 'two', 'way', 'who', 'boy', 'did', 'its', 'let', 'put', 'say', 'she', 'too', 'use'].includes(w))
            .slice(0, 5);

        return words;
    }
}

export const unifiedGraphRAG = new UnifiedGraphRAG();