Spaces:

Kraft102
/

widgettdc-api

Paused

App Files Files Community

widgettdc-api / apps /backend /src /mcp /cognitive /UnifiedGraphRAG.ts

Kraft102

Update backend source

34367da verified 2 months ago

raw

history blame contribute delete

13.3 kB

	import { hybridSearchEngine } from './HybridSearchEngine.js';
	import { getCognitiveMemory } from '../memory/CognitiveMemory.js';
	import { unifiedMemorySystem } from './UnifiedMemorySystem.js';
	import { getLlmService } from '../../services/llm/llmService.js';
	import { MemoryRepository } from '../../services/memory/memoryRepository.js';
	import { getVectorStore } from '../../platform/vector/index.js';

	interface GraphNode {
	id: string;
	type: string;
	content: string;
	score: number;
	depth: number;
	metadata: any;
	connections: GraphEdge[];
	embedding?: number[]; // For semantic similarity
	}

	interface GraphEdge {
	targetId: string;
	relation: string;
	weight: number;
	}

	interface GraphRAGResult {
	answer: string;
	reasoning_path: string[];
	nodes: GraphNode[];
	confidence: number;
	sources?: Array<{ id: string; content: string; score: number }>;
	}

	export class UnifiedGraphRAG {
	private maxHops: number = 2;
	private minScore: number = 0.3;
	private memoryRepo: MemoryRepository;

	constructor() {
	this.memoryRepo = new MemoryRepository();
	}

	/**
	* Perform multi-hop reasoning over the knowledge graph
	* Enhanced with LLM synthesis, semantic similarity, and CMA graph integration
	*/
	public async query(query: string, context: { userId: string; orgId: string; maxHops?: number }): Promise<GraphRAGResult> {
	console.log(`🧠 [GraphRAG] Starting reasoning for: "${query}"`);

	const maxHops = context.maxHops \|\| this.maxHops;

	// 1. Get seed nodes from Hybrid Search (High precision entry points)
	const seedResults = await hybridSearchEngine.search(query, {
	...context,
	limit: 5
	});

	if (seedResults.length === 0) {
	return {
	answer: "No sufficient data found to reason about this query.",
	reasoning_path: [],
	nodes: [],
	confidence: 0
	};
	}

	// 2. Convert search results to graph nodes
	let frontier: GraphNode[] = seedResults.map(r => ({
	id: r.id,
	type: r.type,
	content: r.content,
	score: r.score,
	depth: 0,
	metadata: r.metadata,
	connections: []
	}));

	const visited = new Set<string>(frontier.map(n => n.id));
	const knowledgeGraph: GraphNode[] = [...frontier];
	const reasoningPath: string[] = [`Found ${frontier.length} starting points: ${frontier.map(n => n.id).join(', ')}`];

	// 3. Expand graph (Multi-hop traversal with semantic similarity)
	for (let hop = 1; hop <= maxHops; hop++) {
	console.log(`🔍 [GraphRAG] Hop ${hop}: Expanding ${frontier.length} nodes`);
	const newFrontier: GraphNode[] = [];

	for (const node of frontier) {
	// Enhanced expansion: Use CMA graph relations + semantic similarity
	const connections = await this.expandNode(node, query, context);

	for (const conn of connections) {
	if (!visited.has(conn.id) && conn.score > this.minScore) {
	visited.add(conn.id);
	newFrontier.push(conn);
	knowledgeGraph.push(conn);

	// Track edge in parent node
	node.connections.push({
	targetId: conn.id,
	relation: conn.metadata.relation \|\| 'related_to',
	weight: conn.score
	});
	}
	}
	}

	if (newFrontier.length > 0) {
	reasoningPath.push(`Hop ${hop}: Discovered ${newFrontier.length} new related concepts.`);
	frontier = newFrontier;
	} else {
	break; // No more connections found
	}
	}

	// 4. Synthesize Answer using LLM (Inspired by CgentCore's L1 Director Agent)
	const topNodes = knowledgeGraph.sort((a, b) => b.score - a.score).slice(0, 10);
	const answer = await this.synthesizeAnswer(query, topNodes, context);

	return {
	answer,
	reasoning_path: reasoningPath,
	nodes: topNodes,
	confidence: topNodes.length > 0 ? topNodes[0].score : 0,
	sources: topNodes.slice(0, 5).map(n => ({
	id: n.id,
	content: n.content.substring(0, 200),
	score: n.score
	}))
	};
	}

	/**
	* Enhanced node expansion with CMA graph integration and semantic similarity
	* Inspired by CgentCore's hybrid search approach
	*/
	private async expandNode(node: GraphNode, query: string, context: { userId: string; orgId: string }): Promise<GraphNode[]> {
	const memory = getCognitiveMemory();
	const expandedNodes: GraphNode[] = [];

	// Strategy 1: Get patterns involving this widget/source (existing)
	const patterns = await memory.getWidgetPatterns(node.id);

	// UsagePattern is an object with commonSources and timePatterns
	// Use commonSources to expand graph connections
	for (const source of patterns.commonSources \|\| []) {
	expandedNodes.push({
	id: `source-${source}`,
	type: 'source',
	content: `Source: ${source}`,
	score: node.score * 0.7, // Decay score over hops
	depth: node.depth + 1,
	metadata: { relation: 'uses_source', averageLatency: patterns.averageLatency },
	connections: []
	});
	}

	// Strategy 2: Use CMA memory relations (Direct graph edges)
	// Inspired by CgentCore's memory_relations table
	const relatedMemories = await this.memoryRepo.searchEntities({
	orgId: context.orgId,
	userId: context.userId,
	keywords: this.extractKeywords(node.content),
	limit: 5
	});

	for (const mem of relatedMemories) {
	// Check if memory is semantically related to query
	const semanticScore = await this.computeSemanticSimilarity(query, mem.content);

	if (semanticScore > this.minScore) {
	expandedNodes.push({
	id: `memory-${mem.id}`,
	type: mem.entity_type \|\| 'memory',
	content: mem.content,
	score: (mem.importance \|\| 0.5) * semanticScore * node.score * 0.7,
	depth: node.depth + 1,
	metadata: {
	relation: 'memory_relation',
	importance: mem.importance,
	semanticScore
	},
	connections: []
	});
	}
	}

	// Strategy 3: Use UnifiedMemorySystem for episodic memory connections
	const workingMemory = await unifiedMemorySystem.getWorkingMemory({
	userId: context.userId,
	orgId: context.orgId
	});

	// Find related events/features based on semantic similarity
	const relatedEvents = (workingMemory.recentEvents \|\| []).slice(0, 3);
	for (const event of relatedEvents) {
	const eventContent = JSON.stringify(event);
	const semanticScore = await this.computeSemanticSimilarity(query, eventContent);

	if (semanticScore > this.minScore) {
	expandedNodes.push({
	id: `event-${event.id \|\| Date.now()}`,
	type: 'episodic',
	content: eventContent.substring(0, 200),
	score: semanticScore * node.score * 0.6,
	depth: node.depth + 1,
	metadata: {
	relation: 'episodic_memory',
	semanticScore
	},
	connections: []
	});
	}
	}

	return expandedNodes.sort((a, b) => b.score - a.score).slice(0, 5); // Top 5 per node
	}

	/**
	* LLM-based answer synthesis
	* Inspired by CgentCore's L1 Director Agent response generation
	*/
	private async synthesizeAnswer(query: string, nodes: GraphNode[], context: { userId: string; orgId: string }): Promise<string> {
	try {
	const llmService = getLlmService();

	// Build context from graph nodes
	const graphContext = nodes.map((n, idx) =>
	`[${idx + 1}] ${n.type}: ${n.content.substring(0, 300)} (confidence: ${n.score.toFixed(2)})`
	).join('\n\n');

	const reasoningPath = nodes.map(n => `${n.id} (depth: ${n.depth})`).join(' -> ');

	const systemContext = `You are an advanced reasoning assistant. Synthesize a comprehensive answer based on the knowledge graph context provided.
	Use the reasoning path to explain how you arrived at the answer. Be precise, cite sources, and indicate confidence levels.`;

	const userPrompt = `Query: ${query}

	Knowledge Graph Context:
	${graphContext}

	Reasoning Path: ${reasoningPath}

	Provide a comprehensive answer synthesizing the information from the knowledge graph. Include:
	1. Direct answer to the query
	2. Key insights from the graph
	3. Confidence assessment
	4. Sources referenced`;

	const answer = await llmService.generateContextualResponse(
	systemContext,
	userPrompt,
	`User: ${context.userId}, Org: ${context.orgId}`
	);

	return answer \|\| "Reasoning complete. See nodes for details.";
	} catch (error) {
	console.error('[GraphRAG] LLM synthesis error:', error);
	// Fallback to simple synthesis
	return `Based on ${nodes.length} related concepts found: ${nodes.slice(0, 3).map(n => n.content.substring(0, 100)).join('; ')}...`;
	}
	}

	/**
	* Compute semantic similarity using ChromaDB vector search
	* Uses proper embeddings via HuggingFace for true semantic similarity
	*/
	private async computeSemanticSimilarity(query: string, content: string): Promise<number> {
	try {
	// Use pgvector for proper vector similarity
	const vectorStore = await getVectorStore();

	// For now, use simple text matching as fallback
	// TODO: Generate embeddings for proper vector search
	// const results = await vectorStore.search({
	// vector: [], // Would need actual embeddings here
	// limit: 1
	// });

	// Simple text similarity fallback
	const queryLower = query.toLowerCase();
	const contentLower = content.toLowerCase();

	// Use Jaccard similarity
	const queryWords = new Set(queryLower.split(/\s+/).filter(w => w.length > 2));
	const contentWords = new Set(contentLower.split(/\s+/).filter(w => w.length > 2));
	const intersection = new Set([...queryWords].filter(w => contentWords.has(w)));
	const union = new Set([...queryWords, ...contentWords]);

	const jaccard = union.size > 0 ? intersection.size / union.size : 0;
	const phraseMatch = contentLower.includes(queryLower) ? 0.3 : 0;

	return Math.min(1.0, jaccard + phraseMatch);

	} catch (error) {
	console.warn('[GraphRAG] Vector similarity failed, using keyword fallback:', error);

	// Fallback to keyword similarity
	const queryWords = new Set(query.toLowerCase().split(/\s+/).filter(w => w.length > 2));
	const contentWords = new Set(content.toLowerCase().split(/\s+/).filter(w => w.length > 2));

	const intersection = new Set([...queryWords].filter(w => contentWords.has(w)));
	const union = new Set([...queryWords, ...contentWords]);

	// Fix: Check for division by zero (Bug 2)
	const jaccard = union.size > 0 ? intersection.size / union.size : 0;
	const phraseMatch = content.toLowerCase().includes(query.toLowerCase()) ? 0.3 : 0;

	return Math.min(1.0, jaccard + phraseMatch);
	}
	}

	/**
	* Extract keywords from content for memory search
	*/
	private extractKeywords(content: string): string[] {
	// Simple keyword extraction (can be enhanced with NLP)
	const words = content.toLowerCase()
	.split(/\s+/)
	.filter(w => w.length > 3)
	.filter(w => !['the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'man', 'new', 'now', 'old', 'see', 'two', 'way', 'who', 'boy', 'did', 'its', 'let', 'put', 'say', 'she', 'too', 'use'].includes(w))
	.slice(0, 5);

	return words;
	}
	}

	export const unifiedGraphRAG = new UnifiedGraphRAG();