Kraft102's picture
Update backend source
34367da verified
import { hybridSearchEngine } from './HybridSearchEngine.js';
import { getCognitiveMemory } from '../memory/CognitiveMemory.js';
import { unifiedMemorySystem } from './UnifiedMemorySystem.js';
import { getLlmService } from '../../services/llm/llmService.js';
import { MemoryRepository } from '../../services/memory/memoryRepository.js';
import { getVectorStore } from '../../platform/vector/index.js';
interface GraphNode {
id: string;
type: string;
content: string;
score: number;
depth: number;
metadata: any;
connections: GraphEdge[];
embedding?: number[]; // For semantic similarity
}
interface GraphEdge {
targetId: string;
relation: string;
weight: number;
}
interface GraphRAGResult {
answer: string;
reasoning_path: string[];
nodes: GraphNode[];
confidence: number;
sources?: Array<{ id: string; content: string; score: number }>;
}
export class UnifiedGraphRAG {
private maxHops: number = 2;
private minScore: number = 0.3;
private memoryRepo: MemoryRepository;
constructor() {
this.memoryRepo = new MemoryRepository();
}
/**
* Perform multi-hop reasoning over the knowledge graph
* Enhanced with LLM synthesis, semantic similarity, and CMA graph integration
*/
public async query(query: string, context: { userId: string; orgId: string; maxHops?: number }): Promise<GraphRAGResult> {
console.log(`🧠 [GraphRAG] Starting reasoning for: "${query}"`);
const maxHops = context.maxHops || this.maxHops;
// 1. Get seed nodes from Hybrid Search (High precision entry points)
const seedResults = await hybridSearchEngine.search(query, {
...context,
limit: 5
});
if (seedResults.length === 0) {
return {
answer: "No sufficient data found to reason about this query.",
reasoning_path: [],
nodes: [],
confidence: 0
};
}
// 2. Convert search results to graph nodes
let frontier: GraphNode[] = seedResults.map(r => ({
id: r.id,
type: r.type,
content: r.content,
score: r.score,
depth: 0,
metadata: r.metadata,
connections: []
}));
const visited = new Set<string>(frontier.map(n => n.id));
const knowledgeGraph: GraphNode[] = [...frontier];
const reasoningPath: string[] = [`Found ${frontier.length} starting points: ${frontier.map(n => n.id).join(', ')}`];
// 3. Expand graph (Multi-hop traversal with semantic similarity)
for (let hop = 1; hop <= maxHops; hop++) {
console.log(`🔍 [GraphRAG] Hop ${hop}: Expanding ${frontier.length} nodes`);
const newFrontier: GraphNode[] = [];
for (const node of frontier) {
// Enhanced expansion: Use CMA graph relations + semantic similarity
const connections = await this.expandNode(node, query, context);
for (const conn of connections) {
if (!visited.has(conn.id) && conn.score > this.minScore) {
visited.add(conn.id);
newFrontier.push(conn);
knowledgeGraph.push(conn);
// Track edge in parent node
node.connections.push({
targetId: conn.id,
relation: conn.metadata.relation || 'related_to',
weight: conn.score
});
}
}
}
if (newFrontier.length > 0) {
reasoningPath.push(`Hop ${hop}: Discovered ${newFrontier.length} new related concepts.`);
frontier = newFrontier;
} else {
break; // No more connections found
}
}
// 4. Synthesize Answer using LLM (Inspired by CgentCore's L1 Director Agent)
const topNodes = knowledgeGraph.sort((a, b) => b.score - a.score).slice(0, 10);
const answer = await this.synthesizeAnswer(query, topNodes, context);
return {
answer,
reasoning_path: reasoningPath,
nodes: topNodes,
confidence: topNodes.length > 0 ? topNodes[0].score : 0,
sources: topNodes.slice(0, 5).map(n => ({
id: n.id,
content: n.content.substring(0, 200),
score: n.score
}))
};
}
/**
* Enhanced node expansion with CMA graph integration and semantic similarity
* Inspired by CgentCore's hybrid search approach
*/
private async expandNode(node: GraphNode, query: string, context: { userId: string; orgId: string }): Promise<GraphNode[]> {
const memory = getCognitiveMemory();
const expandedNodes: GraphNode[] = [];
// Strategy 1: Get patterns involving this widget/source (existing)
const patterns = await memory.getWidgetPatterns(node.id);
// UsagePattern is an object with commonSources and timePatterns
// Use commonSources to expand graph connections
for (const source of patterns.commonSources || []) {
expandedNodes.push({
id: `source-${source}`,
type: 'source',
content: `Source: ${source}`,
score: node.score * 0.7, // Decay score over hops
depth: node.depth + 1,
metadata: { relation: 'uses_source', averageLatency: patterns.averageLatency },
connections: []
});
}
// Strategy 2: Use CMA memory relations (Direct graph edges)
// Inspired by CgentCore's memory_relations table
const relatedMemories = await this.memoryRepo.searchEntities({
orgId: context.orgId,
userId: context.userId,
keywords: this.extractKeywords(node.content),
limit: 5
});
for (const mem of relatedMemories) {
// Check if memory is semantically related to query
const semanticScore = await this.computeSemanticSimilarity(query, mem.content);
if (semanticScore > this.minScore) {
expandedNodes.push({
id: `memory-${mem.id}`,
type: mem.entity_type || 'memory',
content: mem.content,
score: (mem.importance || 0.5) * semanticScore * node.score * 0.7,
depth: node.depth + 1,
metadata: {
relation: 'memory_relation',
importance: mem.importance,
semanticScore
},
connections: []
});
}
}
// Strategy 3: Use UnifiedMemorySystem for episodic memory connections
const workingMemory = await unifiedMemorySystem.getWorkingMemory({
userId: context.userId,
orgId: context.orgId
});
// Find related events/features based on semantic similarity
const relatedEvents = (workingMemory.recentEvents || []).slice(0, 3);
for (const event of relatedEvents) {
const eventContent = JSON.stringify(event);
const semanticScore = await this.computeSemanticSimilarity(query, eventContent);
if (semanticScore > this.minScore) {
expandedNodes.push({
id: `event-${event.id || Date.now()}`,
type: 'episodic',
content: eventContent.substring(0, 200),
score: semanticScore * node.score * 0.6,
depth: node.depth + 1,
metadata: {
relation: 'episodic_memory',
semanticScore
},
connections: []
});
}
}
return expandedNodes.sort((a, b) => b.score - a.score).slice(0, 5); // Top 5 per node
}
/**
* LLM-based answer synthesis
* Inspired by CgentCore's L1 Director Agent response generation
*/
private async synthesizeAnswer(query: string, nodes: GraphNode[], context: { userId: string; orgId: string }): Promise<string> {
try {
const llmService = getLlmService();
// Build context from graph nodes
const graphContext = nodes.map((n, idx) =>
`[${idx + 1}] ${n.type}: ${n.content.substring(0, 300)} (confidence: ${n.score.toFixed(2)})`
).join('\n\n');
const reasoningPath = nodes.map(n => `${n.id} (depth: ${n.depth})`).join(' -> ');
const systemContext = `You are an advanced reasoning assistant. Synthesize a comprehensive answer based on the knowledge graph context provided.
Use the reasoning path to explain how you arrived at the answer. Be precise, cite sources, and indicate confidence levels.`;
const userPrompt = `Query: ${query}
Knowledge Graph Context:
${graphContext}
Reasoning Path: ${reasoningPath}
Provide a comprehensive answer synthesizing the information from the knowledge graph. Include:
1. Direct answer to the query
2. Key insights from the graph
3. Confidence assessment
4. Sources referenced`;
const answer = await llmService.generateContextualResponse(
systemContext,
userPrompt,
`User: ${context.userId}, Org: ${context.orgId}`
);
return answer || "Reasoning complete. See nodes for details.";
} catch (error) {
console.error('[GraphRAG] LLM synthesis error:', error);
// Fallback to simple synthesis
return `Based on ${nodes.length} related concepts found: ${nodes.slice(0, 3).map(n => n.content.substring(0, 100)).join('; ')}...`;
}
}
/**
* Compute semantic similarity using ChromaDB vector search
* Uses proper embeddings via HuggingFace for true semantic similarity
*/
private async computeSemanticSimilarity(query: string, content: string): Promise<number> {
try {
// Use pgvector for proper vector similarity
const vectorStore = await getVectorStore();
// For now, use simple text matching as fallback
// TODO: Generate embeddings for proper vector search
// const results = await vectorStore.search({
// vector: [], // Would need actual embeddings here
// limit: 1
// });
// Simple text similarity fallback
const queryLower = query.toLowerCase();
const contentLower = content.toLowerCase();
// Use Jaccard similarity
const queryWords = new Set(queryLower.split(/\s+/).filter(w => w.length > 2));
const contentWords = new Set(contentLower.split(/\s+/).filter(w => w.length > 2));
const intersection = new Set([...queryWords].filter(w => contentWords.has(w)));
const union = new Set([...queryWords, ...contentWords]);
const jaccard = union.size > 0 ? intersection.size / union.size : 0;
const phraseMatch = contentLower.includes(queryLower) ? 0.3 : 0;
return Math.min(1.0, jaccard + phraseMatch);
} catch (error) {
console.warn('[GraphRAG] Vector similarity failed, using keyword fallback:', error);
// Fallback to keyword similarity
const queryWords = new Set(query.toLowerCase().split(/\s+/).filter(w => w.length > 2));
const contentWords = new Set(content.toLowerCase().split(/\s+/).filter(w => w.length > 2));
const intersection = new Set([...queryWords].filter(w => contentWords.has(w)));
const union = new Set([...queryWords, ...contentWords]);
// Fix: Check for division by zero (Bug 2)
const jaccard = union.size > 0 ? intersection.size / union.size : 0;
const phraseMatch = content.toLowerCase().includes(query.toLowerCase()) ? 0.3 : 0;
return Math.min(1.0, jaccard + phraseMatch);
}
}
/**
* Extract keywords from content for memory search
*/
private extractKeywords(content: string): string[] {
// Simple keyword extraction (can be enhanced with NLP)
const words = content.toLowerCase()
.split(/\s+/)
.filter(w => w.length > 3)
.filter(w => !['the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'man', 'new', 'now', 'old', 'see', 'two', 'way', 'who', 'boy', 'did', 'its', 'let', 'put', 'say', 'she', 'too', 'use'].includes(w))
.slice(0, 5);
return words;
}
}
export const unifiedGraphRAG = new UnifiedGraphRAG();