widgetdc-cortex / apps /backend /src /services /ErrorKnowledgeBase.ts
Kraft102's picture
Initial deployment - WidgeTDC Cortex Backend v2.1.0
529090e
/**
* ╔═══════════════════════════════════════════════════════════════════════════╗
* β•‘ ERROR KNOWLEDGE BASE β•‘
* ║═══════════════════════════════════════════════════════════════════════════║
* β•‘ Samler fejlmΓΈnstre fra eksterne kilder (GitHub, HuggingFace, CVE, etc.) β•‘
* ║ Bruges af SelfHealingAdapter til at lære og forudsige fejl ║
* β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
*/
import crypto from 'crypto';
import { logger } from '../utils/logger.js';
// Lazy import for Neo4j to avoid circular dependencies
let neo4jServiceInstance: any = null;
async function getNeo4jService() {
if (!neo4jServiceInstance) {
try {
const { neo4jService } = await import('../database/Neo4jService.js');
neo4jServiceInstance = neo4jService;
} catch (e) {
// Neo4j not available
}
}
return neo4jServiceInstance;
}
const log = logger.child({ module: 'ErrorKnowledgeBase' });
// ═══════════════════════════════════════════════════════════════════════════
// TYPES
// ═══════════════════════════════════════════════════════════════════════════
export interface ErrorPattern {
id: string; // Unique hash of error signature
source: ErrorSource;
category: ErrorCategory;
signature: string; // Error message pattern/regex
description: string;
severity: 'low' | 'medium' | 'high' | 'critical';
solutions: Solution[];
tags: string[];
cveId?: string; // CVE identifier if applicable
cweId?: string; // CWE identifier if applicable
language?: string; // Programming language
framework?: string; // Framework (e.g., Express, React)
occurrences: number; // Times seen
lastSeen: Date;
createdAt: Date;
metadata?: Record<string, any>;
}
export interface Solution {
description: string;
code?: string;
confidence: number; // 0-1
source: string;
verified: boolean;
// Feedback tracking
successCount?: number; // Times this solution worked
failureCount?: number; // Times this solution failed
lastUsed?: Date;
}
export type ErrorSource =
| 'github-defects4j'
| 'github-bugsjs'
| 'huggingface-cve'
| 'huggingface-defect-detection'
| 'huggingface-hdfs-logs'
| 'microsoft-office-api'
| 'microsoft-graph-api'
| 'internal-logs'
| 'user-reported'
| 'gaia-aiops'
| 'nodejs-system'
| 'http-standards'
| 'postgresql-official'
| 'typescript-compiler'
| 'curated-knowledge';
export type ErrorCategory =
| 'runtime'
| 'syntax'
| 'type'
| 'security'
| 'performance'
| 'network'
| 'database'
| 'api'
| 'office'
| 'authentication'
| 'memory'
| 'concurrency'
| 'configuration'
| 'dependency';
// ═══════════════════════════════════════════════════════════════════════════
// ERROR KNOWLEDGE BASE CLASS
// ═══════════════════════════════════════════════════════════════════════════
export class ErrorKnowledgeBase {
private static instance: ErrorKnowledgeBase;
private patterns: Map<string, ErrorPattern> = new Map();
private signatureIndex: Map<string, Set<string>> = new Map(); // For fast lookup
private sourceStats: Map<ErrorSource, number> = new Map();
private constructor() {
this.initializeBuiltInPatterns();
}
public static getInstance(): ErrorKnowledgeBase {
if (!ErrorKnowledgeBase.instance) {
ErrorKnowledgeBase.instance = new ErrorKnowledgeBase();
}
return ErrorKnowledgeBase.instance;
}
/**
* Generate unique ID from error signature to prevent duplicates
*/
private generateId(signature: string, source: ErrorSource): string {
const normalized = signature.toLowerCase().trim().replace(/\s+/g, ' ');
return crypto.createHash('sha256')
.update(`${normalized}:${source}`)
.digest('hex')
.substring(0, 16);
}
/**
* Normalize error signature for comparison
*/
private normalizeSignature(sig: string): string {
return sig
.toLowerCase()
.replace(/0x[0-9a-f]+/gi, '0xHEX') // Hex addresses
.replace(/\d+/g, 'N') // Numbers
.replace(/['"][^'"]*['"]/g, '"STR"') // String literals
.replace(/\s+/g, ' ') // Whitespace
.trim();
}
/**
* Check if pattern already exists (dedupe)
*/
public isDuplicate(signature: string, source: ErrorSource): boolean {
const id = this.generateId(signature, source);
if (this.patterns.has(id)) {
return true;
}
// Also check normalized signature across all sources
const normalized = this.normalizeSignature(signature);
const existing = this.signatureIndex.get(normalized);
return existing !== undefined && existing.size > 0;
}
/**
* Ingest single error pattern (with dedupe)
*/
public ingest(pattern: Omit<ErrorPattern, 'id' | 'occurrences' | 'lastSeen' | 'createdAt'>): boolean {
const id = this.generateId(pattern.signature, pattern.source);
// Check for exact duplicate
if (this.patterns.has(id)) {
// Update existing
const existing = this.patterns.get(id)!;
existing.occurrences++;
existing.lastSeen = new Date();
// Merge solutions
for (const sol of pattern.solutions) {
if (!existing.solutions.some(s => s.description === sol.description)) {
existing.solutions.push(sol);
}
}
return false; // Not new
}
// Check for similar pattern (different source)
const normalized = this.normalizeSignature(pattern.signature);
if (!this.signatureIndex.has(normalized)) {
this.signatureIndex.set(normalized, new Set());
}
this.signatureIndex.get(normalized)!.add(id);
// Create new pattern
const newPattern: ErrorPattern = {
...pattern,
id,
occurrences: 1,
lastSeen: new Date(),
createdAt: new Date()
};
this.patterns.set(id, newPattern);
// Update source stats
const count = this.sourceStats.get(pattern.source) || 0;
this.sourceStats.set(pattern.source, count + 1);
log.info(`Ingested error pattern: ${pattern.category}/${pattern.signature.substring(0, 50)}...`);
return true; // New pattern
}
/**
* Batch ingest with progress tracking
*/
public async batchIngest(
patterns: Omit<ErrorPattern, 'id' | 'occurrences' | 'lastSeen' | 'createdAt'>[],
source: ErrorSource
): Promise<{ total: number; new: number; duplicates: number }> {
let newCount = 0;
let dupeCount = 0;
for (const pattern of patterns) {
const isNew = this.ingest({ ...pattern, source });
if (isNew) newCount++;
else dupeCount++;
}
log.info(`Batch ingest from ${source}: ${newCount} new, ${dupeCount} duplicates`);
return { total: patterns.length, new: newCount, duplicates: dupeCount };
}
/**
* Find matching patterns for an error
*/
public findMatches(errorMessage: string, limit = 5): ErrorPattern[] {
const normalized = this.normalizeSignature(errorMessage);
const results: { pattern: ErrorPattern; score: number }[] = [];
for (const pattern of this.patterns.values()) {
const patternNorm = this.normalizeSignature(pattern.signature);
const score = this.similarityScore(normalized, patternNorm);
if (score > 0.3) { // Lowered threshold for better fuzzy matching
results.push({ pattern, score });
}
}
return results
.sort((a, b) => b.score - a.score)
.slice(0, limit)
.map(r => r.pattern);
}
/**
* Enhanced similarity score combining multiple strategies
*/
private similarityScore(query: string, signature: string): number {
// Strategy 1: Substring containment (highest priority)
if (signature.includes(query) || query.includes(signature)) {
return 1.0;
}
// Strategy 2: Key error code matching (ECONNREFUSED, SQLSTATE, HTTP 4xx/5xx)
const errorCodes = query.match(/\b(E[A-Z]+|SQLSTATE\s*\d+|HTTP\s*\d{3}|TS\d{4})\b/gi) || [];
for (const code of errorCodes) {
if (signature.toUpperCase().includes(code.toUpperCase().replace(/\s+/g, ' '))) {
return 0.95;
}
}
// Strategy 3: Jaccard similarity on words
const setA = new Set(query.split(/\s+/).filter(w => w.length > 2));
const setB = new Set(signature.split(/\s+/).filter(w => w.length > 2));
const intersection = new Set([...setA].filter(x => setB.has(x)));
const union = new Set([...setA, ...setB]);
const jaccard = union.size > 0 ? intersection.size / union.size : 0;
// Strategy 4: Coverage - what % of query words appear in signature
const coverage = setA.size > 0 ? intersection.size / setA.size : 0;
// Combined score: weight coverage more heavily
return Math.max(jaccard, coverage * 0.8);
}
/**
* Get suggested solutions for an error
*/
public getSolutions(errorMessage: string): Solution[] {
const matches = this.findMatches(errorMessage, 3);
const solutions: Solution[] = [];
for (const match of matches) {
solutions.push(...match.solutions);
}
return solutions
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 5);
}
/**
* Get statistics
*/
public getStats() {
const categoryCount: Record<string, number> = {};
const severityCount: Record<string, number> = {};
for (const pattern of this.patterns.values()) {
categoryCount[pattern.category] = (categoryCount[pattern.category] || 0) + 1;
severityCount[pattern.severity] = (severityCount[pattern.severity] || 0) + 1;
}
return {
totalPatterns: this.patterns.size,
bySource: Object.fromEntries(this.sourceStats),
byCategory: categoryCount,
bySeverity: severityCount
};
}
/**
* Export all patterns
*/
public exportPatterns(): ErrorPattern[] {
return Array.from(this.patterns.values());
}
/**
* Initialize with common built-in patterns
*/
private initializeBuiltInPatterns() {
const builtInPatterns: Omit<ErrorPattern, 'id' | 'occurrences' | 'lastSeen' | 'createdAt'>[] = [
// Network errors
{
source: 'internal-logs',
category: 'network',
signature: 'ECONNREFUSED',
description: 'Connection refused - target service not running or firewall blocking',
severity: 'high',
solutions: [
{ description: 'Check if target service is running', confidence: 0.9, source: 'built-in', verified: true },
{ description: 'Verify firewall rules allow connection', confidence: 0.7, source: 'built-in', verified: true },
{ description: 'Check if port number is correct', confidence: 0.8, source: 'built-in', verified: true }
],
tags: ['network', 'connection', 'tcp']
},
{
source: 'internal-logs',
category: 'network',
signature: 'ETIMEDOUT',
description: 'Connection timed out - network latency or service overload',
severity: 'medium',
solutions: [
{ description: 'Increase timeout value', confidence: 0.8, source: 'built-in', verified: true },
{ description: 'Check network connectivity', confidence: 0.7, source: 'built-in', verified: true },
{ description: 'Implement retry with exponential backoff', confidence: 0.9, source: 'built-in', verified: true }
],
tags: ['network', 'timeout', 'latency']
},
// Database errors
{
source: 'internal-logs',
category: 'database',
signature: 'Neo4jError: ServiceUnavailable',
description: 'Neo4j database not reachable',
severity: 'critical',
solutions: [
{ description: 'Check Neo4j container/service status', confidence: 0.9, source: 'built-in', verified: true },
{ description: 'Verify NEO4J_URI environment variable', confidence: 0.8, source: 'built-in', verified: true },
{ description: 'Check AuraDB connection limits', confidence: 0.6, source: 'built-in', verified: true }
],
tags: ['neo4j', 'database', 'graph']
},
{
source: 'internal-logs',
category: 'database',
signature: 'PrismaClientKnownRequestError',
description: 'Prisma database query error',
severity: 'high',
solutions: [
{ description: 'Check if database schema is migrated', confidence: 0.8, source: 'built-in', verified: true },
{ description: 'Verify DATABASE_URL is correct', confidence: 0.7, source: 'built-in', verified: true },
{ description: 'Run npx prisma db push', confidence: 0.9, source: 'built-in', verified: true }
],
tags: ['prisma', 'postgresql', 'database']
},
// TypeScript/JavaScript errors
{
source: 'internal-logs',
category: 'type',
signature: 'TypeError: Cannot read properties of undefined',
description: 'Accessing property on undefined value',
severity: 'medium',
solutions: [
{ description: 'Add null/undefined check before accessing', confidence: 0.9, source: 'built-in', verified: true },
{ description: 'Use optional chaining (?.) operator', confidence: 0.95, source: 'built-in', verified: true },
{ description: 'Provide default value with ?? operator', confidence: 0.8, source: 'built-in', verified: true }
],
tags: ['typescript', 'javascript', 'null-safety']
},
{
source: 'internal-logs',
category: 'syntax',
signature: 'SyntaxError: Unexpected token',
description: 'Invalid JavaScript/JSON syntax',
severity: 'high',
solutions: [
{ description: 'Check for missing brackets or quotes', confidence: 0.8, source: 'built-in', verified: true },
{ description: 'Validate JSON with JSON.parse in try-catch', confidence: 0.7, source: 'built-in', verified: true },
{ description: 'Check for trailing commas in JSON', confidence: 0.6, source: 'built-in', verified: true }
],
tags: ['syntax', 'json', 'parsing']
},
// Office API errors
{
source: 'microsoft-office-api',
category: 'office',
signature: 'InvalidReference',
description: 'Office API reference is not valid for operation',
severity: 'medium',
solutions: [
{ description: 'Ensure object exists before operation', confidence: 0.9, source: 'microsoft-docs', verified: true },
{ description: 'Check if document is still open', confidence: 0.7, source: 'microsoft-docs', verified: true }
],
tags: ['office', 'excel', 'word', 'api'],
cweId: 'CWE-476'
},
{
source: 'microsoft-graph-api',
category: 'api',
signature: 'Error 429: Too Many Requests',
description: 'Microsoft Graph API rate limit exceeded',
severity: 'medium',
solutions: [
{ description: 'Implement exponential backoff retry', confidence: 0.95, source: 'microsoft-docs', verified: true },
{ description: 'Check Retry-After header for wait time', confidence: 0.9, source: 'microsoft-docs', verified: true },
{ description: 'Batch multiple requests together', confidence: 0.8, source: 'microsoft-docs', verified: true }
],
tags: ['graph-api', 'rate-limit', 'throttling']
},
// Security patterns
{
source: 'huggingface-cve',
category: 'security',
signature: 'SQL injection detected',
description: 'Potential SQL injection vulnerability',
severity: 'critical',
solutions: [
{ description: 'Use parameterized queries', confidence: 0.99, source: 'OWASP', verified: true },
{ description: 'Sanitize user input', confidence: 0.9, source: 'OWASP', verified: true },
{ description: 'Use ORM instead of raw SQL', confidence: 0.85, source: 'best-practice', verified: true }
],
tags: ['security', 'sql', 'injection'],
cweId: 'CWE-89'
},
{
source: 'huggingface-cve',
category: 'security',
signature: 'XSS vulnerability',
description: 'Cross-site scripting vulnerability detected',
severity: 'critical',
solutions: [
{ description: 'Escape HTML output', confidence: 0.95, source: 'OWASP', verified: true },
{ description: 'Use Content Security Policy headers', confidence: 0.9, source: 'OWASP', verified: true },
{ description: 'Validate and sanitize input', confidence: 0.85, source: 'best-practice', verified: true }
],
tags: ['security', 'xss', 'injection'],
cweId: 'CWE-79'
},
// Memory errors
{
source: 'internal-logs',
category: 'memory',
signature: 'FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed',
description: 'Node.js heap out of memory',
severity: 'critical',
solutions: [
{ description: 'Increase Node.js heap size: --max-old-space-size=4096', confidence: 0.9, source: 'built-in', verified: true },
{ description: 'Check for memory leaks with --inspect', confidence: 0.8, source: 'built-in', verified: true },
{ description: 'Implement pagination for large data sets', confidence: 0.85, source: 'best-practice', verified: true }
],
tags: ['memory', 'heap', 'oom']
},
// Redis errors
{
source: 'internal-logs',
category: 'database',
signature: 'Redis connection error',
description: 'Cannot connect to Redis server',
severity: 'high',
solutions: [
{ description: 'Check if Redis container is running', confidence: 0.9, source: 'built-in', verified: true },
{ description: 'Verify REDIS_URL environment variable', confidence: 0.8, source: 'built-in', verified: true },
{ description: 'Check Redis maxmemory settings', confidence: 0.6, source: 'built-in', verified: true }
],
tags: ['redis', 'cache', 'connection']
}
];
for (const pattern of builtInPatterns) {
this.ingest(pattern);
}
log.info(`Initialized ErrorKnowledgeBase with ${this.patterns.size} built-in patterns`);
}
// ═══════════════════════════════════════════════════════════════════════════
// NEO4J PERSISTENCE - Feedback Loop
// ═══════════════════════════════════════════════════════════════════════════
/**
* Persist a single pattern to Neo4j
*/
public async persistToNeo4j(pattern: ErrorPattern): Promise<boolean> {
try {
const neo4j = await getNeo4jService();
if (!neo4j) {
log.warn('Neo4j not available, skipping persistence');
return false;
}
const query = `
MERGE (ep:ErrorPattern {id: $id})
SET ep.source = $source,
ep.category = $category,
ep.signature = $signature,
ep.description = $description,
ep.severity = $severity,
ep.tags = $tags,
ep.cveId = $cveId,
ep.cweId = $cweId,
ep.language = $language,
ep.framework = $framework,
ep.occurrences = $occurrences,
ep.lastSeen = datetime($lastSeen),
ep.createdAt = datetime($createdAt),
ep.solutions = $solutions
RETURN ep
`;
await neo4j.runQuery(query, {
id: pattern.id,
source: pattern.source,
category: pattern.category,
signature: pattern.signature,
description: pattern.description,
severity: pattern.severity,
tags: pattern.tags,
cveId: pattern.cveId || null,
cweId: pattern.cweId || null,
language: pattern.language || null,
framework: pattern.framework || null,
occurrences: pattern.occurrences,
lastSeen: pattern.lastSeen.toISOString(),
createdAt: pattern.createdAt.toISOString(),
solutions: JSON.stringify(pattern.solutions)
});
log.debug(`Persisted pattern ${pattern.id} to Neo4j`);
return true;
} catch (error) {
log.error('Failed to persist pattern to Neo4j:', error);
return false;
}
}
/**
* Load all patterns from Neo4j
*/
public async loadFromNeo4j(): Promise<number> {
try {
const neo4j = await getNeo4jService();
if (!neo4j) {
log.warn('Neo4j not available, skipping load');
return 0;
}
const query = `
MATCH (ep:ErrorPattern)
RETURN ep
ORDER BY ep.occurrences DESC
`;
const records = await neo4j.runQuery(query);
let loaded = 0;
for (const record of records) {
const ep = record.ep?.properties || record.ep;
if (!ep || !ep.id) continue;
// Skip if already in memory
if (this.patterns.has(ep.id)) continue;
const pattern: ErrorPattern = {
id: ep.id,
source: ep.source as ErrorSource,
category: ep.category as ErrorCategory,
signature: ep.signature,
description: ep.description,
severity: ep.severity,
tags: ep.tags || [],
cveId: ep.cveId || undefined,
cweId: ep.cweId || undefined,
language: ep.language || undefined,
framework: ep.framework || undefined,
occurrences: ep.occurrences?.toNumber?.() || ep.occurrences || 1,
lastSeen: new Date(ep.lastSeen),
createdAt: new Date(ep.createdAt),
solutions: JSON.parse(ep.solutions || '[]')
};
this.patterns.set(pattern.id, pattern);
loaded++;
}
log.info(`Loaded ${loaded} patterns from Neo4j`);
return loaded;
} catch (error) {
log.error('Failed to load patterns from Neo4j:', error);
return 0;
}
}
/**
* Record feedback for a solution (success or failure)
*/
public async recordFeedback(
patternId: string,
solutionIndex: number,
success: boolean
): Promise<boolean> {
const pattern = this.patterns.get(patternId);
if (!pattern || !pattern.solutions[solutionIndex]) {
return false;
}
const solution = pattern.solutions[solutionIndex];
// Initialize counters if needed
solution.successCount = solution.successCount || 0;
solution.failureCount = solution.failureCount || 0;
// Update counters
if (success) {
solution.successCount++;
} else {
solution.failureCount++;
}
solution.lastUsed = new Date();
// Update confidence based on feedback (Bayesian-ish update)
const totalFeedback = solution.successCount + solution.failureCount;
if (totalFeedback >= 3) {
const successRate = solution.successCount / totalFeedback;
// Blend original confidence with observed success rate
solution.confidence = (solution.confidence * 0.3) + (successRate * 0.7);
// Clamp to valid range
solution.confidence = Math.max(0.1, Math.min(0.99, solution.confidence));
}
// Persist to Neo4j
await this.persistToNeo4j(pattern);
log.info(`Recorded ${success ? 'success' : 'failure'} feedback for pattern ${patternId}, solution ${solutionIndex}. New confidence: ${solution.confidence.toFixed(2)}`);
return true;
}
/**
* Persist all patterns to Neo4j (batch)
*/
public async persistAllToNeo4j(): Promise<{ success: number; failed: number }> {
let success = 0;
let failed = 0;
for (const pattern of this.patterns.values()) {
const result = await this.persistToNeo4j(pattern);
if (result) success++;
else failed++;
}
log.info(`Persisted ${success} patterns to Neo4j (${failed} failed)`);
return { success, failed };
}
/**
* Get solution with feedback stats
*/
public getSolutionsWithStats(errorMessage: string): (Solution & { patternId: string; solutionIndex: number })[] {
const matches = this.findMatches(errorMessage, 3);
const solutions: (Solution & { patternId: string; solutionIndex: number })[] = [];
for (const match of matches) {
match.solutions.forEach((sol, index) => {
solutions.push({
...sol,
patternId: match.id,
solutionIndex: index
});
});
}
return solutions
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 5);
}
}
// Singleton export
export const errorKnowledgeBase = ErrorKnowledgeBase.getInstance();