Spaces:
Paused
Paused
| /** | |
| * βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| * β COGNITIVE ERROR INTELLIGENCE (CEI) β | |
| * βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| * β Unik intelligent fejlhΓ₯ndtering der udnytter WidgeTDC's kapabiliteter: β | |
| * β β | |
| * β 1. GRAPH-BASED ERROR CORRELATION β | |
| * β - Neo4j til at finde relationer mellem fejl β | |
| * β - "Error A forΓ₯rsager ofte Error B inden for 5 minutter" β | |
| * β β | |
| * β 2. PREDICTIVE ERROR DETECTION β | |
| * β - LΓ¦rer mΓΈnstre der forudsiger fejl FΓR de sker β | |
| * β - "Redis memory usage > 80% β OOM inden 10 min" β | |
| * β β | |
| * β 3. CONTEXT-AWARE SOLUTIONS β | |
| * β - Rangerer lΓΈsninger baseret pΓ₯ systemets aktuelle tilstand β | |
| * β - "Neo4j er nede β prioriter lokale lΓΈsninger" β | |
| * β β | |
| * β 4. AUTO-REMEDIATION β | |
| * β - UdfΓΈrer automatisk reparation for kendte fejl β | |
| * β - "ECONNREFUSED pΓ₯ Redis β restart Redis container" β | |
| * β β | |
| * β 5. CAUSAL CHAIN ANALYSIS β | |
| * β - Bygger grafer over fejl-Γ₯rsager β | |
| * β - "Root cause: DNS failure β cascading to 5 services" β | |
| * βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| */ | |
| import { EventEmitter } from 'events'; | |
| import { errorKnowledgeBase, type ErrorPattern, type Solution } from './ErrorKnowledgeBase.js'; | |
| import { selfHealing } from './SelfHealingAdapter.js'; | |
| import { logger } from '../utils/logger.js'; | |
| const log = logger.child({ module: 'CognitiveErrorIntelligence' }); | |
| // Dynamic base URL for self-healing API calls | |
| const getBaseUrl = () => `http://localhost:${process.env.PORT || 7860}`; | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // TYPES | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| interface ErrorEvent { | |
| id: string; | |
| timestamp: Date; | |
| message: string; | |
| service: string; | |
| severity: 'low' | 'medium' | 'high' | 'critical'; | |
| context: Record<string, any>; | |
| stackTrace?: string; | |
| resolved: boolean; | |
| resolvedBy?: string; | |
| resolvedAt?: Date; | |
| } | |
| interface ErrorCorrelation { | |
| sourceErrorId: string; | |
| targetErrorId: string; | |
| correlationType: 'causes' | 'precedes' | 'cooccurs' | 'masks'; | |
| confidence: number; | |
| avgTimeDelta: number; // milliseconds | |
| occurrences: number; | |
| } | |
| interface PredictiveSignal { | |
| metric: string; | |
| threshold: number; | |
| operator: '>' | '<' | '=' | '>=' | '<='; | |
| predictedError: string; | |
| leadTime: number; // milliseconds before error typically occurs | |
| confidence: number; | |
| lastTriggered?: Date; | |
| } | |
| interface RemediationAction { | |
| id: string; | |
| name: string; | |
| description: string; | |
| errorPatterns: string[]; // Pattern IDs this action can fix | |
| command?: string; // Shell command to execute | |
| apiCall?: { endpoint: string; method: string; body?: any }; | |
| requiresApproval: boolean; | |
| riskLevel: 'low' | 'medium' | 'high'; | |
| successRate: number; | |
| avgExecutionTime: number; | |
| lastExecuted?: Date; | |
| } | |
| interface CausalChain { | |
| rootCause: ErrorEvent; | |
| effects: ErrorEvent[]; | |
| totalImpact: number; // Number of affected services/operations | |
| detectedAt: Date; | |
| resolvedAt?: Date; | |
| } | |
| interface SystemContext { | |
| services: Map<string, ServiceHealth>; | |
| activeErrors: ErrorEvent[]; | |
| recentRemediations: RemediationAction[]; | |
| load: { | |
| cpu: number; | |
| memory: number; | |
| connections: number; | |
| }; | |
| } | |
| interface ServiceHealth { | |
| name: string; | |
| status: 'healthy' | 'degraded' | 'unhealthy' | 'unknown'; | |
| lastCheck: Date; | |
| metrics: Record<string, number>; | |
| } | |
| interface IntelligentSolution extends Solution { | |
| contextScore: number; // How relevant given current system state | |
| predictedSuccess: number; // ML-based success prediction | |
| autoRemediable: boolean; | |
| remediationAction?: RemediationAction; | |
| reasoning: string; // Why this solution is recommended | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // COGNITIVE ERROR INTELLIGENCE CLASS | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export class CognitiveErrorIntelligence extends EventEmitter { | |
| private static instance: CognitiveErrorIntelligence; | |
| // Error tracking | |
| private errorHistory: ErrorEvent[] = []; | |
| private readonly MAX_HISTORY = 10000; | |
| // Correlation learning | |
| private correlations: Map<string, ErrorCorrelation> = new Map(); | |
| private correlationWindow = 5 * 60 * 1000; // 5 minutes | |
| // Predictive signals | |
| private predictiveSignals: PredictiveSignal[] = []; | |
| private metricsHistory: Map<string, { timestamp: Date; value: number }[]> = new Map(); | |
| // Auto-remediation | |
| private remediationActions: Map<string, RemediationAction> = new Map(); | |
| private remediationQueue: { error: ErrorEvent; action: RemediationAction }[] = []; | |
| private isRemediating = false; | |
| // System context | |
| private systemContext: SystemContext = { | |
| services: new Map(), | |
| activeErrors: [], | |
| recentRemediations: [], | |
| load: { cpu: 0, memory: 0, connections: 0 } | |
| }; | |
| private constructor() { | |
| super(); | |
| this.initializeDefaultRemediations(); | |
| this.initializePredictiveSignals(); | |
| this.startBackgroundProcessing(); | |
| log.info('π§ Cognitive Error Intelligence initialized'); | |
| } | |
| public static getInstance(): CognitiveErrorIntelligence { | |
| if (!CognitiveErrorIntelligence.instance) { | |
| CognitiveErrorIntelligence.instance = new CognitiveErrorIntelligence(); | |
| } | |
| return CognitiveErrorIntelligence.instance; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // 1. INTELLIGENT ERROR HANDLING | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Process an error with full cognitive analysis | |
| */ | |
| public async processError( | |
| message: string, | |
| service: string, | |
| context: Record<string, any> = {}, | |
| stackTrace?: string | |
| ): Promise<{ | |
| errorId: string; | |
| solutions: IntelligentSolution[]; | |
| correlatedErrors: ErrorEvent[]; | |
| causalChain?: CausalChain; | |
| autoRemediation?: { action: RemediationAction; queued: boolean }; | |
| prediction?: { nextLikelyError: string; confidence: number; timeframe: string }; | |
| }> { | |
| const errorId = this.generateErrorId(); | |
| const severity = this.assessSeverity(message, context); | |
| // Create error event | |
| const errorEvent: ErrorEvent = { | |
| id: errorId, | |
| timestamp: new Date(), | |
| message, | |
| service, | |
| severity, | |
| context, | |
| stackTrace, | |
| resolved: false | |
| }; | |
| // Store in history | |
| this.recordError(errorEvent); | |
| // 1. Find correlated errors (what usually happens with this error?) | |
| const correlatedErrors = this.findCorrelatedErrors(errorEvent); | |
| // 2. Analyze causal chain (is this the root cause or an effect?) | |
| const causalChain = this.analyzeCausalChain(errorEvent); | |
| // 3. Get context-aware solutions | |
| const solutions = await this.getIntelligentSolutions(message, errorEvent); | |
| // 4. Check for auto-remediation | |
| const autoRemediation = await this.checkAutoRemediation(errorEvent, solutions); | |
| // 5. Predict next likely error | |
| const prediction = this.predictNextError(errorEvent); | |
| // 6. Learn correlations for future | |
| this.learnCorrelations(errorEvent); | |
| // Emit event for real-time monitoring | |
| this.emit('error:processed', { | |
| errorId, | |
| severity, | |
| solutions: solutions.length, | |
| autoRemediation: autoRemediation?.queued | |
| }); | |
| log.info(`π§ Processed error ${errorId}: ${solutions.length} solutions, ${correlatedErrors.length} correlations`); | |
| return { | |
| errorId, | |
| solutions, | |
| correlatedErrors, | |
| causalChain, | |
| autoRemediation, | |
| prediction | |
| }; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // 2. GRAPH-BASED ERROR CORRELATION | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Find errors that are correlated with this error | |
| */ | |
| private findCorrelatedErrors(error: ErrorEvent): ErrorEvent[] { | |
| const correlated: ErrorEvent[] = []; | |
| const recentErrors = this.errorHistory.filter( | |
| e => e.timestamp.getTime() > Date.now() - this.correlationWindow && e.id !== error.id | |
| ); | |
| for (const recent of recentErrors) { | |
| const correlationKey = this.getCorrelationKey(recent.message, error.message); | |
| const correlation = this.correlations.get(correlationKey); | |
| if (correlation && correlation.confidence > 0.5) { | |
| correlated.push(recent); | |
| } | |
| } | |
| return correlated; | |
| } | |
| /** | |
| * Learn correlations from error patterns | |
| */ | |
| private learnCorrelations(error: ErrorEvent): void { | |
| const recentErrors = this.errorHistory.filter( | |
| e => e.timestamp.getTime() > error.timestamp.getTime() - this.correlationWindow && e.id !== error.id | |
| ); | |
| for (const recent of recentErrors) { | |
| const timeDelta = error.timestamp.getTime() - recent.timestamp.getTime(); | |
| const correlationKey = this.getCorrelationKey(recent.message, error.message); | |
| if (!this.correlations.has(correlationKey)) { | |
| this.correlations.set(correlationKey, { | |
| sourceErrorId: recent.id, | |
| targetErrorId: error.id, | |
| correlationType: timeDelta < 1000 ? 'cooccurs' : 'precedes', | |
| confidence: 0.1, | |
| avgTimeDelta: timeDelta, | |
| occurrences: 1 | |
| }); | |
| } else { | |
| const existing = this.correlations.get(correlationKey)!; | |
| existing.occurrences++; | |
| existing.avgTimeDelta = (existing.avgTimeDelta + timeDelta) / 2; | |
| // Increase confidence with more observations (Bayesian update) | |
| existing.confidence = Math.min(0.95, existing.confidence + (1 - existing.confidence) * 0.1); | |
| } | |
| } | |
| } | |
| /** | |
| * Persist correlations to Neo4j for graph analysis | |
| */ | |
| public async persistCorrelationsToNeo4j(): Promise<number> { | |
| try { | |
| const { neo4jService } = await import('../database/Neo4jService.js'); | |
| let persisted = 0; | |
| for (const [key, correlation] of this.correlations) { | |
| if (correlation.occurrences >= 3 && correlation.confidence > 0.5) { | |
| await neo4jService.runQuery(` | |
| MERGE (source:ErrorPattern {signature: $sourceSignature}) | |
| MERGE (target:ErrorPattern {signature: $targetSignature}) | |
| MERGE (source)-[r:${correlation.correlationType.toUpperCase()}]->(target) | |
| SET r.confidence = $confidence, | |
| r.avgTimeDelta = $avgTimeDelta, | |
| r.occurrences = $occurrences, | |
| r.updatedAt = datetime() | |
| `, { | |
| sourceSignature: key.split('β')[0], | |
| targetSignature: key.split('β')[1], | |
| confidence: correlation.confidence, | |
| avgTimeDelta: correlation.avgTimeDelta, | |
| occurrences: correlation.occurrences | |
| }); | |
| persisted++; | |
| } | |
| } | |
| log.info(`π Persisted ${persisted} error correlations to Neo4j`); | |
| return persisted; | |
| } catch (e) { | |
| log.warn('Neo4j not available for correlation persistence'); | |
| return 0; | |
| } | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // 3. CAUSAL CHAIN ANALYSIS | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Analyze if this error is root cause or effect | |
| */ | |
| private analyzeCausalChain(error: ErrorEvent): CausalChain | undefined { | |
| const recentErrors = this.errorHistory.filter( | |
| e => Math.abs(e.timestamp.getTime() - error.timestamp.getTime()) < this.correlationWindow | |
| ); | |
| if (recentErrors.length < 2) return undefined; | |
| // Find the earliest error in the chain (likely root cause) | |
| const sortedByTime = [...recentErrors].sort( | |
| (a, b) => a.timestamp.getTime() - b.timestamp.getTime() | |
| ); | |
| const rootCause = sortedByTime[0]; | |
| const effects = sortedByTime.slice(1); | |
| // Calculate impact | |
| const affectedServices = new Set(effects.map(e => e.service)); | |
| return { | |
| rootCause, | |
| effects, | |
| totalImpact: affectedServices.size, | |
| detectedAt: new Date() | |
| }; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // 4. CONTEXT-AWARE INTELLIGENT SOLUTIONS | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Get solutions ranked by current system context | |
| */ | |
| private async getIntelligentSolutions( | |
| errorMessage: string, | |
| errorEvent: ErrorEvent | |
| ): Promise<IntelligentSolution[]> { | |
| // Get base solutions from knowledge base | |
| const baseSolutions = errorKnowledgeBase.getSolutions(errorMessage); | |
| // Enhance with context awareness | |
| const intelligentSolutions: IntelligentSolution[] = []; | |
| for (const solution of baseSolutions) { | |
| const contextScore = this.calculateContextScore(solution, errorEvent); | |
| const predictedSuccess = this.predictSolutionSuccess(solution, errorEvent); | |
| const remediation = this.findRemediationAction(solution); | |
| intelligentSolutions.push({ | |
| ...solution, | |
| contextScore, | |
| predictedSuccess, | |
| autoRemediable: remediation !== undefined && !remediation.requiresApproval, | |
| remediationAction: remediation, | |
| reasoning: this.generateReasoning(solution, contextScore, predictedSuccess) | |
| }); | |
| } | |
| // Sort by combined intelligence score | |
| return intelligentSolutions.sort((a, b) => { | |
| const scoreA = a.contextScore * 0.3 + a.predictedSuccess * 0.4 + a.confidence * 0.3; | |
| const scoreB = b.contextScore * 0.3 + b.predictedSuccess * 0.4 + b.confidence * 0.3; | |
| return scoreB - scoreA; | |
| }); | |
| } | |
| /** | |
| * Calculate how relevant a solution is given current system state | |
| */ | |
| private calculateContextScore(solution: Solution, error: ErrorEvent): number { | |
| let score = 0.5; // Base score | |
| // Check if solution's source is a service that's currently healthy | |
| const serviceHealth = this.systemContext.services.get(error.service); | |
| if (serviceHealth?.status === 'healthy') { | |
| score += 0.1; | |
| } | |
| // Prefer solutions that don't require unavailable services | |
| if (solution.description.toLowerCase().includes('redis')) { | |
| const redisHealth = this.systemContext.services.get('redis'); | |
| if (redisHealth?.status !== 'healthy') { | |
| score -= 0.2; // Penalize if Redis is down | |
| } | |
| } | |
| // Boost solutions that have worked recently | |
| if (solution.successCount && solution.successCount > 0) { | |
| const successRate = solution.successCount / ((solution.successCount || 0) + (solution.failureCount || 0)); | |
| score += successRate * 0.2; | |
| } | |
| // Consider system load | |
| if (this.systemContext.load.cpu > 80 && solution.description.toLowerCase().includes('intensive')) { | |
| score -= 0.1; // Don't suggest CPU-intensive solutions when load is high | |
| } | |
| return Math.max(0, Math.min(1, score)); | |
| } | |
| /** | |
| * Predict success based on historical data | |
| */ | |
| private predictSolutionSuccess(solution: Solution, error: ErrorEvent): number { | |
| // Use feedback data if available | |
| if (solution.successCount !== undefined && solution.failureCount !== undefined) { | |
| const total = solution.successCount + solution.failureCount; | |
| if (total >= 3) { | |
| return solution.successCount / total; | |
| } | |
| } | |
| // Fall back to confidence score with context adjustment | |
| return solution.confidence * this.calculateContextScore(solution, error); | |
| } | |
| /** | |
| * Generate human-readable reasoning for recommendation | |
| */ | |
| private generateReasoning(solution: Solution, contextScore: number, predictedSuccess: number): string { | |
| const reasons: string[] = []; | |
| if (solution.verified) { | |
| reasons.push('verified solution'); | |
| } | |
| if (contextScore > 0.7) { | |
| reasons.push('matches current system state'); | |
| } | |
| if (predictedSuccess > 0.8) { | |
| reasons.push(`${Math.round(predictedSuccess * 100)}% predicted success`); | |
| } | |
| if (solution.successCount && solution.successCount > 5) { | |
| reasons.push(`worked ${solution.successCount} times before`); | |
| } | |
| return reasons.length > 0 ? `Recommended: ${reasons.join(', ')}` : 'Standard recommendation'; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // 5. AUTO-REMEDIATION ENGINE | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Check if error can be auto-remediated | |
| */ | |
| private async checkAutoRemediation( | |
| error: ErrorEvent, | |
| solutions: IntelligentSolution[] | |
| ): Promise<{ action: RemediationAction; queued: boolean } | undefined> { | |
| // Find auto-remediable solutions | |
| const autoRemediable = solutions.find(s => s.autoRemediable && s.remediationAction); | |
| if (!autoRemediable?.remediationAction) return undefined; | |
| const action = autoRemediable.remediationAction; | |
| // Safety checks | |
| if (action.riskLevel === 'high') { | |
| log.warn(`β οΈ High-risk remediation requires approval: ${action.name}`); | |
| this.emit('remediation:approval-required', { error, action }); | |
| return { action, queued: false }; | |
| } | |
| // Check if we've tried this recently (prevent loops) | |
| const recentTry = this.systemContext.recentRemediations.find( | |
| r => r.id === action.id && r.lastExecuted && | |
| Date.now() - r.lastExecuted.getTime() < 60000 // 1 minute cooldown | |
| ); | |
| if (recentTry) { | |
| log.info(`β³ Skipping remediation ${action.name} - cooldown active`); | |
| return { action, queued: false }; | |
| } | |
| // Queue for execution | |
| this.remediationQueue.push({ error, action }); | |
| this.processRemediationQueue(); | |
| return { action, queued: true }; | |
| } | |
| /** | |
| * Process remediation queue | |
| */ | |
| private async processRemediationQueue(): Promise<void> { | |
| if (this.isRemediating || this.remediationQueue.length === 0) return; | |
| this.isRemediating = true; | |
| while (this.remediationQueue.length > 0) { | |
| const { error, action } = this.remediationQueue.shift()!; | |
| try { | |
| log.info(`π§ Executing auto-remediation: ${action.name}`); | |
| this.emit('remediation:started', { error, action }); | |
| const startTime = Date.now(); | |
| let success = false; | |
| if (action.command) { | |
| // Execute shell command | |
| const { exec } = await import('child_process'); | |
| await new Promise<void>((resolve, reject) => { | |
| exec(action.command!, { timeout: 30000 }, (err, stdout, stderr) => { | |
| if (err) { | |
| log.error(`Remediation command failed: ${stderr}`); | |
| reject(err); | |
| } else { | |
| log.info(`Remediation output: ${stdout}`); | |
| success = true; | |
| resolve(); | |
| } | |
| }); | |
| }); | |
| } else if (action.apiCall) { | |
| // Execute API call - prepend base URL to relative paths | |
| const endpoint = action.apiCall.endpoint.startsWith('/') | |
| ? `${getBaseUrl()}${action.apiCall.endpoint}` | |
| : action.apiCall.endpoint; | |
| const response = await fetch(endpoint, { | |
| method: action.apiCall.method, | |
| headers: { 'Content-Type': 'application/json' }, | |
| body: action.apiCall.body ? JSON.stringify(action.apiCall.body) : undefined | |
| }); | |
| success = response.ok; | |
| } | |
| // Update action stats | |
| action.lastExecuted = new Date(); | |
| action.avgExecutionTime = (action.avgExecutionTime + (Date.now() - startTime)) / 2; | |
| if (success) { | |
| action.successRate = (action.successRate * 0.9) + 0.1; // Exponential moving average | |
| } else { | |
| action.successRate = action.successRate * 0.9; | |
| } | |
| // Mark error as resolved if successful | |
| if (success) { | |
| error.resolved = true; | |
| error.resolvedBy = `auto:${action.id}`; | |
| error.resolvedAt = new Date(); | |
| } | |
| this.systemContext.recentRemediations.push(action); | |
| this.emit('remediation:completed', { error, action, success }); | |
| } catch (e) { | |
| log.error(`Remediation failed: ${e}`); | |
| this.emit('remediation:failed', { error, action, reason: String(e) }); | |
| } | |
| } | |
| this.isRemediating = false; | |
| } | |
| /** | |
| * Find remediation action for a solution | |
| */ | |
| private findRemediationAction(solution: Solution): RemediationAction | undefined { | |
| for (const action of this.remediationActions.values()) { | |
| // Check if solution description matches any remediation pattern | |
| if (action.errorPatterns.some(pattern => | |
| solution.description.toLowerCase().includes(pattern.toLowerCase()) | |
| )) { | |
| return action; | |
| } | |
| } | |
| return undefined; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // 6. PREDICTIVE ERROR DETECTION | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Update metrics for predictive analysis | |
| */ | |
| public recordMetric(metric: string, value: number): void { | |
| if (!this.metricsHistory.has(metric)) { | |
| this.metricsHistory.set(metric, []); | |
| } | |
| const history = this.metricsHistory.get(metric)!; | |
| history.push({ timestamp: new Date(), value }); | |
| // Keep only last hour | |
| const oneHourAgo = Date.now() - 3600000; | |
| const filtered = history.filter(h => h.timestamp.getTime() > oneHourAgo); | |
| this.metricsHistory.set(metric, filtered); | |
| // Check predictive signals | |
| this.checkPredictiveSignals(metric, value); | |
| } | |
| /** | |
| * Check if metric triggers a predictive signal | |
| */ | |
| private checkPredictiveSignals(metric: string, value: number): void { | |
| for (const signal of this.predictiveSignals) { | |
| if (signal.metric !== metric) continue; | |
| let triggered = false; | |
| switch (signal.operator) { | |
| case '>': triggered = value > signal.threshold; break; | |
| case '<': triggered = value < signal.threshold; break; | |
| case '>=': triggered = value >= signal.threshold; break; | |
| case '<=': triggered = value <= signal.threshold; break; | |
| case '=': triggered = value === signal.threshold; break; | |
| } | |
| if (triggered) { | |
| signal.lastTriggered = new Date(); | |
| log.warn(`β οΈ PREDICTIVE ALERT: ${metric} = ${value} β ${signal.predictedError} likely in ${signal.leadTime / 1000}s`); | |
| this.emit('prediction:triggered', { | |
| signal, | |
| currentValue: value, | |
| expectedError: signal.predictedError, | |
| expectedIn: signal.leadTime | |
| }); | |
| } | |
| } | |
| } | |
| /** | |
| * Predict next likely error based on current error | |
| */ | |
| private predictNextError(error: ErrorEvent): { nextLikelyError: string; confidence: number; timeframe: string } | undefined { | |
| // Find correlations where this error is the source | |
| const errorSignature = this.normalizeSignature(error.message); | |
| let bestPrediction: { error: string; confidence: number; timeDelta: number } | undefined; | |
| for (const [key, correlation] of this.correlations) { | |
| if (key.startsWith(errorSignature) && correlation.correlationType === 'precedes') { | |
| if (!bestPrediction || correlation.confidence > bestPrediction.confidence) { | |
| bestPrediction = { | |
| error: key.split('β')[1], | |
| confidence: correlation.confidence, | |
| timeDelta: correlation.avgTimeDelta | |
| }; | |
| } | |
| } | |
| } | |
| if (bestPrediction && bestPrediction.confidence > 0.5) { | |
| return { | |
| nextLikelyError: bestPrediction.error, | |
| confidence: bestPrediction.confidence, | |
| timeframe: this.formatTimeDelta(bestPrediction.timeDelta) | |
| }; | |
| } | |
| return undefined; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // 7. INITIALIZATION & BACKGROUND PROCESSING | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Initialize default auto-remediation actions | |
| */ | |
| private initializeDefaultRemediations(): void { | |
| const defaultActions: RemediationAction[] = [ | |
| { | |
| id: 'restart-redis', | |
| name: 'Restart Redis Connection', | |
| description: 'Reconnect to Redis when connection is lost', | |
| errorPatterns: ['ECONNREFUSED', 'redis', 'connection refused'], | |
| apiCall: { endpoint: '/api/healing/service/redis', method: 'POST' }, | |
| requiresApproval: false, | |
| riskLevel: 'low', | |
| successRate: 0.85, | |
| avgExecutionTime: 1000 | |
| }, | |
| { | |
| id: 'clear-memory-cache', | |
| name: 'Clear Memory Cache', | |
| description: 'Clear in-memory caches when memory is low', | |
| errorPatterns: ['heap', 'memory', 'OOM'], | |
| apiCall: { endpoint: '/api/system/clear-cache', method: 'POST' }, | |
| requiresApproval: false, | |
| riskLevel: 'low', | |
| successRate: 0.9, | |
| avgExecutionTime: 500 | |
| }, | |
| { | |
| id: 'retry-database', | |
| name: 'Retry Database Connection', | |
| description: 'Attempt to reconnect to database', | |
| errorPatterns: ['database', 'postgres', 'neo4j', 'SQLSTATE'], | |
| apiCall: { endpoint: '/api/healing/service/database', method: 'POST' }, | |
| requiresApproval: false, | |
| riskLevel: 'medium', | |
| successRate: 0.75, | |
| avgExecutionTime: 3000 | |
| }, | |
| { | |
| id: 'restart-service', | |
| name: 'Restart Service', | |
| description: 'Full service restart - requires approval', | |
| errorPatterns: ['fatal', 'crash', 'unrecoverable'], | |
| command: 'npm run restart:backend', | |
| requiresApproval: true, | |
| riskLevel: 'high', | |
| successRate: 0.95, | |
| avgExecutionTime: 15000 | |
| } | |
| ]; | |
| for (const action of defaultActions) { | |
| this.remediationActions.set(action.id, action); | |
| } | |
| } | |
| /** | |
| * Initialize predictive signals | |
| */ | |
| private initializePredictiveSignals(): void { | |
| this.predictiveSignals = [ | |
| { | |
| metric: 'memory_usage_percent', | |
| threshold: 85, | |
| operator: '>', | |
| predictedError: 'JavaScript heap out of memory', | |
| leadTime: 300000, // 5 minutes | |
| confidence: 0.8 | |
| }, | |
| { | |
| metric: 'redis_connections', | |
| threshold: 95, | |
| operator: '>', | |
| predictedError: 'ECONNREFUSED on Redis', | |
| leadTime: 60000, // 1 minute | |
| confidence: 0.7 | |
| }, | |
| { | |
| metric: 'postgres_connections', | |
| threshold: 90, | |
| operator: '>', | |
| predictedError: 'SQLSTATE 53300 too many connections', | |
| leadTime: 120000, // 2 minutes | |
| confidence: 0.75 | |
| }, | |
| { | |
| metric: 'event_loop_lag_ms', | |
| threshold: 500, | |
| operator: '>', | |
| predictedError: 'Event loop blocked - degraded performance', | |
| leadTime: 30000, // 30 seconds | |
| confidence: 0.85 | |
| } | |
| ]; | |
| } | |
| /** | |
| * Start background processing | |
| */ | |
| private startBackgroundProcessing(): void { | |
| // Cleanup old history every 5 minutes | |
| setInterval(() => { | |
| const cutoff = Date.now() - 3600000; // 1 hour | |
| this.errorHistory = this.errorHistory.filter(e => e.timestamp.getTime() > cutoff); | |
| // Cleanup old correlations with low confidence | |
| for (const [key, correlation] of this.correlations) { | |
| if (correlation.confidence < 0.3 && correlation.occurrences < 3) { | |
| this.correlations.delete(key); | |
| } | |
| } | |
| }, 300000); | |
| // Persist correlations to Neo4j every 10 minutes | |
| setInterval(() => { | |
| this.persistCorrelationsToNeo4j(); | |
| }, 600000); | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // HELPER METHODS | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| private generateErrorId(): string { | |
| return `err_${Date.now()}_${Math.random().toString(36).substring(2, 8)}`; | |
| } | |
| private assessSeverity(message: string, context: Record<string, any>): ErrorEvent['severity'] { | |
| const lower = message.toLowerCase(); | |
| if (lower.includes('fatal') || lower.includes('critical') || lower.includes('crash')) return 'critical'; | |
| if (lower.includes('error') || lower.includes('failed') || lower.includes('refused')) return 'high'; | |
| if (lower.includes('warning') || lower.includes('timeout')) return 'medium'; | |
| return 'low'; | |
| } | |
| private recordError(error: ErrorEvent): void { | |
| this.errorHistory.push(error); | |
| this.systemContext.activeErrors.push(error); | |
| // Trim history if too large | |
| if (this.errorHistory.length > this.MAX_HISTORY) { | |
| this.errorHistory = this.errorHistory.slice(-this.MAX_HISTORY / 2); | |
| } | |
| } | |
| private getCorrelationKey(sourceMsg: string, targetMsg: string): string { | |
| return `${this.normalizeSignature(sourceMsg)}β${this.normalizeSignature(targetMsg)}`; | |
| } | |
| private normalizeSignature(msg: string): string { | |
| return msg | |
| .toLowerCase() | |
| .replace(/[0-9]+/g, 'N') | |
| .replace(/\s+/g, ' ') | |
| .substring(0, 100); | |
| } | |
| private formatTimeDelta(ms: number): string { | |
| if (ms < 1000) return `${ms}ms`; | |
| if (ms < 60000) return `${Math.round(ms / 1000)}s`; | |
| return `${Math.round(ms / 60000)}min`; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // PUBLIC API | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Update system context (called by health checks) | |
| */ | |
| public updateSystemContext(updates: Partial<SystemContext>): void { | |
| Object.assign(this.systemContext, updates); | |
| } | |
| /** | |
| * Get current intelligence stats | |
| */ | |
| public getStats() { | |
| return { | |
| errorHistory: this.errorHistory.length, | |
| correlations: this.correlations.size, | |
| predictiveSignals: this.predictiveSignals.length, | |
| remediationActions: this.remediationActions.size, | |
| activeErrors: this.systemContext.activeErrors.length, | |
| recentRemediations: this.systemContext.recentRemediations.length | |
| }; | |
| } | |
| /** | |
| * Get correlations for visualization | |
| */ | |
| public getCorrelations(): ErrorCorrelation[] { | |
| return Array.from(this.correlations.values()) | |
| .filter(c => c.confidence > 0.5) | |
| .sort((a, b) => b.confidence - a.confidence); | |
| } | |
| /** | |
| * Manually approve a pending remediation | |
| */ | |
| public approveRemediation(actionId: string): boolean { | |
| const action = this.remediationActions.get(actionId); | |
| if (action) { | |
| action.requiresApproval = false; | |
| return true; | |
| } | |
| return false; | |
| } | |
| } | |
| // Singleton export | |
| export const cognitiveErrorIntelligence = CognitiveErrorIntelligence.getInstance(); | |