Spaces:
Paused
Paused
File size: 10,880 Bytes
ee826ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 | /**
* runtime-healer.js β Supervisor Auto-Curativo del Proceso
* ===========================================================
* Basado en: VIGIL framework (2025) β "reflective runtime that supervises
* a sibling agent and performs autonomous maintenance"
*
* FUNCIONES:
* 1. Capturar excepciones no manejadas y diagnosticarlas con IA
* 2. Detectar degradaciΓ³n de rendimiento (lentitud, alta RAM)
* 3. Proponer fixes al owner (NUNCA auto-aplicar cΓ³digo)
* 4. Reiniciar mΓ³dulos especΓficos sin matar el proceso
* 5. Log estructurado de todos los eventos para auditorΓa
*
* LΓMITE CRΓTICO (de la investigaciΓ³n):
* "Self-correcting agents without hard limits are just very determined
* systems that can make the same bad decision many times in a row"
* β El healer NUNCA escribe cΓ³digo directamente. Solo diagnostica y sugiere.
*/
import { callAIBackground } from './ai.js';
import * as db from './db.js';
import { readConfig } from './utils.js';
const config = readConfig();
// ββ Estado del healer βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
const _state = {
errors : [], // ΓΊltimos 50 errores
healingAttempts : 0,
lastHealed : null,
degraded : false,
client : null,
};
const MAX_ERRORS_STORED = 50;
const MAX_HEALING_PER_HOUR = 5; // cap por hora para evitar loops
let _healingThisHour = 0;
let _hourReset = Date.now();
function canHeal() {
const now = Date.now();
if (now - _hourReset > 3_600_000) { _healingThisHour = 0; _hourReset = now; }
return _healingThisHour < MAX_HEALING_PER_HOUR;
}
// ββ Registrar error βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
function recordError(type, message, stack = '', context = '') {
const entry = {
type, message: message.slice(0, 500), stack: stack.slice(0, 1000),
context, ts: Date.now(),
};
_state.errors.push(entry);
if (_state.errors.length > MAX_ERRORS_STORED) _state.errors.shift();
// Persistir en DB para diagnΓ³stico posterior
db.memSet(`runtime.error.${Date.now()}`, entry, 'runtime_errors').catch(() => {});
return entry;
}
// ββ DiagnΓ³stico con IA ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
async function diagnoseWithAI(error) {
if (!canHeal()) {
console.warn('[Healer] Cap de healing/hora alcanzado β no diagnosticando');
return null;
}
_healingThisHour++;
// Incluir los ΓΊltimos 3 errores para contexto (pattern detection)
const recentErrors = _state.errors.slice(-3).map(e =>
`[${new Date(e.ts).toISOString()}] ${e.type}: ${e.message}`
).join('\n');
try {
const raw = await callAIBackground([
{
role : 'system',
content: `Eres un experto en Node.js y Discord.js diagnosticando errores de Zelin, un bot de Discord.
Analiza el error y propΓ³n una soluciΓ³n ESPECΓFICA y SEGURA.
Reglas:
- NO sugieras reiniciar el servidor completo como primera opciΓ³n
- SΓ sugiere quΓ© mΓ³dulo especΓfico podrΓa recargarse
- SΓ identifica si es un error transitorio (red, API) o estructural (bug en cΓ³digo)
- SΓ indica si hay un pattern de errores repetidos
Responde SOLO JSON:
{
"type": "transient|structural|resource|unknown",
"root_cause": "causa raΓz en 1 frase",
"severity": "low|medium|high|critical",
"is_pattern": true/false,
"safe_actions": ["acciΓ³n 1 que Zelin puede hacer sola sin riesgo"],
"owner_suggestion": "quΓ© deberΓa hacer tomatitoo manualmente (si aplica)",
"auto_recoverable": true/false
}`,
},
{
role : 'user',
content: `Error actual:\n${error.type}: ${error.message}\n${error.stack ? `Stack:\n${error.stack}` : ''}\n\nErrores recientes:\n${recentErrors}`,
},
], 'reasoning', 400);
return JSON.parse(raw.replace(/```json|```/g, '').trim());
} catch {
return null;
}
}
// ββ Acciones de recuperaciΓ³n automΓ‘tica (solo las seguras) ββββββββββββββββββββ
async function attemptAutoRecovery(diagnosis) {
if (!diagnosis?.auto_recoverable) return false;
for (const action of diagnosis.safe_actions ?? []) {
const actionLower = action.toLowerCase();
if (actionLower.includes('clear cache') || actionLower.includes('limpiar cache')) {
const { clearCache } = await import('./ai.js');
clearCache();
console.log('[Healer] β
Cache limpiada automΓ‘ticamente');
} else if (actionLower.includes('gc') || actionLower.includes('garbage')) {
if (global.gc) { global.gc(); console.log('[Healer] β
GC ejecutado'); }
} else if (actionLower.includes('reintentar') || actionLower.includes('wait')) {
// Errores transitorios de red β simplemente esperar
await new Promise(r => setTimeout(r, 5000));
console.log('[Healer] β
Esperado 5s para recuperaciΓ³n de error transitorio');
}
}
return true;
}
// ββ Notificar al owner ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
async function notifyOwner(error, diagnosis) {
if (!_state.client) return;
try {
const owner = await _state.client.users.fetch(config.admin.userId);
const dm = await owner.createDM();
const severity = diagnosis?.severity ?? 'unknown';
const icon = { low: 'π‘', medium: 'π ', high: 'π΄', critical: 'π' }[severity] ?? 'βͺ';
const msg = [
`${icon} **Error en Zelin** [${severity.toUpperCase()}]`,
`**Tipo:** ${error.type}`,
`**Causa raΓz:** ${diagnosis?.root_cause ?? error.message.slice(0, 200)}`,
diagnosis?.is_pattern ? `β οΈ **PatrΓ³n detectado** β este error se repite` : '',
diagnosis?.owner_suggestion ? `**Sugerencia para ti:** ${diagnosis.owner_suggestion}` : '',
`*Errores en el ΓΊltimo ciclo: ${_state.errors.filter(e => Date.now()-e.ts < 3_600_000).length}*`,
].filter(Boolean).join('\n');
await dm.send(msg.slice(0, 1900));
} catch {}
}
// ββ Monitor de rendimiento ββββββββββββββββββββββββββββββββββββββββββββββββββββ
// NOTA: NO usamos heapUsed/heapTotal β esa mΓ©trica es engaΓ±osa en Node.js con
// mΓ³dulos nativos (node-llama-cpp carga modelos fuera del heap V8).
// heapTotal puede ser 200MB mientras RSS es 1.2GB β heapPct = 94% FALSO POSITIVO.
// MΓ©trica correcta: RSS vs RAM total real del sistema.
import { freemem, totalmem } from 'os';
function checkPerformance() {
const mem = process.memoryUsage();
const totalRamMB = totalmem() / 1024 / 1024;
const freeRamMB = freemem() / 1024 / 1024;
const usedRamMB = totalRamMB - freeRamMB;
const rssMB = mem.rss / 1024 / 1024;
const ramPct = usedRamMB / totalRamMB; // % de RAM del SISTEMA (no del heap V8)
// Warning real: RAM del sistema > 88% (eso sΓ es preocupante)
const WARN_THRESHOLD = 0.88;
const CRIT_THRESHOLD = 0.94;
if (ramPct > CRIT_THRESHOLD) {
console.warn(`[Healer] π΄ RAM crΓtica: ${usedRamMB.toFixed(0)}MB / ${totalRamMB.toFixed(0)}MB (${(ramPct*100).toFixed(0)}%)`);
_state.degraded = true;
if (global.gc) { global.gc(); console.log('[Healer] GC ejecutado'); }
} else if (ramPct > WARN_THRESHOLD) {
console.warn(`[Healer] β οΈ RAM alta: ${usedRamMB.toFixed(0)}MB / ${totalRamMB.toFixed(0)}MB (${(ramPct*100).toFixed(0)}%)`);
_state.degraded = false; // warning pero no degraded
} else {
_state.degraded = false;
}
return {
heapUsedMB : (mem.heapUsed / 1024 / 1024).toFixed(1),
heapTotalMB : (mem.heapTotal / 1024 / 1024).toFixed(1),
rssMB : rssMB.toFixed(1),
ramUsedMB : usedRamMB.toFixed(1),
ramTotalMB : totalRamMB.toFixed(1),
ramPct : (ramPct * 100).toFixed(0) + '%',
uptime : Math.round(process.uptime()) + 's',
degraded : _state.degraded,
};
}
// ββ Inicializar el healer βββββββββββββββββββββββββββββββββββββββββββββββββββββ
export function initRuntimeHealer(client) {
_state.client = client;
// 1. Capturar excepciones no manejadas
process.on('uncaughtException', async (err) => {
// NUNCA dejar que el proceso muera silenciosamente
const recorded = recordError('uncaughtException', err.message, err.stack);
console.error('[Healer] π¨ uncaughtException:', err.message);
const diagnosis = await diagnoseWithAI(recorded);
if (diagnosis) {
await attemptAutoRecovery(diagnosis);
// Solo notificar al owner si es medium o peor
if (['medium','high','critical'].includes(diagnosis.severity)) {
await notifyOwner(recorded, diagnosis);
}
}
// NO hacer process.exit() β intentar seguir corriendo
});
// 2. Capturar promesas rechazadas no manejadas
process.on('unhandledRejection', async (reason) => {
const msg = reason instanceof Error ? reason.message : String(reason);
const stack = reason instanceof Error ? reason.stack : '';
const recorded = recordError('unhandledRejection', msg, stack);
console.error('[Healer] π¨ unhandledRejection:', msg.slice(0, 200));
// Solo diagnosticar si es un error frecuente (3+ en 5 minutos)
const recentSimilar = _state.errors.filter(e =>
e.type === 'unhandledRejection' &&
Date.now() - e.ts < 5 * 60_000 &&
e.message.slice(0, 50) === msg.slice(0, 50)
).length;
if (recentSimilar >= 3) {
const diagnosis = await diagnoseWithAI(recorded);
if (diagnosis) {
await attemptAutoRecovery(diagnosis);
await notifyOwner(recorded, diagnosis);
}
}
});
// 3. Monitor de rendimiento cada 5 minutos
setInterval(() => {
const perf = checkPerformance();
if (_state.degraded) {
console.warn(`[Healer] β οΈ Rendimiento degradado: ${perf.ramUsedMB}MB / ${perf.ramTotalMB}MB RAM sistema (${perf.ramPct}), RSS proceso: ${perf.rssMB}MB`);
}
}, 5 * 60_000);
console.log('[Healer] β
Runtime healer activo β monitoreando proceso');
}
export function getHealerStats() {
return {
errors_stored : _state.errors.length,
errors_last_hour : _state.errors.filter(e => Date.now()-e.ts < 3_600_000).length,
healing_this_hour : _healingThisHour,
healing_cap : MAX_HEALING_PER_HOUR,
degraded : _state.degraded,
performance : checkPerformance(),
};
}
|