/** * ai.js — Router de IA v10.0 — TORNEO DE MODELOS * ================================================ * CAMBIOS v10.0 (basado en torneo de 179 modelos): * - Qwen3-32B (Groq) como proveedor PRIMARIO — mejor español+velocidad+thinking * - Qwen3-235B-A22B (Cerebras) como premium — ultra-rápido, excelente español * - Llama-4-Scout-17B (Groq) como secundario — oficial español * - DeepSeek R1 (OpenRouter) para razonamiento complejo * - GLM mantenido pero degradado en prioridad * - Modelo local (Gemma 4 E4B) solo como fallback final * - Thinking mode optimizado para Qwen3 * - Cascadas reordenadas por puntaje del torneo */ import { readConfig } from './utils.js'; import { semanticCache } from './semantic-cache.js'; import { emergencyFallback, isLocalAIReady } from './local-ai.js'; import { callHuggingFace, callHuggingFaceCascade, isHFAvailable, discoverHFModels, getHFStats } from './hf-provider.js'; import { hiveGenerate, initialize as hiveInit, getHiveStatus, getHiveStats, benchmarkHive, warmUpWorkers } from './hive.js'; const config = readConfig(); const ai = config.ai ?? {}; const feats = config.features ?? {}; const glm = config.glm ?? {}; // ── Thinking Mode v3.0 — REMOVED injectThinking() ────────────────────────── // v3.0 CRITICAL FIX: injectThinking() was causing the #1 bug — chain-of-thought // leaking into Discord responses. Models would output their thinking process as // plain text despite instructions not to. The fix: DON'T inject thinking prompts. // Models already think internally. Adding "PIENSA PROFUNDAMENTE" causes them to // output that thinking. Less is more — just give good system prompts. // // For models with native thinking (GLM-5.1 reasoning_content, Qwen3 /think): // - We let the API handle thinking natively (separate from content) // - We NEVER inject /think or thinking instructions manually // - We ALWAYS strip any thinking that leaks into content via cleanThinkingArtifacts() // injectThinking is now a no-op — returns messages unchanged function injectThinking(messages, _providerName = '') { return messages; } // Limpiar artefactos de thinking + patrones AI — v11 ANTI-LEAK // v11: Major overhaul — handles ALL known thinking leak patterns // Root cause: models output their chain-of-thought as plain text // v11 FIX: More aggressive detection — lower thresholds, more patterns function cleanThinkingArtifacts(text) { if (!text || typeof text !== 'string') return text; let t = text; // ═══ v11 NUCLEAR: Strip ALL thinking tags first ═══ // Handle every known tag format t = t.replace(//gi, ''); t = t.replace(//gi, ''); t = t.replace(//gi, ''); t = t.replace(//gi, ''); t = t.replace(//gi, ''); t = t.replace(//gi, ''); t = t.replace(/<\/think>/gi, ''); t = t.replace(/<\/thinking>/gi, ''); t = t.replace(/<\/reasoning>/gi, ''); // ═══ v11 CRÍTICO: Ultra-aggressive thinking leak detection ═══ // Detect multi-line thinking followed by a response // Pattern: First part is analytical (long, detailed), last part is natural Spanish if (t.includes('\n') && t.length > 150) { const lines = t.split('\n').filter(l => l.trim().length > 0); if (lines.length >= 3) { // Check if first lines are thinking (English or structured analysis) const firstLines = lines.slice(0, Math.ceil(lines.length / 2)); const lastLines = lines.slice(Math.ceil(lines.length / 2)); // v11: More thinking indicators — covers more model output patterns const thinkingWords = /^(?:okay|alright|let me|i should|the user|first,|based on|since|so,|well,|now,|also,|however,|but,|actually,|hmm|let's|i need|i think|i'll|going to|in order|therefore|because|this means|that means|it seems|it appears|looking at|considering|analyzing|understanding|evaluating|to respond|to answer|the message|the question|as zelin|respond as|in character|staying in|my role|my persona)/i; // v11: Expanded Spanish words list for better detection const spanishWords = /\b(wey|neta|chido|chale|órale|híjole|zelin|morra|güey|onda|chingón|bueno|claro|sí|no|nah|oye|mira|ósea|pues|nada|simón|sale|va|ok|jaja|qu[eé]|c[oó]mo|d[oó]nde|cu[aá]ndo|por qu[eé]|much[oas]?|tambi[eé]n|aqu[ií]|all[iá]|este|esta|eso|esa|s[ií]|nope|yup|sip|nop|dale|va|holi|ola|bro|crack|xd|gg|ns|ni idea|ni modo|ya ves|ya mero|híjole|a poco|qué onda|no mames|no manches|est[aá] ca[nnñ]ón|padre|madre)/i; let firstHalfThinking = 0; let secondHalfSpanish = 0; for (const line of firstLines) { if (thinkingWords.test(line.trim())) firstHalfThinking++; if (spanishWords.test(line)) firstHalfThinking--; // Not thinking if Spanish } for (const line of lastLines) { if (spanishWords.test(line)) secondHalfSpanish++; if (thinkingWords.test(line.trim())) secondHalfSpanish--; // Not response if thinking } // v11: Lower threshold — even 1 thinking indicator is suspicious if (firstHalfThinking >= 1 && secondHalfSpanish >= 1) { const response = lastLines.join('\n').trim(); if (response.length > 5) { t = response; } } } } // ═══ v11 NEW: Single-paragraph English thinking followed by Spanish ═══ // Pattern: "Okay, I should respond as a Mexican girl. wey no sé" // Extract ONLY the Spanish part after the last English sentence if (t.length > 100) { // Find the LAST transition from English to Spanish const sentences = t.split(/(?<=[.!?])\s+/); if (sentences.length >= 2) { let lastSpanishStart = -1; for (let i = 0; i < sentences.length; i++) { // A sentence is Spanish if it has Spanish-specific words or characters const s = sentences[i]; if (/\b(wey|neta|chido|chale|órale|híjole|morra|güey|onda|chingón|ósea|pues|simón)\b/i.test(s) || /[¿¡]/.test(s) || (/\b(s[ií]|no|nah|oye|mira|bueno|claro|dale|ns)\b/i.test(s) && !/^(?:okay|alright|let me|i should|the user)/i.test(s))) { if (i > 0 && /^(?:okay|alright|let me|i should|the user|first|based on|since|so|well|now|also|however|actually|hmm|let's|i need|i think)/i.test(sentences[0])) { lastSpanishStart = i; } } } if (lastSpanishStart >= 0) { const response = sentences.slice(lastSpanishStart).join(' ').trim(); if (response.length > 5) t = response; } } } // ═══ v9 CRÍTICO: Detectar y eliminar razonamiento interno en texto plano ═══ // El modelo a veces incluye su proceso de pensamiento como texto plano // sin marcadores como . Detectamos estos patrones y los limpiamos. // Patrón 1: Líneas numeradas de thinking recitadas del system prompt // Ej: "1. ¿Cuál es la intención real del mensaje? ... 6. Responde SOLO..." const thinkingRecitationPattern = /^[\d][.)]\s*(?:¿Cu[aá]l|¿Hay|¿Qu[eé]|¿C[oó]mo|Auto-eval|Responde|PIENSA|Pienso|Pensamiento)/m; if (thinkingRecitationPattern.test(t)) { // La respuesta real está DESPUÉS de la última línea numerada const lines = t.split('\n'); let lastNumberedLine = -1; for (let i = 0; i < lines.length; i++) { if (/^\d[.)]\s/.test(lines[i].trim())) lastNumberedLine = i; } if (lastNumberedLine >= 0 && lastNumberedLine < lines.length - 1) { const afterThinking = lines.slice(lastNumberedLine + 1).join('\n').trim(); if (afterThinking.length > 2) t = afterThinking; } } // Patrón 2: Razonamiento en inglés seguido de respuesta en español // Ej: "Okay, the user is asking... I should respond as Zelin..." const englishThinkingPattern = /^(?:Okay|Alright|Let me|I should|The user|First,|Based on|Since)/i; if (englishThinkingPattern.test(t) && t.length > 150) { const lines = t.split('\n'); let foundSpanish = false; const spanishLines = []; for (const line of lines) { // Si la línea tiene español (acentos, ñ, ¿, ¡, o jerga mexicana) if (/[¿¡ñáéíóú]/i.test(line) || /\b(wey|neta|chido|chale|órale|zelin|híjole|morra)\b/i.test(line)) { foundSpanish = true; spanishLines.push(line); } else if (foundSpanish) { spanishLines.push(line); } } if (spanishLines.length > 0 && spanishLines.join('\n').trim().length > 2) { t = spanishLines.join('\n').trim(); } } // Patrón 3: "Zelin would say:" o "Mi respuesta:" seguido de la respuesta const promptPatterns = [ /Zelin (?:would|should|might) (?:say|respond|reply|answer)[:\s]*\n?/i, /(?:Mi respuesta|My response|My answer|Response)[:\s]*\n?/i, /(?:Así respondería|Here's how|Here is what)[:\s]*\n?/i, ]; for (const p of promptPatterns) { const match = t.match(p); if (match && match.index > 0) { const after = t.slice(match.index + match[0].length).trim(); if (after.length > 2) t = after; } } // v8 CRÍTICO: Limpiar "user: ... zelin: ..." pattern (modelo recita ejemplos del prompt) if (/\buser:/i.test(t)) { const beforeUser = t.split(/\buser:/i)[0].trim(); if (beforeUser.length > 2) { t = beforeUser; } else { t = 'ns'; } } if (/\bzelin:\s*/i.test(t) && !/^zelin:/i.test(t)) { const beforeZelin = t.split(/\bzelin:\s*/i)[0].trim(); if (beforeZelin.length > 2) t = beforeZelin; } t = t.replace(/^zelin:\s*/i, ''); // (thinking tags already stripped at the top of this function) // Qwen3 thinking mode: the model sometimes outputs /think content and /no_think markers const noThinkIdx = t.indexOf('/no_think'); if (noThinkIdx !== -1) { const afterNoThink = t.slice(noThinkIdx + 9).trim(); if (afterNoThink.length > 2) t = afterNoThink; } t = t.replace(/^\/think\s*/i, ''); t = t.replace(/^\/no_think\s*/i, ''); t = t.replace(/^Pienso[\s\S]*?\nRespuesta:\s*/i, ''); t = t.replace(/^Pensamiento:[\s\S]*?\nRespuesta:\s*/i, ''); // v9: Más patrones de thinking en texto plano t = t.replace(/^An[aá]lisis:[\s\S]*?(?=Ahora|Bien|Ok|Sí|No|Nah|Wey|Oye|hmm|bueno)/i, ''); t = t.replace(/^Razonamiento:[\s\S]*?(?=Ahora|Bien|Ok|Sí|No|Nah|Wey|Oye|hm|bueno)/i, ''); // Quitar prefijos de "respuesta final" t = t.replace(/^Respuesta final:\s*/i, ''); t = t.replace(/^Final answer:\s*/i, ''); t = t.replace(/^Respuesta:\s*/i, ''); // Limpiar tool call placeholders rotos t = t.replace(/\[(?:mc_status|mc_player|mc_wiki|hora actual|usar\s+\w+\s+para\s+dato\s+real)\]/gi, ''); t = t.replace(/c_status\]/g, ''); t = t.replace(/ora actual\]/g, ''); t = t.replace(/\w+_(?:status|player|wiki|info)\]/g, ''); // Reemplazar patrones formales de rechazo t = t.replace(/no puedo cumplir (esa|este|aquella) (solicitud|request|orden|instrucci[oó]n)/gi, 'eso no va'); t = t.replace(/no puedo procesar esa solicitud/gi, 'eso no va'); t = t.replace(/lo siento,? pero no puedo/gi, 'nah'); t = t.replace(/disculpa,? pero no puedo/gi, 'nah'); t = t.replace(/lamentablemente no puedo/gi, 'nah'); // Corregir "nop" al inicio cuando no es sí/no t = t.replace(/^nop,\s*soy\s+/gi, 'no, soy '); // Correcciones de identidad t = t.replace(/\bzel[eé]n\b/gi, 'zelin'); t = t.replace(/\brezin\b/gi, 'zelin'); t = t.replace(/\bzelen\b/gi, 'zelin'); t = t.replace(/\btomatitoo\b/gi, 'tomatito'); // Quitar patrones de asistente al final t = t.replace(/[,\s]*¿?en qu[eé]\s+(te\s+)?puedo\s+ayud[aeo]r?[¿?]?\.?\s*$/gi, ''); t = t.replace(/[,\s]*¿?(algo\s+)?m[aá]s\s+en\s+(lo\s+que\s+)?pued[ao]\s+ayud[aeo]r?[¿?]?\.?\s*$/gi, ''); t = t.replace(/[,\s]*¿?necesitas\s+(algo\s+)?m[aá]s[¿?]?\.?\s*$/gi, ''); // Garbled text corrections t = t.replace(/\besti con vos/gi, 'suerte con eso'); t = t.replace(/\bwienes/gi, 'bien, y tú'); // Simplificar recitación de identidad if (/^soy zelin,? (la morra |la )?del server minecraft? tomatesmp\.?$/i.test(t)) { t = 'soy zelin del server'; } return t.trim(); } // ── Modelos descubiertos dinámicamente por proveedor ───────────────────────── // Se actualizan al arrancar y cada 6h para que no queden obsoletos const _discoveredModels = {}; async function discoverModels(providerName) { try { switch (providerName) { case 'groq': { if (!ai.groq?.apiKey) return; const r = await fetch('https://api.groq.com/openai/v1/models', { headers: { Authorization: `Bearer ${ai.groq.apiKey}` }, signal: AbortSignal.timeout(5000), }); if (!r.ok) return; const data = await r.json(); const models = (data.data ?? []).filter(m => m.active !== false).map(m => m.id); _discoveredModels.groq = models; console.log(`[AI] Groq models: ${models.length} (${models.slice(0,3).join(', ')}...)`); break; } case 'mistral': { if (!ai.mistral?.apiKey) return; const r = await fetch('https://api.mistral.ai/v1/models', { headers: { Authorization: `Bearer ${ai.mistral.apiKey}` }, signal: AbortSignal.timeout(5000), }); if (!r.ok) return; const data = await r.json(); const models = (data.data ?? []).map(m => m.id); _discoveredModels.mistral = models; console.log(`[AI] Mistral models: ${models.length}`); break; } case 'pollinations': { const r = await fetch('https://text.pollinations.ai/models', { signal: AbortSignal.timeout(5000) }); if (!r.ok) return; const data = await r.json(); _discoveredModels.pollinations = Array.isArray(data) ? data.map(m => m.name ?? m) : []; console.log(`[AI] Pollinations models: ${_discoveredModels.pollinations.length}`); break; } } } catch {} } // Seleccionar el mejor modelo disponible de un proveedor function getBestModel(providerName, fallback, preference = 'large') { const models = _discoveredModels[providerName]; if (!models?.length) return fallback; // Filtrar modelos que NO son de chat/texto const EXCLUDE = /whisper|tts|speech|audio|embed|vision|image|dall|stable|rerank|guard|code-gecko|text-bison/i; const textModels = models.filter(m => !EXCLUDE.test(m)); if (!textModels.length) return fallback; if (preference === 'fast') { return textModels.find(m => /8b|7b|fast|instant|flash|mini/i.test(m)) ?? fallback; } if (preference === 'large') { return textModels.find(m => /70b|72b|large|versatile|plus|pro/i.test(m)) ?? fallback; } return fallback; } // Descubrir en background al arrancar export function startModelDiscovery() { const providers = ['groq', 'mistral', 'pollinations']; providers.forEach(p => discoverModels(p).catch(() => {})); // Descubrir modelos de HuggingFace discoverHFModels().catch(() => {}); // Refrescar cada 6h setInterval(() => { providers.forEach(p => discoverModels(p).catch(() => {})); discoverHFModels().catch(() => {}); }, 6 * 60 * 60 * 1000); } // ── Tiers y cascadas (GLM 5.1 primero, API-first, local fallback) ──────────── const TIERS = { fast : ['glmFlash', 'groqFast', 'cerebras', 'pollinations', 'hfFast'], smart : ['glm51', 'pollinations', 'groq', 'hfSmart', 'mistral', 'gemma4', 'cloudflare'], fallback: ['glmAir', 'openrouter', 'groqKimi', 'openrouterR1', 'mistralCode', 'hfFallback'], }; const CASCADES = { // v11: HIVE — RigoChat-7B cluster como PRIMARIO (usa TODOS los workers, no Promise.any) // Hive: consenso + speculative decoding + parallel batch = máxima potencia chat : ['hive', 'groqQwen3', 'cerebras', 'groq', 'glm51', 'groqFast', 'pollinations', 'hfSmart', 'mistral', 'gemma4', 'cloudflare', 'openrouter', 'local'], spanish : ['hive', 'groqQwen3', 'cerebras', 'groq', 'glm51', 'hfSpanish', 'pollinations', 'mistral', 'gemma4', 'cloudflare', 'openrouter', 'groqKimi', 'local'], fast : ['hive', 'groqFast', 'cerebras', 'glmFlash', 'pollinations', 'hfFast', 'groq', 'mistral', 'cloudflare', 'openrouter', 'local'], reasoning : ['hive', 'groqQwen3', 'cerebras', 'groqKimi', 'hfReasoning', 'openrouterR1', 'glm51', 'groq', 'mistral', 'pollinations', 'gemma4', 'cloudflare', 'local'], code : ['groqQwen3', 'glm51', 'mistralCode', 'hfCode', 'groq', 'pollinations', 'mistral', 'openrouter'], volume : ['hive', 'groqFast', 'glmFlash', 'pollinations', 'cerebras', 'hfFast', 'gemma4', 'cloudflare', 'groqQwen3', 'local'], background: ['hive', 'glmFlash', 'mistral', 'groqFast', 'groq', 'hfFast', 'pollinations', 'cloudflare', 'gemma4', 'openrouter', 'local'], }; const GLM_PROVIDERS = ['glm51', 'glmAir', 'glmFlash']; const ALL_PROVIDERS = [...new Set([...TIERS.fast, ...TIERS.smart, ...TIERS.fallback, 'groqQwen3', 'cerebras', 'local', 'hive'])]; const HF_PROVIDERS = ['hfFast', 'hfSmart', 'hfSpanish', 'hfCode', 'hfReasoning', 'hfFallback']; // ── Límites diarios ─────────────────────────────────────────────────────────── const DAILY_LIMITS = { // Local: sin límites — modelo propio local : 99999, // Qwen3-32B (Groq) — TORNEO: #1 modelo primario groqQwen3 : 1000, // Qwen3-32B via Groq — 60 RPM, 1000 RPD // Cerebras — TORNEO: #2 ultra-fast reasoning cerebras : 14400, // 30 RPM, 1M TPD // GLM: gratis/ilimitado glm51 : 99999, glmAir : 99999, glmFlash : 99999, // Otros proveedores pollinations : 99999, groq : 1000, groqFast : 14400, mistral : 99999, mistralCode : 99999, gemma4 : 86400, openrouter : 1000, cloudflare : 10000, groqKimi : 1000, openrouterR1 : 1000, hfFast : 5000, hfSmart : 2000, hfSpanish : 3000, hfCode : 2000, hfReasoning : 1000, hfFallback : 500, hive : 99999, // HIVE — sin límites, es nuestro cluster propio }; // ── Circuit Breaker ─────────────────────────────────────────────────────────── class CircuitBreaker { constructor(name) { this.name = name; this.state = 'CLOSED'; this.failTimes = []; this.openCount = 0; this.lastFail = 0; this.threshold = 6; this.cooldown = 30000; this.maxCooldown = 600000; } canRequest() { if (this.state === 'CLOSED') return true; const cd = Math.min(this.cooldown * Math.pow(2, this.openCount - 1), this.maxCooldown); if (Date.now() - this.lastFail > cd) { this.state = 'HALF_OPEN'; return true; } return false; } recordSuccess() { this.failTimes = []; this.openCount = 0; this.state = 'CLOSED'; } recordFailure(code, isTimeout = false) { this.lastFail = Date.now(); if (!isTimeout) this.failTimes.push(Date.now()); const weight = isTimeout ? 0.3 : 1.0; if (code === 401 || code === 403) { this.state = 'OPEN'; this.openCount = 99; this.lastFail = Date.now() + 86_400_000; return; } const recent = this.failTimes.filter(t => Date.now() - t < 60000); if (recent.length * weight >= this.threshold || this.state === 'HALF_OPEN') { this.state = 'OPEN'; this.openCount++; } } } const breakers = {}; for (const n of [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]) breakers[n] = new CircuitBreaker(n); // HIVE breaker — more forgiving (workers can be slow) breakers['hive'] = new CircuitBreaker('hive'); breakers['hive'].threshold = 10; // Needs more failures before opening // ── Estadísticas por proveedor ───────────────────────────────────────────────── const _stats = {}; function getCounter(name) { if (!_stats[name]) _stats[name] = { req: 0, err: 0, totalMs: 0, calls: 0 }; return _stats[name]; } function recordReq(name) { getCounter(name).req++; } function recordError(name){ getCounter(name).err++; } function recordLatency(name, ms) { const s = getCounter(name); s.totalMs += ms; s.calls++; } // ── Daily counter reset ──────────────────────────────────────────────────────── let _lastResetDay = new Date().getUTCDate(); function checkDailyReset() { const today = new Date().getUTCDate(); if (today !== _lastResetDay) { _lastResetDay = today; for (const name of Object.keys(_stats)) { _stats[name].req = 0; _stats[name].err = 0; } console.log('[AI] 🔄 Daily counters reset'); } } // ── Health Score ────────────────────────────────────────────────────────────── function healthScore(name) { const s = getCounter(name); const succ = Math.max(1, s.req - s.err); const rate = succ / Math.max(1, s.req); const avg = s.calls > 0 ? s.totalMs / s.calls : 2000; const quota= DAILY_LIMITS[name] ? 1 - (s.req / DAILY_LIMITS[name]) : 1; return rate * 0.5 + (1 / (avg / 1000)) * 0.3 + quota * 0.2; } // ── Quota ───────────────────────────────────────────────────────────────────── function isQuotaOk(name) { const lim = DAILY_LIMITS[name]; return !lim || getCounter(name).req < lim * 0.95; } function predictExhaustion(name) { const c = getCounter(name), lim = DAILY_LIMITS[name]; if (!lim) return false; const hour = new Date().getHours(); const proj = c.req + (c.req / Math.max(hour, 1)) * (24 - hour); return proj > lim * 0.85; } // ── Adaptive tokens ─────────────────────────────────────────────────────────── function adaptiveTokens(msg, task, req) { if (req) return req; const len = (msg ?? '').length; if (task === 'fast' || len < 50) return 150; if (task === 'code') return 1024; if (task === 'reasoning') return 600; if (len < 100) return 200; if (len < 300) return 400; return 512; } // ── SingleFlight ────────────────────────────────────────────────────────────── const _inflight = new Map(); function singleFlight(key, fn) { if (_inflight.has(key)) return _inflight.get(key); const p = fn().finally(() => _inflight.delete(key)); _inflight.set(key, p); return p; } // ── TTL Cache ───────────────────────────────────────────────────────────────── class TTLCache { constructor() { this.store = new Map(); } _ttl(msg = '') { const m = msg.toLowerCase(); if (/regla|norma|plugin|info|servidor/.test(m)) return 86_400_000; if (/online|jugador|tps|lag/.test(m)) return 30_000; return 3_600_000; } key(msgs, task) { const text = msgs.map(m => m.role + ':' + (m.content ?? '').slice(0, 100)).join('|') + task; let h = 5381; for (let i = 0; i < text.length; i++) h = ((h << 5) + h + text.charCodeAt(i)) | 0; return `${task}:${h >>> 0}`; } get(k) { const e = this.store.get(k); if (!e) return null; if (Date.now() > e.exp) { this.store.delete(k); return null; } return e.value; } set(k, v, msg = '') { this.store.set(k, { value: v, exp: Date.now() + this._ttl(msg) }); if (this.store.size > 1000) this.store.delete(this.store.keys().next().value); } clear() { this.store.clear(); console.log('[AI] Cache limpiada'); } } export const _cache = new TTLCache(); // ── Pools de concurrencia ───────────────────────────────────────────────────── const _userQueue = { active: 0, max: 4 }; const _backgroundQueue = { active: 0, max: 2 }; async function withQueue(queue, fn) { while (queue.active >= queue.max) await new Promise(r => setTimeout(r, 50)); queue.active++; try { return await fn(); } finally { queue.active--; } } // ── Intent classifier (0 tokens) — SIN REGEX, keyword-based ────────────────── // Replaces regex-based INTENT_PATTERNS with a cleaner keyword matching system const INTENT_RULES = [ { keywords: ['hola', 'hey', 'hi', 'buenas', 'qué tal', 'ola', 'saludos', 'wenas'], matchMode: 'startsWith', type: 'fast', intent: 'greeting' }, { keywords: ['regla', 'norma', 'prohibi', 'permit'], matchMode: 'contains', type: 'volume', intent: 'rules' }, { keywords: ['```', '.yml', '.json', '.java', 'config'], matchMode: 'contains', type: 'code', intent: 'code' }, { keywords: ['ban', 'sancion', 'report', 'trampa', 'hack', 'cheat'], matchMode: 'contains', type: 'reasoning', intent: 'moderation' }, { keywords: ['analiza', 'explica', 'compara', 'argumenta'], matchMode: 'contains', type: 'reasoning', intent: 'complex' }, { keywords: ['cómo', 'qué', 'cuál', 'dónde', 'cuándo', 'por qué'], matchMode: 'contains', type: 'spanish', intent: 'question' }, ]; export function classifyIntent(message) { const m = (message ?? '').trim().toLowerCase(); // Short message check (was regex /^.{1,20}$/) if (m.length > 0 && m.length <= 20) { return { intent: 'short', type: 'fast' }; } for (const rule of INTENT_RULES) { for (const kw of rule.keywords) { if (rule.matchMode === 'startsWith' && m.startsWith(kw)) { return { intent: rule.intent, type: rule.type }; } if (rule.matchMode === 'contains' && m.includes(kw)) { return { intent: rule.intent, type: rule.type }; } } } return { intent: 'general', type: 'spanish' }; } // ── Gemini Rotator ──────────────────────────────────────────────────────────── const geminiRotator = (() => { const keys = ai.gemini?.keys ?? [ai.gemini?.apiKey].filter(Boolean); let i = 0; const failed = new Set(); const last = new Map(); return { get() { const available = keys.filter(k => !failed.has(k) && Date.now() - (last.get(k) ?? 0) > 65000); if (!available.length) { failed.clear(); return keys[i++ % keys.length]; } const k = available[i++ % available.length]; last.set(k, Date.now()); return k; }, fail(k) { failed.add(k); }, }; })(); // ── Función base OpenAI-compatible ─────────────────────────────────────────── // v9: Handles GLM-5.1 reasoning_content (separate from content) async function oai(url, key, model, messages, maxTokens, systemOverride) { const msgs = systemOverride ? [{ role: 'system', content: systemOverride }, ...messages.filter(m => m.role !== 'system')] : messages; const res = await fetch(url, { method : 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` }, body : JSON.stringify({ model, messages: msgs, max_tokens: maxTokens, stream: false }), signal : AbortSignal.timeout(15000), }); if (!res.ok) { const body = await res.text().catch(() => ''); throw Object.assign(new Error(`${model} ${res.status}: ${body.slice(0, 100)}`), { status: res.status, code: res.status }); } const data = await res.json(); const msg = data.choices?.[0]?.message; if (!msg) return ''; // v11 CRITICAL FIX: ALWAYS clean thinking artifacts from content // Models (GLM-5.1, Qwen3, etc.) can leak thinking into content field // regardless of whether reasoning_content is present. // We MUST always apply cleanThinkingArtifacts() to every response. let content = msg.content?.trim() ?? ''; // If reasoning_content exists, log it for debugging if (msg.reasoning_content) { console.log(`[AI] ${model}: reasoning_content present (${msg.reasoning_content.length} chars thinking, ${content.length} chars content)`); } // ALWAYS clean thinking artifacts — models can leak thinking in many ways: // 1. Plain-text reasoning before the actual response // 2. Numbered lists of analytical steps // 3. English analysis followed by Spanish response // 4. tags that weren't properly separated content = cleanThinkingArtifacts(content); return content; } function isTO(e) { return e?.name === 'TimeoutError' || e?.name === 'AbortError' || /timeout/i.test(e?.message ?? ''); } // ── callDirect: implementación de cada proveedor ───────────────────────────── async function callDirect(name, messages, maxTokens) { const PERM = [401, 402, 403, 404, 422]; switch (name) { // ── GLM (ZhipuAI) — proveedor primario, gratis/ilimitado ────────────── case 'glm51': { if (!glm.apiKey) throw new Error('glm51: sin key GLM'); return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions', glm.apiKey, 'glm-5.1', messages, maxTokens); } case 'glmAir': { if (!glm.apiKey) throw new Error('glmAir: sin key GLM'); return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions', glm.apiKey, 'glm-4-air', messages, maxTokens); } case 'glmFlash': { if (!glm.apiKey) throw new Error('glmFlash: sin key GLM'); return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions', glm.apiKey, 'glm-4-flash', messages, maxTokens); } case 'pollinations': { // Pollinations — free, unlimited, updated API endpoints const last = messages.filter(m => m.role === 'user').pop()?.content ?? ''; const sys = messages.find(m => m.role === 'system')?.content ?? ''; // FIX: Try models in PARALLEL with Promise.any instead of sequentially const POLL_MODELS = ['openai', 'mistral', 'llama', 'qwen']; try { const result = await Promise.any( POLL_MODELS.map(model => fetch('https://text.pollinations.ai/openai/chat/completions', { method : 'POST', headers: { 'Content-Type': 'application/json' }, body : JSON.stringify({ model, messages, max_tokens: maxTokens, stream : false, seed : Math.floor(Math.random() * 9999), }), signal: AbortSignal.timeout(12000), }).then(async res => { if (!res.ok) throw new Error(`${model} ${res.status}`); const data = await res.json(); const text = data.choices?.[0]?.message?.content?.trim(); if (!text || text.length <= 2) throw new Error(`${model} empty`); return text; }) ) ); if (result) return result; } catch { /* all parallel attempts failed */ } // Fallback: GET endpoint (anonymous, always works) try { const shortMsg = last.slice(0, 800); const shortSys = sys.slice(0, 400); const url = 'https://text.pollinations.ai/' + encodeURIComponent(shortMsg) + '?seed=' + Math.floor(Math.random() * 9999) + (shortSys ? '&system=' + encodeURIComponent(shortSys) : ''); const r2 = await fetch(url, { headers: { 'User-Agent': 'Mozilla/5.0' }, signal : AbortSignal.timeout(12000), }); if (r2.ok) { const t = await r2.text(); if (t?.trim() && t.trim().length > 2) return t.trim(); } } catch {} throw new Error('pollinations all endpoints failed'); } case 'cerebras': if (!ai.cerebras?.apiKey) throw new Error('cerebras: sin key'); // TORNEO #2: Qwen3-235B-A22B — ultra-fast reasoning, excellent Spanish // Cerebras runs at ~2600 tokens/sec, fastest inference available return oai(ai.cerebras.baseUrl, ai.cerebras.apiKey, 'qwen3-235b-a22b', messages, maxTokens); // TORNEO #1: Qwen3-32B — best Spanish + speed + thinking case 'groqQwen3': if (!ai.groq?.apiKey) throw new Error('groqQwen3: sin key'); return oai(ai.groq.baseUrl, ai.groq.apiKey, 'qwen/qwen3-32b', messages, maxTokens); case 'groq': if (!ai.groq?.apiKey) throw new Error('groq: sin key'); // TORNEO #5: Llama-4-Scout — fast multilingual, official Spanish return oai(ai.groq.baseUrl, ai.groq.apiKey, getBestModel('groq', 'llama-4-scout-17b-16e-instruct', 'large'), messages, maxTokens); case 'groqFast': if (!ai.groq?.apiKey) throw new Error('groqFast: sin key'); return oai(ai.groq.baseUrl, ai.groq.apiKey, getBestModel('groq', 'llama-3.1-8b-instant', 'fast'), messages, maxTokens); case 'groqKimi': if (!ai.groq?.apiKey) throw new Error('groqKimi: sin key'); return oai(ai.groq.baseUrl, ai.groq.apiKey, 'moonshotai/kimi-k2-instruct', messages, maxTokens); case 'mistral': if (!ai.mistral?.apiKey) throw new Error('mistral: sin key'); return oai(ai.mistral.baseUrl, ai.mistral.apiKey, getBestModel('mistral', 'mistral-large-latest', 'large'), messages, maxTokens); case 'mistralCode': if (!ai.mistral?.apiKey) throw new Error('mistralCode: sin key'); return oai(ai.mistral.baseUrl, ai.mistral.apiKey, 'codestral-latest', messages, maxTokens); case 'gemma4': { const entry = geminiRotator.get(); if (!entry) throw new Error('gemma4: sin key Gemini'); // Extraer system prompt — Gemini lo necesita como systemInstruction separado const sysMsgs = messages.filter(m => m.role === 'system'); const sysText = sysMsgs.map(m => m.content ?? '').join('\n\n'); const chatMsgs = messages.filter(m => m.role !== 'system'); // Convertir a formato Gemini (assistant → model) const contents = chatMsgs.map(m => ({ role : m.role === 'assistant' ? 'model' : 'user', parts: [{ text: m.content ?? '' }], })); // Si no hay mensajes de chat, añadir uno vacío para que no falle if (!contents.length) contents.push({ role: 'user', parts: [{ text: '.' }] }); const body = { contents, generationConfig: { maxOutputTokens: maxTokens, temperature: 0.8, topP: 0.95, }, }; // Inyectar system prompt como systemInstruction (soportado por Gemini API) if (sysText) { body.systemInstruction = { parts: [{ text: sysText }] }; } // Gemma 4 31B IT — mejor modelo Gemma para chat en español, via Gemini API const res = await fetch( `https://generativelanguage.googleapis.com/v1beta/models/gemma-4-31b-it:generateContent?key=${entry}`, { method : 'POST', headers: { 'Content-Type': 'application/json' }, body : JSON.stringify(body), signal : AbortSignal.timeout(15000), } ); if (!res.ok) { geminiRotator.fail(entry); throw Object.assign(new Error(`gemma4 ${res.status}`), { status: res.status, code: res.status }); } const data = await res.json(); return data.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ?? ''; } case 'openrouter': { if (!ai.openrouter?.apiKey) throw new Error('openrouter: sin key'); const model = 'meta-llama/llama-3.3-70b-instruct:free'; const res = await fetch(ai.openrouter.baseUrl, { method : 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.openrouter.apiKey}`, 'HTTP-Referer': 'https://tomatesmp.pw', 'X-Title': 'TomateSMP' }, body : JSON.stringify({ model, messages, max_tokens: maxTokens }), signal : AbortSignal.timeout(20000), }); if (!res.ok) throw Object.assign(new Error(`openrouter ${res.status}`), { status: res.status, code: res.status }); const data = await res.json(); if (data.error) throw new Error(`openrouter: ${data.error.message ?? data.error}`); return data.choices?.[0]?.message?.content?.trim() ?? ''; } case 'openrouterR1': { if (!ai.openrouter?.apiKey) throw new Error('openrouterR1: sin key'); const res = await fetch(ai.openrouter.baseUrl, { method : 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.openrouter.apiKey}`, 'HTTP-Referer': 'https://tomatesmp.pw' }, body : JSON.stringify({ model: 'deepseek/deepseek-r1:free', messages, max_tokens: maxTokens }), signal : AbortSignal.timeout(30000), }); if (!res.ok) throw Object.assign(new Error(`openrouterR1 ${res.status}`), { status: res.status, code: res.status }); const data = await res.json(); return data.choices?.[0]?.message?.content?.trim() ?? ''; } case 'cloudflare': { if (!ai.cloudflare?.accountId || !ai.cloudflare?.apiToken) throw new Error('cloudflare: sin config'); const res = await fetch( `https://api.cloudflare.com/client/v4/accounts/${ai.cloudflare.accountId}/ai/run/@cf/meta/llama-3.3-70b-instruct-fp8-fast`, { method : 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.cloudflare.apiToken}` }, body : JSON.stringify({ messages, max_tokens: maxTokens }), signal : AbortSignal.timeout(20000), } ); if (!res.ok) throw Object.assign(new Error(`cloudflare ${res.status}`), { status: res.status, code: res.status }); const data = await res.json(); // Cloudflare AI devuelve distintos formatos según el modelo const r = data.result; if (typeof r?.response === 'string') return r.response.trim(); if (Array.isArray(r) && r[0]?.response) return String(r[0].response).trim(); if (r?.choices?.[0]?.message?.content) return r.choices[0].message.content.trim(); if (typeof r === 'string') return r.trim(); return ''; } // ── HuggingFace Inference API providers ─────────────────────────────── case 'hfFast': if (!isHFAvailable()) throw new Error('hfFast: HF unavailable'); return callHuggingFace(messages, 'fast', maxTokens); case 'hfSmart': if (!isHFAvailable()) throw new Error('hfSmart: HF unavailable'); return callHuggingFaceCascade(messages, 'smart', maxTokens); case 'hfSpanish': if (!isHFAvailable()) throw new Error('hfSpanish: HF unavailable'); return callHuggingFaceCascade(messages, 'spanish', maxTokens); case 'hfCode': if (!isHFAvailable()) throw new Error('hfCode: HF unavailable'); return callHuggingFace(messages, 'code', maxTokens); case 'hfReasoning': if (!isHFAvailable()) throw new Error('hfReasoning: HF unavailable'); return callHuggingFaceCascade(messages, 'reasoning', maxTokens); case 'hfFallback': if (!isHFAvailable()) throw new Error('hfFallback: HF unavailable'); return callHuggingFace(messages, 'fast', maxTokens); // ── HIVE — RigoChat-7B Cluster (ALL workers combined) ──────────────── case 'hive': { try { const userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? ''; const result = await hiveGenerate(messages, maxTokens, userMsg, 'hybrid'); if (result?.text && result.text.trim().length > 2) { return result.text; } throw new Error('hive: empty response'); } catch (err) { throw Object.assign(new Error(`hive: ${err.message}`), { status: 503, code: 503 }); } } // ── Local model — primary local inference ──────────────────────────── case 'local': { if (!isLocalAIReady()) throw new Error('local: model not ready'); const { localChatPrimary } = await import('./local-ai.js'); return localChatPrimary(messages, maxTokens, 0.8); } default: throw new Error(`Proveedor desconocido: ${name}`); } } // ── callProvider: CB + backoff + stats ──────────────────────────────────────── const PERM_ERRORS = [401, 402, 403, 404, 422]; async function callProvider(name, messages, maxTokens, failedFlag) { checkDailyReset(); const cb = breakers[name]; if (!cb?.canRequest()) throw new Error(`${name}: circuit OPEN`); const start = Date.now(); recordReq(name); try { let result; for (let attempt = 0; attempt < 3; attempt++) { try { result = await callDirect(name, messages, maxTokens); break; } catch (e) { // 429: esperar y reintentar if (e.status === 429 && attempt < 2) { await new Promise(r => setTimeout(r, 1500 + Math.random() * 3000)); continue; } // 400 con mensaje de longitud: comprimir contexto y reintentar UNA vez if (e.status === 400 && attempt === 0 && /length|too long|reduce|token/i.test(e.message ?? '')) { console.warn('[AI] ' + name + ': mensaje muy largo, comprimiendo...'); // Quedarse solo con system + últimos 2 mensajes const sys = messages.filter(m => m.role === 'system'); const rest = messages.filter(m => m.role !== 'system').slice(-2); // Recortar system prompt a 1500 chars const shortSys = sys.map(m => ({ ...m, content: m.content.slice(0, 1500) })); messages = [...shortSys, ...rest]; continue; } throw e; } } cb.recordSuccess(); recordLatency(name, Date.now() - start); return result; } catch (err) { const code = err.status ?? err.code ?? null; if (PERM_ERRORS.includes(code)) { cb.state = 'OPEN'; cb.openCount = 99; cb.lastFail = Date.now() + 86_400_000; console.warn(`[AI] ⛔ ${name} deshabilitado 24h (${code})`); } else if (!failedFlag?.v) { failedFlag && (failedFlag.v = true); cb.recordFailure(code, isTO(err)); } recordError(name); if (!PERM_ERRORS.includes(code)) { console.warn(`[AI] ❌ ${name}: ${err.message?.slice(0, 80)}`); } throw err; } } // ── Validación de respuestas — Sistema avanzado SIN regex ──────────────────── // v10: Reemplaza BAD_PATTERNS (regex) con validación semántica determinista // que es más precisa, sin falsos positivos, y más mantenible. // Frases de IA que NUNCA deben aparecer al inicio de una respuesta válida const AI_DISCLAIMER_PREFIXES = [ 'as an ai', 'as a language model', 'como una ia', 'como un modelo', "i'm an ai", 'i am an ai', "i'm a language model", 'i am a language model', 'como inteligencia artificial', 'como modelo de lenguaje', 'as an assistant', 'como asistente', ]; // Respuestas basura que indican fallo del modelo const GARBAGE_RESPONSES = new Set([ 'error', 'null', 'undefined', 'true', 'false', '{}', '[]', 'nan', 'none', 'nil', 'void', ]); function isValidResponse(text) { if (!text || typeof text !== 'string') return false; const t = text.trim(); // Empty or whitespace-only if (t.length < 2) return false; // Only punctuation/whitespace const withoutPunct = t.replace(/[\s.,!?_\-:;'"()]/g, ''); if (withoutPunct.length === 0) return false; // Garbage single values if (GARBAGE_RESPONSES.has(t.toLowerCase())) return false; // Too long = probably prompt recitation if (t.length > 2000) return false; // AI disclaimer prefix check (case-insensitive, no regex) const tLower = t.toLowerCase(); for (const prefix of AI_DISCLAIMER_PREFIXES) { if (tLower.startsWith(prefix)) return false; } // Check for repetitive patterns (same word repeated 10+ times) const words = tLower.split(/\s+/); if (words.length > 10) { const wordCounts = {}; for (const w of words) { wordCounts[w] = (wordCounts[w] || 0) + 1; if (wordCounts[w] > 10 && w.length > 2) return false; // Repetition loop } } return true; } // ── AI-based response quality check (uses local model for fast validation) ── // This replaces regex-based BAD_PATTERNS with semantic understanding async function aiValidateResponse(text, userMsg) { if (!text || text.length < 10) return { valid: true }; // Too short to bother // Fast path: check obvious issues without AI const tLower = text.toLowerCase().trim(); // AI disclosure in the middle of response (not just prefix) const aiPatterns = [ 'como ia,', 'como inteligencia artificial,', 'as an ai,', 'as a language model,', 'i cannot fulfill', 'no puedo cumplir esa solicitud', 'no puedo procesar esa solicitud', ]; for (const p of aiPatterns) { if (tLower.includes(p)) { return { valid: false, reason: 'ai_disclaimer', replacement: 'eso no va' }; } } return { valid: true }; } // ── Router principal ────────────────────────────────────────────────────────── function ensureAvailability() { const allProviders = [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]; const open = allProviders.filter(n => breakers[n]?.state === 'OPEN'); if (open.length === allProviders.length) { console.warn('[AI] Todos los proveedores en OPEN — reseteando'); open.forEach(n => { breakers[n].state = 'CLOSED'; breakers[n].openCount = 0; }); } } async function _route(messages, taskType, maxTokens, userMsg) { ensureAvailability(); const cascade = [...(CASCADES[taskType] ?? CASCADES.spanish)]; cascade.sort((a, b) => { const pa = predictExhaustion(a) ? -0.3 : 0; const pb = predictExhaustion(b) ? -0.3 : 0; return (healthScore(b) + pb) - (healthScore(a) + pa); }); const available = cascade.filter(n => breakers[n]?.canRequest() && isQuotaOk(n)); if (!available.length) { // Fallback al modelo local if (isLocalAIReady()) { const r = await emergencyFallback(messages, userMsg).catch(() => null); if (r) return r; } throw new Error('[AI] Sin proveedores disponibles'); } const failedFlag = { v: false }; // v12 FIX: Usar SOLO el primer proveedor disponible (no hedged requests) // Los hedged requests (Promise.any con top3) causaban respuestas triplicadas // cuando múltiples proveedores respondían y el merge de hive los concatenaba. // Ahora: intentamos el mejor proveedor, si falla pasamos al siguiente. for (const name of available) { try { const r = await callProvider(name, messages, maxTokens, failedFlag); if (isValidResponse(r)) return r; } catch { continue; } } // Último recurso: modelo local if (isLocalAIReady()) { console.log('[AI] 🏠 Usando modelo local como fallback (todos los proveedores fallaron)'); const r = await emergencyFallback(messages, userMsg).catch(e => { console.warn('[AI] Modelo local también falló:', e.message); return null; }); if (r) return r; } else { console.warn('[AI] Modelo local no disponible para fallback (isLocalAIReady=false)'); } throw new Error('[AI] Todos los proveedores fallaron'); } async function _routeOwner(messages, taskType, maxTokens) { ensureAvailability(); const cascade = [...(CASCADES[taskType] ?? CASCADES.spanish)]; cascade.sort((a, b) => (healthScore(b)) - (healthScore(a))); const available = cascade.filter(n => breakers[n]?.canRequest() && isQuotaOk(n)); if (!available.length && isLocalAIReady()) { return emergencyFallback(messages, '') ?? Promise.reject(new Error('Sin proveedores')); } for (const name of available) { try { const r = await callProvider(name, messages, maxTokens, { v: false }); if (r && r.trim().length > 0) return r; } catch { continue; } } if (isLocalAIReady()) return emergencyFallback(messages, ''); throw new Error('[AI] Sin respuesta'); } // ── callAI ──────────────────────────────────────────────────────────────────── export async function callAI(messages, taskType = 'spanish', maxTokens = null, userMsg = '', ownerMode = false) { if (!userMsg) userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? ''; const tokens = adaptiveTokens(userMsg, taskType, maxTokens); // Inyectar thinking mode para mejor coherencia (provider name se detecta en el router) const thinkMsgs = injectThinking(messages); const msgs = await compressContext(thinkMsgs); const cacheKey = _cache.key(msgs, taskType); // SIEMPRE definido if (ownerMode) { return withQueue(_userQueue, async () => { const result = await _routeOwner(msgs, taskType, tokens); return cleanThinkingArtifacts(result); }); } // Cache exacto const cached = _cache.get(cacheKey); if (cached) { console.log('[AI] 💾 cache hit'); return cached; } // Semantic cache if (feats.semanticCache !== false) { try { const semHit = await semanticCache.get(userMsg); if (semHit) { console.log('[AI] 💾 semantic cache hit'); return semHit; } } catch {} } const doCall = () => withQueue(_userQueue, async () => { const result = await _route(msgs, taskType, tokens, userMsg); const cleaned = cleanThinkingArtifacts(result); _cache.set(cacheKey, cleaned, userMsg); semanticCache.set(userMsg, cleaned, semanticCache.getTTL(userMsg)).catch(() => {}); return cleaned; }); return singleFlight(cacheKey, doCall); } // ── callAIBackground ───────────────────────────────────────────────────────── export async function callAIBackground(messages, taskType = 'spanish', maxTokens = null, userMsg = '') { if (!userMsg) userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? ''; const tokens = adaptiveTokens(userMsg, taskType, maxTokens); const thinkMsgs = injectThinking(messages); const msgs = await compressContext(thinkMsgs); return withQueue(_backgroundQueue, async () => { const result = await _route(msgs, 'background', tokens, userMsg); return cleanThinkingArtifacts(result); }); } // ── Mixture of Agents ───────────────────────────────────────────────────────── export async function callAIMoA(messages, maxTokens = 600) { if (!feats.moa) return callAI(messages, 'spanish', maxTokens); const [r1, r2, r3] = await Promise.allSettled([ callProvider('glm51', messages, 300, { v: false }), callProvider('groqFast', messages, 300, { v: false }), callProvider('pollinations', messages, 300, { v: false }), ]); const proposals = [r1, r2, r3] .filter(r => r.status === 'fulfilled' && r.value?.length > 10) .map((r, i) => `Respuesta ${i + 1}: ${r.value}`) .join('\n\n'); if (!proposals) return callAI(messages, 'spanish', maxTokens); return callProvider('glm51', [ { role: 'system', content: 'Sintetiza la mejor respuesta de las siguientes opciones. Sin texto extra.' }, { role: 'user', content: `${proposals}\n\nSíntesis:` }, ], maxTokens, { v: false }); } // ── Self-healing ────────────────────────────────────────────────────────────── export function startSelfHealing() { if (!feats.selfHealing) return; setInterval(() => { const allProviders = [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]; const totalReq = allProviders.reduce((s, n) => s + getCounter(n).req, 0); if (totalReq === 0) return; const avgQuota = allProviders.reduce((s, n) => { const lim = DAILY_LIMITS[n]; return s + (lim ? getCounter(n).req / lim : 0); }, 0) / allProviders.length; if (avgQuota > 0.85) console.warn('[SelfHeal] Cuota global alta — priorizar proveedores con más quota'); }, 5 * 60 * 1000); console.log('[HEAL] Self-healing v7 iniciado'); } // ── Warmup ──────────────────────────────────────────────────────────────────── export async function warmupProviders() { if (!feats.warmup) return; const test = [{ role: 'user', content: 'ok' }]; console.log('[WARMUP] Verificando proveedores...'); await Promise.allSettled( ['glm51', 'glmFlash', 'groqFast', 'cerebras', 'pollinations', 'mistral'].map(async name => { const start = Date.now(); try { await callProvider(name, test, 5, { v: false }); console.log(`[WARMUP] ✅ ${name} ${Date.now() - start}ms`); } catch (e) { console.log(`[WARMUP] ❌ ${name}: ${e.message?.slice(0, 40)}`); } }) ); } // ── Comprimir contexto ─────────────────────────────────────────────────────── async function compressContext(messages) { // FIX: No cortar a ciegas el system prompt — antes se podía cortar en medio de las reglas de seguridad // En vez de eso, reducir el historial de chat primero (menos importante que el system prompt) const sys = messages.filter(m => m.role === 'system'); const chat = messages.filter(m => m.role !== 'system'); // Recortar system prompt SOLO si es extremadamente largo (>4000 chars) // Priorizar: mantener identidad + seguridad + tools intactos const compressedSys = sys.map(m => { if ((m.content ?? '').length > 4000) { // Intentar cortar en una sección completa (después de un \n\n##) const content = m.content; // Mantener primeros 3500 chars (identidad + personalidad + seguridad) // y últimos 1000 chars (tools + reglas finales) return { ...m, content: content.slice(0, 3500) + '\n...(contexto comprimido)\n' + content.slice(-1000) }; } return m; }); // Reducir historial de chat a máximo 4 turnos (antes eran 6) const recentChat = chat.slice(-4); return [...compressedSys, ...recentChat]; } // ── Stats ───────────────────────────────────────────────────────────────────── export function getDailyStats() { const out = {}; for (const n of [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]) { const s = getCounter(n); const lim = DAILY_LIMITS[n] ?? 99999; out[n] = { requests: s.req, limit: lim, pct: ((s.req / lim) * 100).toFixed(1) + '%', state: breakers[n]?.state ?? '?', score: healthScore(n).toFixed(2) }; } // Append HF-specific stats try { out._hf = getHFStats(); } catch {} return out; } export function getProviderStatus() { return [...ALL_PROVIDERS, ...GLM_PROVIDERS].map(n => ({ name: n, state: breakers[n]?.state ?? 'UNKNOWN', score: healthScore(n).toFixed(2) })); } export function clearCache() { _cache.clear(); } // ── HIVE exports ──────────────────────────────────────────────────────────── export { getHiveStatus, getHiveStats, benchmarkHive, warmUpWorkers, hiveInit as initHive }; export function runQualityGate(response, userMessage) { if (!feats.qualityGate) return Promise.resolve({ pass: true }); if (!response || response.trim().length < 10) return Promise.resolve({ pass: false, reason: 'empty' }); return Promise.resolve({ pass: isValidResponse(response) }); }