Spaces:
Paused
Paused
v11: Fix thinking leak - ALWAYS cleanThinkingArtifacts, more aggressive detection, expanded patterns
2bac7f7 verified | /** | |
| * ai.js β Router de IA v10.0 β TORNEO DE MODELOS | |
| * ================================================ | |
| * CAMBIOS v10.0 (basado en torneo de 179 modelos): | |
| * - Qwen3-32B (Groq) como proveedor PRIMARIO β mejor espaΓ±ol+velocidad+thinking | |
| * - Qwen3-235B-A22B (Cerebras) como premium β ultra-rΓ‘pido, excelente espaΓ±ol | |
| * - Llama-4-Scout-17B (Groq) como secundario β oficial espaΓ±ol | |
| * - DeepSeek R1 (OpenRouter) para razonamiento complejo | |
| * - GLM mantenido pero degradado en prioridad | |
| * - Modelo local (Gemma 4 E4B) solo como fallback final | |
| * - Thinking mode optimizado para Qwen3 | |
| * - Cascadas reordenadas por puntaje del torneo | |
| */ | |
| import { readConfig } from './utils.js'; | |
| import { semanticCache } from './semantic-cache.js'; | |
| import { emergencyFallback, isLocalAIReady } from './local-ai.js'; | |
| import { callHuggingFace, callHuggingFaceCascade, isHFAvailable, discoverHFModels, getHFStats } from './hf-provider.js'; | |
| import { hiveGenerate, initialize as hiveInit, getHiveStatus, getHiveStats, benchmarkHive, warmUpWorkers } from './hive.js'; | |
| const config = readConfig(); | |
| const ai = config.ai ?? {}; | |
| const feats = config.features ?? {}; | |
| const glm = config.glm ?? {}; | |
| // ββ Thinking Mode v3.0 β REMOVED injectThinking() ββββββββββββββββββββββββββ | |
| // v3.0 CRITICAL FIX: injectThinking() was causing the #1 bug β chain-of-thought | |
| // leaking into Discord responses. Models would output their thinking process as | |
| // plain text despite instructions not to. The fix: DON'T inject thinking prompts. | |
| // Models already think internally. Adding "PIENSA PROFUNDAMENTE" causes them to | |
| // output that thinking. Less is more β just give good system prompts. | |
| // | |
| // For models with native thinking (GLM-5.1 reasoning_content, Qwen3 /think): | |
| // - We let the API handle thinking natively (separate from content) | |
| // - We NEVER inject /think or thinking instructions manually | |
| // - We ALWAYS strip any thinking that leaks into content via cleanThinkingArtifacts() | |
| // injectThinking is now a no-op β returns messages unchanged | |
| function injectThinking(messages, _providerName = '') { | |
| return messages; | |
| } | |
| // Limpiar artefactos de thinking + patrones AI β v11 ANTI-LEAK | |
| // v11: Major overhaul β handles ALL known thinking leak patterns | |
| // Root cause: models output their chain-of-thought as plain text | |
| // v11 FIX: More aggressive detection β lower thresholds, more patterns | |
| function cleanThinkingArtifacts(text) { | |
| if (!text || typeof text !== 'string') return text; | |
| let t = text; | |
| // βββ v11 NUCLEAR: Strip ALL thinking tags first βββ | |
| // Handle every known tag format | |
| t = t.replace(/<think[\s\S]*?<\/think>/gi, ''); | |
| t = t.replace(/<thinking[\s\S]*?<\/thinking>/gi, ''); | |
| t = t.replace(/<reasoning[\s\S]*?<\/reasoning>/gi, ''); | |
| t = t.replace(/<scratchpad[\s\S]*?<\/scratchpad>/gi, ''); | |
| t = t.replace(/<internal[\s\S]*?<\/internal>/gi, ''); | |
| t = t.replace(/<![CDATA[\s\S]*?]]>/gi, ''); | |
| t = t.replace(/<\/think>/gi, ''); | |
| t = t.replace(/<\/thinking>/gi, ''); | |
| t = t.replace(/<\/reasoning>/gi, ''); | |
| // βββ v11 CRΓTICO: Ultra-aggressive thinking leak detection βββ | |
| // Detect multi-line thinking followed by a response | |
| // Pattern: First part is analytical (long, detailed), last part is natural Spanish | |
| if (t.includes('\n') && t.length > 150) { | |
| const lines = t.split('\n').filter(l => l.trim().length > 0); | |
| if (lines.length >= 3) { | |
| // Check if first lines are thinking (English or structured analysis) | |
| const firstLines = lines.slice(0, Math.ceil(lines.length / 2)); | |
| const lastLines = lines.slice(Math.ceil(lines.length / 2)); | |
| // v11: More thinking indicators β covers more model output patterns | |
| const thinkingWords = /^(?:okay|alright|let me|i should|the user|first,|based on|since|so,|well,|now,|also,|however,|but,|actually,|hmm|let's|i need|i think|i'll|going to|in order|therefore|because|this means|that means|it seems|it appears|looking at|considering|analyzing|understanding|evaluating|to respond|to answer|the message|the question|as zelin|respond as|in character|staying in|my role|my persona)/i; | |
| // v11: Expanded Spanish words list for better detection | |
| const spanishWords = /\b(wey|neta|chido|chale|Γ³rale|hΓjole|zelin|morra|gΓΌey|onda|chingΓ³n|bueno|claro|sΓ|no|nah|oye|mira|Γ³sea|pues|nada|simΓ³n|sale|va|ok|jaja|qu[eΓ©]|c[oΓ³]mo|d[oΓ³]nde|cu[aΓ‘]ndo|por qu[eΓ©]|much[oas]?|tambi[eΓ©]n|aqu[iΓ]|all[iΓ‘]|este|esta|eso|esa|s[iΓ]|nope|yup|sip|nop|dale|va|holi|ola|bro|crack|xd|gg|ns|ni idea|ni modo|ya ves|ya mero|hΓjole|a poco|quΓ© onda|no mames|no manches|est[aΓ‘] ca[nnΓ±]Γ³n|padre|madre)/i; | |
| let firstHalfThinking = 0; | |
| let secondHalfSpanish = 0; | |
| for (const line of firstLines) { | |
| if (thinkingWords.test(line.trim())) firstHalfThinking++; | |
| if (spanishWords.test(line)) firstHalfThinking--; // Not thinking if Spanish | |
| } | |
| for (const line of lastLines) { | |
| if (spanishWords.test(line)) secondHalfSpanish++; | |
| if (thinkingWords.test(line.trim())) secondHalfSpanish--; // Not response if thinking | |
| } | |
| // v11: Lower threshold β even 1 thinking indicator is suspicious | |
| if (firstHalfThinking >= 1 && secondHalfSpanish >= 1) { | |
| const response = lastLines.join('\n').trim(); | |
| if (response.length > 5) { | |
| t = response; | |
| } | |
| } | |
| } | |
| } | |
| // βββ v11 NEW: Single-paragraph English thinking followed by Spanish βββ | |
| // Pattern: "Okay, I should respond as a Mexican girl. wey no sΓ©" | |
| // Extract ONLY the Spanish part after the last English sentence | |
| if (t.length > 100) { | |
| // Find the LAST transition from English to Spanish | |
| const sentences = t.split(/(?<=[.!?])\s+/); | |
| if (sentences.length >= 2) { | |
| let lastSpanishStart = -1; | |
| for (let i = 0; i < sentences.length; i++) { | |
| // A sentence is Spanish if it has Spanish-specific words or characters | |
| const s = sentences[i]; | |
| if (/\b(wey|neta|chido|chale|Γ³rale|hΓjole|morra|gΓΌey|onda|chingΓ³n|Γ³sea|pues|simΓ³n)\b/i.test(s) || | |
| /[ΒΏΒ‘]/.test(s) || | |
| (/\b(s[iΓ]|no|nah|oye|mira|bueno|claro|dale|ns)\b/i.test(s) && !/^(?:okay|alright|let me|i should|the user)/i.test(s))) { | |
| if (i > 0 && /^(?:okay|alright|let me|i should|the user|first|based on|since|so|well|now|also|however|actually|hmm|let's|i need|i think)/i.test(sentences[0])) { | |
| lastSpanishStart = i; | |
| } | |
| } | |
| } | |
| if (lastSpanishStart >= 0) { | |
| const response = sentences.slice(lastSpanishStart).join(' ').trim(); | |
| if (response.length > 5) t = response; | |
| } | |
| } | |
| } | |
| // βββ v9 CRΓTICO: Detectar y eliminar razonamiento interno en texto plano βββ | |
| // El modelo a veces incluye su proceso de pensamiento como texto plano | |
| // sin marcadores como <think/>. Detectamos estos patrones y los limpiamos. | |
| // PatrΓ³n 1: LΓneas numeradas de thinking recitadas del system prompt | |
| // Ej: "1. ΒΏCuΓ‘l es la intenciΓ³n real del mensaje? ... 6. Responde SOLO..." | |
| const thinkingRecitationPattern = /^[\d][.)]\s*(?:ΒΏCu[aΓ‘]l|ΒΏHay|ΒΏQu[eΓ©]|ΒΏC[oΓ³]mo|Auto-eval|Responde|PIENSA|Pienso|Pensamiento)/m; | |
| if (thinkingRecitationPattern.test(t)) { | |
| // La respuesta real estΓ‘ DESPUΓS de la ΓΊltima lΓnea numerada | |
| const lines = t.split('\n'); | |
| let lastNumberedLine = -1; | |
| for (let i = 0; i < lines.length; i++) { | |
| if (/^\d[.)]\s/.test(lines[i].trim())) lastNumberedLine = i; | |
| } | |
| if (lastNumberedLine >= 0 && lastNumberedLine < lines.length - 1) { | |
| const afterThinking = lines.slice(lastNumberedLine + 1).join('\n').trim(); | |
| if (afterThinking.length > 2) t = afterThinking; | |
| } | |
| } | |
| // PatrΓ³n 2: Razonamiento en inglΓ©s seguido de respuesta en espaΓ±ol | |
| // Ej: "Okay, the user is asking... I should respond as Zelin..." | |
| const englishThinkingPattern = /^(?:Okay|Alright|Let me|I should|The user|First,|Based on|Since)/i; | |
| if (englishThinkingPattern.test(t) && t.length > 150) { | |
| const lines = t.split('\n'); | |
| let foundSpanish = false; | |
| const spanishLines = []; | |
| for (const line of lines) { | |
| // Si la lΓnea tiene espaΓ±ol (acentos, Γ±, ΒΏ, Β‘, o jerga mexicana) | |
| if (/[¿‘ñÑéΓΓ³ΓΊ]/i.test(line) || /\b(wey|neta|chido|chale|Γ³rale|zelin|hΓjole|morra)\b/i.test(line)) { | |
| foundSpanish = true; | |
| spanishLines.push(line); | |
| } else if (foundSpanish) { | |
| spanishLines.push(line); | |
| } | |
| } | |
| if (spanishLines.length > 0 && spanishLines.join('\n').trim().length > 2) { | |
| t = spanishLines.join('\n').trim(); | |
| } | |
| } | |
| // PatrΓ³n 3: "Zelin would say:" o "Mi respuesta:" seguido de la respuesta | |
| const promptPatterns = [ | |
| /Zelin (?:would|should|might) (?:say|respond|reply|answer)[:\s]*\n?/i, | |
| /(?:Mi respuesta|My response|My answer|Response)[:\s]*\n?/i, | |
| /(?:AsΓ responderΓa|Here's how|Here is what)[:\s]*\n?/i, | |
| ]; | |
| for (const p of promptPatterns) { | |
| const match = t.match(p); | |
| if (match && match.index > 0) { | |
| const after = t.slice(match.index + match[0].length).trim(); | |
| if (after.length > 2) t = after; | |
| } | |
| } | |
| // v8 CRΓTICO: Limpiar "user: ... zelin: ..." pattern (modelo recita ejemplos del prompt) | |
| if (/\buser:/i.test(t)) { | |
| const beforeUser = t.split(/\buser:/i)[0].trim(); | |
| if (beforeUser.length > 2) { | |
| t = beforeUser; | |
| } else { | |
| t = 'ns'; | |
| } | |
| } | |
| if (/\bzelin:\s*/i.test(t) && !/^zelin:/i.test(t)) { | |
| const beforeZelin = t.split(/\bzelin:\s*/i)[0].trim(); | |
| if (beforeZelin.length > 2) t = beforeZelin; | |
| } | |
| t = t.replace(/^zelin:\s*/i, ''); | |
| // (thinking tags already stripped at the top of this function) | |
| // Qwen3 thinking mode: the model sometimes outputs /think content and /no_think markers | |
| const noThinkIdx = t.indexOf('/no_think'); | |
| if (noThinkIdx !== -1) { | |
| const afterNoThink = t.slice(noThinkIdx + 9).trim(); | |
| if (afterNoThink.length > 2) t = afterNoThink; | |
| } | |
| t = t.replace(/^\/think\s*/i, ''); | |
| t = t.replace(/^\/no_think\s*/i, ''); | |
| t = t.replace(/^Pienso[\s\S]*?\nRespuesta:\s*/i, ''); | |
| t = t.replace(/^Pensamiento:[\s\S]*?\nRespuesta:\s*/i, ''); | |
| // v9: MΓ‘s patrones de thinking en texto plano | |
| t = t.replace(/^An[aΓ‘]lisis:[\s\S]*?(?=Ahora|Bien|Ok|SΓ|No|Nah|Wey|Oye|hmm|bueno)/i, ''); | |
| t = t.replace(/^Razonamiento:[\s\S]*?(?=Ahora|Bien|Ok|SΓ|No|Nah|Wey|Oye|hm|bueno)/i, ''); | |
| // Quitar prefijos de "respuesta final" | |
| t = t.replace(/^Respuesta final:\s*/i, ''); | |
| t = t.replace(/^Final answer:\s*/i, ''); | |
| t = t.replace(/^Respuesta:\s*/i, ''); | |
| // Limpiar tool call placeholders rotos | |
| t = t.replace(/\[(?:mc_status|mc_player|mc_wiki|hora actual|usar\s+\w+\s+para\s+dato\s+real)\]/gi, ''); | |
| t = t.replace(/c_status\]/g, ''); | |
| t = t.replace(/ora actual\]/g, ''); | |
| t = t.replace(/\w+_(?:status|player|wiki|info)\]/g, ''); | |
| // Reemplazar patrones formales de rechazo | |
| t = t.replace(/no puedo cumplir (esa|este|aquella) (solicitud|request|orden|instrucci[oΓ³]n)/gi, 'eso no va'); | |
| t = t.replace(/no puedo procesar esa solicitud/gi, 'eso no va'); | |
| t = t.replace(/lo siento,? pero no puedo/gi, 'nah'); | |
| t = t.replace(/disculpa,? pero no puedo/gi, 'nah'); | |
| t = t.replace(/lamentablemente no puedo/gi, 'nah'); | |
| // Corregir "nop" al inicio cuando no es sΓ/no | |
| t = t.replace(/^nop,\s*soy\s+/gi, 'no, soy '); | |
| // Correcciones de identidad | |
| t = t.replace(/\bzel[eΓ©]n\b/gi, 'zelin'); | |
| t = t.replace(/\brezin\b/gi, 'zelin'); | |
| t = t.replace(/\bzelen\b/gi, 'zelin'); | |
| t = t.replace(/\btomatitoo\b/gi, 'tomatito'); | |
| // Quitar patrones de asistente al final | |
| t = t.replace(/[,\s]*ΒΏ?en qu[eΓ©]\s+(te\s+)?puedo\s+ayud[aeo]r?[ΒΏ?]?\.?\s*$/gi, ''); | |
| t = t.replace(/[,\s]*ΒΏ?(algo\s+)?m[aΓ‘]s\s+en\s+(lo\s+que\s+)?pued[ao]\s+ayud[aeo]r?[ΒΏ?]?\.?\s*$/gi, ''); | |
| t = t.replace(/[,\s]*ΒΏ?necesitas\s+(algo\s+)?m[aΓ‘]s[ΒΏ?]?\.?\s*$/gi, ''); | |
| // Garbled text corrections | |
| t = t.replace(/\besti con vos/gi, 'suerte con eso'); | |
| t = t.replace(/\bwienes/gi, 'bien, y tΓΊ'); | |
| // Simplificar recitaciΓ³n de identidad | |
| if (/^soy zelin,? (la morra |la )?del server minecraft? tomatesmp\.?$/i.test(t)) { | |
| t = 'soy zelin del server'; | |
| } | |
| return t.trim(); | |
| } | |
| // ββ Modelos descubiertos dinΓ‘micamente por proveedor βββββββββββββββββββββββββ | |
| // Se actualizan al arrancar y cada 6h para que no queden obsoletos | |
| const _discoveredModels = {}; | |
| async function discoverModels(providerName) { | |
| try { | |
| switch (providerName) { | |
| case 'groq': { | |
| if (!ai.groq?.apiKey) return; | |
| const r = await fetch('https://api.groq.com/openai/v1/models', { | |
| headers: { Authorization: `Bearer ${ai.groq.apiKey}` }, | |
| signal: AbortSignal.timeout(5000), | |
| }); | |
| if (!r.ok) return; | |
| const data = await r.json(); | |
| const models = (data.data ?? []).filter(m => m.active !== false).map(m => m.id); | |
| _discoveredModels.groq = models; | |
| console.log(`[AI] Groq models: ${models.length} (${models.slice(0,3).join(', ')}...)`); | |
| break; | |
| } | |
| case 'mistral': { | |
| if (!ai.mistral?.apiKey) return; | |
| const r = await fetch('https://api.mistral.ai/v1/models', { | |
| headers: { Authorization: `Bearer ${ai.mistral.apiKey}` }, | |
| signal: AbortSignal.timeout(5000), | |
| }); | |
| if (!r.ok) return; | |
| const data = await r.json(); | |
| const models = (data.data ?? []).map(m => m.id); | |
| _discoveredModels.mistral = models; | |
| console.log(`[AI] Mistral models: ${models.length}`); | |
| break; | |
| } | |
| case 'pollinations': { | |
| const r = await fetch('https://text.pollinations.ai/models', { signal: AbortSignal.timeout(5000) }); | |
| if (!r.ok) return; | |
| const data = await r.json(); | |
| _discoveredModels.pollinations = Array.isArray(data) ? data.map(m => m.name ?? m) : []; | |
| console.log(`[AI] Pollinations models: ${_discoveredModels.pollinations.length}`); | |
| break; | |
| } | |
| } | |
| } catch {} | |
| } | |
| // Seleccionar el mejor modelo disponible de un proveedor | |
| function getBestModel(providerName, fallback, preference = 'large') { | |
| const models = _discoveredModels[providerName]; | |
| if (!models?.length) return fallback; | |
| // Filtrar modelos que NO son de chat/texto | |
| const EXCLUDE = /whisper|tts|speech|audio|embed|vision|image|dall|stable|rerank|guard|code-gecko|text-bison/i; | |
| const textModels = models.filter(m => !EXCLUDE.test(m)); | |
| if (!textModels.length) return fallback; | |
| if (preference === 'fast') { | |
| return textModels.find(m => /8b|7b|fast|instant|flash|mini/i.test(m)) ?? fallback; | |
| } | |
| if (preference === 'large') { | |
| return textModels.find(m => /70b|72b|large|versatile|plus|pro/i.test(m)) ?? fallback; | |
| } | |
| return fallback; | |
| } | |
| // Descubrir en background al arrancar | |
| export function startModelDiscovery() { | |
| const providers = ['groq', 'mistral', 'pollinations']; | |
| providers.forEach(p => discoverModels(p).catch(() => {})); | |
| // Descubrir modelos de HuggingFace | |
| discoverHFModels().catch(() => {}); | |
| // Refrescar cada 6h | |
| setInterval(() => { | |
| providers.forEach(p => discoverModels(p).catch(() => {})); | |
| discoverHFModels().catch(() => {}); | |
| }, 6 * 60 * 60 * 1000); | |
| } | |
| // ββ Tiers y cascadas (GLM 5.1 primero, API-first, local fallback) ββββββββββββ | |
| const TIERS = { | |
| fast : ['glmFlash', 'groqFast', 'cerebras', 'pollinations', 'hfFast'], | |
| smart : ['glm51', 'pollinations', 'groq', 'hfSmart', 'mistral', 'gemma4', 'cloudflare'], | |
| fallback: ['glmAir', 'openrouter', 'groqKimi', 'openrouterR1', 'mistralCode', 'hfFallback'], | |
| }; | |
| const CASCADES = { | |
| // v11: HIVE β RigoChat-7B cluster como PRIMARIO (usa TODOS los workers, no Promise.any) | |
| // Hive: consenso + speculative decoding + parallel batch = mΓ‘xima potencia | |
| chat : ['hive', 'groqQwen3', 'cerebras', 'groq', 'glm51', 'groqFast', 'pollinations', 'hfSmart', 'mistral', 'gemma4', 'cloudflare', 'openrouter', 'local'], | |
| spanish : ['hive', 'groqQwen3', 'cerebras', 'groq', 'glm51', 'hfSpanish', 'pollinations', 'mistral', 'gemma4', 'cloudflare', 'openrouter', 'groqKimi', 'local'], | |
| fast : ['hive', 'groqFast', 'cerebras', 'glmFlash', 'pollinations', 'hfFast', 'groq', 'mistral', 'cloudflare', 'openrouter', 'local'], | |
| reasoning : ['hive', 'groqQwen3', 'cerebras', 'groqKimi', 'hfReasoning', 'openrouterR1', 'glm51', 'groq', 'mistral', 'pollinations', 'gemma4', 'cloudflare', 'local'], | |
| code : ['groqQwen3', 'glm51', 'mistralCode', 'hfCode', 'groq', 'pollinations', 'mistral', 'openrouter'], | |
| volume : ['hive', 'groqFast', 'glmFlash', 'pollinations', 'cerebras', 'hfFast', 'gemma4', 'cloudflare', 'groqQwen3', 'local'], | |
| background: ['hive', 'glmFlash', 'mistral', 'groqFast', 'groq', 'hfFast', 'pollinations', 'cloudflare', 'gemma4', 'openrouter', 'local'], | |
| }; | |
| const GLM_PROVIDERS = ['glm51', 'glmAir', 'glmFlash']; | |
| const ALL_PROVIDERS = [...new Set([...TIERS.fast, ...TIERS.smart, ...TIERS.fallback, 'groqQwen3', 'cerebras', 'local', 'hive'])]; | |
| const HF_PROVIDERS = ['hfFast', 'hfSmart', 'hfSpanish', 'hfCode', 'hfReasoning', 'hfFallback']; | |
| // ββ LΓmites diarios βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const DAILY_LIMITS = { | |
| // Local: sin lΓmites β modelo propio | |
| local : 99999, | |
| // Qwen3-32B (Groq) β TORNEO: #1 modelo primario | |
| groqQwen3 : 1000, // Qwen3-32B via Groq β 60 RPM, 1000 RPD | |
| // Cerebras β TORNEO: #2 ultra-fast reasoning | |
| cerebras : 14400, // 30 RPM, 1M TPD | |
| // GLM: gratis/ilimitado | |
| glm51 : 99999, | |
| glmAir : 99999, | |
| glmFlash : 99999, | |
| // Otros proveedores | |
| pollinations : 99999, | |
| groq : 1000, | |
| groqFast : 14400, | |
| mistral : 99999, | |
| mistralCode : 99999, | |
| gemma4 : 86400, | |
| openrouter : 1000, | |
| cloudflare : 10000, | |
| groqKimi : 1000, | |
| openrouterR1 : 1000, | |
| hfFast : 5000, | |
| hfSmart : 2000, | |
| hfSpanish : 3000, | |
| hfCode : 2000, | |
| hfReasoning : 1000, | |
| hfFallback : 500, | |
| hive : 99999, // HIVE β sin lΓmites, es nuestro cluster propio | |
| }; | |
| // ββ Circuit Breaker βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class CircuitBreaker { | |
| constructor(name) { | |
| this.name = name; | |
| this.state = 'CLOSED'; | |
| this.failTimes = []; | |
| this.openCount = 0; | |
| this.lastFail = 0; | |
| this.threshold = 6; | |
| this.cooldown = 30000; | |
| this.maxCooldown = 600000; | |
| } | |
| canRequest() { | |
| if (this.state === 'CLOSED') return true; | |
| const cd = Math.min(this.cooldown * Math.pow(2, this.openCount - 1), this.maxCooldown); | |
| if (Date.now() - this.lastFail > cd) { this.state = 'HALF_OPEN'; return true; } | |
| return false; | |
| } | |
| recordSuccess() { | |
| this.failTimes = []; this.openCount = 0; this.state = 'CLOSED'; | |
| } | |
| recordFailure(code, isTimeout = false) { | |
| this.lastFail = Date.now(); | |
| if (!isTimeout) this.failTimes.push(Date.now()); | |
| const weight = isTimeout ? 0.3 : 1.0; | |
| if (code === 401 || code === 403) { | |
| this.state = 'OPEN'; this.openCount = 99; this.lastFail = Date.now() + 86_400_000; | |
| return; | |
| } | |
| const recent = this.failTimes.filter(t => Date.now() - t < 60000); | |
| if (recent.length * weight >= this.threshold || this.state === 'HALF_OPEN') { | |
| this.state = 'OPEN'; this.openCount++; | |
| } | |
| } | |
| } | |
| const breakers = {}; | |
| for (const n of [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]) breakers[n] = new CircuitBreaker(n); | |
| // HIVE breaker β more forgiving (workers can be slow) | |
| breakers['hive'] = new CircuitBreaker('hive'); | |
| breakers['hive'].threshold = 10; // Needs more failures before opening | |
| // ββ EstadΓsticas por proveedor βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const _stats = {}; | |
| function getCounter(name) { | |
| if (!_stats[name]) _stats[name] = { req: 0, err: 0, totalMs: 0, calls: 0 }; | |
| return _stats[name]; | |
| } | |
| function recordReq(name) { getCounter(name).req++; } | |
| function recordError(name){ getCounter(name).err++; } | |
| function recordLatency(name, ms) { const s = getCounter(name); s.totalMs += ms; s.calls++; } | |
| // ββ Daily counter reset ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| let _lastResetDay = new Date().getUTCDate(); | |
| function checkDailyReset() { | |
| const today = new Date().getUTCDate(); | |
| if (today !== _lastResetDay) { | |
| _lastResetDay = today; | |
| for (const name of Object.keys(_stats)) { | |
| _stats[name].req = 0; | |
| _stats[name].err = 0; | |
| } | |
| console.log('[AI] π Daily counters reset'); | |
| } | |
| } | |
| // ββ Health Score ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function healthScore(name) { | |
| const s = getCounter(name); | |
| const succ = Math.max(1, s.req - s.err); | |
| const rate = succ / Math.max(1, s.req); | |
| const avg = s.calls > 0 ? s.totalMs / s.calls : 2000; | |
| const quota= DAILY_LIMITS[name] ? 1 - (s.req / DAILY_LIMITS[name]) : 1; | |
| return rate * 0.5 + (1 / (avg / 1000)) * 0.3 + quota * 0.2; | |
| } | |
| // ββ Quota βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function isQuotaOk(name) { | |
| const lim = DAILY_LIMITS[name]; | |
| return !lim || getCounter(name).req < lim * 0.95; | |
| } | |
| function predictExhaustion(name) { | |
| const c = getCounter(name), lim = DAILY_LIMITS[name]; if (!lim) return false; | |
| const hour = new Date().getHours(); | |
| const proj = c.req + (c.req / Math.max(hour, 1)) * (24 - hour); | |
| return proj > lim * 0.85; | |
| } | |
| // ββ Adaptive tokens βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function adaptiveTokens(msg, task, req) { | |
| if (req) return req; | |
| const len = (msg ?? '').length; | |
| if (task === 'fast' || len < 50) return 150; | |
| if (task === 'code') return 1024; | |
| if (task === 'reasoning') return 600; | |
| if (len < 100) return 200; | |
| if (len < 300) return 400; | |
| return 512; | |
| } | |
| // ββ SingleFlight ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const _inflight = new Map(); | |
| function singleFlight(key, fn) { | |
| if (_inflight.has(key)) return _inflight.get(key); | |
| const p = fn().finally(() => _inflight.delete(key)); | |
| _inflight.set(key, p); | |
| return p; | |
| } | |
| // ββ TTL Cache βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TTLCache { | |
| constructor() { this.store = new Map(); } | |
| _ttl(msg = '') { | |
| const m = msg.toLowerCase(); | |
| if (/regla|norma|plugin|info|servidor/.test(m)) return 86_400_000; | |
| if (/online|jugador|tps|lag/.test(m)) return 30_000; | |
| return 3_600_000; | |
| } | |
| key(msgs, task) { | |
| const text = msgs.map(m => m.role + ':' + (m.content ?? '').slice(0, 100)).join('|') + task; | |
| let h = 5381; | |
| for (let i = 0; i < text.length; i++) h = ((h << 5) + h + text.charCodeAt(i)) | 0; | |
| return `${task}:${h >>> 0}`; | |
| } | |
| get(k) { | |
| const e = this.store.get(k); | |
| if (!e) return null; | |
| if (Date.now() > e.exp) { this.store.delete(k); return null; } | |
| return e.value; | |
| } | |
| set(k, v, msg = '') { | |
| this.store.set(k, { value: v, exp: Date.now() + this._ttl(msg) }); | |
| if (this.store.size > 1000) this.store.delete(this.store.keys().next().value); | |
| } | |
| clear() { this.store.clear(); console.log('[AI] Cache limpiada'); } | |
| } | |
| export const _cache = new TTLCache(); | |
| // ββ Pools de concurrencia βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const _userQueue = { active: 0, max: 4 }; | |
| const _backgroundQueue = { active: 0, max: 2 }; | |
| async function withQueue(queue, fn) { | |
| while (queue.active >= queue.max) await new Promise(r => setTimeout(r, 50)); | |
| queue.active++; | |
| try { return await fn(); } finally { queue.active--; } | |
| } | |
| // ββ Intent classifier (0 tokens) β SIN REGEX, keyword-based ββββββββββββββββββ | |
| // Replaces regex-based INTENT_PATTERNS with a cleaner keyword matching system | |
| const INTENT_RULES = [ | |
| { keywords: ['hola', 'hey', 'hi', 'buenas', 'quΓ© tal', 'ola', 'saludos', 'wenas'], matchMode: 'startsWith', type: 'fast', intent: 'greeting' }, | |
| { keywords: ['regla', 'norma', 'prohibi', 'permit'], matchMode: 'contains', type: 'volume', intent: 'rules' }, | |
| { keywords: ['```', '.yml', '.json', '.java', 'config'], matchMode: 'contains', type: 'code', intent: 'code' }, | |
| { keywords: ['ban', 'sancion', 'report', 'trampa', 'hack', 'cheat'], matchMode: 'contains', type: 'reasoning', intent: 'moderation' }, | |
| { keywords: ['analiza', 'explica', 'compara', 'argumenta'], matchMode: 'contains', type: 'reasoning', intent: 'complex' }, | |
| { keywords: ['cΓ³mo', 'quΓ©', 'cuΓ‘l', 'dΓ³nde', 'cuΓ‘ndo', 'por quΓ©'], matchMode: 'contains', type: 'spanish', intent: 'question' }, | |
| ]; | |
| export function classifyIntent(message) { | |
| const m = (message ?? '').trim().toLowerCase(); | |
| // Short message check (was regex /^.{1,20}$/) | |
| if (m.length > 0 && m.length <= 20) { | |
| return { intent: 'short', type: 'fast' }; | |
| } | |
| for (const rule of INTENT_RULES) { | |
| for (const kw of rule.keywords) { | |
| if (rule.matchMode === 'startsWith' && m.startsWith(kw)) { | |
| return { intent: rule.intent, type: rule.type }; | |
| } | |
| if (rule.matchMode === 'contains' && m.includes(kw)) { | |
| return { intent: rule.intent, type: rule.type }; | |
| } | |
| } | |
| } | |
| return { intent: 'general', type: 'spanish' }; | |
| } | |
| // ββ Gemini Rotator ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const geminiRotator = (() => { | |
| const keys = ai.gemini?.keys ?? [ai.gemini?.apiKey].filter(Boolean); | |
| let i = 0; | |
| const failed = new Set(); | |
| const last = new Map(); | |
| return { | |
| get() { | |
| const available = keys.filter(k => !failed.has(k) && Date.now() - (last.get(k) ?? 0) > 65000); | |
| if (!available.length) { failed.clear(); return keys[i++ % keys.length]; } | |
| const k = available[i++ % available.length]; | |
| last.set(k, Date.now()); | |
| return k; | |
| }, | |
| fail(k) { failed.add(k); }, | |
| }; | |
| })(); | |
| // ββ FunciΓ³n base OpenAI-compatible βββββββββββββββββββββββββββββββββββββββββββ | |
| // v9: Handles GLM-5.1 reasoning_content (separate from content) | |
| async function oai(url, key, model, messages, maxTokens, systemOverride) { | |
| const msgs = systemOverride | |
| ? [{ role: 'system', content: systemOverride }, ...messages.filter(m => m.role !== 'system')] | |
| : messages; | |
| const res = await fetch(url, { | |
| method : 'POST', | |
| headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` }, | |
| body : JSON.stringify({ model, messages: msgs, max_tokens: maxTokens, stream: false }), | |
| signal : AbortSignal.timeout(15000), | |
| }); | |
| if (!res.ok) { | |
| const body = await res.text().catch(() => ''); | |
| throw Object.assign(new Error(`${model} ${res.status}: ${body.slice(0, 100)}`), { status: res.status, code: res.status }); | |
| } | |
| const data = await res.json(); | |
| const msg = data.choices?.[0]?.message; | |
| if (!msg) return ''; | |
| // v11 CRITICAL FIX: ALWAYS clean thinking artifacts from content | |
| // Models (GLM-5.1, Qwen3, etc.) can leak thinking into content field | |
| // regardless of whether reasoning_content is present. | |
| // We MUST always apply cleanThinkingArtifacts() to every response. | |
| let content = msg.content?.trim() ?? ''; | |
| // If reasoning_content exists, log it for debugging | |
| if (msg.reasoning_content) { | |
| console.log(`[AI] ${model}: reasoning_content present (${msg.reasoning_content.length} chars thinking, ${content.length} chars content)`); | |
| } | |
| // ALWAYS clean thinking artifacts β models can leak thinking in many ways: | |
| // 1. Plain-text reasoning before the actual response | |
| // 2. Numbered lists of analytical steps | |
| // 3. English analysis followed by Spanish response | |
| // 4. <think> tags that weren't properly separated | |
| content = cleanThinkingArtifacts(content); | |
| return content; | |
| } | |
| function isTO(e) { return e?.name === 'TimeoutError' || e?.name === 'AbortError' || /timeout/i.test(e?.message ?? ''); } | |
| // ββ callDirect: implementaciΓ³n de cada proveedor βββββββββββββββββββββββββββββ | |
| async function callDirect(name, messages, maxTokens) { | |
| const PERM = [401, 402, 403, 404, 422]; | |
| switch (name) { | |
| // ββ GLM (ZhipuAI) β proveedor primario, gratis/ilimitado ββββββββββββββ | |
| case 'glm51': { | |
| if (!glm.apiKey) throw new Error('glm51: sin key GLM'); | |
| return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions', | |
| glm.apiKey, 'glm-5.1', messages, maxTokens); | |
| } | |
| case 'glmAir': { | |
| if (!glm.apiKey) throw new Error('glmAir: sin key GLM'); | |
| return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions', | |
| glm.apiKey, 'glm-4-air', messages, maxTokens); | |
| } | |
| case 'glmFlash': { | |
| if (!glm.apiKey) throw new Error('glmFlash: sin key GLM'); | |
| return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions', | |
| glm.apiKey, 'glm-4-flash', messages, maxTokens); | |
| } | |
| case 'pollinations': { | |
| // Pollinations β free, unlimited, updated API endpoints | |
| const last = messages.filter(m => m.role === 'user').pop()?.content ?? ''; | |
| const sys = messages.find(m => m.role === 'system')?.content ?? ''; | |
| // FIX: Try models in PARALLEL with Promise.any instead of sequentially | |
| const POLL_MODELS = ['openai', 'mistral', 'llama', 'qwen']; | |
| try { | |
| const result = await Promise.any( | |
| POLL_MODELS.map(model => | |
| fetch('https://text.pollinations.ai/openai/chat/completions', { | |
| method : 'POST', | |
| headers: { 'Content-Type': 'application/json' }, | |
| body : JSON.stringify({ | |
| model, | |
| messages, | |
| max_tokens: maxTokens, | |
| stream : false, | |
| seed : Math.floor(Math.random() * 9999), | |
| }), | |
| signal: AbortSignal.timeout(12000), | |
| }).then(async res => { | |
| if (!res.ok) throw new Error(`${model} ${res.status}`); | |
| const data = await res.json(); | |
| const text = data.choices?.[0]?.message?.content?.trim(); | |
| if (!text || text.length <= 2) throw new Error(`${model} empty`); | |
| return text; | |
| }) | |
| ) | |
| ); | |
| if (result) return result; | |
| } catch { /* all parallel attempts failed */ } | |
| // Fallback: GET endpoint (anonymous, always works) | |
| try { | |
| const shortMsg = last.slice(0, 800); | |
| const shortSys = sys.slice(0, 400); | |
| const url = 'https://text.pollinations.ai/' + | |
| encodeURIComponent(shortMsg) + | |
| '?seed=' + Math.floor(Math.random() * 9999) + | |
| (shortSys ? '&system=' + encodeURIComponent(shortSys) : ''); | |
| const r2 = await fetch(url, { | |
| headers: { 'User-Agent': 'Mozilla/5.0' }, | |
| signal : AbortSignal.timeout(12000), | |
| }); | |
| if (r2.ok) { | |
| const t = await r2.text(); | |
| if (t?.trim() && t.trim().length > 2) return t.trim(); | |
| } | |
| } catch {} | |
| throw new Error('pollinations all endpoints failed'); | |
| } | |
| case 'cerebras': | |
| if (!ai.cerebras?.apiKey) throw new Error('cerebras: sin key'); | |
| // TORNEO #2: Qwen3-235B-A22B β ultra-fast reasoning, excellent Spanish | |
| // Cerebras runs at ~2600 tokens/sec, fastest inference available | |
| return oai(ai.cerebras.baseUrl, ai.cerebras.apiKey, | |
| 'qwen3-235b-a22b', messages, maxTokens); | |
| // TORNEO #1: Qwen3-32B β best Spanish + speed + thinking | |
| case 'groqQwen3': | |
| if (!ai.groq?.apiKey) throw new Error('groqQwen3: sin key'); | |
| return oai(ai.groq.baseUrl, ai.groq.apiKey, | |
| 'qwen/qwen3-32b', messages, maxTokens); | |
| case 'groq': | |
| if (!ai.groq?.apiKey) throw new Error('groq: sin key'); | |
| // TORNEO #5: Llama-4-Scout β fast multilingual, official Spanish | |
| return oai(ai.groq.baseUrl, ai.groq.apiKey, | |
| getBestModel('groq', 'llama-4-scout-17b-16e-instruct', 'large'), messages, maxTokens); | |
| case 'groqFast': | |
| if (!ai.groq?.apiKey) throw new Error('groqFast: sin key'); | |
| return oai(ai.groq.baseUrl, ai.groq.apiKey, | |
| getBestModel('groq', 'llama-3.1-8b-instant', 'fast'), messages, maxTokens); | |
| case 'groqKimi': | |
| if (!ai.groq?.apiKey) throw new Error('groqKimi: sin key'); | |
| return oai(ai.groq.baseUrl, ai.groq.apiKey, 'moonshotai/kimi-k2-instruct', messages, maxTokens); | |
| case 'mistral': | |
| if (!ai.mistral?.apiKey) throw new Error('mistral: sin key'); | |
| return oai(ai.mistral.baseUrl, ai.mistral.apiKey, | |
| getBestModel('mistral', 'mistral-large-latest', 'large'), messages, maxTokens); | |
| case 'mistralCode': | |
| if (!ai.mistral?.apiKey) throw new Error('mistralCode: sin key'); | |
| return oai(ai.mistral.baseUrl, ai.mistral.apiKey, 'codestral-latest', messages, maxTokens); | |
| case 'gemma4': { | |
| const entry = geminiRotator.get(); | |
| if (!entry) throw new Error('gemma4: sin key Gemini'); | |
| // Extraer system prompt β Gemini lo necesita como systemInstruction separado | |
| const sysMsgs = messages.filter(m => m.role === 'system'); | |
| const sysText = sysMsgs.map(m => m.content ?? '').join('\n\n'); | |
| const chatMsgs = messages.filter(m => m.role !== 'system'); | |
| // Convertir a formato Gemini (assistant β model) | |
| const contents = chatMsgs.map(m => ({ | |
| role : m.role === 'assistant' ? 'model' : 'user', | |
| parts: [{ text: m.content ?? '' }], | |
| })); | |
| // Si no hay mensajes de chat, aΓ±adir uno vacΓo para que no falle | |
| if (!contents.length) contents.push({ role: 'user', parts: [{ text: '.' }] }); | |
| const body = { | |
| contents, | |
| generationConfig: { | |
| maxOutputTokens: maxTokens, | |
| temperature: 0.8, | |
| topP: 0.95, | |
| }, | |
| }; | |
| // Inyectar system prompt como systemInstruction (soportado por Gemini API) | |
| if (sysText) { | |
| body.systemInstruction = { parts: [{ text: sysText }] }; | |
| } | |
| // Gemma 4 31B IT β mejor modelo Gemma para chat en espaΓ±ol, via Gemini API | |
| const res = await fetch( | |
| `https://generativelanguage.googleapis.com/v1beta/models/gemma-4-31b-it:generateContent?key=${entry}`, | |
| { | |
| method : 'POST', | |
| headers: { 'Content-Type': 'application/json' }, | |
| body : JSON.stringify(body), | |
| signal : AbortSignal.timeout(15000), | |
| } | |
| ); | |
| if (!res.ok) { geminiRotator.fail(entry); throw Object.assign(new Error(`gemma4 ${res.status}`), { status: res.status, code: res.status }); } | |
| const data = await res.json(); | |
| return data.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ?? ''; | |
| } | |
| case 'openrouter': { | |
| if (!ai.openrouter?.apiKey) throw new Error('openrouter: sin key'); | |
| const model = 'meta-llama/llama-3.3-70b-instruct:free'; | |
| const res = await fetch(ai.openrouter.baseUrl, { | |
| method : 'POST', | |
| headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.openrouter.apiKey}`, 'HTTP-Referer': 'https://tomatesmp.pw', 'X-Title': 'TomateSMP' }, | |
| body : JSON.stringify({ model, messages, max_tokens: maxTokens }), | |
| signal : AbortSignal.timeout(20000), | |
| }); | |
| if (!res.ok) throw Object.assign(new Error(`openrouter ${res.status}`), { status: res.status, code: res.status }); | |
| const data = await res.json(); | |
| if (data.error) throw new Error(`openrouter: ${data.error.message ?? data.error}`); | |
| return data.choices?.[0]?.message?.content?.trim() ?? ''; | |
| } | |
| case 'openrouterR1': { | |
| if (!ai.openrouter?.apiKey) throw new Error('openrouterR1: sin key'); | |
| const res = await fetch(ai.openrouter.baseUrl, { | |
| method : 'POST', | |
| headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.openrouter.apiKey}`, 'HTTP-Referer': 'https://tomatesmp.pw' }, | |
| body : JSON.stringify({ model: 'deepseek/deepseek-r1:free', messages, max_tokens: maxTokens }), | |
| signal : AbortSignal.timeout(30000), | |
| }); | |
| if (!res.ok) throw Object.assign(new Error(`openrouterR1 ${res.status}`), { status: res.status, code: res.status }); | |
| const data = await res.json(); | |
| return data.choices?.[0]?.message?.content?.trim() ?? ''; | |
| } | |
| case 'cloudflare': { | |
| if (!ai.cloudflare?.accountId || !ai.cloudflare?.apiToken) throw new Error('cloudflare: sin config'); | |
| const res = await fetch( | |
| `https://api.cloudflare.com/client/v4/accounts/${ai.cloudflare.accountId}/ai/run/@cf/meta/llama-3.3-70b-instruct-fp8-fast`, | |
| { | |
| method : 'POST', | |
| headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.cloudflare.apiToken}` }, | |
| body : JSON.stringify({ messages, max_tokens: maxTokens }), | |
| signal : AbortSignal.timeout(20000), | |
| } | |
| ); | |
| if (!res.ok) throw Object.assign(new Error(`cloudflare ${res.status}`), { status: res.status, code: res.status }); | |
| const data = await res.json(); | |
| // Cloudflare AI devuelve distintos formatos segΓΊn el modelo | |
| const r = data.result; | |
| if (typeof r?.response === 'string') return r.response.trim(); | |
| if (Array.isArray(r) && r[0]?.response) return String(r[0].response).trim(); | |
| if (r?.choices?.[0]?.message?.content) return r.choices[0].message.content.trim(); | |
| if (typeof r === 'string') return r.trim(); | |
| return ''; | |
| } | |
| // ββ HuggingFace Inference API providers βββββββββββββββββββββββββββββββ | |
| case 'hfFast': | |
| if (!isHFAvailable()) throw new Error('hfFast: HF unavailable'); | |
| return callHuggingFace(messages, 'fast', maxTokens); | |
| case 'hfSmart': | |
| if (!isHFAvailable()) throw new Error('hfSmart: HF unavailable'); | |
| return callHuggingFaceCascade(messages, 'smart', maxTokens); | |
| case 'hfSpanish': | |
| if (!isHFAvailable()) throw new Error('hfSpanish: HF unavailable'); | |
| return callHuggingFaceCascade(messages, 'spanish', maxTokens); | |
| case 'hfCode': | |
| if (!isHFAvailable()) throw new Error('hfCode: HF unavailable'); | |
| return callHuggingFace(messages, 'code', maxTokens); | |
| case 'hfReasoning': | |
| if (!isHFAvailable()) throw new Error('hfReasoning: HF unavailable'); | |
| return callHuggingFaceCascade(messages, 'reasoning', maxTokens); | |
| case 'hfFallback': | |
| if (!isHFAvailable()) throw new Error('hfFallback: HF unavailable'); | |
| return callHuggingFace(messages, 'fast', maxTokens); | |
| // ββ HIVE β RigoChat-7B Cluster (ALL workers combined) ββββββββββββββββ | |
| case 'hive': { | |
| try { | |
| const userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? ''; | |
| const result = await hiveGenerate(messages, maxTokens, userMsg, 'hybrid'); | |
| if (result?.text && result.text.trim().length > 2) { | |
| return result.text; | |
| } | |
| throw new Error('hive: empty response'); | |
| } catch (err) { | |
| throw Object.assign(new Error(`hive: ${err.message}`), { status: 503, code: 503 }); | |
| } | |
| } | |
| // ββ Local model β primary local inference ββββββββββββββββββββββββββββ | |
| case 'local': { | |
| if (!isLocalAIReady()) throw new Error('local: model not ready'); | |
| const { localChatPrimary } = await import('./local-ai.js'); | |
| return localChatPrimary(messages, maxTokens, 0.8); | |
| } | |
| default: | |
| throw new Error(`Proveedor desconocido: ${name}`); | |
| } | |
| } | |
| // ββ callProvider: CB + backoff + stats ββββββββββββββββββββββββββββββββββββββββ | |
| const PERM_ERRORS = [401, 402, 403, 404, 422]; | |
| async function callProvider(name, messages, maxTokens, failedFlag) { | |
| checkDailyReset(); | |
| const cb = breakers[name]; | |
| if (!cb?.canRequest()) throw new Error(`${name}: circuit OPEN`); | |
| const start = Date.now(); | |
| recordReq(name); | |
| try { | |
| let result; | |
| for (let attempt = 0; attempt < 3; attempt++) { | |
| try { | |
| result = await callDirect(name, messages, maxTokens); | |
| break; | |
| } catch (e) { | |
| // 429: esperar y reintentar | |
| if (e.status === 429 && attempt < 2) { | |
| await new Promise(r => setTimeout(r, 1500 + Math.random() * 3000)); | |
| continue; | |
| } | |
| // 400 con mensaje de longitud: comprimir contexto y reintentar UNA vez | |
| if (e.status === 400 && attempt === 0 && | |
| /length|too long|reduce|token/i.test(e.message ?? '')) { | |
| console.warn('[AI] ' + name + ': mensaje muy largo, comprimiendo...'); | |
| // Quedarse solo con system + ΓΊltimos 2 mensajes | |
| const sys = messages.filter(m => m.role === 'system'); | |
| const rest = messages.filter(m => m.role !== 'system').slice(-2); | |
| // Recortar system prompt a 1500 chars | |
| const shortSys = sys.map(m => ({ ...m, content: m.content.slice(0, 1500) })); | |
| messages = [...shortSys, ...rest]; | |
| continue; | |
| } | |
| throw e; | |
| } | |
| } | |
| cb.recordSuccess(); | |
| recordLatency(name, Date.now() - start); | |
| return result; | |
| } catch (err) { | |
| const code = err.status ?? err.code ?? null; | |
| if (PERM_ERRORS.includes(code)) { | |
| cb.state = 'OPEN'; cb.openCount = 99; cb.lastFail = Date.now() + 86_400_000; | |
| console.warn(`[AI] β ${name} deshabilitado 24h (${code})`); | |
| } else if (!failedFlag?.v) { | |
| failedFlag && (failedFlag.v = true); | |
| cb.recordFailure(code, isTO(err)); | |
| } | |
| recordError(name); | |
| if (!PERM_ERRORS.includes(code)) { | |
| console.warn(`[AI] β ${name}: ${err.message?.slice(0, 80)}`); | |
| } | |
| throw err; | |
| } | |
| } | |
| // ββ ValidaciΓ³n de respuestas β Sistema avanzado SIN regex ββββββββββββββββββββ | |
| // v10: Reemplaza BAD_PATTERNS (regex) con validaciΓ³n semΓ‘ntica determinista | |
| // que es mΓ‘s precisa, sin falsos positivos, y mΓ‘s mantenible. | |
| // Frases de IA que NUNCA deben aparecer al inicio de una respuesta vΓ‘lida | |
| const AI_DISCLAIMER_PREFIXES = [ | |
| 'as an ai', | |
| 'as a language model', | |
| 'como una ia', | |
| 'como un modelo', | |
| "i'm an ai", | |
| 'i am an ai', | |
| "i'm a language model", | |
| 'i am a language model', | |
| 'como inteligencia artificial', | |
| 'como modelo de lenguaje', | |
| 'as an assistant', | |
| 'como asistente', | |
| ]; | |
| // Respuestas basura que indican fallo del modelo | |
| const GARBAGE_RESPONSES = new Set([ | |
| 'error', 'null', 'undefined', 'true', 'false', '{}', '[]', | |
| 'nan', 'none', 'nil', 'void', | |
| ]); | |
| function isValidResponse(text) { | |
| if (!text || typeof text !== 'string') return false; | |
| const t = text.trim(); | |
| // Empty or whitespace-only | |
| if (t.length < 2) return false; | |
| // Only punctuation/whitespace | |
| const withoutPunct = t.replace(/[\s.,!?_\-:;'"()]/g, ''); | |
| if (withoutPunct.length === 0) return false; | |
| // Garbage single values | |
| if (GARBAGE_RESPONSES.has(t.toLowerCase())) return false; | |
| // Too long = probably prompt recitation | |
| if (t.length > 2000) return false; | |
| // AI disclaimer prefix check (case-insensitive, no regex) | |
| const tLower = t.toLowerCase(); | |
| for (const prefix of AI_DISCLAIMER_PREFIXES) { | |
| if (tLower.startsWith(prefix)) return false; | |
| } | |
| // Check for repetitive patterns (same word repeated 10+ times) | |
| const words = tLower.split(/\s+/); | |
| if (words.length > 10) { | |
| const wordCounts = {}; | |
| for (const w of words) { | |
| wordCounts[w] = (wordCounts[w] || 0) + 1; | |
| if (wordCounts[w] > 10 && w.length > 2) return false; // Repetition loop | |
| } | |
| } | |
| return true; | |
| } | |
| // ββ AI-based response quality check (uses local model for fast validation) ββ | |
| // This replaces regex-based BAD_PATTERNS with semantic understanding | |
| async function aiValidateResponse(text, userMsg) { | |
| if (!text || text.length < 10) return { valid: true }; // Too short to bother | |
| // Fast path: check obvious issues without AI | |
| const tLower = text.toLowerCase().trim(); | |
| // AI disclosure in the middle of response (not just prefix) | |
| const aiPatterns = [ | |
| 'como ia,', | |
| 'como inteligencia artificial,', | |
| 'as an ai,', | |
| 'as a language model,', | |
| 'i cannot fulfill', | |
| 'no puedo cumplir esa solicitud', | |
| 'no puedo procesar esa solicitud', | |
| ]; | |
| for (const p of aiPatterns) { | |
| if (tLower.includes(p)) { | |
| return { valid: false, reason: 'ai_disclaimer', replacement: 'eso no va' }; | |
| } | |
| } | |
| return { valid: true }; | |
| } | |
| // ββ Router principal ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function ensureAvailability() { | |
| const allProviders = [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]; | |
| const open = allProviders.filter(n => breakers[n]?.state === 'OPEN'); | |
| if (open.length === allProviders.length) { | |
| console.warn('[AI] Todos los proveedores en OPEN β reseteando'); | |
| open.forEach(n => { breakers[n].state = 'CLOSED'; breakers[n].openCount = 0; }); | |
| } | |
| } | |
| async function _route(messages, taskType, maxTokens, userMsg) { | |
| ensureAvailability(); | |
| const cascade = [...(CASCADES[taskType] ?? CASCADES.spanish)]; | |
| cascade.sort((a, b) => { | |
| const pa = predictExhaustion(a) ? -0.3 : 0; | |
| const pb = predictExhaustion(b) ? -0.3 : 0; | |
| return (healthScore(b) + pb) - (healthScore(a) + pa); | |
| }); | |
| const available = cascade.filter(n => breakers[n]?.canRequest() && isQuotaOk(n)); | |
| if (!available.length) { | |
| // Fallback al modelo local | |
| if (isLocalAIReady()) { | |
| const r = await emergencyFallback(messages, userMsg).catch(() => null); | |
| if (r) return r; | |
| } | |
| throw new Error('[AI] Sin proveedores disponibles'); | |
| } | |
| const failedFlag = { v: false }; | |
| // v12 FIX: Usar SOLO el primer proveedor disponible (no hedged requests) | |
| // Los hedged requests (Promise.any con top3) causaban respuestas triplicadas | |
| // cuando mΓΊltiples proveedores respondΓan y el merge de hive los concatenaba. | |
| // Ahora: intentamos el mejor proveedor, si falla pasamos al siguiente. | |
| for (const name of available) { | |
| try { | |
| const r = await callProvider(name, messages, maxTokens, failedFlag); | |
| if (isValidResponse(r)) return r; | |
| } catch { continue; } | |
| } | |
| // Γltimo recurso: modelo local | |
| if (isLocalAIReady()) { | |
| console.log('[AI] π Usando modelo local como fallback (todos los proveedores fallaron)'); | |
| const r = await emergencyFallback(messages, userMsg).catch(e => { | |
| console.warn('[AI] Modelo local tambiΓ©n fallΓ³:', e.message); | |
| return null; | |
| }); | |
| if (r) return r; | |
| } else { | |
| console.warn('[AI] Modelo local no disponible para fallback (isLocalAIReady=false)'); | |
| } | |
| throw new Error('[AI] Todos los proveedores fallaron'); | |
| } | |
| async function _routeOwner(messages, taskType, maxTokens) { | |
| ensureAvailability(); | |
| const cascade = [...(CASCADES[taskType] ?? CASCADES.spanish)]; | |
| cascade.sort((a, b) => (healthScore(b)) - (healthScore(a))); | |
| const available = cascade.filter(n => breakers[n]?.canRequest() && isQuotaOk(n)); | |
| if (!available.length && isLocalAIReady()) { | |
| return emergencyFallback(messages, '') ?? Promise.reject(new Error('Sin proveedores')); | |
| } | |
| for (const name of available) { | |
| try { | |
| const r = await callProvider(name, messages, maxTokens, { v: false }); | |
| if (r && r.trim().length > 0) return r; | |
| } catch { continue; } | |
| } | |
| if (isLocalAIReady()) return emergencyFallback(messages, ''); | |
| throw new Error('[AI] Sin respuesta'); | |
| } | |
| // ββ callAI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export async function callAI(messages, taskType = 'spanish', maxTokens = null, userMsg = '', ownerMode = false) { | |
| if (!userMsg) userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? ''; | |
| const tokens = adaptiveTokens(userMsg, taskType, maxTokens); | |
| // Inyectar thinking mode para mejor coherencia (provider name se detecta en el router) | |
| const thinkMsgs = injectThinking(messages); | |
| const msgs = await compressContext(thinkMsgs); | |
| const cacheKey = _cache.key(msgs, taskType); // SIEMPRE definido | |
| if (ownerMode) { | |
| return withQueue(_userQueue, async () => { | |
| const result = await _routeOwner(msgs, taskType, tokens); | |
| return cleanThinkingArtifacts(result); | |
| }); | |
| } | |
| // Cache exacto | |
| const cached = _cache.get(cacheKey); | |
| if (cached) { console.log('[AI] πΎ cache hit'); return cached; } | |
| // Semantic cache | |
| if (feats.semanticCache !== false) { | |
| try { | |
| const semHit = await semanticCache.get(userMsg); | |
| if (semHit) { console.log('[AI] πΎ semantic cache hit'); return semHit; } | |
| } catch {} | |
| } | |
| const doCall = () => withQueue(_userQueue, async () => { | |
| const result = await _route(msgs, taskType, tokens, userMsg); | |
| const cleaned = cleanThinkingArtifacts(result); | |
| _cache.set(cacheKey, cleaned, userMsg); | |
| semanticCache.set(userMsg, cleaned, semanticCache.getTTL(userMsg)).catch(() => {}); | |
| return cleaned; | |
| }); | |
| return singleFlight(cacheKey, doCall); | |
| } | |
| // ββ callAIBackground βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export async function callAIBackground(messages, taskType = 'spanish', maxTokens = null, userMsg = '') { | |
| if (!userMsg) userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? ''; | |
| const tokens = adaptiveTokens(userMsg, taskType, maxTokens); | |
| const thinkMsgs = injectThinking(messages); | |
| const msgs = await compressContext(thinkMsgs); | |
| return withQueue(_backgroundQueue, async () => { | |
| const result = await _route(msgs, 'background', tokens, userMsg); | |
| return cleanThinkingArtifacts(result); | |
| }); | |
| } | |
| // ββ Mixture of Agents βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export async function callAIMoA(messages, maxTokens = 600) { | |
| if (!feats.moa) return callAI(messages, 'spanish', maxTokens); | |
| const [r1, r2, r3] = await Promise.allSettled([ | |
| callProvider('glm51', messages, 300, { v: false }), | |
| callProvider('groqFast', messages, 300, { v: false }), | |
| callProvider('pollinations', messages, 300, { v: false }), | |
| ]); | |
| const proposals = [r1, r2, r3] | |
| .filter(r => r.status === 'fulfilled' && r.value?.length > 10) | |
| .map((r, i) => `Respuesta ${i + 1}: ${r.value}`) | |
| .join('\n\n'); | |
| if (!proposals) return callAI(messages, 'spanish', maxTokens); | |
| return callProvider('glm51', [ | |
| { role: 'system', content: 'Sintetiza la mejor respuesta de las siguientes opciones. Sin texto extra.' }, | |
| { role: 'user', content: `${proposals}\n\nSΓntesis:` }, | |
| ], maxTokens, { v: false }); | |
| } | |
| // ββ Self-healing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export function startSelfHealing() { | |
| if (!feats.selfHealing) return; | |
| setInterval(() => { | |
| const allProviders = [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]; | |
| const totalReq = allProviders.reduce((s, n) => s + getCounter(n).req, 0); | |
| if (totalReq === 0) return; | |
| const avgQuota = allProviders.reduce((s, n) => { | |
| const lim = DAILY_LIMITS[n]; return s + (lim ? getCounter(n).req / lim : 0); | |
| }, 0) / allProviders.length; | |
| if (avgQuota > 0.85) console.warn('[SelfHeal] Cuota global alta β priorizar proveedores con mΓ‘s quota'); | |
| }, 5 * 60 * 1000); | |
| console.log('[HEAL] Self-healing v7 iniciado'); | |
| } | |
| // ββ Warmup ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export async function warmupProviders() { | |
| if (!feats.warmup) return; | |
| const test = [{ role: 'user', content: 'ok' }]; | |
| console.log('[WARMUP] Verificando proveedores...'); | |
| await Promise.allSettled( | |
| ['glm51', 'glmFlash', 'groqFast', 'cerebras', 'pollinations', 'mistral'].map(async name => { | |
| const start = Date.now(); | |
| try { | |
| await callProvider(name, test, 5, { v: false }); | |
| console.log(`[WARMUP] β ${name} ${Date.now() - start}ms`); | |
| } catch (e) { | |
| console.log(`[WARMUP] β ${name}: ${e.message?.slice(0, 40)}`); | |
| } | |
| }) | |
| ); | |
| } | |
| // ββ Comprimir contexto βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function compressContext(messages) { | |
| // FIX: No cortar a ciegas el system prompt β antes se podΓa cortar en medio de las reglas de seguridad | |
| // En vez de eso, reducir el historial de chat primero (menos importante que el system prompt) | |
| const sys = messages.filter(m => m.role === 'system'); | |
| const chat = messages.filter(m => m.role !== 'system'); | |
| // Recortar system prompt SOLO si es extremadamente largo (>4000 chars) | |
| // Priorizar: mantener identidad + seguridad + tools intactos | |
| const compressedSys = sys.map(m => { | |
| if ((m.content ?? '').length > 4000) { | |
| // Intentar cortar en una secciΓ³n completa (despuΓ©s de un \n\n##) | |
| const content = m.content; | |
| // Mantener primeros 3500 chars (identidad + personalidad + seguridad) | |
| // y ΓΊltimos 1000 chars (tools + reglas finales) | |
| return { ...m, content: content.slice(0, 3500) + '\n...(contexto comprimido)\n' + content.slice(-1000) }; | |
| } | |
| return m; | |
| }); | |
| // Reducir historial de chat a mΓ‘ximo 4 turnos (antes eran 6) | |
| const recentChat = chat.slice(-4); | |
| return [...compressedSys, ...recentChat]; | |
| } | |
| // ββ Stats βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export function getDailyStats() { | |
| const out = {}; | |
| for (const n of [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]) { | |
| const s = getCounter(n); | |
| const lim = DAILY_LIMITS[n] ?? 99999; | |
| out[n] = { requests: s.req, limit: lim, pct: ((s.req / lim) * 100).toFixed(1) + '%', state: breakers[n]?.state ?? '?', score: healthScore(n).toFixed(2) }; | |
| } | |
| // Append HF-specific stats | |
| try { out._hf = getHFStats(); } catch {} | |
| return out; | |
| } | |
| export function getProviderStatus() { | |
| return [...ALL_PROVIDERS, ...GLM_PROVIDERS].map(n => ({ name: n, state: breakers[n]?.state ?? 'UNKNOWN', score: healthScore(n).toFixed(2) })); | |
| } | |
| export function clearCache() { _cache.clear(); } | |
| // ββ HIVE exports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export { getHiveStatus, getHiveStats, benchmarkHive, warmUpWorkers, hiveInit as initHive }; | |
| export function runQualityGate(response, userMessage) { | |
| if (!feats.qualityGate) return Promise.resolve({ pass: true }); | |
| if (!response || response.trim().length < 10) return Promise.resolve({ pass: false, reason: 'empty' }); | |
| return Promise.resolve({ pass: isValidResponse(response) }); | |
| } | |