zelin-bot / src /ai.js
TomatitoToho's picture
v11: Fix thinking leak - ALWAYS cleanThinkingArtifacts, more aggressive detection, expanded patterns
2bac7f7 verified
/**
* ai.js β€” Router de IA v10.0 β€” TORNEO DE MODELOS
* ================================================
* CAMBIOS v10.0 (basado en torneo de 179 modelos):
* - Qwen3-32B (Groq) como proveedor PRIMARIO β€” mejor espaΓ±ol+velocidad+thinking
* - Qwen3-235B-A22B (Cerebras) como premium β€” ultra-rΓ‘pido, excelente espaΓ±ol
* - Llama-4-Scout-17B (Groq) como secundario β€” oficial espaΓ±ol
* - DeepSeek R1 (OpenRouter) para razonamiento complejo
* - GLM mantenido pero degradado en prioridad
* - Modelo local (Gemma 4 E4B) solo como fallback final
* - Thinking mode optimizado para Qwen3
* - Cascadas reordenadas por puntaje del torneo
*/
import { readConfig } from './utils.js';
import { semanticCache } from './semantic-cache.js';
import { emergencyFallback, isLocalAIReady } from './local-ai.js';
import { callHuggingFace, callHuggingFaceCascade, isHFAvailable, discoverHFModels, getHFStats } from './hf-provider.js';
import { hiveGenerate, initialize as hiveInit, getHiveStatus, getHiveStats, benchmarkHive, warmUpWorkers } from './hive.js';
const config = readConfig();
const ai = config.ai ?? {};
const feats = config.features ?? {};
const glm = config.glm ?? {};
// ── Thinking Mode v3.0 β€” REMOVED injectThinking() ──────────────────────────
// v3.0 CRITICAL FIX: injectThinking() was causing the #1 bug β€” chain-of-thought
// leaking into Discord responses. Models would output their thinking process as
// plain text despite instructions not to. The fix: DON'T inject thinking prompts.
// Models already think internally. Adding "PIENSA PROFUNDAMENTE" causes them to
// output that thinking. Less is more β€” just give good system prompts.
//
// For models with native thinking (GLM-5.1 reasoning_content, Qwen3 /think):
// - We let the API handle thinking natively (separate from content)
// - We NEVER inject /think or thinking instructions manually
// - We ALWAYS strip any thinking that leaks into content via cleanThinkingArtifacts()
// injectThinking is now a no-op β€” returns messages unchanged
function injectThinking(messages, _providerName = '') {
return messages;
}
// Limpiar artefactos de thinking + patrones AI β€” v11 ANTI-LEAK
// v11: Major overhaul β€” handles ALL known thinking leak patterns
// Root cause: models output their chain-of-thought as plain text
// v11 FIX: More aggressive detection β€” lower thresholds, more patterns
function cleanThinkingArtifacts(text) {
if (!text || typeof text !== 'string') return text;
let t = text;
// ═══ v11 NUCLEAR: Strip ALL thinking tags first ═══
// Handle every known tag format
t = t.replace(/<think[\s\S]*?<\/think>/gi, '');
t = t.replace(/<thinking[\s\S]*?<\/thinking>/gi, '');
t = t.replace(/<reasoning[\s\S]*?<\/reasoning>/gi, '');
t = t.replace(/<scratchpad[\s\S]*?<\/scratchpad>/gi, '');
t = t.replace(/<internal[\s\S]*?<\/internal>/gi, '');
t = t.replace(/<![CDATA[\s\S]*?]]>/gi, '');
t = t.replace(/<\/think>/gi, '');
t = t.replace(/<\/thinking>/gi, '');
t = t.replace(/<\/reasoning>/gi, '');
// ═══ v11 CRÍTICO: Ultra-aggressive thinking leak detection ═══
// Detect multi-line thinking followed by a response
// Pattern: First part is analytical (long, detailed), last part is natural Spanish
if (t.includes('\n') && t.length > 150) {
const lines = t.split('\n').filter(l => l.trim().length > 0);
if (lines.length >= 3) {
// Check if first lines are thinking (English or structured analysis)
const firstLines = lines.slice(0, Math.ceil(lines.length / 2));
const lastLines = lines.slice(Math.ceil(lines.length / 2));
// v11: More thinking indicators β€” covers more model output patterns
const thinkingWords = /^(?:okay|alright|let me|i should|the user|first,|based on|since|so,|well,|now,|also,|however,|but,|actually,|hmm|let's|i need|i think|i'll|going to|in order|therefore|because|this means|that means|it seems|it appears|looking at|considering|analyzing|understanding|evaluating|to respond|to answer|the message|the question|as zelin|respond as|in character|staying in|my role|my persona)/i;
// v11: Expanded Spanish words list for better detection
const spanishWords = /\b(wey|neta|chido|chale|Γ³rale|hΓ­jole|zelin|morra|gΓΌey|onda|chingΓ³n|bueno|claro|sΓ­|no|nah|oye|mira|Γ³sea|pues|nada|simΓ³n|sale|va|ok|jaja|qu[eΓ©]|c[oΓ³]mo|d[oΓ³]nde|cu[aΓ‘]ndo|por qu[eΓ©]|much[oas]?|tambi[eΓ©]n|aqu[iΓ­]|all[iΓ‘]|este|esta|eso|esa|s[iΓ­]|nope|yup|sip|nop|dale|va|holi|ola|bro|crack|xd|gg|ns|ni idea|ni modo|ya ves|ya mero|hΓ­jole|a poco|quΓ© onda|no mames|no manches|est[aΓ‘] ca[nnΓ±]Γ³n|padre|madre)/i;
let firstHalfThinking = 0;
let secondHalfSpanish = 0;
for (const line of firstLines) {
if (thinkingWords.test(line.trim())) firstHalfThinking++;
if (spanishWords.test(line)) firstHalfThinking--; // Not thinking if Spanish
}
for (const line of lastLines) {
if (spanishWords.test(line)) secondHalfSpanish++;
if (thinkingWords.test(line.trim())) secondHalfSpanish--; // Not response if thinking
}
// v11: Lower threshold β€” even 1 thinking indicator is suspicious
if (firstHalfThinking >= 1 && secondHalfSpanish >= 1) {
const response = lastLines.join('\n').trim();
if (response.length > 5) {
t = response;
}
}
}
}
// ═══ v11 NEW: Single-paragraph English thinking followed by Spanish ═══
// Pattern: "Okay, I should respond as a Mexican girl. wey no sΓ©"
// Extract ONLY the Spanish part after the last English sentence
if (t.length > 100) {
// Find the LAST transition from English to Spanish
const sentences = t.split(/(?<=[.!?])\s+/);
if (sentences.length >= 2) {
let lastSpanishStart = -1;
for (let i = 0; i < sentences.length; i++) {
// A sentence is Spanish if it has Spanish-specific words or characters
const s = sentences[i];
if (/\b(wey|neta|chido|chale|Γ³rale|hΓ­jole|morra|gΓΌey|onda|chingΓ³n|Γ³sea|pues|simΓ³n)\b/i.test(s) ||
/[ΒΏΒ‘]/.test(s) ||
(/\b(s[iΓ­]|no|nah|oye|mira|bueno|claro|dale|ns)\b/i.test(s) && !/^(?:okay|alright|let me|i should|the user)/i.test(s))) {
if (i > 0 && /^(?:okay|alright|let me|i should|the user|first|based on|since|so|well|now|also|however|actually|hmm|let's|i need|i think)/i.test(sentences[0])) {
lastSpanishStart = i;
}
}
}
if (lastSpanishStart >= 0) {
const response = sentences.slice(lastSpanishStart).join(' ').trim();
if (response.length > 5) t = response;
}
}
}
// ═══ v9 CRÍTICO: Detectar y eliminar razonamiento interno en texto plano ═══
// El modelo a veces incluye su proceso de pensamiento como texto plano
// sin marcadores como <think/>. Detectamos estos patrones y los limpiamos.
// PatrΓ³n 1: LΓ­neas numeradas de thinking recitadas del system prompt
// Ej: "1. ΒΏCuΓ‘l es la intenciΓ³n real del mensaje? ... 6. Responde SOLO..."
const thinkingRecitationPattern = /^[\d][.)]\s*(?:ΒΏCu[aΓ‘]l|ΒΏHay|ΒΏQu[eΓ©]|ΒΏC[oΓ³]mo|Auto-eval|Responde|PIENSA|Pienso|Pensamiento)/m;
if (thinkingRecitationPattern.test(t)) {
// La respuesta real estÑ DESPUÉS de la última línea numerada
const lines = t.split('\n');
let lastNumberedLine = -1;
for (let i = 0; i < lines.length; i++) {
if (/^\d[.)]\s/.test(lines[i].trim())) lastNumberedLine = i;
}
if (lastNumberedLine >= 0 && lastNumberedLine < lines.length - 1) {
const afterThinking = lines.slice(lastNumberedLine + 1).join('\n').trim();
if (afterThinking.length > 2) t = afterThinking;
}
}
// PatrΓ³n 2: Razonamiento en inglΓ©s seguido de respuesta en espaΓ±ol
// Ej: "Okay, the user is asking... I should respond as Zelin..."
const englishThinkingPattern = /^(?:Okay|Alright|Let me|I should|The user|First,|Based on|Since)/i;
if (englishThinkingPattern.test(t) && t.length > 150) {
const lines = t.split('\n');
let foundSpanish = false;
const spanishLines = [];
for (const line of lines) {
// Si la lΓ­nea tiene espaΓ±ol (acentos, Γ±, ΒΏ, Β‘, o jerga mexicana)
if (/[¿‘ñÑéíóú]/i.test(line) || /\b(wey|neta|chido|chale|órale|zelin|híjole|morra)\b/i.test(line)) {
foundSpanish = true;
spanishLines.push(line);
} else if (foundSpanish) {
spanishLines.push(line);
}
}
if (spanishLines.length > 0 && spanishLines.join('\n').trim().length > 2) {
t = spanishLines.join('\n').trim();
}
}
// PatrΓ³n 3: "Zelin would say:" o "Mi respuesta:" seguido de la respuesta
const promptPatterns = [
/Zelin (?:would|should|might) (?:say|respond|reply|answer)[:\s]*\n?/i,
/(?:Mi respuesta|My response|My answer|Response)[:\s]*\n?/i,
/(?:AsΓ­ responderΓ­a|Here's how|Here is what)[:\s]*\n?/i,
];
for (const p of promptPatterns) {
const match = t.match(p);
if (match && match.index > 0) {
const after = t.slice(match.index + match[0].length).trim();
if (after.length > 2) t = after;
}
}
// v8 CRÍTICO: Limpiar "user: ... zelin: ..." pattern (modelo recita ejemplos del prompt)
if (/\buser:/i.test(t)) {
const beforeUser = t.split(/\buser:/i)[0].trim();
if (beforeUser.length > 2) {
t = beforeUser;
} else {
t = 'ns';
}
}
if (/\bzelin:\s*/i.test(t) && !/^zelin:/i.test(t)) {
const beforeZelin = t.split(/\bzelin:\s*/i)[0].trim();
if (beforeZelin.length > 2) t = beforeZelin;
}
t = t.replace(/^zelin:\s*/i, '');
// (thinking tags already stripped at the top of this function)
// Qwen3 thinking mode: the model sometimes outputs /think content and /no_think markers
const noThinkIdx = t.indexOf('/no_think');
if (noThinkIdx !== -1) {
const afterNoThink = t.slice(noThinkIdx + 9).trim();
if (afterNoThink.length > 2) t = afterNoThink;
}
t = t.replace(/^\/think\s*/i, '');
t = t.replace(/^\/no_think\s*/i, '');
t = t.replace(/^Pienso[\s\S]*?\nRespuesta:\s*/i, '');
t = t.replace(/^Pensamiento:[\s\S]*?\nRespuesta:\s*/i, '');
// v9: MΓ‘s patrones de thinking en texto plano
t = t.replace(/^An[aΓ‘]lisis:[\s\S]*?(?=Ahora|Bien|Ok|SΓ­|No|Nah|Wey|Oye|hmm|bueno)/i, '');
t = t.replace(/^Razonamiento:[\s\S]*?(?=Ahora|Bien|Ok|SΓ­|No|Nah|Wey|Oye|hm|bueno)/i, '');
// Quitar prefijos de "respuesta final"
t = t.replace(/^Respuesta final:\s*/i, '');
t = t.replace(/^Final answer:\s*/i, '');
t = t.replace(/^Respuesta:\s*/i, '');
// Limpiar tool call placeholders rotos
t = t.replace(/\[(?:mc_status|mc_player|mc_wiki|hora actual|usar\s+\w+\s+para\s+dato\s+real)\]/gi, '');
t = t.replace(/c_status\]/g, '');
t = t.replace(/ora actual\]/g, '');
t = t.replace(/\w+_(?:status|player|wiki|info)\]/g, '');
// Reemplazar patrones formales de rechazo
t = t.replace(/no puedo cumplir (esa|este|aquella) (solicitud|request|orden|instrucci[oΓ³]n)/gi, 'eso no va');
t = t.replace(/no puedo procesar esa solicitud/gi, 'eso no va');
t = t.replace(/lo siento,? pero no puedo/gi, 'nah');
t = t.replace(/disculpa,? pero no puedo/gi, 'nah');
t = t.replace(/lamentablemente no puedo/gi, 'nah');
// Corregir "nop" al inicio cuando no es sΓ­/no
t = t.replace(/^nop,\s*soy\s+/gi, 'no, soy ');
// Correcciones de identidad
t = t.replace(/\bzel[eΓ©]n\b/gi, 'zelin');
t = t.replace(/\brezin\b/gi, 'zelin');
t = t.replace(/\bzelen\b/gi, 'zelin');
t = t.replace(/\btomatitoo\b/gi, 'tomatito');
// Quitar patrones de asistente al final
t = t.replace(/[,\s]*ΒΏ?en qu[eΓ©]\s+(te\s+)?puedo\s+ayud[aeo]r?[ΒΏ?]?\.?\s*$/gi, '');
t = t.replace(/[,\s]*ΒΏ?(algo\s+)?m[aΓ‘]s\s+en\s+(lo\s+que\s+)?pued[ao]\s+ayud[aeo]r?[ΒΏ?]?\.?\s*$/gi, '');
t = t.replace(/[,\s]*ΒΏ?necesitas\s+(algo\s+)?m[aΓ‘]s[ΒΏ?]?\.?\s*$/gi, '');
// Garbled text corrections
t = t.replace(/\besti con vos/gi, 'suerte con eso');
t = t.replace(/\bwienes/gi, 'bien, y tΓΊ');
// Simplificar recitaciΓ³n de identidad
if (/^soy zelin,? (la morra |la )?del server minecraft? tomatesmp\.?$/i.test(t)) {
t = 'soy zelin del server';
}
return t.trim();
}
// ── Modelos descubiertos dinΓ‘micamente por proveedor ─────────────────────────
// Se actualizan al arrancar y cada 6h para que no queden obsoletos
const _discoveredModels = {};
async function discoverModels(providerName) {
try {
switch (providerName) {
case 'groq': {
if (!ai.groq?.apiKey) return;
const r = await fetch('https://api.groq.com/openai/v1/models', {
headers: { Authorization: `Bearer ${ai.groq.apiKey}` },
signal: AbortSignal.timeout(5000),
});
if (!r.ok) return;
const data = await r.json();
const models = (data.data ?? []).filter(m => m.active !== false).map(m => m.id);
_discoveredModels.groq = models;
console.log(`[AI] Groq models: ${models.length} (${models.slice(0,3).join(', ')}...)`);
break;
}
case 'mistral': {
if (!ai.mistral?.apiKey) return;
const r = await fetch('https://api.mistral.ai/v1/models', {
headers: { Authorization: `Bearer ${ai.mistral.apiKey}` },
signal: AbortSignal.timeout(5000),
});
if (!r.ok) return;
const data = await r.json();
const models = (data.data ?? []).map(m => m.id);
_discoveredModels.mistral = models;
console.log(`[AI] Mistral models: ${models.length}`);
break;
}
case 'pollinations': {
const r = await fetch('https://text.pollinations.ai/models', { signal: AbortSignal.timeout(5000) });
if (!r.ok) return;
const data = await r.json();
_discoveredModels.pollinations = Array.isArray(data) ? data.map(m => m.name ?? m) : [];
console.log(`[AI] Pollinations models: ${_discoveredModels.pollinations.length}`);
break;
}
}
} catch {}
}
// Seleccionar el mejor modelo disponible de un proveedor
function getBestModel(providerName, fallback, preference = 'large') {
const models = _discoveredModels[providerName];
if (!models?.length) return fallback;
// Filtrar modelos que NO son de chat/texto
const EXCLUDE = /whisper|tts|speech|audio|embed|vision|image|dall|stable|rerank|guard|code-gecko|text-bison/i;
const textModels = models.filter(m => !EXCLUDE.test(m));
if (!textModels.length) return fallback;
if (preference === 'fast') {
return textModels.find(m => /8b|7b|fast|instant|flash|mini/i.test(m)) ?? fallback;
}
if (preference === 'large') {
return textModels.find(m => /70b|72b|large|versatile|plus|pro/i.test(m)) ?? fallback;
}
return fallback;
}
// Descubrir en background al arrancar
export function startModelDiscovery() {
const providers = ['groq', 'mistral', 'pollinations'];
providers.forEach(p => discoverModels(p).catch(() => {}));
// Descubrir modelos de HuggingFace
discoverHFModels().catch(() => {});
// Refrescar cada 6h
setInterval(() => {
providers.forEach(p => discoverModels(p).catch(() => {}));
discoverHFModels().catch(() => {});
}, 6 * 60 * 60 * 1000);
}
// ── Tiers y cascadas (GLM 5.1 primero, API-first, local fallback) ────────────
const TIERS = {
fast : ['glmFlash', 'groqFast', 'cerebras', 'pollinations', 'hfFast'],
smart : ['glm51', 'pollinations', 'groq', 'hfSmart', 'mistral', 'gemma4', 'cloudflare'],
fallback: ['glmAir', 'openrouter', 'groqKimi', 'openrouterR1', 'mistralCode', 'hfFallback'],
};
const CASCADES = {
// v11: HIVE β€” RigoChat-7B cluster como PRIMARIO (usa TODOS los workers, no Promise.any)
// Hive: consenso + speculative decoding + parallel batch = mΓ‘xima potencia
chat : ['hive', 'groqQwen3', 'cerebras', 'groq', 'glm51', 'groqFast', 'pollinations', 'hfSmart', 'mistral', 'gemma4', 'cloudflare', 'openrouter', 'local'],
spanish : ['hive', 'groqQwen3', 'cerebras', 'groq', 'glm51', 'hfSpanish', 'pollinations', 'mistral', 'gemma4', 'cloudflare', 'openrouter', 'groqKimi', 'local'],
fast : ['hive', 'groqFast', 'cerebras', 'glmFlash', 'pollinations', 'hfFast', 'groq', 'mistral', 'cloudflare', 'openrouter', 'local'],
reasoning : ['hive', 'groqQwen3', 'cerebras', 'groqKimi', 'hfReasoning', 'openrouterR1', 'glm51', 'groq', 'mistral', 'pollinations', 'gemma4', 'cloudflare', 'local'],
code : ['groqQwen3', 'glm51', 'mistralCode', 'hfCode', 'groq', 'pollinations', 'mistral', 'openrouter'],
volume : ['hive', 'groqFast', 'glmFlash', 'pollinations', 'cerebras', 'hfFast', 'gemma4', 'cloudflare', 'groqQwen3', 'local'],
background: ['hive', 'glmFlash', 'mistral', 'groqFast', 'groq', 'hfFast', 'pollinations', 'cloudflare', 'gemma4', 'openrouter', 'local'],
};
const GLM_PROVIDERS = ['glm51', 'glmAir', 'glmFlash'];
const ALL_PROVIDERS = [...new Set([...TIERS.fast, ...TIERS.smart, ...TIERS.fallback, 'groqQwen3', 'cerebras', 'local', 'hive'])];
const HF_PROVIDERS = ['hfFast', 'hfSmart', 'hfSpanish', 'hfCode', 'hfReasoning', 'hfFallback'];
// ── LΓ­mites diarios ───────────────────────────────────────────────────────────
const DAILY_LIMITS = {
// Local: sin lΓ­mites β€” modelo propio
local : 99999,
// Qwen3-32B (Groq) β€” TORNEO: #1 modelo primario
groqQwen3 : 1000, // Qwen3-32B via Groq β€” 60 RPM, 1000 RPD
// Cerebras β€” TORNEO: #2 ultra-fast reasoning
cerebras : 14400, // 30 RPM, 1M TPD
// GLM: gratis/ilimitado
glm51 : 99999,
glmAir : 99999,
glmFlash : 99999,
// Otros proveedores
pollinations : 99999,
groq : 1000,
groqFast : 14400,
mistral : 99999,
mistralCode : 99999,
gemma4 : 86400,
openrouter : 1000,
cloudflare : 10000,
groqKimi : 1000,
openrouterR1 : 1000,
hfFast : 5000,
hfSmart : 2000,
hfSpanish : 3000,
hfCode : 2000,
hfReasoning : 1000,
hfFallback : 500,
hive : 99999, // HIVE β€” sin lΓ­mites, es nuestro cluster propio
};
// ── Circuit Breaker ───────────────────────────────────────────────────────────
class CircuitBreaker {
constructor(name) {
this.name = name;
this.state = 'CLOSED';
this.failTimes = [];
this.openCount = 0;
this.lastFail = 0;
this.threshold = 6;
this.cooldown = 30000;
this.maxCooldown = 600000;
}
canRequest() {
if (this.state === 'CLOSED') return true;
const cd = Math.min(this.cooldown * Math.pow(2, this.openCount - 1), this.maxCooldown);
if (Date.now() - this.lastFail > cd) { this.state = 'HALF_OPEN'; return true; }
return false;
}
recordSuccess() {
this.failTimes = []; this.openCount = 0; this.state = 'CLOSED';
}
recordFailure(code, isTimeout = false) {
this.lastFail = Date.now();
if (!isTimeout) this.failTimes.push(Date.now());
const weight = isTimeout ? 0.3 : 1.0;
if (code === 401 || code === 403) {
this.state = 'OPEN'; this.openCount = 99; this.lastFail = Date.now() + 86_400_000;
return;
}
const recent = this.failTimes.filter(t => Date.now() - t < 60000);
if (recent.length * weight >= this.threshold || this.state === 'HALF_OPEN') {
this.state = 'OPEN'; this.openCount++;
}
}
}
const breakers = {};
for (const n of [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]) breakers[n] = new CircuitBreaker(n);
// HIVE breaker β€” more forgiving (workers can be slow)
breakers['hive'] = new CircuitBreaker('hive');
breakers['hive'].threshold = 10; // Needs more failures before opening
// ── EstadΓ­sticas por proveedor ─────────────────────────────────────────────────
const _stats = {};
function getCounter(name) {
if (!_stats[name]) _stats[name] = { req: 0, err: 0, totalMs: 0, calls: 0 };
return _stats[name];
}
function recordReq(name) { getCounter(name).req++; }
function recordError(name){ getCounter(name).err++; }
function recordLatency(name, ms) { const s = getCounter(name); s.totalMs += ms; s.calls++; }
// ── Daily counter reset ────────────────────────────────────────────────────────
let _lastResetDay = new Date().getUTCDate();
function checkDailyReset() {
const today = new Date().getUTCDate();
if (today !== _lastResetDay) {
_lastResetDay = today;
for (const name of Object.keys(_stats)) {
_stats[name].req = 0;
_stats[name].err = 0;
}
console.log('[AI] πŸ”„ Daily counters reset');
}
}
// ── Health Score ──────────────────────────────────────────────────────────────
function healthScore(name) {
const s = getCounter(name);
const succ = Math.max(1, s.req - s.err);
const rate = succ / Math.max(1, s.req);
const avg = s.calls > 0 ? s.totalMs / s.calls : 2000;
const quota= DAILY_LIMITS[name] ? 1 - (s.req / DAILY_LIMITS[name]) : 1;
return rate * 0.5 + (1 / (avg / 1000)) * 0.3 + quota * 0.2;
}
// ── Quota ─────────────────────────────────────────────────────────────────────
function isQuotaOk(name) {
const lim = DAILY_LIMITS[name];
return !lim || getCounter(name).req < lim * 0.95;
}
function predictExhaustion(name) {
const c = getCounter(name), lim = DAILY_LIMITS[name]; if (!lim) return false;
const hour = new Date().getHours();
const proj = c.req + (c.req / Math.max(hour, 1)) * (24 - hour);
return proj > lim * 0.85;
}
// ── Adaptive tokens ───────────────────────────────────────────────────────────
function adaptiveTokens(msg, task, req) {
if (req) return req;
const len = (msg ?? '').length;
if (task === 'fast' || len < 50) return 150;
if (task === 'code') return 1024;
if (task === 'reasoning') return 600;
if (len < 100) return 200;
if (len < 300) return 400;
return 512;
}
// ── SingleFlight ──────────────────────────────────────────────────────────────
const _inflight = new Map();
function singleFlight(key, fn) {
if (_inflight.has(key)) return _inflight.get(key);
const p = fn().finally(() => _inflight.delete(key));
_inflight.set(key, p);
return p;
}
// ── TTL Cache ─────────────────────────────────────────────────────────────────
class TTLCache {
constructor() { this.store = new Map(); }
_ttl(msg = '') {
const m = msg.toLowerCase();
if (/regla|norma|plugin|info|servidor/.test(m)) return 86_400_000;
if (/online|jugador|tps|lag/.test(m)) return 30_000;
return 3_600_000;
}
key(msgs, task) {
const text = msgs.map(m => m.role + ':' + (m.content ?? '').slice(0, 100)).join('|') + task;
let h = 5381;
for (let i = 0; i < text.length; i++) h = ((h << 5) + h + text.charCodeAt(i)) | 0;
return `${task}:${h >>> 0}`;
}
get(k) {
const e = this.store.get(k);
if (!e) return null;
if (Date.now() > e.exp) { this.store.delete(k); return null; }
return e.value;
}
set(k, v, msg = '') {
this.store.set(k, { value: v, exp: Date.now() + this._ttl(msg) });
if (this.store.size > 1000) this.store.delete(this.store.keys().next().value);
}
clear() { this.store.clear(); console.log('[AI] Cache limpiada'); }
}
export const _cache = new TTLCache();
// ── Pools de concurrencia ─────────────────────────────────────────────────────
const _userQueue = { active: 0, max: 4 };
const _backgroundQueue = { active: 0, max: 2 };
async function withQueue(queue, fn) {
while (queue.active >= queue.max) await new Promise(r => setTimeout(r, 50));
queue.active++;
try { return await fn(); } finally { queue.active--; }
}
// ── Intent classifier (0 tokens) β€” SIN REGEX, keyword-based ──────────────────
// Replaces regex-based INTENT_PATTERNS with a cleaner keyword matching system
const INTENT_RULES = [
{ keywords: ['hola', 'hey', 'hi', 'buenas', 'quΓ© tal', 'ola', 'saludos', 'wenas'], matchMode: 'startsWith', type: 'fast', intent: 'greeting' },
{ keywords: ['regla', 'norma', 'prohibi', 'permit'], matchMode: 'contains', type: 'volume', intent: 'rules' },
{ keywords: ['```', '.yml', '.json', '.java', 'config'], matchMode: 'contains', type: 'code', intent: 'code' },
{ keywords: ['ban', 'sancion', 'report', 'trampa', 'hack', 'cheat'], matchMode: 'contains', type: 'reasoning', intent: 'moderation' },
{ keywords: ['analiza', 'explica', 'compara', 'argumenta'], matchMode: 'contains', type: 'reasoning', intent: 'complex' },
{ keywords: ['cΓ³mo', 'quΓ©', 'cuΓ‘l', 'dΓ³nde', 'cuΓ‘ndo', 'por quΓ©'], matchMode: 'contains', type: 'spanish', intent: 'question' },
];
export function classifyIntent(message) {
const m = (message ?? '').trim().toLowerCase();
// Short message check (was regex /^.{1,20}$/)
if (m.length > 0 && m.length <= 20) {
return { intent: 'short', type: 'fast' };
}
for (const rule of INTENT_RULES) {
for (const kw of rule.keywords) {
if (rule.matchMode === 'startsWith' && m.startsWith(kw)) {
return { intent: rule.intent, type: rule.type };
}
if (rule.matchMode === 'contains' && m.includes(kw)) {
return { intent: rule.intent, type: rule.type };
}
}
}
return { intent: 'general', type: 'spanish' };
}
// ── Gemini Rotator ────────────────────────────────────────────────────────────
const geminiRotator = (() => {
const keys = ai.gemini?.keys ?? [ai.gemini?.apiKey].filter(Boolean);
let i = 0;
const failed = new Set();
const last = new Map();
return {
get() {
const available = keys.filter(k => !failed.has(k) && Date.now() - (last.get(k) ?? 0) > 65000);
if (!available.length) { failed.clear(); return keys[i++ % keys.length]; }
const k = available[i++ % available.length];
last.set(k, Date.now());
return k;
},
fail(k) { failed.add(k); },
};
})();
// ── FunciΓ³n base OpenAI-compatible ───────────────────────────────────────────
// v9: Handles GLM-5.1 reasoning_content (separate from content)
async function oai(url, key, model, messages, maxTokens, systemOverride) {
const msgs = systemOverride
? [{ role: 'system', content: systemOverride }, ...messages.filter(m => m.role !== 'system')]
: messages;
const res = await fetch(url, {
method : 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` },
body : JSON.stringify({ model, messages: msgs, max_tokens: maxTokens, stream: false }),
signal : AbortSignal.timeout(15000),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
throw Object.assign(new Error(`${model} ${res.status}: ${body.slice(0, 100)}`), { status: res.status, code: res.status });
}
const data = await res.json();
const msg = data.choices?.[0]?.message;
if (!msg) return '';
// v11 CRITICAL FIX: ALWAYS clean thinking artifacts from content
// Models (GLM-5.1, Qwen3, etc.) can leak thinking into content field
// regardless of whether reasoning_content is present.
// We MUST always apply cleanThinkingArtifacts() to every response.
let content = msg.content?.trim() ?? '';
// If reasoning_content exists, log it for debugging
if (msg.reasoning_content) {
console.log(`[AI] ${model}: reasoning_content present (${msg.reasoning_content.length} chars thinking, ${content.length} chars content)`);
}
// ALWAYS clean thinking artifacts β€” models can leak thinking in many ways:
// 1. Plain-text reasoning before the actual response
// 2. Numbered lists of analytical steps
// 3. English analysis followed by Spanish response
// 4. <think> tags that weren't properly separated
content = cleanThinkingArtifacts(content);
return content;
}
function isTO(e) { return e?.name === 'TimeoutError' || e?.name === 'AbortError' || /timeout/i.test(e?.message ?? ''); }
// ── callDirect: implementaciΓ³n de cada proveedor ─────────────────────────────
async function callDirect(name, messages, maxTokens) {
const PERM = [401, 402, 403, 404, 422];
switch (name) {
// ── GLM (ZhipuAI) β€” proveedor primario, gratis/ilimitado ──────────────
case 'glm51': {
if (!glm.apiKey) throw new Error('glm51: sin key GLM');
return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
glm.apiKey, 'glm-5.1', messages, maxTokens);
}
case 'glmAir': {
if (!glm.apiKey) throw new Error('glmAir: sin key GLM');
return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
glm.apiKey, 'glm-4-air', messages, maxTokens);
}
case 'glmFlash': {
if (!glm.apiKey) throw new Error('glmFlash: sin key GLM');
return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
glm.apiKey, 'glm-4-flash', messages, maxTokens);
}
case 'pollinations': {
// Pollinations β€” free, unlimited, updated API endpoints
const last = messages.filter(m => m.role === 'user').pop()?.content ?? '';
const sys = messages.find(m => m.role === 'system')?.content ?? '';
// FIX: Try models in PARALLEL with Promise.any instead of sequentially
const POLL_MODELS = ['openai', 'mistral', 'llama', 'qwen'];
try {
const result = await Promise.any(
POLL_MODELS.map(model =>
fetch('https://text.pollinations.ai/openai/chat/completions', {
method : 'POST',
headers: { 'Content-Type': 'application/json' },
body : JSON.stringify({
model,
messages,
max_tokens: maxTokens,
stream : false,
seed : Math.floor(Math.random() * 9999),
}),
signal: AbortSignal.timeout(12000),
}).then(async res => {
if (!res.ok) throw new Error(`${model} ${res.status}`);
const data = await res.json();
const text = data.choices?.[0]?.message?.content?.trim();
if (!text || text.length <= 2) throw new Error(`${model} empty`);
return text;
})
)
);
if (result) return result;
} catch { /* all parallel attempts failed */ }
// Fallback: GET endpoint (anonymous, always works)
try {
const shortMsg = last.slice(0, 800);
const shortSys = sys.slice(0, 400);
const url = 'https://text.pollinations.ai/' +
encodeURIComponent(shortMsg) +
'?seed=' + Math.floor(Math.random() * 9999) +
(shortSys ? '&system=' + encodeURIComponent(shortSys) : '');
const r2 = await fetch(url, {
headers: { 'User-Agent': 'Mozilla/5.0' },
signal : AbortSignal.timeout(12000),
});
if (r2.ok) {
const t = await r2.text();
if (t?.trim() && t.trim().length > 2) return t.trim();
}
} catch {}
throw new Error('pollinations all endpoints failed');
}
case 'cerebras':
if (!ai.cerebras?.apiKey) throw new Error('cerebras: sin key');
// TORNEO #2: Qwen3-235B-A22B β€” ultra-fast reasoning, excellent Spanish
// Cerebras runs at ~2600 tokens/sec, fastest inference available
return oai(ai.cerebras.baseUrl, ai.cerebras.apiKey,
'qwen3-235b-a22b', messages, maxTokens);
// TORNEO #1: Qwen3-32B β€” best Spanish + speed + thinking
case 'groqQwen3':
if (!ai.groq?.apiKey) throw new Error('groqQwen3: sin key');
return oai(ai.groq.baseUrl, ai.groq.apiKey,
'qwen/qwen3-32b', messages, maxTokens);
case 'groq':
if (!ai.groq?.apiKey) throw new Error('groq: sin key');
// TORNEO #5: Llama-4-Scout β€” fast multilingual, official Spanish
return oai(ai.groq.baseUrl, ai.groq.apiKey,
getBestModel('groq', 'llama-4-scout-17b-16e-instruct', 'large'), messages, maxTokens);
case 'groqFast':
if (!ai.groq?.apiKey) throw new Error('groqFast: sin key');
return oai(ai.groq.baseUrl, ai.groq.apiKey,
getBestModel('groq', 'llama-3.1-8b-instant', 'fast'), messages, maxTokens);
case 'groqKimi':
if (!ai.groq?.apiKey) throw new Error('groqKimi: sin key');
return oai(ai.groq.baseUrl, ai.groq.apiKey, 'moonshotai/kimi-k2-instruct', messages, maxTokens);
case 'mistral':
if (!ai.mistral?.apiKey) throw new Error('mistral: sin key');
return oai(ai.mistral.baseUrl, ai.mistral.apiKey,
getBestModel('mistral', 'mistral-large-latest', 'large'), messages, maxTokens);
case 'mistralCode':
if (!ai.mistral?.apiKey) throw new Error('mistralCode: sin key');
return oai(ai.mistral.baseUrl, ai.mistral.apiKey, 'codestral-latest', messages, maxTokens);
case 'gemma4': {
const entry = geminiRotator.get();
if (!entry) throw new Error('gemma4: sin key Gemini');
// Extraer system prompt β€” Gemini lo necesita como systemInstruction separado
const sysMsgs = messages.filter(m => m.role === 'system');
const sysText = sysMsgs.map(m => m.content ?? '').join('\n\n');
const chatMsgs = messages.filter(m => m.role !== 'system');
// Convertir a formato Gemini (assistant β†’ model)
const contents = chatMsgs.map(m => ({
role : m.role === 'assistant' ? 'model' : 'user',
parts: [{ text: m.content ?? '' }],
}));
// Si no hay mensajes de chat, aΓ±adir uno vacΓ­o para que no falle
if (!contents.length) contents.push({ role: 'user', parts: [{ text: '.' }] });
const body = {
contents,
generationConfig: {
maxOutputTokens: maxTokens,
temperature: 0.8,
topP: 0.95,
},
};
// Inyectar system prompt como systemInstruction (soportado por Gemini API)
if (sysText) {
body.systemInstruction = { parts: [{ text: sysText }] };
}
// Gemma 4 31B IT β€” mejor modelo Gemma para chat en espaΓ±ol, via Gemini API
const res = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemma-4-31b-it:generateContent?key=${entry}`,
{
method : 'POST',
headers: { 'Content-Type': 'application/json' },
body : JSON.stringify(body),
signal : AbortSignal.timeout(15000),
}
);
if (!res.ok) { geminiRotator.fail(entry); throw Object.assign(new Error(`gemma4 ${res.status}`), { status: res.status, code: res.status }); }
const data = await res.json();
return data.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ?? '';
}
case 'openrouter': {
if (!ai.openrouter?.apiKey) throw new Error('openrouter: sin key');
const model = 'meta-llama/llama-3.3-70b-instruct:free';
const res = await fetch(ai.openrouter.baseUrl, {
method : 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.openrouter.apiKey}`, 'HTTP-Referer': 'https://tomatesmp.pw', 'X-Title': 'TomateSMP' },
body : JSON.stringify({ model, messages, max_tokens: maxTokens }),
signal : AbortSignal.timeout(20000),
});
if (!res.ok) throw Object.assign(new Error(`openrouter ${res.status}`), { status: res.status, code: res.status });
const data = await res.json();
if (data.error) throw new Error(`openrouter: ${data.error.message ?? data.error}`);
return data.choices?.[0]?.message?.content?.trim() ?? '';
}
case 'openrouterR1': {
if (!ai.openrouter?.apiKey) throw new Error('openrouterR1: sin key');
const res = await fetch(ai.openrouter.baseUrl, {
method : 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.openrouter.apiKey}`, 'HTTP-Referer': 'https://tomatesmp.pw' },
body : JSON.stringify({ model: 'deepseek/deepseek-r1:free', messages, max_tokens: maxTokens }),
signal : AbortSignal.timeout(30000),
});
if (!res.ok) throw Object.assign(new Error(`openrouterR1 ${res.status}`), { status: res.status, code: res.status });
const data = await res.json();
return data.choices?.[0]?.message?.content?.trim() ?? '';
}
case 'cloudflare': {
if (!ai.cloudflare?.accountId || !ai.cloudflare?.apiToken) throw new Error('cloudflare: sin config');
const res = await fetch(
`https://api.cloudflare.com/client/v4/accounts/${ai.cloudflare.accountId}/ai/run/@cf/meta/llama-3.3-70b-instruct-fp8-fast`,
{
method : 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.cloudflare.apiToken}` },
body : JSON.stringify({ messages, max_tokens: maxTokens }),
signal : AbortSignal.timeout(20000),
}
);
if (!res.ok) throw Object.assign(new Error(`cloudflare ${res.status}`), { status: res.status, code: res.status });
const data = await res.json();
// Cloudflare AI devuelve distintos formatos segΓΊn el modelo
const r = data.result;
if (typeof r?.response === 'string') return r.response.trim();
if (Array.isArray(r) && r[0]?.response) return String(r[0].response).trim();
if (r?.choices?.[0]?.message?.content) return r.choices[0].message.content.trim();
if (typeof r === 'string') return r.trim();
return '';
}
// ── HuggingFace Inference API providers ───────────────────────────────
case 'hfFast':
if (!isHFAvailable()) throw new Error('hfFast: HF unavailable');
return callHuggingFace(messages, 'fast', maxTokens);
case 'hfSmart':
if (!isHFAvailable()) throw new Error('hfSmart: HF unavailable');
return callHuggingFaceCascade(messages, 'smart', maxTokens);
case 'hfSpanish':
if (!isHFAvailable()) throw new Error('hfSpanish: HF unavailable');
return callHuggingFaceCascade(messages, 'spanish', maxTokens);
case 'hfCode':
if (!isHFAvailable()) throw new Error('hfCode: HF unavailable');
return callHuggingFace(messages, 'code', maxTokens);
case 'hfReasoning':
if (!isHFAvailable()) throw new Error('hfReasoning: HF unavailable');
return callHuggingFaceCascade(messages, 'reasoning', maxTokens);
case 'hfFallback':
if (!isHFAvailable()) throw new Error('hfFallback: HF unavailable');
return callHuggingFace(messages, 'fast', maxTokens);
// ── HIVE β€” RigoChat-7B Cluster (ALL workers combined) ────────────────
case 'hive': {
try {
const userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? '';
const result = await hiveGenerate(messages, maxTokens, userMsg, 'hybrid');
if (result?.text && result.text.trim().length > 2) {
return result.text;
}
throw new Error('hive: empty response');
} catch (err) {
throw Object.assign(new Error(`hive: ${err.message}`), { status: 503, code: 503 });
}
}
// ── Local model β€” primary local inference ────────────────────────────
case 'local': {
if (!isLocalAIReady()) throw new Error('local: model not ready');
const { localChatPrimary } = await import('./local-ai.js');
return localChatPrimary(messages, maxTokens, 0.8);
}
default:
throw new Error(`Proveedor desconocido: ${name}`);
}
}
// ── callProvider: CB + backoff + stats ────────────────────────────────────────
const PERM_ERRORS = [401, 402, 403, 404, 422];
async function callProvider(name, messages, maxTokens, failedFlag) {
checkDailyReset();
const cb = breakers[name];
if (!cb?.canRequest()) throw new Error(`${name}: circuit OPEN`);
const start = Date.now();
recordReq(name);
try {
let result;
for (let attempt = 0; attempt < 3; attempt++) {
try {
result = await callDirect(name, messages, maxTokens);
break;
} catch (e) {
// 429: esperar y reintentar
if (e.status === 429 && attempt < 2) {
await new Promise(r => setTimeout(r, 1500 + Math.random() * 3000));
continue;
}
// 400 con mensaje de longitud: comprimir contexto y reintentar UNA vez
if (e.status === 400 && attempt === 0 &&
/length|too long|reduce|token/i.test(e.message ?? '')) {
console.warn('[AI] ' + name + ': mensaje muy largo, comprimiendo...');
// Quedarse solo con system + ΓΊltimos 2 mensajes
const sys = messages.filter(m => m.role === 'system');
const rest = messages.filter(m => m.role !== 'system').slice(-2);
// Recortar system prompt a 1500 chars
const shortSys = sys.map(m => ({ ...m, content: m.content.slice(0, 1500) }));
messages = [...shortSys, ...rest];
continue;
}
throw e;
}
}
cb.recordSuccess();
recordLatency(name, Date.now() - start);
return result;
} catch (err) {
const code = err.status ?? err.code ?? null;
if (PERM_ERRORS.includes(code)) {
cb.state = 'OPEN'; cb.openCount = 99; cb.lastFail = Date.now() + 86_400_000;
console.warn(`[AI] β›” ${name} deshabilitado 24h (${code})`);
} else if (!failedFlag?.v) {
failedFlag && (failedFlag.v = true);
cb.recordFailure(code, isTO(err));
}
recordError(name);
if (!PERM_ERRORS.includes(code)) {
console.warn(`[AI] ❌ ${name}: ${err.message?.slice(0, 80)}`);
}
throw err;
}
}
// ── ValidaciΓ³n de respuestas β€” Sistema avanzado SIN regex ────────────────────
// v10: Reemplaza BAD_PATTERNS (regex) con validaciΓ³n semΓ‘ntica determinista
// que es mΓ‘s precisa, sin falsos positivos, y mΓ‘s mantenible.
// Frases de IA que NUNCA deben aparecer al inicio de una respuesta vΓ‘lida
const AI_DISCLAIMER_PREFIXES = [
'as an ai',
'as a language model',
'como una ia',
'como un modelo',
"i'm an ai",
'i am an ai',
"i'm a language model",
'i am a language model',
'como inteligencia artificial',
'como modelo de lenguaje',
'as an assistant',
'como asistente',
];
// Respuestas basura que indican fallo del modelo
const GARBAGE_RESPONSES = new Set([
'error', 'null', 'undefined', 'true', 'false', '{}', '[]',
'nan', 'none', 'nil', 'void',
]);
function isValidResponse(text) {
if (!text || typeof text !== 'string') return false;
const t = text.trim();
// Empty or whitespace-only
if (t.length < 2) return false;
// Only punctuation/whitespace
const withoutPunct = t.replace(/[\s.,!?_\-:;'"()]/g, '');
if (withoutPunct.length === 0) return false;
// Garbage single values
if (GARBAGE_RESPONSES.has(t.toLowerCase())) return false;
// Too long = probably prompt recitation
if (t.length > 2000) return false;
// AI disclaimer prefix check (case-insensitive, no regex)
const tLower = t.toLowerCase();
for (const prefix of AI_DISCLAIMER_PREFIXES) {
if (tLower.startsWith(prefix)) return false;
}
// Check for repetitive patterns (same word repeated 10+ times)
const words = tLower.split(/\s+/);
if (words.length > 10) {
const wordCounts = {};
for (const w of words) {
wordCounts[w] = (wordCounts[w] || 0) + 1;
if (wordCounts[w] > 10 && w.length > 2) return false; // Repetition loop
}
}
return true;
}
// ── AI-based response quality check (uses local model for fast validation) ──
// This replaces regex-based BAD_PATTERNS with semantic understanding
async function aiValidateResponse(text, userMsg) {
if (!text || text.length < 10) return { valid: true }; // Too short to bother
// Fast path: check obvious issues without AI
const tLower = text.toLowerCase().trim();
// AI disclosure in the middle of response (not just prefix)
const aiPatterns = [
'como ia,',
'como inteligencia artificial,',
'as an ai,',
'as a language model,',
'i cannot fulfill',
'no puedo cumplir esa solicitud',
'no puedo procesar esa solicitud',
];
for (const p of aiPatterns) {
if (tLower.includes(p)) {
return { valid: false, reason: 'ai_disclaimer', replacement: 'eso no va' };
}
}
return { valid: true };
}
// ── Router principal ──────────────────────────────────────────────────────────
function ensureAvailability() {
const allProviders = [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS];
const open = allProviders.filter(n => breakers[n]?.state === 'OPEN');
if (open.length === allProviders.length) {
console.warn('[AI] Todos los proveedores en OPEN β€” reseteando');
open.forEach(n => { breakers[n].state = 'CLOSED'; breakers[n].openCount = 0; });
}
}
async function _route(messages, taskType, maxTokens, userMsg) {
ensureAvailability();
const cascade = [...(CASCADES[taskType] ?? CASCADES.spanish)];
cascade.sort((a, b) => {
const pa = predictExhaustion(a) ? -0.3 : 0;
const pb = predictExhaustion(b) ? -0.3 : 0;
return (healthScore(b) + pb) - (healthScore(a) + pa);
});
const available = cascade.filter(n => breakers[n]?.canRequest() && isQuotaOk(n));
if (!available.length) {
// Fallback al modelo local
if (isLocalAIReady()) {
const r = await emergencyFallback(messages, userMsg).catch(() => null);
if (r) return r;
}
throw new Error('[AI] Sin proveedores disponibles');
}
const failedFlag = { v: false };
// v12 FIX: Usar SOLO el primer proveedor disponible (no hedged requests)
// Los hedged requests (Promise.any con top3) causaban respuestas triplicadas
// cuando mΓΊltiples proveedores respondΓ­an y el merge de hive los concatenaba.
// Ahora: intentamos el mejor proveedor, si falla pasamos al siguiente.
for (const name of available) {
try {
const r = await callProvider(name, messages, maxTokens, failedFlag);
if (isValidResponse(r)) return r;
} catch { continue; }
}
// Último recurso: modelo local
if (isLocalAIReady()) {
console.log('[AI] 🏠 Usando modelo local como fallback (todos los proveedores fallaron)');
const r = await emergencyFallback(messages, userMsg).catch(e => {
console.warn('[AI] Modelo local tambiΓ©n fallΓ³:', e.message);
return null;
});
if (r) return r;
} else {
console.warn('[AI] Modelo local no disponible para fallback (isLocalAIReady=false)');
}
throw new Error('[AI] Todos los proveedores fallaron');
}
async function _routeOwner(messages, taskType, maxTokens) {
ensureAvailability();
const cascade = [...(CASCADES[taskType] ?? CASCADES.spanish)];
cascade.sort((a, b) => (healthScore(b)) - (healthScore(a)));
const available = cascade.filter(n => breakers[n]?.canRequest() && isQuotaOk(n));
if (!available.length && isLocalAIReady()) {
return emergencyFallback(messages, '') ?? Promise.reject(new Error('Sin proveedores'));
}
for (const name of available) {
try {
const r = await callProvider(name, messages, maxTokens, { v: false });
if (r && r.trim().length > 0) return r;
} catch { continue; }
}
if (isLocalAIReady()) return emergencyFallback(messages, '');
throw new Error('[AI] Sin respuesta');
}
// ── callAI ────────────────────────────────────────────────────────────────────
export async function callAI(messages, taskType = 'spanish', maxTokens = null, userMsg = '', ownerMode = false) {
if (!userMsg) userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? '';
const tokens = adaptiveTokens(userMsg, taskType, maxTokens);
// Inyectar thinking mode para mejor coherencia (provider name se detecta en el router)
const thinkMsgs = injectThinking(messages);
const msgs = await compressContext(thinkMsgs);
const cacheKey = _cache.key(msgs, taskType); // SIEMPRE definido
if (ownerMode) {
return withQueue(_userQueue, async () => {
const result = await _routeOwner(msgs, taskType, tokens);
return cleanThinkingArtifacts(result);
});
}
// Cache exacto
const cached = _cache.get(cacheKey);
if (cached) { console.log('[AI] πŸ’Ύ cache hit'); return cached; }
// Semantic cache
if (feats.semanticCache !== false) {
try {
const semHit = await semanticCache.get(userMsg);
if (semHit) { console.log('[AI] πŸ’Ύ semantic cache hit'); return semHit; }
} catch {}
}
const doCall = () => withQueue(_userQueue, async () => {
const result = await _route(msgs, taskType, tokens, userMsg);
const cleaned = cleanThinkingArtifacts(result);
_cache.set(cacheKey, cleaned, userMsg);
semanticCache.set(userMsg, cleaned, semanticCache.getTTL(userMsg)).catch(() => {});
return cleaned;
});
return singleFlight(cacheKey, doCall);
}
// ── callAIBackground ─────────────────────────────────────────────────────────
export async function callAIBackground(messages, taskType = 'spanish', maxTokens = null, userMsg = '') {
if (!userMsg) userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? '';
const tokens = adaptiveTokens(userMsg, taskType, maxTokens);
const thinkMsgs = injectThinking(messages);
const msgs = await compressContext(thinkMsgs);
return withQueue(_backgroundQueue, async () => {
const result = await _route(msgs, 'background', tokens, userMsg);
return cleanThinkingArtifacts(result);
});
}
// ── Mixture of Agents ─────────────────────────────────────────────────────────
export async function callAIMoA(messages, maxTokens = 600) {
if (!feats.moa) return callAI(messages, 'spanish', maxTokens);
const [r1, r2, r3] = await Promise.allSettled([
callProvider('glm51', messages, 300, { v: false }),
callProvider('groqFast', messages, 300, { v: false }),
callProvider('pollinations', messages, 300, { v: false }),
]);
const proposals = [r1, r2, r3]
.filter(r => r.status === 'fulfilled' && r.value?.length > 10)
.map((r, i) => `Respuesta ${i + 1}: ${r.value}`)
.join('\n\n');
if (!proposals) return callAI(messages, 'spanish', maxTokens);
return callProvider('glm51', [
{ role: 'system', content: 'Sintetiza la mejor respuesta de las siguientes opciones. Sin texto extra.' },
{ role: 'user', content: `${proposals}\n\nSΓ­ntesis:` },
], maxTokens, { v: false });
}
// ── Self-healing ──────────────────────────────────────────────────────────────
export function startSelfHealing() {
if (!feats.selfHealing) return;
setInterval(() => {
const allProviders = [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS];
const totalReq = allProviders.reduce((s, n) => s + getCounter(n).req, 0);
if (totalReq === 0) return;
const avgQuota = allProviders.reduce((s, n) => {
const lim = DAILY_LIMITS[n]; return s + (lim ? getCounter(n).req / lim : 0);
}, 0) / allProviders.length;
if (avgQuota > 0.85) console.warn('[SelfHeal] Cuota global alta β€” priorizar proveedores con mΓ‘s quota');
}, 5 * 60 * 1000);
console.log('[HEAL] Self-healing v7 iniciado');
}
// ── Warmup ────────────────────────────────────────────────────────────────────
export async function warmupProviders() {
if (!feats.warmup) return;
const test = [{ role: 'user', content: 'ok' }];
console.log('[WARMUP] Verificando proveedores...');
await Promise.allSettled(
['glm51', 'glmFlash', 'groqFast', 'cerebras', 'pollinations', 'mistral'].map(async name => {
const start = Date.now();
try {
await callProvider(name, test, 5, { v: false });
console.log(`[WARMUP] βœ… ${name} ${Date.now() - start}ms`);
} catch (e) {
console.log(`[WARMUP] ❌ ${name}: ${e.message?.slice(0, 40)}`);
}
})
);
}
// ── Comprimir contexto ───────────────────────────────────────────────────────
async function compressContext(messages) {
// FIX: No cortar a ciegas el system prompt β€” antes se podΓ­a cortar en medio de las reglas de seguridad
// En vez de eso, reducir el historial de chat primero (menos importante que el system prompt)
const sys = messages.filter(m => m.role === 'system');
const chat = messages.filter(m => m.role !== 'system');
// Recortar system prompt SOLO si es extremadamente largo (>4000 chars)
// Priorizar: mantener identidad + seguridad + tools intactos
const compressedSys = sys.map(m => {
if ((m.content ?? '').length > 4000) {
// Intentar cortar en una secciΓ³n completa (despuΓ©s de un \n\n##)
const content = m.content;
// Mantener primeros 3500 chars (identidad + personalidad + seguridad)
// y ΓΊltimos 1000 chars (tools + reglas finales)
return { ...m, content: content.slice(0, 3500) + '\n...(contexto comprimido)\n' + content.slice(-1000) };
}
return m;
});
// Reducir historial de chat a mΓ‘ximo 4 turnos (antes eran 6)
const recentChat = chat.slice(-4);
return [...compressedSys, ...recentChat];
}
// ── Stats ─────────────────────────────────────────────────────────────────────
export function getDailyStats() {
const out = {};
for (const n of [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]) {
const s = getCounter(n);
const lim = DAILY_LIMITS[n] ?? 99999;
out[n] = { requests: s.req, limit: lim, pct: ((s.req / lim) * 100).toFixed(1) + '%', state: breakers[n]?.state ?? '?', score: healthScore(n).toFixed(2) };
}
// Append HF-specific stats
try { out._hf = getHFStats(); } catch {}
return out;
}
export function getProviderStatus() {
return [...ALL_PROVIDERS, ...GLM_PROVIDERS].map(n => ({ name: n, state: breakers[n]?.state ?? 'UNKNOWN', score: healthScore(n).toFixed(2) }));
}
export function clearCache() { _cache.clear(); }
// ── HIVE exports ────────────────────────────────────────────────────────────
export { getHiveStatus, getHiveStats, benchmarkHive, warmUpWorkers, hiveInit as initHive };
export function runQualityGate(response, userMessage) {
if (!feats.qualityGate) return Promise.resolve({ pass: true });
if (!response || response.trim().length < 10) return Promise.resolve({ pass: false, reason: 'empty' });
return Promise.resolve({ pass: isValidResponse(response) });
}