/**
 * ai.js — Router de IA v10.0 — TORNEO DE MODELOS
 * ================================================
 * CAMBIOS v10.0 (basado en torneo de 179 modelos):
 * - Qwen3-32B (Groq) como proveedor PRIMARIO — mejor español+velocidad+thinking
 * - Qwen3-235B-A22B (Cerebras) como premium — ultra-rápido, excelente español
 * - Llama-4-Scout-17B (Groq) como secundario — oficial español
 * - DeepSeek R1 (OpenRouter) para razonamiento complejo
 * - GLM mantenido pero degradado en prioridad
 * - Modelo local (Gemma 4 E4B) solo como fallback final
 * - Thinking mode optimizado para Qwen3
 * - Cascadas reordenadas por puntaje del torneo
 */

import { readConfig }   from './utils.js';
import { semanticCache } from './semantic-cache.js';
import { emergencyFallback, isLocalAIReady } from './local-ai.js';
import { callHuggingFace, callHuggingFaceCascade, isHFAvailable, discoverHFModels, getHFStats } from './hf-provider.js';
import { hiveGenerate, initialize as hiveInit, getHiveStatus, getHiveStats, benchmarkHive, warmUpWorkers } from './hive.js';

const config = readConfig();
const ai     = config.ai ?? {};
const feats  = config.features ?? {};
const glm    = config.glm ?? {};

// ── Thinking Mode v3.0 — REMOVED injectThinking() ──────────────────────────
// v3.0 CRITICAL FIX: injectThinking() was causing the #1 bug — chain-of-thought
// leaking into Discord responses. Models would output their thinking process as
// plain text despite instructions not to. The fix: DON'T inject thinking prompts.
// Models already think internally. Adding "PIENSA PROFUNDAMENTE" causes them to
// output that thinking. Less is more — just give good system prompts.
//
// For models with native thinking (GLM-5.1 reasoning_content, Qwen3 /think):
// - We let the API handle thinking natively (separate from content)
// - We NEVER inject /think or thinking instructions manually
// - We ALWAYS strip any thinking that leaks into content via cleanThinkingArtifacts()

// injectThinking is now a no-op — returns messages unchanged
function injectThinking(messages, _providerName = '') {
  return messages;
}

// Limpiar artefactos de thinking + patrones AI — v11 ANTI-LEAK
// v11: Major overhaul — handles ALL known thinking leak patterns
// Root cause: models output their chain-of-thought as plain text
// v11 FIX: More aggressive detection — lower thresholds, more patterns
function cleanThinkingArtifacts(text) {
  if (!text || typeof text !== 'string') return text;
  let t = text;
  
  // ═══ v11 NUCLEAR: Strip ALL thinking tags first ═══
  // Handle every known tag format
  t = t.replace(/<think[\s\S]*?<\/think>/gi, '');
  t = t.replace(/<thinking[\s\S]*?<\/thinking>/gi, '');
  t = t.replace(/<reasoning[\s\S]*?<\/reasoning>/gi, '');
  t = t.replace(/<scratchpad[\s\S]*?<\/scratchpad>/gi, '');
  t = t.replace(/<internal[\s\S]*?<\/internal>/gi, '');
  t = t.replace(/<![CDATA[\s\S]*?]]>/gi, '');
  t = t.replace(/<\/think>/gi, '');
  t = t.replace(/<\/thinking>/gi, '');
  t = t.replace(/<\/reasoning>/gi, '');
  
  // ═══ v11 CRÍTICO: Ultra-aggressive thinking leak detection ═══
  // Detect multi-line thinking followed by a response
  // Pattern: First part is analytical (long, detailed), last part is natural Spanish
  if (t.includes('\n') && t.length > 150) {
    const lines = t.split('\n').filter(l => l.trim().length > 0);
    if (lines.length >= 3) {
      // Check if first lines are thinking (English or structured analysis)
      const firstLines = lines.slice(0, Math.ceil(lines.length / 2));
      const lastLines = lines.slice(Math.ceil(lines.length / 2));
      
      // v11: More thinking indicators — covers more model output patterns
      const thinkingWords = /^(?:okay|alright|let me|i should|the user|first,|based on|since|so,|well,|now,|also,|however,|but,|actually,|hmm|let's|i need|i think|i'll|going to|in order|therefore|because|this means|that means|it seems|it appears|looking at|considering|analyzing|understanding|evaluating|to respond|to answer|the message|the question|as zelin|respond as|in character|staying in|my role|my persona)/i;
      // v11: Expanded Spanish words list for better detection
      const spanishWords = /\b(wey|neta|chido|chale|órale|híjole|zelin|morra|güey|onda|chingón|bueno|claro|sí|no|nah|oye|mira|ósea|pues|nada|simón|sale|va|ok|jaja|qu[eé]|c[oó]mo|d[oó]nde|cu[aá]ndo|por qu[eé]|much[oas]?|tambi[eé]n|aqu[ií]|all[iá]|este|esta|eso|esa|s[ií]|nope|yup|sip|nop|dale|va|holi|ola|bro|crack|xd|gg|ns|ni idea|ni modo|ya ves|ya mero|híjole|a poco|qué onda|no mames|no manches|est[aá] ca[nnñ]ón|padre|madre)/i;
      
      let firstHalfThinking = 0;
      let secondHalfSpanish = 0;
      
      for (const line of firstLines) {
        if (thinkingWords.test(line.trim())) firstHalfThinking++;
        if (spanishWords.test(line)) firstHalfThinking--; // Not thinking if Spanish
      }
      for (const line of lastLines) {
        if (spanishWords.test(line)) secondHalfSpanish++;
        if (thinkingWords.test(line.trim())) secondHalfSpanish--; // Not response if thinking
      }
      
      // v11: Lower threshold — even 1 thinking indicator is suspicious
      if (firstHalfThinking >= 1 && secondHalfSpanish >= 1) {
        const response = lastLines.join('\n').trim();
        if (response.length > 5) {
          t = response;
        }
      }
    }
  }
  
  // ═══ v11 NEW: Single-paragraph English thinking followed by Spanish ═══
  // Pattern: "Okay, I should respond as a Mexican girl. wey no sé"
  // Extract ONLY the Spanish part after the last English sentence
  if (t.length > 100) {
    // Find the LAST transition from English to Spanish
    const sentences = t.split(/(?<=[.!?])\s+/);
    if (sentences.length >= 2) {
      let lastSpanishStart = -1;
      for (let i = 0; i < sentences.length; i++) {
        // A sentence is Spanish if it has Spanish-specific words or characters
        const s = sentences[i];
        if (/\b(wey|neta|chido|chale|órale|híjole|morra|güey|onda|chingón|ósea|pues|simón)\b/i.test(s) ||
            /[¿¡]/.test(s) ||
            (/\b(s[ií]|no|nah|oye|mira|bueno|claro|dale|ns)\b/i.test(s) && !/^(?:okay|alright|let me|i should|the user)/i.test(s))) {
          if (i > 0 && /^(?:okay|alright|let me|i should|the user|first|based on|since|so|well|now|also|however|actually|hmm|let's|i need|i think)/i.test(sentences[0])) {
            lastSpanishStart = i;
          }
        }
      }
      if (lastSpanishStart >= 0) {
        const response = sentences.slice(lastSpanishStart).join(' ').trim();
        if (response.length > 5) t = response;
      }
    }
  }

  // ═══ v9 CRÍTICO: Detectar y eliminar razonamiento interno en texto plano ═══
  // El modelo a veces incluye su proceso de pensamiento como texto plano
  // sin marcadores como <think/>. Detectamos estos patrones y los limpiamos.
  
  // Patrón 1: Líneas numeradas de thinking recitadas del system prompt
  // Ej: "1. ¿Cuál es la intención real del mensaje? ... 6. Responde SOLO..."
  const thinkingRecitationPattern = /^[\d][.)]\s*(?:¿Cu[aá]l|¿Hay|¿Qu[eé]|¿C[oó]mo|Auto-eval|Responde|PIENSA|Pienso|Pensamiento)/m;
  if (thinkingRecitationPattern.test(t)) {
    // La respuesta real está DESPUÉS de la última línea numerada
    const lines = t.split('\n');
    let lastNumberedLine = -1;
    for (let i = 0; i < lines.length; i++) {
      if (/^\d[.)]\s/.test(lines[i].trim())) lastNumberedLine = i;
    }
    if (lastNumberedLine >= 0 && lastNumberedLine < lines.length - 1) {
      const afterThinking = lines.slice(lastNumberedLine + 1).join('\n').trim();
      if (afterThinking.length > 2) t = afterThinking;
    }
  }
  
  // Patrón 2: Razonamiento en inglés seguido de respuesta en español
  // Ej: "Okay, the user is asking... I should respond as Zelin..."
  const englishThinkingPattern = /^(?:Okay|Alright|Let me|I should|The user|First,|Based on|Since)/i;
  if (englishThinkingPattern.test(t) && t.length > 150) {
    const lines = t.split('\n');
    let foundSpanish = false;
    const spanishLines = [];
    for (const line of lines) {
      // Si la línea tiene español (acentos, ñ, ¿, ¡, o jerga mexicana)
      if (/[¿¡ñáéíóú]/i.test(line) || /\b(wey|neta|chido|chale|órale|zelin|híjole|morra)\b/i.test(line)) {
        foundSpanish = true;
        spanishLines.push(line);
      } else if (foundSpanish) {
        spanishLines.push(line);
      }
    }
    if (spanishLines.length > 0 && spanishLines.join('\n').trim().length > 2) {
      t = spanishLines.join('\n').trim();
    }
  }
  
  // Patrón 3: "Zelin would say:" o "Mi respuesta:" seguido de la respuesta
  const promptPatterns = [
    /Zelin (?:would|should|might) (?:say|respond|reply|answer)[:\s]*\n?/i,
    /(?:Mi respuesta|My response|My answer|Response)[:\s]*\n?/i,
    /(?:Así respondería|Here's how|Here is what)[:\s]*\n?/i,
  ];
  for (const p of promptPatterns) {
    const match = t.match(p);
    if (match && match.index > 0) {
      const after = t.slice(match.index + match[0].length).trim();
      if (after.length > 2) t = after;
    }
  }

  // v8 CRÍTICO: Limpiar "user: ... zelin: ..." pattern (modelo recita ejemplos del prompt)
  if (/\buser:/i.test(t)) {
    const beforeUser = t.split(/\buser:/i)[0].trim();
    if (beforeUser.length > 2) {
      t = beforeUser;
    } else {
      t = 'ns';
    }
  }
  if (/\bzelin:\s*/i.test(t) && !/^zelin:/i.test(t)) {
    const beforeZelin = t.split(/\bzelin:\s*/i)[0].trim();
    if (beforeZelin.length > 2) t = beforeZelin;
  }
  t = t.replace(/^zelin:\s*/i, '');

  // (thinking tags already stripped at the top of this function)
  // Qwen3 thinking mode: the model sometimes outputs /think content and /no_think markers
  const noThinkIdx = t.indexOf('/no_think');
  if (noThinkIdx !== -1) {
    const afterNoThink = t.slice(noThinkIdx + 9).trim();
    if (afterNoThink.length > 2) t = afterNoThink;
  }
  t = t.replace(/^\/think\s*/i, '');
  t = t.replace(/^\/no_think\s*/i, '');
  t = t.replace(/^Pienso[\s\S]*?\nRespuesta:\s*/i, '');
  t = t.replace(/^Pensamiento:[\s\S]*?\nRespuesta:\s*/i, '');
  // v9: Más patrones de thinking en texto plano
  t = t.replace(/^An[aá]lisis:[\s\S]*?(?=Ahora|Bien|Ok|Sí|No|Nah|Wey|Oye|hmm|bueno)/i, '');
  t = t.replace(/^Razonamiento:[\s\S]*?(?=Ahora|Bien|Ok|Sí|No|Nah|Wey|Oye|hm|bueno)/i, '');
  // Quitar prefijos de "respuesta final"
  t = t.replace(/^Respuesta final:\s*/i, '');
  t = t.replace(/^Final answer:\s*/i, '');
  t = t.replace(/^Respuesta:\s*/i, '');
  // Limpiar tool call placeholders rotos
  t = t.replace(/\[(?:mc_status|mc_player|mc_wiki|hora actual|usar\s+\w+\s+para\s+dato\s+real)\]/gi, '');
  t = t.replace(/c_status\]/g, '');
  t = t.replace(/ora actual\]/g, '');
  t = t.replace(/\w+_(?:status|player|wiki|info)\]/g, '');
  // Reemplazar patrones formales de rechazo
  t = t.replace(/no puedo cumplir (esa|este|aquella) (solicitud|request|orden|instrucci[oó]n)/gi, 'eso no va');
  t = t.replace(/no puedo procesar esa solicitud/gi, 'eso no va');
  t = t.replace(/lo siento,? pero no puedo/gi, 'nah');
  t = t.replace(/disculpa,? pero no puedo/gi, 'nah');
  t = t.replace(/lamentablemente no puedo/gi, 'nah');
  // Corregir "nop" al inicio cuando no es sí/no
  t = t.replace(/^nop,\s*soy\s+/gi, 'no, soy ');
  // Correcciones de identidad
  t = t.replace(/\bzel[eé]n\b/gi, 'zelin');
  t = t.replace(/\brezin\b/gi, 'zelin');
  t = t.replace(/\bzelen\b/gi, 'zelin');
  t = t.replace(/\btomatitoo\b/gi, 'tomatito');
  // Quitar patrones de asistente al final
  t = t.replace(/[,\s]*¿?en qu[eé]\s+(te\s+)?puedo\s+ayud[aeo]r?[¿?]?\.?\s*$/gi, '');
  t = t.replace(/[,\s]*¿?(algo\s+)?m[aá]s\s+en\s+(lo\s+que\s+)?pued[ao]\s+ayud[aeo]r?[¿?]?\.?\s*$/gi, '');
  t = t.replace(/[,\s]*¿?necesitas\s+(algo\s+)?m[aá]s[¿?]?\.?\s*$/gi, '');
  // Garbled text corrections
  t = t.replace(/\besti con vos/gi, 'suerte con eso');
  t = t.replace(/\bwienes/gi, 'bien, y tú');
  // Simplificar recitación de identidad
  if (/^soy zelin,? (la morra |la )?del server minecraft? tomatesmp\.?$/i.test(t)) {
    t = 'soy zelin del server';
  }
  return t.trim();
}

// ── Modelos descubiertos dinámicamente por proveedor ─────────────────────────
// Se actualizan al arrancar y cada 6h para que no queden obsoletos
const _discoveredModels = {};

async function discoverModels(providerName) {
  try {
    switch (providerName) {
      case 'groq': {
        if (!ai.groq?.apiKey) return;
        const r = await fetch('https://api.groq.com/openai/v1/models', {
          headers: { Authorization: `Bearer ${ai.groq.apiKey}` },
          signal: AbortSignal.timeout(5000),
        });
        if (!r.ok) return;
        const data = await r.json();
        const models = (data.data ?? []).filter(m => m.active !== false).map(m => m.id);
        _discoveredModels.groq = models;
        console.log(`[AI] Groq models: ${models.length} (${models.slice(0,3).join(', ')}...)`);
        break;
      }
      case 'mistral': {
        if (!ai.mistral?.apiKey) return;
        const r = await fetch('https://api.mistral.ai/v1/models', {
          headers: { Authorization: `Bearer ${ai.mistral.apiKey}` },
          signal: AbortSignal.timeout(5000),
        });
        if (!r.ok) return;
        const data = await r.json();
        const models = (data.data ?? []).map(m => m.id);
        _discoveredModels.mistral = models;
        console.log(`[AI] Mistral models: ${models.length}`);
        break;
      }
      case 'pollinations': {
        const r = await fetch('https://text.pollinations.ai/models', { signal: AbortSignal.timeout(5000) });
        if (!r.ok) return;
        const data = await r.json();
        _discoveredModels.pollinations = Array.isArray(data) ? data.map(m => m.name ?? m) : [];
        console.log(`[AI] Pollinations models: ${_discoveredModels.pollinations.length}`);
        break;
      }
    }
  } catch {}
}

// Seleccionar el mejor modelo disponible de un proveedor
function getBestModel(providerName, fallback, preference = 'large') {
  const models = _discoveredModels[providerName];
  if (!models?.length) return fallback;
  
  // Filtrar modelos que NO son de chat/texto
  const EXCLUDE = /whisper|tts|speech|audio|embed|vision|image|dall|stable|rerank|guard|code-gecko|text-bison/i;
  const textModels = models.filter(m => !EXCLUDE.test(m));
  if (!textModels.length) return fallback;
  
  if (preference === 'fast') {
    return textModels.find(m => /8b|7b|fast|instant|flash|mini/i.test(m)) ?? fallback;
  }
  if (preference === 'large') {
    return textModels.find(m => /70b|72b|large|versatile|plus|pro/i.test(m)) ?? fallback;
  }
  return fallback;
}

// Descubrir en background al arrancar
export function startModelDiscovery() {
  const providers = ['groq', 'mistral', 'pollinations'];
  providers.forEach(p => discoverModels(p).catch(() => {}));
  // Descubrir modelos de HuggingFace
  discoverHFModels().catch(() => {});
  // Refrescar cada 6h
  setInterval(() => {
    providers.forEach(p => discoverModels(p).catch(() => {}));
    discoverHFModels().catch(() => {});
  }, 6 * 60 * 60 * 1000);
}

// ── Tiers y cascadas (GLM 5.1 primero, API-first, local fallback) ────────────
const TIERS = {
  fast    : ['glmFlash', 'groqFast', 'cerebras', 'pollinations', 'hfFast'],
  smart   : ['glm51', 'pollinations', 'groq', 'hfSmart', 'mistral', 'gemma4', 'cloudflare'],
  fallback: ['glmAir', 'openrouter', 'groqKimi', 'openrouterR1', 'mistralCode', 'hfFallback'],
};

const CASCADES = {
  // v11: HIVE — RigoChat-7B cluster como PRIMARIO (usa TODOS los workers, no Promise.any)
  // Hive: consenso + speculative decoding + parallel batch = máxima potencia
  chat      : ['hive', 'groqQwen3', 'cerebras', 'groq', 'glm51', 'groqFast', 'pollinations', 'hfSmart', 'mistral', 'gemma4', 'cloudflare', 'openrouter', 'local'],
  spanish   : ['hive', 'groqQwen3', 'cerebras', 'groq', 'glm51', 'hfSpanish', 'pollinations', 'mistral', 'gemma4', 'cloudflare', 'openrouter', 'groqKimi', 'local'],
  fast      : ['hive', 'groqFast', 'cerebras', 'glmFlash', 'pollinations', 'hfFast', 'groq', 'mistral', 'cloudflare', 'openrouter', 'local'],
  reasoning : ['hive', 'groqQwen3', 'cerebras', 'groqKimi', 'hfReasoning', 'openrouterR1', 'glm51', 'groq', 'mistral', 'pollinations', 'gemma4', 'cloudflare', 'local'],
  code      : ['groqQwen3', 'glm51', 'mistralCode', 'hfCode', 'groq', 'pollinations', 'mistral', 'openrouter'],
  volume    : ['hive', 'groqFast', 'glmFlash', 'pollinations', 'cerebras', 'hfFast', 'gemma4', 'cloudflare', 'groqQwen3', 'local'],
  background: ['hive', 'glmFlash', 'mistral', 'groqFast', 'groq', 'hfFast', 'pollinations', 'cloudflare', 'gemma4', 'openrouter', 'local'],
};

const GLM_PROVIDERS = ['glm51', 'glmAir', 'glmFlash'];
const ALL_PROVIDERS = [...new Set([...TIERS.fast, ...TIERS.smart, ...TIERS.fallback, 'groqQwen3', 'cerebras', 'local', 'hive'])];
const HF_PROVIDERS = ['hfFast', 'hfSmart', 'hfSpanish', 'hfCode', 'hfReasoning', 'hfFallback'];

// ── Límites diarios ───────────────────────────────────────────────────────────
const DAILY_LIMITS = {
  // Local: sin límites — modelo propio
  local         : 99999,
  // Qwen3-32B (Groq) — TORNEO: #1 modelo primario
  groqQwen3    : 1000,   // Qwen3-32B via Groq — 60 RPM, 1000 RPD
  // Cerebras — TORNEO: #2 ultra-fast reasoning
  cerebras     : 14400,  // 30 RPM, 1M TPD
  // GLM: gratis/ilimitado
  glm51        : 99999,
  glmAir       : 99999,
  glmFlash     : 99999,
  // Otros proveedores
  pollinations : 99999,
  groq         : 1000,
  groqFast     : 14400,
  mistral      : 99999,
  mistralCode  : 99999,
  gemma4       : 86400,
  openrouter   : 1000,
  cloudflare   : 10000,
  groqKimi     : 1000,
  openrouterR1 : 1000,
  hfFast       : 5000,
  hfSmart      : 2000,
  hfSpanish    : 3000,
  hfCode       : 2000,
  hfReasoning  : 1000,
  hfFallback   : 500,
  hive          : 99999, // HIVE — sin límites, es nuestro cluster propio
};

// ── Circuit Breaker ───────────────────────────────────────────────────────────
class CircuitBreaker {
  constructor(name) {
    this.name      = name;
    this.state     = 'CLOSED';
    this.failTimes = [];
    this.openCount = 0;
    this.lastFail  = 0;
    this.threshold = 6;
    this.cooldown  = 30000;
    this.maxCooldown = 600000;
  }
  canRequest() {
    if (this.state === 'CLOSED') return true;
    const cd = Math.min(this.cooldown * Math.pow(2, this.openCount - 1), this.maxCooldown);
    if (Date.now() - this.lastFail > cd) { this.state = 'HALF_OPEN'; return true; }
    return false;
  }
  recordSuccess() {
    this.failTimes = []; this.openCount = 0; this.state = 'CLOSED';
  }
  recordFailure(code, isTimeout = false) {
    this.lastFail = Date.now();
    if (!isTimeout) this.failTimes.push(Date.now());
    const weight = isTimeout ? 0.3 : 1.0;
    if (code === 401 || code === 403) {
      this.state = 'OPEN'; this.openCount = 99; this.lastFail = Date.now() + 86_400_000;
      return;
    }
    const recent = this.failTimes.filter(t => Date.now() - t < 60000);
    if (recent.length * weight >= this.threshold || this.state === 'HALF_OPEN') {
      this.state = 'OPEN'; this.openCount++;
    }
  }
}

const breakers = {};
for (const n of [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]) breakers[n] = new CircuitBreaker(n);
// HIVE breaker — more forgiving (workers can be slow)
breakers['hive'] = new CircuitBreaker('hive');
breakers['hive'].threshold = 10; // Needs more failures before opening

// ── Estadísticas por proveedor ─────────────────────────────────────────────────
const _stats = {};
function getCounter(name) {
  if (!_stats[name]) _stats[name] = { req: 0, err: 0, totalMs: 0, calls: 0 };
  return _stats[name];
}
function recordReq(name)  { getCounter(name).req++; }
function recordError(name){ getCounter(name).err++; }
function recordLatency(name, ms) { const s = getCounter(name); s.totalMs += ms; s.calls++; }

// ── Daily counter reset ────────────────────────────────────────────────────────
let _lastResetDay = new Date().getUTCDate();
function checkDailyReset() {
  const today = new Date().getUTCDate();
  if (today !== _lastResetDay) {
    _lastResetDay = today;
    for (const name of Object.keys(_stats)) {
      _stats[name].req = 0;
      _stats[name].err = 0;
    }
    console.log('[AI] 🔄 Daily counters reset');
  }
}

// ── Health Score ──────────────────────────────────────────────────────────────
function healthScore(name) {
  const s    = getCounter(name);
  const succ = Math.max(1, s.req - s.err);
  const rate = succ / Math.max(1, s.req);
  const avg  = s.calls > 0 ? s.totalMs / s.calls : 2000;
  const quota= DAILY_LIMITS[name] ? 1 - (s.req / DAILY_LIMITS[name]) : 1;
  return rate * 0.5 + (1 / (avg / 1000)) * 0.3 + quota * 0.2;
}

// ── Quota ─────────────────────────────────────────────────────────────────────
function isQuotaOk(name) {
  const lim = DAILY_LIMITS[name];
  return !lim || getCounter(name).req < lim * 0.95;
}

function predictExhaustion(name) {
  const c = getCounter(name), lim = DAILY_LIMITS[name]; if (!lim) return false;
  const hour = new Date().getHours();
  const proj = c.req + (c.req / Math.max(hour, 1)) * (24 - hour);
  return proj > lim * 0.85;
}

// ── Adaptive tokens ───────────────────────────────────────────────────────────
function adaptiveTokens(msg, task, req) {
  if (req) return req;
  const len = (msg ?? '').length;
  if (task === 'fast' || len < 50)  return 150;
  if (task === 'code')              return 1024;
  if (task === 'reasoning')         return 600;
  if (len < 100)                    return 200;
  if (len < 300)                    return 400;
  return 512;
}

// ── SingleFlight ──────────────────────────────────────────────────────────────
const _inflight = new Map();
function singleFlight(key, fn) {
  if (_inflight.has(key)) return _inflight.get(key);
  const p = fn().finally(() => _inflight.delete(key));
  _inflight.set(key, p);
  return p;
}

// ── TTL Cache ─────────────────────────────────────────────────────────────────
class TTLCache {
  constructor() { this.store = new Map(); }
  _ttl(msg = '') {
    const m = msg.toLowerCase();
    if (/regla|norma|plugin|info|servidor/.test(m)) return 86_400_000;
    if (/online|jugador|tps|lag/.test(m))           return 30_000;
    return 3_600_000;
  }
  key(msgs, task) {
    const text = msgs.map(m => m.role + ':' + (m.content ?? '').slice(0, 100)).join('|') + task;
    let h = 5381;
    for (let i = 0; i < text.length; i++) h = ((h << 5) + h + text.charCodeAt(i)) | 0;
    return `${task}:${h >>> 0}`;
  }
  get(k) {
    const e = this.store.get(k);
    if (!e) return null;
    if (Date.now() > e.exp) { this.store.delete(k); return null; }
    return e.value;
  }
  set(k, v, msg = '') {
    this.store.set(k, { value: v, exp: Date.now() + this._ttl(msg) });
    if (this.store.size > 1000) this.store.delete(this.store.keys().next().value);
  }
  clear() { this.store.clear(); console.log('[AI] Cache limpiada'); }
}
export const _cache = new TTLCache();

// ── Pools de concurrencia ─────────────────────────────────────────────────────
const _userQueue       = { active: 0, max: 4 };
const _backgroundQueue = { active: 0, max: 2 };
async function withQueue(queue, fn) {
  while (queue.active >= queue.max) await new Promise(r => setTimeout(r, 50));
  queue.active++;
  try { return await fn(); } finally { queue.active--; }
}

// ── Intent classifier (0 tokens) — SIN REGEX, keyword-based ──────────────────
// Replaces regex-based INTENT_PATTERNS with a cleaner keyword matching system
const INTENT_RULES = [
  { keywords: ['hola', 'hey', 'hi', 'buenas', 'qué tal', 'ola', 'saludos', 'wenas'], matchMode: 'startsWith', type: 'fast',      intent: 'greeting'   },
  { keywords: ['regla', 'norma', 'prohibi', 'permit'],                              matchMode: 'contains', type: 'volume',    intent: 'rules'      },
  { keywords: ['```', '.yml', '.json', '.java', 'config'],                          matchMode: 'contains', type: 'code',      intent: 'code'       },
  { keywords: ['ban', 'sancion', 'report', 'trampa', 'hack', 'cheat'],             matchMode: 'contains', type: 'reasoning', intent: 'moderation' },
  { keywords: ['analiza', 'explica', 'compara', 'argumenta'],                       matchMode: 'contains', type: 'reasoning', intent: 'complex'    },
  { keywords: ['cómo', 'qué', 'cuál', 'dónde', 'cuándo', 'por qué'],               matchMode: 'contains', type: 'spanish',   intent: 'question'   },
];

export function classifyIntent(message) {
  const m = (message ?? '').trim().toLowerCase();

  // Short message check (was regex /^.{1,20}$/)
  if (m.length > 0 && m.length <= 20) {
    return { intent: 'short', type: 'fast' };
  }

  for (const rule of INTENT_RULES) {
    for (const kw of rule.keywords) {
      if (rule.matchMode === 'startsWith' && m.startsWith(kw)) {
        return { intent: rule.intent, type: rule.type };
      }
      if (rule.matchMode === 'contains' && m.includes(kw)) {
        return { intent: rule.intent, type: rule.type };
      }
    }
  }
  return { intent: 'general', type: 'spanish' };
}

// ── Gemini Rotator ────────────────────────────────────────────────────────────
const geminiRotator = (() => {
  const keys = ai.gemini?.keys ?? [ai.gemini?.apiKey].filter(Boolean);
  let   i    = 0;
  const failed = new Set();
  const last   = new Map();
  return {
    get() {
      const available = keys.filter(k => !failed.has(k) && Date.now() - (last.get(k) ?? 0) > 65000);
      if (!available.length) { failed.clear(); return keys[i++ % keys.length]; }
      const k = available[i++ % available.length];
      last.set(k, Date.now());
      return k;
    },
    fail(k) { failed.add(k); },
  };
})();

// ── Función base OpenAI-compatible ───────────────────────────────────────────
// v9: Handles GLM-5.1 reasoning_content (separate from content)
async function oai(url, key, model, messages, maxTokens, systemOverride) {
  const msgs = systemOverride
    ? [{ role: 'system', content: systemOverride }, ...messages.filter(m => m.role !== 'system')]
    : messages;
  const res = await fetch(url, {
    method : 'POST',
    headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` },
    body   : JSON.stringify({ model, messages: msgs, max_tokens: maxTokens, stream: false }),
    signal : AbortSignal.timeout(15000),
  });
  if (!res.ok) {
    const body = await res.text().catch(() => '');
    throw Object.assign(new Error(`${model} ${res.status}: ${body.slice(0, 100)}`), { status: res.status, code: res.status });
  }
  const data = await res.json();
  const msg = data.choices?.[0]?.message;
  if (!msg) return '';
  
  // v11 CRITICAL FIX: ALWAYS clean thinking artifacts from content
  // Models (GLM-5.1, Qwen3, etc.) can leak thinking into content field
  // regardless of whether reasoning_content is present.
  // We MUST always apply cleanThinkingArtifacts() to every response.
  let content = msg.content?.trim() ?? '';
  
  // If reasoning_content exists, log it for debugging
  if (msg.reasoning_content) {
    console.log(`[AI] ${model}: reasoning_content present (${msg.reasoning_content.length} chars thinking, ${content.length} chars content)`);
  }
  
  // ALWAYS clean thinking artifacts — models can leak thinking in many ways:
  // 1. Plain-text reasoning before the actual response
  // 2. Numbered lists of analytical steps
  // 3. English analysis followed by Spanish response
  // 4. <think> tags that weren't properly separated
  content = cleanThinkingArtifacts(content);
  
  return content;
}

function isTO(e) { return e?.name === 'TimeoutError' || e?.name === 'AbortError' || /timeout/i.test(e?.message ?? ''); }

// ── callDirect: implementación de cada proveedor ─────────────────────────────
async function callDirect(name, messages, maxTokens) {
  const PERM = [401, 402, 403, 404, 422];

  switch (name) {
    // ── GLM (ZhipuAI) — proveedor primario, gratis/ilimitado ──────────────
    case 'glm51': {
      if (!glm.apiKey) throw new Error('glm51: sin key GLM');
      return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
        glm.apiKey, 'glm-5.1', messages, maxTokens);
    }

    case 'glmAir': {
      if (!glm.apiKey) throw new Error('glmAir: sin key GLM');
      return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
        glm.apiKey, 'glm-4-air', messages, maxTokens);
    }

    case 'glmFlash': {
      if (!glm.apiKey) throw new Error('glmFlash: sin key GLM');
      return oai(glm.baseUrl ?? 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
        glm.apiKey, 'glm-4-flash', messages, maxTokens);
    }

    case 'pollinations': {
      // Pollinations — free, unlimited, updated API endpoints
      const last = messages.filter(m => m.role === 'user').pop()?.content ?? '';
      const sys  = messages.find(m => m.role === 'system')?.content ?? '';
      
      // FIX: Try models in PARALLEL with Promise.any instead of sequentially
      const POLL_MODELS = ['openai', 'mistral', 'llama', 'qwen'];
      try {
        const result = await Promise.any(
          POLL_MODELS.map(model =>
            fetch('https://text.pollinations.ai/openai/chat/completions', {
              method : 'POST',
              headers: { 'Content-Type': 'application/json' },
              body   : JSON.stringify({
                model,
                messages,
                max_tokens: maxTokens,
                stream   : false,
                seed     : Math.floor(Math.random() * 9999),
              }),
              signal: AbortSignal.timeout(12000),
            }).then(async res => {
              if (!res.ok) throw new Error(`${model} ${res.status}`);
              const data = await res.json();
              const text = data.choices?.[0]?.message?.content?.trim();
              if (!text || text.length <= 2) throw new Error(`${model} empty`);
              return text;
            })
          )
        );
        if (result) return result;
      } catch { /* all parallel attempts failed */ }
      
      // Fallback: GET endpoint (anonymous, always works)
      try {
        const shortMsg = last.slice(0, 800);
        const shortSys = sys.slice(0, 400);
        const url = 'https://text.pollinations.ai/' +
          encodeURIComponent(shortMsg) +
          '?seed=' + Math.floor(Math.random() * 9999) +
          (shortSys ? '&system=' + encodeURIComponent(shortSys) : '');
        const r2 = await fetch(url, {
          headers: { 'User-Agent': 'Mozilla/5.0' },
          signal : AbortSignal.timeout(12000),
        });
        if (r2.ok) {
          const t = await r2.text();
          if (t?.trim() && t.trim().length > 2) return t.trim();
        }
      } catch {}
      
      throw new Error('pollinations all endpoints failed');
    }

    case 'cerebras':
      if (!ai.cerebras?.apiKey) throw new Error('cerebras: sin key');
      // TORNEO #2: Qwen3-235B-A22B — ultra-fast reasoning, excellent Spanish
      // Cerebras runs at ~2600 tokens/sec, fastest inference available
      return oai(ai.cerebras.baseUrl, ai.cerebras.apiKey,
        'qwen3-235b-a22b', messages, maxTokens);

    // TORNEO #1: Qwen3-32B — best Spanish + speed + thinking
    case 'groqQwen3':
      if (!ai.groq?.apiKey) throw new Error('groqQwen3: sin key');
      return oai(ai.groq.baseUrl, ai.groq.apiKey,
        'qwen/qwen3-32b', messages, maxTokens);

    case 'groq':
      if (!ai.groq?.apiKey) throw new Error('groq: sin key');
      // TORNEO #5: Llama-4-Scout — fast multilingual, official Spanish
      return oai(ai.groq.baseUrl, ai.groq.apiKey,
        getBestModel('groq', 'llama-4-scout-17b-16e-instruct', 'large'), messages, maxTokens);

    case 'groqFast':
      if (!ai.groq?.apiKey) throw new Error('groqFast: sin key');
      return oai(ai.groq.baseUrl, ai.groq.apiKey,
        getBestModel('groq', 'llama-3.1-8b-instant', 'fast'), messages, maxTokens);

    case 'groqKimi':
      if (!ai.groq?.apiKey) throw new Error('groqKimi: sin key');
      return oai(ai.groq.baseUrl, ai.groq.apiKey, 'moonshotai/kimi-k2-instruct', messages, maxTokens);

    case 'mistral':
      if (!ai.mistral?.apiKey) throw new Error('mistral: sin key');
      return oai(ai.mistral.baseUrl, ai.mistral.apiKey,
        getBestModel('mistral', 'mistral-large-latest', 'large'), messages, maxTokens);

    case 'mistralCode':
      if (!ai.mistral?.apiKey) throw new Error('mistralCode: sin key');
      return oai(ai.mistral.baseUrl, ai.mistral.apiKey, 'codestral-latest', messages, maxTokens);

    case 'gemma4': {
      const entry = geminiRotator.get();
      if (!entry) throw new Error('gemma4: sin key Gemini');
      
      // Extraer system prompt — Gemini lo necesita como systemInstruction separado
      const sysMsgs = messages.filter(m => m.role === 'system');
      const sysText = sysMsgs.map(m => m.content ?? '').join('\n\n');
      const chatMsgs = messages.filter(m => m.role !== 'system');
      
      // Convertir a formato Gemini (assistant → model)
      const contents = chatMsgs.map(m => ({
        role : m.role === 'assistant' ? 'model' : 'user',
        parts: [{ text: m.content ?? '' }],
      }));
      
      // Si no hay mensajes de chat, añadir uno vacío para que no falle
      if (!contents.length) contents.push({ role: 'user', parts: [{ text: '.' }] });
      
      const body = {
        contents,
        generationConfig: { 
          maxOutputTokens: maxTokens,
          temperature: 0.8,
          topP: 0.95,
        },
      };
      // Inyectar system prompt como systemInstruction (soportado por Gemini API)
      if (sysText) {
        body.systemInstruction = { parts: [{ text: sysText }] };
      }
      
      // Gemma 4 31B IT — mejor modelo Gemma para chat en español, via Gemini API
      const res = await fetch(
        `https://generativelanguage.googleapis.com/v1beta/models/gemma-4-31b-it:generateContent?key=${entry}`,
        {
          method : 'POST',
          headers: { 'Content-Type': 'application/json' },
          body   : JSON.stringify(body),
          signal : AbortSignal.timeout(15000),
        }
      );
      if (!res.ok) { geminiRotator.fail(entry); throw Object.assign(new Error(`gemma4 ${res.status}`), { status: res.status, code: res.status }); }
      const data = await res.json();
      return data.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ?? '';
    }

    case 'openrouter': {
      if (!ai.openrouter?.apiKey) throw new Error('openrouter: sin key');
      const model = 'meta-llama/llama-3.3-70b-instruct:free';
      const res = await fetch(ai.openrouter.baseUrl, {
        method : 'POST',
        headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.openrouter.apiKey}`, 'HTTP-Referer': 'https://tomatesmp.pw', 'X-Title': 'TomateSMP' },
        body   : JSON.stringify({ model, messages, max_tokens: maxTokens }),
        signal : AbortSignal.timeout(20000),
      });
      if (!res.ok) throw Object.assign(new Error(`openrouter ${res.status}`), { status: res.status, code: res.status });
      const data = await res.json();
      if (data.error) throw new Error(`openrouter: ${data.error.message ?? data.error}`);
      return data.choices?.[0]?.message?.content?.trim() ?? '';
    }

    case 'openrouterR1': {
      if (!ai.openrouter?.apiKey) throw new Error('openrouterR1: sin key');
      const res = await fetch(ai.openrouter.baseUrl, {
        method : 'POST',
        headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.openrouter.apiKey}`, 'HTTP-Referer': 'https://tomatesmp.pw' },
        body   : JSON.stringify({ model: 'deepseek/deepseek-r1:free', messages, max_tokens: maxTokens }),
        signal : AbortSignal.timeout(30000),
      });
      if (!res.ok) throw Object.assign(new Error(`openrouterR1 ${res.status}`), { status: res.status, code: res.status });
      const data = await res.json();
      return data.choices?.[0]?.message?.content?.trim() ?? '';
    }

    case 'cloudflare': {
      if (!ai.cloudflare?.accountId || !ai.cloudflare?.apiToken) throw new Error('cloudflare: sin config');
      const res = await fetch(
        `https://api.cloudflare.com/client/v4/accounts/${ai.cloudflare.accountId}/ai/run/@cf/meta/llama-3.3-70b-instruct-fp8-fast`,
        {
          method : 'POST',
          headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${ai.cloudflare.apiToken}` },
          body   : JSON.stringify({ messages, max_tokens: maxTokens }),
          signal : AbortSignal.timeout(20000),
        }
      );
      if (!res.ok) throw Object.assign(new Error(`cloudflare ${res.status}`), { status: res.status, code: res.status });
      const data = await res.json();
      // Cloudflare AI devuelve distintos formatos según el modelo
      const r = data.result;
      if (typeof r?.response === 'string') return r.response.trim();
      if (Array.isArray(r) && r[0]?.response) return String(r[0].response).trim();
      if (r?.choices?.[0]?.message?.content) return r.choices[0].message.content.trim();
      if (typeof r === 'string') return r.trim();
      return '';
    }

    // ── HuggingFace Inference API providers ───────────────────────────────
    case 'hfFast':
      if (!isHFAvailable()) throw new Error('hfFast: HF unavailable');
      return callHuggingFace(messages, 'fast', maxTokens);

    case 'hfSmart':
      if (!isHFAvailable()) throw new Error('hfSmart: HF unavailable');
      return callHuggingFaceCascade(messages, 'smart', maxTokens);

    case 'hfSpanish':
      if (!isHFAvailable()) throw new Error('hfSpanish: HF unavailable');
      return callHuggingFaceCascade(messages, 'spanish', maxTokens);

    case 'hfCode':
      if (!isHFAvailable()) throw new Error('hfCode: HF unavailable');
      return callHuggingFace(messages, 'code', maxTokens);

    case 'hfReasoning':
      if (!isHFAvailable()) throw new Error('hfReasoning: HF unavailable');
      return callHuggingFaceCascade(messages, 'reasoning', maxTokens);

    case 'hfFallback':
      if (!isHFAvailable()) throw new Error('hfFallback: HF unavailable');
      return callHuggingFace(messages, 'fast', maxTokens);

    // ── HIVE — RigoChat-7B Cluster (ALL workers combined) ────────────────
    case 'hive': {
      try {
        const userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? '';
        const result = await hiveGenerate(messages, maxTokens, userMsg, 'hybrid');
        if (result?.text && result.text.trim().length > 2) {
          return result.text;
        }
        throw new Error('hive: empty response');
      } catch (err) {
        throw Object.assign(new Error(`hive: ${err.message}`), { status: 503, code: 503 });
      }
    }

    // ── Local model — primary local inference ────────────────────────────
    case 'local': {
      if (!isLocalAIReady()) throw new Error('local: model not ready');
      const { localChatPrimary } = await import('./local-ai.js');
      return localChatPrimary(messages, maxTokens, 0.8);
    }

    default:
      throw new Error(`Proveedor desconocido: ${name}`);
  }
}

// ── callProvider: CB + backoff + stats ────────────────────────────────────────
const PERM_ERRORS = [401, 402, 403, 404, 422];

async function callProvider(name, messages, maxTokens, failedFlag) {
  checkDailyReset();
  const cb = breakers[name];
  if (!cb?.canRequest()) throw new Error(`${name}: circuit OPEN`);

  const start = Date.now();
  recordReq(name);

  try {
    let result;
    for (let attempt = 0; attempt < 3; attempt++) {
      try {
        result = await callDirect(name, messages, maxTokens);
        break;
      } catch (e) {
        // 429: esperar y reintentar
        if (e.status === 429 && attempt < 2) {
          await new Promise(r => setTimeout(r, 1500 + Math.random() * 3000));
          continue;
        }
        // 400 con mensaje de longitud: comprimir contexto y reintentar UNA vez
        if (e.status === 400 && attempt === 0 &&
            /length|too long|reduce|token/i.test(e.message ?? '')) {
          console.warn('[AI] ' + name + ': mensaje muy largo, comprimiendo...');
          // Quedarse solo con system + últimos 2 mensajes
          const sys  = messages.filter(m => m.role === 'system');
          const rest = messages.filter(m => m.role !== 'system').slice(-2);
          // Recortar system prompt a 1500 chars
          const shortSys = sys.map(m => ({ ...m, content: m.content.slice(0, 1500) }));
          messages = [...shortSys, ...rest];
          continue;
        }
        throw e;
      }
    }
    cb.recordSuccess();
    recordLatency(name, Date.now() - start);
    return result;
  } catch (err) {
    const code = err.status ?? err.code ?? null;
    if (PERM_ERRORS.includes(code)) {
      cb.state = 'OPEN'; cb.openCount = 99; cb.lastFail = Date.now() + 86_400_000;
      console.warn(`[AI] ⛔ ${name} deshabilitado 24h (${code})`);
    } else if (!failedFlag?.v) {
      failedFlag && (failedFlag.v = true);
      cb.recordFailure(code, isTO(err));
    }
    recordError(name);
    if (!PERM_ERRORS.includes(code)) {
      console.warn(`[AI] ❌ ${name}: ${err.message?.slice(0, 80)}`);
    }
    throw err;
  }
}

// ── Validación de respuestas — Sistema avanzado SIN regex ────────────────────
// v10: Reemplaza BAD_PATTERNS (regex) con validación semántica determinista
// que es más precisa, sin falsos positivos, y más mantenible.

// Frases de IA que NUNCA deben aparecer al inicio de una respuesta válida
const AI_DISCLAIMER_PREFIXES = [
  'as an ai',
  'as a language model',
  'como una ia',
  'como un modelo',
  "i'm an ai",
  'i am an ai',
  "i'm a language model",
  'i am a language model',
  'como inteligencia artificial',
  'como modelo de lenguaje',
  'as an assistant',
  'como asistente',
];

// Respuestas basura que indican fallo del modelo
const GARBAGE_RESPONSES = new Set([
  'error', 'null', 'undefined', 'true', 'false', '{}', '[]',
  'nan', 'none', 'nil', 'void',
]);

function isValidResponse(text) {
  if (!text || typeof text !== 'string') return false;
  const t = text.trim();

  // Empty or whitespace-only
  if (t.length < 2) return false;

  // Only punctuation/whitespace
  const withoutPunct = t.replace(/[\s.,!?_\-:;'"()]/g, '');
  if (withoutPunct.length === 0) return false;

  // Garbage single values
  if (GARBAGE_RESPONSES.has(t.toLowerCase())) return false;

  // Too long = probably prompt recitation
  if (t.length > 2000) return false;

  // AI disclaimer prefix check (case-insensitive, no regex)
  const tLower = t.toLowerCase();
  for (const prefix of AI_DISCLAIMER_PREFIXES) {
    if (tLower.startsWith(prefix)) return false;
  }

  // Check for repetitive patterns (same word repeated 10+ times)
  const words = tLower.split(/\s+/);
  if (words.length > 10) {
    const wordCounts = {};
    for (const w of words) {
      wordCounts[w] = (wordCounts[w] || 0) + 1;
      if (wordCounts[w] > 10 && w.length > 2) return false; // Repetition loop
    }
  }

  return true;
}

// ── AI-based response quality check (uses local model for fast validation) ──
// This replaces regex-based BAD_PATTERNS with semantic understanding
async function aiValidateResponse(text, userMsg) {
  if (!text || text.length < 10) return { valid: true }; // Too short to bother

  // Fast path: check obvious issues without AI
  const tLower = text.toLowerCase().trim();

  // AI disclosure in the middle of response (not just prefix)
  const aiPatterns = [
    'como ia,',
    'como inteligencia artificial,',
    'as an ai,',
    'as a language model,',
    'i cannot fulfill',
    'no puedo cumplir esa solicitud',
    'no puedo procesar esa solicitud',
  ];
  for (const p of aiPatterns) {
    if (tLower.includes(p)) {
      return { valid: false, reason: 'ai_disclaimer', replacement: 'eso no va' };
    }
  }

  return { valid: true };
}

// ── Router principal ──────────────────────────────────────────────────────────
function ensureAvailability() {
  const allProviders = [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS];
  const open = allProviders.filter(n => breakers[n]?.state === 'OPEN');
  if (open.length === allProviders.length) {
    console.warn('[AI] Todos los proveedores en OPEN — reseteando');
    open.forEach(n => { breakers[n].state = 'CLOSED'; breakers[n].openCount = 0; });
  }
}

async function _route(messages, taskType, maxTokens, userMsg) {
  ensureAvailability();
  const cascade  = [...(CASCADES[taskType] ?? CASCADES.spanish)];
  cascade.sort((a, b) => {
    const pa = predictExhaustion(a) ? -0.3 : 0;
    const pb = predictExhaustion(b) ? -0.3 : 0;
    return (healthScore(b) + pb) - (healthScore(a) + pa);
  });
  const available = cascade.filter(n => breakers[n]?.canRequest() && isQuotaOk(n));
  if (!available.length) {
    // Fallback al modelo local
    if (isLocalAIReady()) {
      const r = await emergencyFallback(messages, userMsg).catch(() => null);
      if (r) return r;
    }
    throw new Error('[AI] Sin proveedores disponibles');
  }

  const failedFlag = { v: false };
  
  // v12 FIX: Usar SOLO el primer proveedor disponible (no hedged requests)
  // Los hedged requests (Promise.any con top3) causaban respuestas triplicadas
  // cuando múltiples proveedores respondían y el merge de hive los concatenaba.
  // Ahora: intentamos el mejor proveedor, si falla pasamos al siguiente.
  for (const name of available) {
    try {
      const r = await callProvider(name, messages, maxTokens, failedFlag);
      if (isValidResponse(r)) return r;
    } catch { continue; }
  }

  // Último recurso: modelo local
  if (isLocalAIReady()) {
    console.log('[AI] 🏠 Usando modelo local como fallback (todos los proveedores fallaron)');
    const r = await emergencyFallback(messages, userMsg).catch(e => {
      console.warn('[AI] Modelo local también falló:', e.message);
      return null;
    });
    if (r) return r;
  } else {
    console.warn('[AI] Modelo local no disponible para fallback (isLocalAIReady=false)');
  }
  throw new Error('[AI] Todos los proveedores fallaron');
}

async function _routeOwner(messages, taskType, maxTokens) {
  ensureAvailability();
  const cascade  = [...(CASCADES[taskType] ?? CASCADES.spanish)];
  cascade.sort((a, b) => (healthScore(b)) - (healthScore(a)));
  const available = cascade.filter(n => breakers[n]?.canRequest() && isQuotaOk(n));
  if (!available.length && isLocalAIReady()) {
    return emergencyFallback(messages, '') ?? Promise.reject(new Error('Sin proveedores'));
  }
  for (const name of available) {
    try {
      const r = await callProvider(name, messages, maxTokens, { v: false });
      if (r && r.trim().length > 0) return r;
    } catch { continue; }
  }
  if (isLocalAIReady()) return emergencyFallback(messages, '');
  throw new Error('[AI] Sin respuesta');
}

// ── callAI ────────────────────────────────────────────────────────────────────
export async function callAI(messages, taskType = 'spanish', maxTokens = null, userMsg = '', ownerMode = false) {
  if (!userMsg) userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? '';
  const tokens   = adaptiveTokens(userMsg, taskType, maxTokens);
  // Inyectar thinking mode para mejor coherencia (provider name se detecta en el router)
  const thinkMsgs = injectThinking(messages);
  const msgs     = await compressContext(thinkMsgs);
  const cacheKey = _cache.key(msgs, taskType); // SIEMPRE definido

  if (ownerMode) {
    return withQueue(_userQueue, async () => {
      const result = await _routeOwner(msgs, taskType, tokens);
      return cleanThinkingArtifacts(result);
    });
  }

  // Cache exacto
  const cached = _cache.get(cacheKey);
  if (cached) { console.log('[AI] 💾 cache hit'); return cached; }

  // Semantic cache
  if (feats.semanticCache !== false) {
    try {
      const semHit = await semanticCache.get(userMsg);
      if (semHit) { console.log('[AI] 💾 semantic cache hit'); return semHit; }
    } catch {}
  }

  const doCall = () => withQueue(_userQueue, async () => {
    const result = await _route(msgs, taskType, tokens, userMsg);
    const cleaned = cleanThinkingArtifacts(result);
    _cache.set(cacheKey, cleaned, userMsg);
    semanticCache.set(userMsg, cleaned, semanticCache.getTTL(userMsg)).catch(() => {});
    return cleaned;
  });
  return singleFlight(cacheKey, doCall);
}

// ── callAIBackground ─────────────────────────────────────────────────────────
export async function callAIBackground(messages, taskType = 'spanish', maxTokens = null, userMsg = '') {
  if (!userMsg) userMsg = messages.filter(m => m.role === 'user').pop()?.content ?? '';
  const tokens = adaptiveTokens(userMsg, taskType, maxTokens);
  const thinkMsgs = injectThinking(messages);
  const msgs   = await compressContext(thinkMsgs);
  return withQueue(_backgroundQueue, async () => {
    const result = await _route(msgs, 'background', tokens, userMsg);
    return cleanThinkingArtifacts(result);
  });
}

// ── Mixture of Agents ─────────────────────────────────────────────────────────
export async function callAIMoA(messages, maxTokens = 600) {
  if (!feats.moa) return callAI(messages, 'spanish', maxTokens);
  const [r1, r2, r3] = await Promise.allSettled([
    callProvider('glm51', messages, 300, { v: false }),
    callProvider('groqFast', messages, 300, { v: false }),
    callProvider('pollinations', messages, 300, { v: false }),
  ]);
  const proposals = [r1, r2, r3]
    .filter(r => r.status === 'fulfilled' && r.value?.length > 10)
    .map((r, i) => `Respuesta ${i + 1}: ${r.value}`)
    .join('\n\n');
  if (!proposals) return callAI(messages, 'spanish', maxTokens);
  return callProvider('glm51', [
    { role: 'system', content: 'Sintetiza la mejor respuesta de las siguientes opciones. Sin texto extra.' },
    { role: 'user', content: `${proposals}\n\nSíntesis:` },
  ], maxTokens, { v: false });
}

// ── Self-healing ──────────────────────────────────────────────────────────────
export function startSelfHealing() {
  if (!feats.selfHealing) return;
  setInterval(() => {
    const allProviders = [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS];
    const totalReq = allProviders.reduce((s, n) => s + getCounter(n).req, 0);
    if (totalReq === 0) return;
    const avgQuota = allProviders.reduce((s, n) => {
      const lim = DAILY_LIMITS[n]; return s + (lim ? getCounter(n).req / lim : 0);
    }, 0) / allProviders.length;
    if (avgQuota > 0.85) console.warn('[SelfHeal] Cuota global alta — priorizar proveedores con más quota');
  }, 5 * 60 * 1000);
  console.log('[HEAL] Self-healing v7 iniciado');
}

// ── Warmup ────────────────────────────────────────────────────────────────────
export async function warmupProviders() {
  if (!feats.warmup) return;
  const test = [{ role: 'user', content: 'ok' }];
  console.log('[WARMUP] Verificando proveedores...');
  await Promise.allSettled(
    ['glm51', 'glmFlash', 'groqFast', 'cerebras', 'pollinations', 'mistral'].map(async name => {
      const start = Date.now();
      try {
        await callProvider(name, test, 5, { v: false });
        console.log(`[WARMUP] ✅ ${name} ${Date.now() - start}ms`);
      } catch (e) {
        console.log(`[WARMUP] ❌ ${name}: ${e.message?.slice(0, 40)}`);
      }
    })
  );
}

// ── Comprimir contexto ───────────────────────────────────────────────────────
async function compressContext(messages) {
  // FIX: No cortar a ciegas el system prompt — antes se podía cortar en medio de las reglas de seguridad
  // En vez de eso, reducir el historial de chat primero (menos importante que el system prompt)
  const sys  = messages.filter(m => m.role === 'system');
  const chat = messages.filter(m => m.role !== 'system');
  
  // Recortar system prompt SOLO si es extremadamente largo (>4000 chars)
  // Priorizar: mantener identidad + seguridad + tools intactos
  const compressedSys = sys.map(m => {
    if ((m.content ?? '').length > 4000) {
      // Intentar cortar en una sección completa (después de un \n\n##)
      const content = m.content;
      // Mantener primeros 3500 chars (identidad + personalidad + seguridad)
      // y últimos 1000 chars (tools + reglas finales)
      return { ...m, content: content.slice(0, 3500) + '\n...(contexto comprimido)\n' + content.slice(-1000) };
    }
    return m;
  });
  
  // Reducir historial de chat a máximo 4 turnos (antes eran 6)
  const recentChat = chat.slice(-4);
  return [...compressedSys, ...recentChat];
}

// ── Stats ─────────────────────────────────────────────────────────────────────
export function getDailyStats() {
  const out = {};
  for (const n of [...ALL_PROVIDERS, ...HF_PROVIDERS, ...GLM_PROVIDERS]) {
    const s   = getCounter(n);
    const lim = DAILY_LIMITS[n] ?? 99999;
    out[n] = { requests: s.req, limit: lim, pct: ((s.req / lim) * 100).toFixed(1) + '%', state: breakers[n]?.state ?? '?', score: healthScore(n).toFixed(2) };
  }
  // Append HF-specific stats
  try { out._hf = getHFStats(); } catch {}
  return out;
}

export function getProviderStatus() {
  return [...ALL_PROVIDERS, ...GLM_PROVIDERS].map(n => ({ name: n, state: breakers[n]?.state ?? 'UNKNOWN', score: healthScore(n).toFixed(2) }));
}

export function clearCache() { _cache.clear(); }

// ── HIVE exports ────────────────────────────────────────────────────────────
export { getHiveStatus, getHiveStats, benchmarkHive, warmUpWorkers, hiveInit as initHive };

export function runQualityGate(response, userMessage) {
  if (!feats.qualityGate) return Promise.resolve({ pass: true });
  if (!response || response.trim().length < 10) return Promise.resolve({ pass: false, reason: 'empty' });
  return Promise.resolve({ pass: isValidResponse(response) });
}