Spaces:

TomatitoToho
/

zelin-bot

Paused

File size: 8,691 Bytes

ee826ee

/**
 * context-engine.js — Context Engineering para Zelin
 * ====================================================
 * Basado en investigación real:
 * - Google ADK (Dic 2025): "Context as a compiled view, not a string buffer"
 * - Liu et al. "Lost in the Middle" (2023): info en el medio se ignora 30%+ más
 * - Manus AI architecture: event stream tipado + aggressive pruning
 * - Context Rot research (2025): degradación empieza a 50% del context window
 *
 * TRES MEJORAS CONCRETAS:
 * 1. Event Stream — historial tipado en vez de texto crudo
 * 2. Strategic Positioning — info crítica al INICIO y al FINAL (anti-lost-in-middle)
 * 3. Auto-compaction jerárquica — trigger a 50%, no cuando se llena
 */

import { callAIBackground } from './ai.js';
import { isLocalAIReady, ollamaChatDirect } from './local-ai.js';

// ── 1. EVENT STREAM tipado ────────────────────────────────────────────────────
// En vez de guardar raw text, cada evento tiene tipo y metadatos
// Así el compilador sabe qué es importante y qué puede comprimir

export function createEvent(type, content, metadata = {}) {
  return { type, content, ts: Date.now(), ...metadata };
}

export const EventTypes = {
  USER_MESSAGE   : 'user_message',
  BOT_RESPONSE   : 'bot_response',
  TOOL_CALL      : 'tool_call',
  TOOL_RESULT    : 'tool_result',      // pruneable después de ser observado
  SYSTEM_NOTE    : 'system_note',
  SUMMARY        : 'summary',          // resultado de compaction
};

// Convertir events tipados a messages para la API
export function compileEventsToMessages(events, systemPrompt) {
  const messages = [{ role: 'system', content: systemPrompt }];

  for (const evt of events) {
    switch (evt.type) {
      case EventTypes.USER_MESSAGE:
        messages.push({ role: 'user', content: evt.content });
        break;
      case EventTypes.BOT_RESPONSE:
        messages.push({ role: 'assistant', content: evt.content });
        break;
      case EventTypes.SUMMARY:
        // Resúmenes van como contexto del sistema, no como mensajes
        messages[0] = { role: 'system', content: systemPrompt + '\n\n[Resumen de conversación anterior]: ' + evt.content };
        break;
      case EventTypes.TOOL_RESULT:
        // Solo incluir si no fue marcado como observado (pruneable)
        if (!evt.pruned) {
          messages.push({ role: 'user', content: '[Resultado de herramienta]: ' + evt.content.slice(0, 300) });
        }
        break;
      // tool_call y system_note no se pasan al modelo
    }
  }
  return messages;
}

// ── 2. STRATEGIC POSITIONING (anti-lost-in-middle) ─────────────────────────────
// Investigación: LLMs atienden al INICIO y al FINAL, ignoran el MEDIO
// Solución: poner la info más importante al inicio y fin del system prompt
// NUNCA en el medio

export function buildStrategicPrompt(parts) {
  const {
    coreIdentity,       // quién es Zelin — al INICIO (siempre)
    securityRules,      // reglas de seguridad — al INICIO (crítico)
    ragContext,         // contexto RAG — al INICIO si existe
    conversationSummary,// resumen previo — en el medio (está bien, es contexto)
    userProfile,        // perfil del usuario — al FINAL (referencia)
    currentTime,        // hora actual — al FINAL
    instructions,       // instrucciones de respuesta — al FINAL (máxima atención)
  } = parts;

  // INICIO: lo más crítico — identidad + seguridad + contexto RAG
  let prompt = coreIdentity + '\n\n';
  if (securityRules) prompt += securityRules + '\n\n';
  if (ragContext)    prompt += '## CONTEXTO RELEVANTE DEL SERVIDOR\n' + ragContext + '\n\n';

  // MEDIO: contexto de soporte (comprimir agresivamente si es largo)
  if (conversationSummary) prompt += '## RESUMEN PREVIO\n' + conversationSummary + '\n\n';

  // FINAL: perfil + instrucciones — alta atención del modelo
  if (userProfile)   prompt += '## CON QUIÉN HABLAS\n' + userProfile + '\n\n';
  if (currentTime)   prompt += `Hora actual: ${currentTime}\n\n`;
  if (instructions)  prompt += '## INSTRUCCIONES DE RESPUESTA\n' + instructions;

  return prompt.trim();
}

// ── 3. AUTO-COMPACTION JERÁRQUICA ────────────────────────────────────────────
// Trigger a 50% del context window estimado (no esperar a que se llene)
// Manus: "Compress immediately after acknowledging tool outputs"
// Google ADK: sliding window con overlap para no perder contexto

// Estimación tokens: ~1 token por 3.5 chars en español
export function estimateTokens(messages) {
  return Math.ceil(messages.reduce((acc, m) => acc + (m.content?.length ?? 0), 0) / 3.5);
}

const CONTEXT_WINDOW = {
  'fast'      : 4096,
  'chat'      : 8192,
  'spanish'   : 8192,
  'reasoning' : 16384,
  'code'      : 16384,
  'default'   : 8192,
};
const COMPACTION_THRESHOLD = 0.50; // 50% → compactar (investigación recomienda 50%)

export async function shouldCompact(messages, taskType = 'default') {
  const tokens  = estimateTokens(messages);
  const window  = CONTEXT_WINDOW[taskType] ?? CONTEXT_WINDOW.default;
  const fillPct = tokens / window;
  return fillPct >= COMPACTION_THRESHOLD;
}

// Compaction jerárquica: mantener últimos N verbatim, resumir el resto
export async function hierarchicalCompact(messages, taskType = 'default') {
  const KEEP_RECENT = 6; // últimos 6 mensajes siempre verbatim

  const sys    = messages.filter(m => m.role === 'system');
  const convo  = messages.filter(m => m.role !== 'system');

  if (convo.length <= KEEP_RECENT) return messages; // nada que compactar

  const toCompress = convo.slice(0, convo.length - KEEP_RECENT);
  const recent     = convo.slice(convo.length - KEEP_RECENT);

  const text = toCompress.map(m => `${m.role === 'user' ? 'Usuario' : 'Zelin'}: ${m.content?.slice(0, 400) ?? ''}`).join('\n');

  let summary;
  try {
    // Preferir modelo local para no gastar API en compaction
    if (isLocalAIReady()) {
      summary = await ollamaChatDirect([
        { role: 'system', content: 'Resume en 3-4 frases en español los puntos clave de esta conversación. Solo hechos importantes, sin detalles redundantes.' },
        { role: 'user',   content: text.slice(0, 3000) },
      ], 200, 10000);
    } else {
      summary = await callAIBackground([
        { role: 'system', content: 'Resume en 3-4 frases los puntos clave. Solo hechos, sin detalles redundantes.' },
        { role: 'user',   content: text.slice(0, 3000) },
      ], 'fast', 200);
    }
  } catch {
    // Si falla la compaction, mantener solo los últimos mensajes
    return [...sys, ...recent];
  }

  const summaryMsg = { role: 'user', content: `[Resumen de conversación anterior]: ${summary}` };
  return [...sys, summaryMsg, { role: 'assistant', content: 'Entendido.' }, ...recent];
}

// Wrapper completo: compactar si necesario, con strategic positioning
export async function engineerContext(messages, systemPrompt, taskType, ragContext = '') {
  // 1. Separar system del historial
  const nonSystem = messages.filter(m => m.role !== 'system');

  // 2. Auto-compaction si supera 50%
  let optimizedHistory = nonSystem;
  if (await shouldCompact([{ role: 'system', content: systemPrompt }, ...nonSystem], taskType)) {
    console.log('[ContextEngine] 📦 Auto-compaction activada (>50% window)');
    const compacted = await hierarchicalCompact([{ role: 'system', content: systemPrompt }, ...nonSystem], taskType);
    optimizedHistory = compacted.filter(m => m.role !== 'system');
  }

  // 3. Strategic positioning del system prompt
  // Info crítica al inicio, instrucciones al final
  const optimizedSystem = ragContext
    ? systemPrompt.replace(
        /## INFORMACIÓN RELEVANTE DEL SERVIDOR \(RAG\)\n[\s\S]*?(?=\n\n##|$)/,
        '' // quitar RAG del medio si ya estaba
      ) + `\n\n## CONTEXTO RELEVANTE (posicionado al inicio para máxima atención)\n${ragContext}`
    : systemPrompt;

  return [{ role: 'system', content: optimizedSystem }, ...optimizedHistory];
}

// ── Pruning de tool results (como hace Manus) ─────────────────────────────────
// Después de que el agente usa un resultado de herramienta, marcarlo como "observado"
// Para que no siga ocupando context en futuras llamadas
export function markToolResultPruned(events, toolCallId) {
  for (const e of events) {
    if (e.type === EventTypes.TOOL_RESULT && e.toolCallId === toolCallId) {
      e.pruned = true;
    }
  }
}