zelin-bot / src /rag.js
Z User
v5.8.5: Gemma 4, MC Wiki, MC Player, anti-hallucination, CPU optimizations
ee826ee
/**
* rag.js β€” RAG para TomateSMP
* ============================
* Retrieval-Augmented Generation: indexa el conocimiento del servidor
* y recupera chunks relevantes para cada pregunta.
* Sin vectra ni Docker β€” array en memoria + Gemini embeddings.
* Se inicializa al arrancar con server_knowledge.js.
*/
import { embed } from './semantic-cache.js';
import { SERVER_KNOWLEDGE } from '../server_knowledge.js';
function cosineSim(a, b) {
if (!a?.length || !b?.length) return 0;
let dot = 0, na = 0, nb = 0;
for (let i = 0; i < a.length; i++) { dot += a[i]*b[i]; na += a[i]**2; nb += b[i]**2; }
return Math.sqrt(na)*Math.sqrt(nb) === 0 ? 0 : dot/(Math.sqrt(na)*Math.sqrt(nb));
}
// ── Chunker: divide el conocimiento en fragmentos manejables ─────────────────
function chunkText(text, maxChars = 400, overlap = 50) {
const sections = text.split(/\n## /);
const chunks = [];
for (const section of sections) {
if (!section.trim()) continue;
const sectionText = section.startsWith('##') ? section : '## ' + section;
// Si el section cabe entero, aΓ±adirlo tal cual
if (sectionText.length <= maxChars) {
chunks.push(sectionText.trim());
continue;
}
// Si es largo, dividir por pΓ‘rrafos
const paragraphs = sectionText.split('\n\n');
let current = '';
for (const para of paragraphs) {
if (current.length + para.length > maxChars && current.length > 0) {
chunks.push(current.trim());
// Overlap: incluir el final del chunk anterior
current = current.slice(-overlap) + '\n' + para;
} else {
current += '\n\n' + para;
}
}
if (current.trim()) chunks.push(current.trim());
}
return chunks.filter(c => c.length > 30);
}
// ── Index en memoria ──────────────────────────────────────────────────────────
const _index = []; // { text, embedding, category }
let _indexed = false;
let _indexing = false;
export async function initRAG() {
if (_indexed || _indexing) return;
if (!SERVER_KNOWLEDGE) { console.log('[RAG] server_knowledge.js vacΓ­o β€” RAG desactivado'); return; }
_indexing = true;
console.log('[RAG] Indexando conocimiento de TomateSMP...');
const chunks = chunkText(SERVER_KNOWLEDGE);
console.log(`[RAG] ${chunks.length} chunks a indexar...`);
// Probar si los embeddings de Gemini funcionan con un chunk de prueba
let embeddingsWorking = false;
try {
await embed(chunks[0].slice(0, 50));
embeddingsWorking = true;
console.log('[RAG] βœ… Embeddings disponibles (local o Gemini)');
} catch (e) {
console.warn('[RAG] Embeddings no disponibles aΓΊn (modelo cargando), usando bΓΊsqueda textual');
}
// Indexar chunks (con o sin embeddings)
const BATCH = 5;
for (let i = 0; i < chunks.length; i += BATCH) {
const batch = chunks.slice(i, i + BATCH);
await Promise.allSettled(batch.map(async text => {
if (embeddingsWorking) {
try {
const embedding = await embed(text);
_index.push({ text, embedding });
} catch {
_index.push({ text, embedding: null }); // fallback textual
}
} else {
_index.push({ text, embedding: null }); // solo textual
}
}));
if (embeddingsWorking && i + BATCH < chunks.length) {
await new Promise(r => setTimeout(r, 200));
}
}
_indexed = true;
_indexing = false;
const indexed = _index.filter(e => e.embedding).length;
console.log(`[RAG] βœ… Indexado: ${indexed}/${_index.length} chunks con embedding`);
}
// ── Retrieval: obtener chunks mΓ‘s relevantes para una query ──────────────────
export async function retrieveContext(query, topK = 3) {
if (!_indexed || _index.length === 0) return '';
try {
const queryEmb = await embed(query);
const scored = _index
.filter(e => e.embedding)
.map(e => ({ text: e.text, score: cosineSim(queryEmb, e.embedding) }))
.filter(e => e.score > 0.5)
.sort((a, b) => b.score - a.score)
.slice(0, topK);
if (!scored.length) return '';
return scored.map(e => e.text).join('\n\n---\n\n');
} catch {
// Fallback textual: buscar por keywords mΓΊltiples
const words = query.toLowerCase().split(/\s+/).filter(w => w.length > 3);
const scored = _index.map(e => {
const t = e.text.toLowerCase();
const score = words.reduce((s, w) => s + (t.includes(w) ? 1 : 0), 0);
return { text: e.text, score };
}).filter(e => e.score > 0).sort((a, b) => b.score - a.score).slice(0, topK);
return scored.map(e => e.text).join('\n\n---\n\n');
}
}
// ── Inyectar contexto RAG en el system prompt ─────────────────────────────────
// Devuelve solo el string de contexto RAG (sin el system prompt base)
// Útil para paralelizar con otras operaciones
export async function getRAGSuffix(userMessage) {
if (!_indexed) return '';
const serverKeywords = /regla|comando|rango|plugin|crate|job|skill|habilidad|warp|server|servidor|pvp|koth|dungeon|trade|battlepass|tag|badge|insignia|economΓ­a|dinero|mobcoin|claim|protecciΓ³n|terreno|matrimonio|mascota|voice|voz/i;
if (!serverKeywords.test(userMessage)) return '';
const context = await retrieveContext(userMessage, 3);
if (!context) return '';
return `\n\n## INFORMACIΓ“N RELEVANTE DEL SERVIDOR (RAG)\n${context}`;
}
export async function injectRAGContext(systemPrompt, userMessage) {
if (!_indexed) return systemPrompt;
// Solo activar RAG si la pregunta parece ser sobre el servidor
const serverKeywords = /regla|comando|rango|plugin|crate|job|skill|habilidad|warp|server|servidor|pvp|koth|dungeon|trade|battlepass|tag|badge|insignia|economΓ­a|dinero|mobcoin|claim|protecciΓ³n|terreno|matrimonio|mascota|voice|voz/i;
if (!serverKeywords.test(userMessage)) return systemPrompt;
const context = await retrieveContext(userMessage, 3);
if (!context) return systemPrompt;
return systemPrompt + `\n\n## INFORMACIΓ“N RELEVANTE DEL SERVIDOR (RAG)\n${context}`;
}
export function ragStats() {
return { indexed: _indexed, chunks: _index.length, withEmbedding: _index.filter(e=>e.embedding).length };
}