Spaces:

TomatitoToho
/

zelin-bot

Paused

App Files Files Community

zelin-bot / src /rag.js

Z User

v5.8.5: Gemma 4, MC Wiki, MC Player, anti-hallucination, CPU optimizations

ee826ee 18 days ago

history blame contribute delete

6.36 kB

	/**
	* rag.js — RAG para TomateSMP
	* ============================
	* Retrieval-Augmented Generation: indexa el conocimiento del servidor
	* y recupera chunks relevantes para cada pregunta.
	* Sin vectra ni Docker — array en memoria + Gemini embeddings.
	* Se inicializa al arrancar con server_knowledge.js.
	*/

	import { embed } from './semantic-cache.js';
	import { SERVER_KNOWLEDGE } from '../server_knowledge.js';

	function cosineSim(a, b) {
	if (!a?.length \|\| !b?.length) return 0;
	let dot = 0, na = 0, nb = 0;
	for (let i = 0; i < a.length; i++) { dot += a[i]b[i]; na += a[i]2; nb += b[i]*2; }
	return Math.sqrt(na)Math.sqrt(nb) === 0 ? 0 : dot/(Math.sqrt(na)Math.sqrt(nb));
	}

	// ── Chunker: divide el conocimiento en fragmentos manejables ─────────────────
	function chunkText(text, maxChars = 400, overlap = 50) {
	const sections = text.split(/\n## /);
	const chunks = [];
	for (const section of sections) {
	if (!section.trim()) continue;
	const sectionText = section.startsWith('##') ? section : '## ' + section;
	// Si el section cabe entero, añadirlo tal cual
	if (sectionText.length <= maxChars) {
	chunks.push(sectionText.trim());
	continue;
	}
	// Si es largo, dividir por párrafos
	const paragraphs = sectionText.split('\n\n');
	let current = '';
	for (const para of paragraphs) {
	if (current.length + para.length > maxChars && current.length > 0) {
	chunks.push(current.trim());
	// Overlap: incluir el final del chunk anterior
	current = current.slice(-overlap) + '\n' + para;
	} else {
	current += '\n\n' + para;
	}
	}
	if (current.trim()) chunks.push(current.trim());
	}
	return chunks.filter(c => c.length > 30);
	}

	// ── Index en memoria ──────────────────────────────────────────────────────────
	const _index = []; // { text, embedding, category }
	let _indexed = false;
	let _indexing = false;

	export async function initRAG() {
	if (_indexed \|\| _indexing) return;
	if (!SERVER_KNOWLEDGE) { console.log('[RAG] server_knowledge.js vacío — RAG desactivado'); return; }

	_indexing = true;
	console.log('[RAG] Indexando conocimiento de TomateSMP...');

	const chunks = chunkText(SERVER_KNOWLEDGE);
	console.log(`[RAG] ${chunks.length} chunks a indexar...`);

	// Probar si los embeddings de Gemini funcionan con un chunk de prueba
	let embeddingsWorking = false;
	try {
	await embed(chunks[0].slice(0, 50));
	embeddingsWorking = true;
	console.log('[RAG] ✅ Embeddings disponibles (local o Gemini)');
	} catch (e) {
	console.warn('[RAG] Embeddings no disponibles aún (modelo cargando), usando búsqueda textual');
	}

	// Indexar chunks (con o sin embeddings)
	const BATCH = 5;
	for (let i = 0; i < chunks.length; i += BATCH) {
	const batch = chunks.slice(i, i + BATCH);
	await Promise.allSettled(batch.map(async text => {
	if (embeddingsWorking) {
	try {
	const embedding = await embed(text);
	_index.push({ text, embedding });
	} catch {
	_index.push({ text, embedding: null }); // fallback textual
	}
	} else {
	_index.push({ text, embedding: null }); // solo textual
	}
	}));
	if (embeddingsWorking && i + BATCH < chunks.length) {
	await new Promise(r => setTimeout(r, 200));
	}
	}

	_indexed = true;
	_indexing = false;
	const indexed = _index.filter(e => e.embedding).length;
	console.log(`[RAG] ✅ Indexado: ${indexed}/${_index.length} chunks con embedding`);
	}

	// ── Retrieval: obtener chunks más relevantes para una query ──────────────────
	export async function retrieveContext(query, topK = 3) {
	if (!_indexed \|\| _index.length === 0) return '';

	try {
	const queryEmb = await embed(query);
	const scored = _index
	.filter(e => e.embedding)
	.map(e => ({ text: e.text, score: cosineSim(queryEmb, e.embedding) }))
	.filter(e => e.score > 0.5)
	.sort((a, b) => b.score - a.score)
	.slice(0, topK);

	if (!scored.length) return '';
	return scored.map(e => e.text).join('\n\n---\n\n');
	} catch {
	// Fallback textual: buscar por keywords múltiples
	const words = query.toLowerCase().split(/\s+/).filter(w => w.length > 3);
	const scored = _index.map(e => {
	const t = e.text.toLowerCase();
	const score = words.reduce((s, w) => s + (t.includes(w) ? 1 : 0), 0);
	return { text: e.text, score };
	}).filter(e => e.score > 0).sort((a, b) => b.score - a.score).slice(0, topK);
	return scored.map(e => e.text).join('\n\n---\n\n');
	}
	}

	// ── Inyectar contexto RAG en el system prompt ─────────────────────────────────
	// Devuelve solo el string de contexto RAG (sin el system prompt base)
	// Útil para paralelizar con otras operaciones
	export async function getRAGSuffix(userMessage) {
	if (!_indexed) return '';
	const serverKeywords = /regla\|comando\|rango\|plugin\|crate\|job\|skill\|habilidad\|warp\|server\|servidor\|pvp\|koth\|dungeon\|trade\|battlepass\|tag\|badge\|insignia\|economía\|dinero\|mobcoin\|claim\|protección\|terreno\|matrimonio\|mascota\|voice\|voz/i;
	if (!serverKeywords.test(userMessage)) return '';
	const context = await retrieveContext(userMessage, 3);
	if (!context) return '';
	return `\n\n## INFORMACIÓN RELEVANTE DEL SERVIDOR (RAG)\n${context}`;
	}

	export async function injectRAGContext(systemPrompt, userMessage) {
	if (!_indexed) return systemPrompt;

	// Solo activar RAG si la pregunta parece ser sobre el servidor
	const serverKeywords = /regla\|comando\|rango\|plugin\|crate\|job\|skill\|habilidad\|warp\|server\|servidor\|pvp\|koth\|dungeon\|trade\|battlepass\|tag\|badge\|insignia\|economía\|dinero\|mobcoin\|claim\|protección\|terreno\|matrimonio\|mascota\|voice\|voz/i;
	if (!serverKeywords.test(userMessage)) return systemPrompt;

	const context = await retrieveContext(userMessage, 3);
	if (!context) return systemPrompt;

	return systemPrompt + `\n\n## INFORMACIÓN RELEVANTE DEL SERVIDOR (RAG)\n${context}`;
	}

	export function ragStats() {
	return { indexed: _indexed, chunks: _index.length, withEmbedding: _index.filter(e=>e.embedding).length };
	}