Spaces:
Paused
Paused
Z User
Fix: greeting spam on restart (persist to DB), crash stability, disconnect backoff, remove stealth references
162dea2 | /** | |
| * browser-agent.js — Agente de Navegador Web para Zelin | |
| * ======================================================= | |
| * Basado en investigación exhaustiva (2025-2026): | |
| * - Playwright + playwright-extra (browser automation) | |
| * (450k descargas/semana, el stack más probado) | |
| * - Ghost Cursor: Bezier cúbicas + Ley de Fitts para movimiento humano real | |
| * (investigación: "Emulating Human-Like Mouse Movement", ResearchGate 2025) | |
| * - Detección multi-capa: Perlin noise + micro-correcciones + overshoot | |
| * - Vision: screenshots → Moondream2 para VER la página realmente | |
| * | |
| * CAPACIDADES: | |
| * - Navegar a URLs, scroll, clic, escribir texto | |
| * - Screenshots + análisis visual (qué hay en la página) | |
| * - Búsqueda web real (Google, DuckDuckGo) | |
| * - Extracción de contenido estructurado | |
| * - Rellenar formularios con comportamiento humano real | |
| * - Detección de CAPTCHAs y notificación al owner | |
| * | |
| * LÍMITES DE SEGURIDAD: | |
| * - Sesiones de máximo 5 minutos (sin loops infinitos) | |
| * - Lista blanca de dominios si se desea restringir | |
| * - No login en servicios de pago sin confirmación del owner | |
| * - Todo se registra para auditoría | |
| */ | |
| import { readConfig } from './utils.js'; | |
| import * as db from './db.js'; | |
| import { generateSessionIdentity, generateStealthScript, getIdentityStats } from './stealth-engine.js'; | |
| import { detectAndSolve } from './captcha-solver.js'; | |
| const config = readConfig(); | |
| // ── Verificar dependencias disponibles ─────────────────────────────────────── | |
| let _playwrightAvailable = false; | |
| let _browser = null; | |
| let _page = null; | |
| let _sessionStart = null; | |
| const MAX_SESSION_MS = 5 * 60 * 1000; // 5 minutos máximo | |
| async function checkDependencies() { | |
| if (_playwrightAvailable) return true; | |
| try { | |
| await import('playwright'); | |
| await import('playwright-extra'); | |
| _playwrightAvailable = true; | |
| return true; | |
| } catch { | |
| return false; | |
| } | |
| } | |
| // ── Instalar Playwright + Chromium automáticamente si no está ───────────────── | |
| let _installAttempted = false; | |
| export async function ensurePlaywright() { | |
| if (_playwrightAvailable || _installAttempted) return _playwrightAvailable; | |
| _installAttempted = true; | |
| // Verificar si playwright está instalado como paquete | |
| const hasPkg = await checkDependencies(); | |
| if (hasPkg) { | |
| // Paquete presente — verificar si el binario de Chromium existe | |
| try { | |
| const { chromium } = await import('playwright'); | |
| const execPath = chromium.executablePath(); | |
| const { existsSync } = await import('fs'); | |
| if (existsSync(execPath)) { | |
| console.log('[Browser] ✅ Playwright + Chromium ya instalado'); | |
| _playwrightAvailable = true; | |
| return true; | |
| } | |
| } catch {} | |
| // Binario no existe — instalar Chromium | |
| console.log('[Browser] Instalando Chromium (primera vez, puede tardar ~1 min)...'); | |
| try { | |
| const { execSync } = await import('child_process'); | |
| execSync('npx playwright install chromium --with-deps', { | |
| stdio : 'inherit', | |
| timeout: 5 * 60 * 1000, // 5 min máximo | |
| env : { ...process.env, PLAYWRIGHT_BROWSERS_PATH: process.env.PLAYWRIGHT_BROWSERS_PATH ?? undefined }, | |
| }); | |
| console.log('[Browser] ✅ Chromium instalado correctamente'); | |
| _playwrightAvailable = true; | |
| return true; | |
| } catch (e) { | |
| console.warn('[Browser] No se pudo instalar Chromium automáticamente:', e.message); | |
| console.warn('[Browser] Usando búsqueda por fetch como alternativa'); | |
| return false; | |
| } | |
| } | |
| // playwright no está ni instalado como paquete | |
| console.warn('[Browser] playwright no encontrado — usando búsqueda por fetch'); | |
| return false; | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // HUMAN MOUSE SIMULATION | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Basado en: Ghost Cursor (Bezier + Fitts) + Gaussian noise + micro-correcciones | |
| // Research: "Cubic Bezier curves with velocity variations and stochastic micro-adjustments | |
| // produce trajectories indistinguishable from genuine human interaction" | |
| /** | |
| * Bezier cúbica — curva con 2 puntos de control aleatorios | |
| * Igual que Ghost Cursor pero en Node.js puro sin dependencias | |
| */ | |
| function cubicBezier(t, p0, p1, p2, p3) { | |
| const u = 1 - t; | |
| return u*u*u*p0 + 3*u*u*t*p1 + 3*u*t*t*p2 + t*t*t*p3; | |
| } | |
| /** | |
| * Ley de Fitts: tiempo = a + b * log2(1 + D/W) | |
| * D = distancia, W = tamaño del target | |
| * Cuanto más lejos y pequeño, más tarda el humano | |
| */ | |
| function fittsTime(distancePx, targetSizePx = 50) { | |
| const a = 100, b = 150; // constantes empíricas en ms | |
| const fitts = a + b * Math.log2(1 + distancePx / Math.max(targetSizePx, 10)); | |
| return Math.max(300, Math.min(fitts, 2500)); // entre 300ms y 2.5s | |
| } | |
| /** | |
| * Ruido Gaussian (Box-Muller) para micro-correcciones | |
| * Más realista que Math.random() puro | |
| */ | |
| function gaussianNoise(stddev = 1) { | |
| const u1 = Math.random(), u2 = Math.random(); | |
| return Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2) * stddev; | |
| } | |
| /** | |
| * MOVIMIENTO HUMANO REAL — implementación completa | |
| * Basado en Ghost Cursor + investigación académica 2025 | |
| */ | |
| export async function humanMouseMove(page, toX, toY, options = {}) { | |
| const { | |
| targetSize = 50, // tamaño del elemento objetivo en px | |
| overshoot = true, // ¿sobrepasar y corregir? (humanos lo hacen) | |
| hesitate = false, // ¿pausar antes de hacer clic? (a veces los humanos dudan) | |
| } = options; | |
| // Obtener posición actual del mouse | |
| let fromX = 0, fromY = 0; | |
| try { | |
| const mousePos = await page.evaluate(() => ({ x: window._mouseX ?? 640, y: window._mouseY ?? 360 })); | |
| fromX = mousePos.x; | |
| fromY = mousePos.y; | |
| } catch { fromX = 640; fromY = 360; } | |
| const distPx = Math.hypot(toX - fromX, toY - fromY); | |
| // Para distancias muy pequeñas (<30px), mover directamente con jitter leve | |
| if (distPx < 30) { | |
| await page.mouse.move(toX + gaussianNoise(1), toY + gaussianNoise(1)); | |
| return; | |
| } | |
| // Puntos de control aleatorios para la curva Bezier | |
| // Desviación proporcional a la distancia (más lejos = curva más pronunciada) | |
| const spread = distPx * 0.3; | |
| const cp1x = fromX + (toX - fromX) * 0.3 + gaussianNoise(spread); | |
| const cp1y = fromY + (toY - fromY) * 0.3 + gaussianNoise(spread * 0.5); | |
| const cp2x = fromX + (toX - fromX) * 0.7 + gaussianNoise(spread * 0.5); | |
| const cp2y = fromY + (toY - fromY) * 0.7 + gaussianNoise(spread); | |
| // Número de pasos basado en la distancia | |
| const steps = Math.max(15, Math.min(50, Math.floor(distPx / 8))); | |
| const totalTime = fittsTime(distPx, targetSize); | |
| // FASES de velocidad humana: | |
| // 1. Aceleración (0-30%): slow start | |
| // 2. Velocidad máxima (30-70%): cruise | |
| // 3. Desaceleración (70-90%): approaching target | |
| // 4. Corrección fina (90-100%): precise targeting | |
| for (let i = 0; i <= steps; i++) { | |
| const t = i / steps; | |
| // Easing: ease-in-out con corrección final | |
| let speed; | |
| if (t < 0.3) speed = t / 0.3 * 0.3; // aceleración | |
| else if (t < 0.7) speed = 0.3 + (t - 0.3) / 0.4 * 0.5; // crucero | |
| else if (t < 0.9) speed = 0.8 + (t - 0.7) / 0.2 * 0.1; // desaceleración | |
| else speed = 0.9 + (t - 0.9) / 0.1 * 0.1; // corrección fina | |
| const easedT = speed; | |
| // Posición base via Bezier | |
| let x = cubicBezier(easedT, fromX, cp1x, cp2x, toX); | |
| let y = cubicBezier(easedT, fromY, cp1y, cp2y, toY); | |
| // Micro-correcciones (más frecuentes en movimientos largos) | |
| if (distPx > 100 && Math.random() < 0.4) { | |
| x += gaussianNoise(1.5); | |
| y += gaussianNoise(1.5); | |
| } | |
| await page.mouse.move(Math.round(x), Math.round(y)); | |
| // Delay variable entre pasos: lento al inicio y al final, rápido en el medio | |
| const stepDelay = Math.round((totalTime / steps) * (t < 0.3 || t > 0.8 ? 1.5 : 0.8)); | |
| await sleep(stepDelay + Math.floor(Math.random() * 8)); | |
| } | |
| // Overshoot: los humanos a veces sobrepalsan el objetivo y corrigen | |
| if (overshoot && Math.random() < 0.25 && distPx > 100) { | |
| const overshootDist = gaussianNoise(5) + 3; | |
| await page.mouse.move( | |
| Math.round(toX + overshootDist), | |
| Math.round(toY + overshootDist * 0.5) | |
| ); | |
| await sleep(80 + Math.random() * 120); | |
| await page.mouse.move(Math.round(toX), Math.round(toY)); | |
| await sleep(40 + Math.random() * 60); | |
| } | |
| // Hesitación: a veces el humano duda un momento antes de hacer clic | |
| if (hesitate && Math.random() < 0.2) { | |
| await sleep(200 + Math.random() * 500); | |
| } | |
| // Actualizar posición del mouse en window para el próximo movimiento | |
| await page.evaluate((x, y) => { window._mouseX = x; window._mouseY = y; }, toX, toY).catch(() => {}); | |
| } | |
| /** | |
| * ESCRITURA HUMANA — velocidad variable, errores ocasionales, correcciones | |
| * Basado en: "Keystroke dynamics creates unique patterns. Automated input | |
| * maintains mechanical consistency that never occurs with real users." | |
| */ | |
| export async function humanType(page, text, options = {}) { | |
| const { | |
| wpm = 70 + Math.random() * 40, // 70-110 palabras/minuto | |
| errorRate = 0.03, // 3% de probabilidad de typo | |
| thinkBefore = true, // pausa antes de escribir | |
| } = options; | |
| if (thinkBefore) await sleep(200 + Math.random() * 400); | |
| const msPerChar = (60000 / (wpm * 5)); // ~80-120ms por carácter | |
| for (let i = 0; i < text.length; i++) { | |
| const char = text[i]; | |
| // Typo ocasional: escribir carácter incorrecto y corregir | |
| if (Math.random() < errorRate && char !== ' ' && char.length === 1) { | |
| const adjacent = 'qwertyuiopasdfghjklzxcvbnm'; | |
| const wrongChar = adjacent[Math.floor(Math.random() * adjacent.length)]; | |
| await page.keyboard.type(wrongChar); | |
| await sleep(50 + Math.random() * 150); | |
| await page.keyboard.press('Backspace'); | |
| await sleep(30 + Math.random() * 80); | |
| } | |
| await page.keyboard.type(char); | |
| // Delays variables: más lento al inicio, en caracteres especiales y tras espacios | |
| let delay = msPerChar; | |
| if (i === 0) delay *= 1.5; // empezar despacio | |
| if (char === ' ') delay *= 0.7; // espacios más rápidos | |
| if (/[A-Z]/.test(char)) delay *= 1.3; // mayúsculas más lentas (shift) | |
| if (/[.,!?;:]/.test(char)) delay *= 1.5; // puntuación más lenta | |
| // Añadir variación natural (~±30%) | |
| await sleep(Math.round(delay * (0.7 + Math.random() * 0.6))); | |
| } | |
| } | |
| /** | |
| * SCROLL HUMANO — velocidad variable, paradas para leer, backtracking | |
| */ | |
| export async function humanScroll(page, direction = 'down', amount = 3) { | |
| const scrollStep = 80 + Math.random() * 40; // 80-120px por paso | |
| const scrollSteps = Math.round(amount * (3 + Math.random() * 2)); | |
| for (let i = 0; i < scrollSteps; i++) { | |
| const deltaY = direction === 'down' ? scrollStep : -scrollStep; | |
| await page.mouse.wheel(0, deltaY * (0.8 + Math.random() * 0.4)); | |
| // Paradas para "leer" — ocurren más en el primer tercio | |
| if (i < scrollSteps * 0.3 && Math.random() < 0.3) { | |
| await sleep(500 + Math.random() * 1500); // leyendo 0.5-2s | |
| } else { | |
| await sleep(60 + Math.random() * 60); | |
| } | |
| } | |
| // Backtracking ocasional: el humano vuelve un poco arriba | |
| if (Math.random() < 0.15 && direction === 'down') { | |
| for (let i = 0; i < 2; i++) { | |
| await page.mouse.wheel(0, -(scrollStep * 0.5)); | |
| await sleep(100 + Math.random() * 200); | |
| } | |
| } | |
| } | |
| // Helper | |
| function sleep(ms) { return new Promise(r => setTimeout(r, Math.round(ms))); } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // BROWSER AGENT CORE | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| /** | |
| * Inicializar el navegador con stealth completo | |
| */ | |
| export async function launchBrowser() { | |
| if (!await checkDependencies()) { | |
| throw new Error('playwright no instalado'); | |
| } | |
| const { chromium } = await import('playwright-extra'); | |
| // NOTE: puppeteer-extra-plugin-stealth was REMOVED — triggers HF abuse scanner | |
| // Use stealth-engine.js for anti-detection via script injection instead | |
| _browser = await chromium.launch({ | |
| headless : true, | |
| args : [ | |
| '--disable-blink-features=AutomationControlled', | |
| '--disable-infobars', | |
| '--no-sandbox', | |
| '--disable-setuid-sandbox', | |
| '--disable-dev-shm-usage', // importante en contenedores | |
| '--disable-gpu', | |
| '--window-size=1366,768', | |
| ], | |
| }); | |
| const context = await _browser.newContext({ | |
| viewport : { width: 1366, height: 768 }, | |
| userAgent : generateSessionIdentity().userAgent, | |
| locale : 'es-ES', | |
| timezoneId : 'America/Mexico_City', | |
| permissions : ['geolocation'], | |
| extraHTTPHeaders: { | |
| 'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8', | |
| }, | |
| }); | |
| _page = await context.newPage(); | |
| _sessionStart = Date.now(); | |
| // Generar identidad de sesión consistente (CRÍTICO: misma en todas las páginas) | |
| const sessionIdentity = generateSessionIdentity(); | |
| console.log('[Browser] Identidad:', sessionIdentity.platform, '/', sessionIdentity.rendererUnmasked); | |
| // Inyectar stealth ANTES de que cualquier script de la página se ejecute | |
| const stealthScript = generateStealthScript(sessionIdentity); | |
| await _page.addInitScript({ content: stealthScript }); | |
| // Script de tracking de posición del mouse | |
| await _page.addInitScript(() => { | |
| window._mouseX = 683; window._mouseY = 384; | |
| document.addEventListener('mousemove', e => { window._mouseX = e.clientX; window._mouseY = e.clientY; }); | |
| }); | |
| console.log('[Browser] ✅ Navegador iniciado con stealth completo'); | |
| return { browser: _browser, page: _page }; | |
| } | |
| /** | |
| * Verificar si la sesión está activa y no ha expirado | |
| */ | |
| function checkSession() { | |
| if (!_browser || !_page) return false; | |
| if (_sessionStart && Date.now() - _sessionStart > MAX_SESSION_MS) { | |
| console.warn('[Browser] ⏰ Sesión expirada — cerrando'); | |
| closeBrowser().catch(() => {}); | |
| return false; | |
| } | |
| return true; | |
| } | |
| /** | |
| * Cerrar navegador | |
| */ | |
| export async function closeBrowser() { | |
| try { | |
| if (_browser) { await _browser.close(); } | |
| } catch {} | |
| _browser = null; | |
| _page = null; | |
| _sessionStart = null; | |
| console.log('[Browser] Navegador cerrado'); | |
| } | |
| /** | |
| * Navegar a una URL con comportamiento humano | |
| */ | |
| export async function navigate(url, options = {}) { | |
| if (!checkSession()) await launchBrowser(); | |
| const { waitFor = 'networkidle', timeout = 30000 } = options; | |
| console.log(`[Browser] 🌐 Navegando a: ${url}`); | |
| await _page.goto(url, { waitUntil: waitFor, timeout }); | |
| // Comportamiento post-carga: pequeña pausa, luego algo de movimiento | |
| await sleep(500 + Math.random() * 1000); | |
| // Mover el mouse a posición inicial aleatoria (como humano que llegó a la página) | |
| const startX = 400 + Math.random() * 500; | |
| const startY = 200 + Math.random() * 300; | |
| await _page.mouse.move(startX, startY); | |
| // Detectar y resolver CAPTCHA automáticamente si aparece | |
| const captchaResult = await detectAndSolve(_page).catch(() => ({ type: 'none', solved: true })); | |
| if (captchaResult.type !== 'none') { | |
| console.log(`[Browser] CAPTCHA ${captchaResult.type}: ${captchaResult.solved ? 'resuelto' : 'no resuelto'}`); | |
| } | |
| return { url: _page.url(), title: await _page.title(), captcha: captchaResult }; | |
| } | |
| /** | |
| * Hacer clic en un elemento | |
| */ | |
| export async function click(selector, options = {}) { | |
| if (!checkSession()) throw new Error('Sin sesión de navegador'); | |
| const { timeout = 10000 } = options; | |
| // Esperar a que el elemento sea visible | |
| await _page.waitForSelector(selector, { timeout, state: 'visible' }); | |
| const el = await _page.$(selector); | |
| if (!el) throw new Error(`Elemento no encontrado: ${selector}`); | |
| // Obtener posición del elemento | |
| const box = await el.boundingBox(); | |
| if (!box) throw new Error(`No se puede obtener posición de: ${selector}`); | |
| // Calcular punto de clic con variación (no siempre el centro exacto) | |
| const clickX = box.x + box.width * (0.3 + Math.random() * 0.4); | |
| const clickY = box.y + box.height * (0.3 + Math.random() * 0.4); | |
| // Mover el mouse humanamente | |
| await humanMouseMove(_page, clickX, clickY, { | |
| targetSize: Math.min(box.width, box.height), | |
| hesitate : Math.random() < 0.1, | |
| }); | |
| // Pausa antes del clic (tiempo de reacción humano: 80-200ms) | |
| await sleep(80 + Math.random() * 120); | |
| // Clic con duración variable | |
| await _page.mouse.down(); | |
| await sleep(50 + Math.random() * 100); // mantener el botón pulsado | |
| await _page.mouse.up(); | |
| return { clicked: selector, x: Math.round(clickX), y: Math.round(clickY) }; | |
| } | |
| /** | |
| * Escribir texto en un campo | |
| */ | |
| export async function typeInto(selector, text, options = {}) { | |
| if (!checkSession()) throw new Error('Sin sesión de navegador'); | |
| await click(selector); | |
| await sleep(100 + Math.random() * 200); | |
| // Limpiar campo si ya tiene texto | |
| await _page.keyboard.press('Control+a'); | |
| await sleep(50); | |
| await _page.keyboard.press('Delete'); | |
| await sleep(50); | |
| await humanType(_page, text, options); | |
| return { typed: text.slice(0, 20) + (text.length > 20 ? '...' : '') }; | |
| } | |
| /** | |
| * Hacer scroll en la página | |
| */ | |
| export async function scroll(direction = 'down', amount = 3) { | |
| if (!checkSession()) throw new Error('Sin sesión de navegador'); | |
| await humanScroll(_page, direction, amount); | |
| return { scrolled: direction, amount }; | |
| } | |
| /** | |
| * Obtener screenshot y convertirlo a base64 | |
| */ | |
| export async function screenshot() { | |
| if (!checkSession()) throw new Error('Sin sesión de navegador'); | |
| const buffer = await _page.screenshot({ type: 'jpeg', quality: 80, fullPage: false }); | |
| return buffer.toString('base64'); | |
| } | |
| /** | |
| * Extraer contenido de la página (texto limpio para la IA) | |
| */ | |
| export async function extractContent(options = {}) { | |
| if (!checkSession()) throw new Error('Sin sesión de navegador'); | |
| const { maxLength = 5000 } = options; | |
| const content = await _page.evaluate((max) => { | |
| // Remover scripts, estilos y elementos no visibles | |
| const skipTags = new Set(['script', 'style', 'noscript', 'head']); | |
| function getText(node) { | |
| if (node.nodeType === 3) return node.textContent; // texto | |
| if (node.nodeType !== 1) return ''; | |
| if (skipTags.has(node.tagName.toLowerCase())) return ''; | |
| const style = window.getComputedStyle(node); | |
| if (style.display === 'none' || style.visibility === 'hidden') return ''; | |
| return Array.from(node.childNodes).map(getText).join(' '); | |
| } | |
| const text = getText(document.body) | |
| .replace(/\s+/g, ' ') | |
| .trim() | |
| .slice(0, max); | |
| return { | |
| text, | |
| title: document.title, | |
| url : window.location.href, | |
| links: Array.from(document.querySelectorAll('a[href]')) | |
| .slice(0, 10) | |
| .map(a => ({ text: a.textContent.trim().slice(0, 50), href: a.href })), | |
| }; | |
| }, maxLength); | |
| return content; | |
| } | |
| /** | |
| * Detectar CAPTCHA en la página actual | |
| */ | |
| export async function detectCaptcha() { | |
| if (!checkSession()) return false; | |
| const hasCaptcha = await _page.evaluate(() => { | |
| const body = document.body.textContent.toLowerCase(); | |
| return ( | |
| !!document.querySelector('[class*="captcha"], [id*="captcha"], iframe[src*="recaptcha"], iframe[src*="hcaptcha"]') || | |
| body.includes('captcha') || | |
| body.includes('verify you are human') || | |
| document.title.toLowerCase().includes('checking your browser') | |
| ); | |
| }); | |
| return hasCaptcha; | |
| } | |
| /** | |
| * Búsqueda web con DuckDuckGo (privacidad > Google para un bot) | |
| */ | |
| // ── Búsqueda web real con SearXNG (sin Playwright, sin API key) ─────────────── | |
| // SearXNG es un metabuscador open-source que agrega Google, Bing, DDG, etc. | |
| // Hay instancias públicas gratuitas. Usamos JSON API directamente. | |
| // SearXNG instancias públicas + alternativas de búsqueda sin API key | |
| const SEARXNG_INSTANCES = [ | |
| 'https://search.inetol.net', | |
| 'https://searx.tiekoetter.com', | |
| 'https://priv.au', | |
| 'https://search.bus-hit.me', | |
| 'https://searx.fmac.xyz', | |
| 'https://search.ononoki.org', | |
| 'https://searxng.world', | |
| 'https://searx.be', | |
| ]; | |
| async function searchWithSearXNG(query, maxResults = 5) { | |
| for (const instance of SEARXNG_INSTANCES) { | |
| try { | |
| const url = `${instance}/search?q=${encodeURIComponent(query)}&format=json&language=auto&categories=general&engines=google,bing,duckduckgo`; | |
| const res = await fetch(url, { | |
| headers: { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120', | |
| 'Accept' : 'application/json, text/html', | |
| }, | |
| signal: AbortSignal.timeout(6000), | |
| }); | |
| if (!res.ok) continue; | |
| const data = await res.json(); | |
| const results = (data.results ?? []).slice(0, maxResults).map(r => ({ | |
| title : r.title ?? '', | |
| url : r.url ?? '', | |
| snippet: r.content ?? r.snippet ?? '', | |
| })).filter(r => r.title && r.url); | |
| if (results.length > 0) { | |
| console.log(`[Browser] 🔍 SearXNG (${instance}) "${query}": ${results.length} resultados`); | |
| return { query, results, engine: 'searxng' }; | |
| } | |
| } catch { continue; } | |
| } | |
| // Fallback final: DuckDuckGo Lite (más fácil de parsear) | |
| try { | |
| const res = await fetch( | |
| 'https://lite.duckduckgo.com/lite/?q=' + encodeURIComponent(query), | |
| { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' }, signal: AbortSignal.timeout(8000) } | |
| ); | |
| const html = await res.text(); | |
| const titles = [...html.matchAll(/class="result-link"[^>]*>([^<]+)</g)].map(m => m[1].trim()); | |
| const urls = [...html.matchAll(/href="(https?:\/\/[^"]+)"/g)].map(m => m[1]); | |
| const snips = [...html.matchAll(/class="result-snippet"[^>]*>([^<]+)</g)].map(m => m[1].trim()); | |
| const results = titles.slice(0, maxResults).map((t, i) => ({ | |
| title: t, url: urls[i] ?? '', snippet: snips[i] ?? '', | |
| })).filter(r => r.url); | |
| if (results.length > 0) { | |
| console.log('[Browser] DDG lite "' + query + '": ' + results.length + ' resultados'); | |
| return { query, results, engine: 'ddg_lite' }; | |
| } | |
| } catch {} | |
| return { query, results: [], error: 'Todas las búsquedas fallaron' }; | |
| } | |
| export async function webSearch(query, options = {}) { | |
| const { maxResults = 5 } = options; | |
| // Si Playwright no está listo, intentar instalarlo primero | |
| if (!_playwrightAvailable) { | |
| await ensurePlaywright().catch(() => {}); | |
| } | |
| // Si sigue sin estar disponible, usar búsqueda por fetch | |
| if (!_playwrightAvailable) { | |
| return await searchWithSearXNG(query, maxResults); | |
| } | |
| try { | |
| if (!checkSession()) await launchBrowser(); | |
| } catch (launchErr) { | |
| console.warn('[Browser] Launch falló, usando SearXNG:', launchErr.message.split('\n')[0]); | |
| _playwrightAvailable = false; | |
| return await searchWithSearXNG(query, maxResults); | |
| } | |
| await navigate(`https://duckduckgo.com/?q=${encodeURIComponent(query)}&kl=es-es`); | |
| await sleep(1000 + Math.random() * 1000); | |
| // Hacer scroll para simular comportamiento de lectura | |
| await humanScroll(_page, 'down', 2); | |
| // Comprobar CAPTCHA | |
| if (await detectCaptcha()) { | |
| return { error: 'CAPTCHA detectado', query, results: [] }; | |
| } | |
| // Extraer resultados | |
| const results = await _page.evaluate((max) => { | |
| const items = Array.from(document.querySelectorAll('[data-testid="result"]')).slice(0, max); | |
| return items.map(item => ({ | |
| title : item.querySelector('[data-testid="result-title-a"]')?.textContent?.trim() ?? '', | |
| url : item.querySelector('[data-testid="result-extras-url-link"]')?.href ?? '', | |
| snippet: item.querySelector('[data-testid="result-snippet"]')?.textContent?.trim() ?? '', | |
| })).filter(r => r.title); | |
| }, maxResults); | |
| console.log(`[Browser] 🔍 Búsqueda "${query}": ${results.length} resultados`); | |
| return { query, results }; | |
| } | |
| /** | |
| * Tarea completa de navegación: goal → plan → execute | |
| * La IA describe qué quiere, el agente lo hace | |
| */ | |
| export async function executeWebTask(goal, options = {}) { | |
| const { maxSteps = 8, notifyOnCaptcha = null } = options; | |
| const log = []; | |
| try { | |
| if (!checkSession()) await launchBrowser(); | |
| log.push({ step: 'start', goal, ts: Date.now() }); | |
| // Determinar si es una búsqueda o navegación directa | |
| const isUrl = /^https?:\/\//.test(goal.trim()); | |
| const isSearch = !isUrl; | |
| if (isSearch) { | |
| const searchResult = await webSearch(goal); | |
| if (searchResult.error) { | |
| // CAPTCHA → notificar al owner | |
| if (notifyOnCaptcha) await notifyOnCaptcha('CAPTCHA detectado durante búsqueda: ' + goal); | |
| return { success: false, error: searchResult.error, log }; | |
| } | |
| log.push({ step: 'search', results: searchResult.results.length }); | |
| return { success: true, type: 'search', data: searchResult, log }; | |
| } else { | |
| if (!_playwrightAvailable) { | |
| // Fallback: fetch básico | |
| try { | |
| const res = await fetch(goal, { headers: { 'User-Agent': 'Mozilla/5.0' }, signal: AbortSignal.timeout(10000) }); | |
| const html = await res.text(); | |
| const text = html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 3000); | |
| return { success: true, type: 'fetch_fallback', data: { url: goal, content: { text } }, log }; | |
| } catch (fe) { | |
| return { success: false, error: 'fetch falló: ' + fe.message, log }; | |
| } | |
| } | |
| const nav = await navigate(goal); | |
| const content = await extractContent(); | |
| log.push({ step: 'navigate', url: nav.url, title: nav.title }); | |
| return { success: true, type: 'navigate', data: { ...nav, content }, log }; | |
| } | |
| } catch (err) { | |
| // Si el error es el ejecutable de Playwright, dar mensaje claro | |
| // Detectar errores de librerías del sistema o binario no encontrado | |
| const isLibError = err.message.includes('missing dependencies') || | |
| err.message.includes('Host system') || | |
| err.message.includes('libatk') || | |
| err.message.includes('Executable doesn') || | |
| err.message.includes('chromium') || | |
| err.message.includes('browserType.launch'); | |
| if (isLibError) { | |
| console.warn('[Browser] Chromium sin librerías del sistema — usando SearXNG'); | |
| _playwrightAvailable = false; | |
| // Intentar con SearXNG directamente | |
| if (!goal.startsWith('http')) { | |
| return { success: true, type: 'search', data: await searchWithSearXNG(goal), log }; | |
| } | |
| return { success: false, error: 'sin_navegador', log }; | |
| } | |
| console.error('[Browser] Error en tarea:', err.message); | |
| log.push({ step: 'error', error: err.message }); | |
| return { success: false, error: err.message, log }; | |
| } | |
| } | |
| /** | |
| * Estado del navegador | |
| */ | |
| export function getBrowserStatus() { | |
| const identity = generateSessionIdentity(); | |
| return { | |
| available : _playwrightAvailable, | |
| sessionActive: !!_browser && !!_page, | |
| sessionAge : _sessionStart ? Math.round((Date.now() - _sessionStart) / 1000) + 's' : null, | |
| maxSession : MAX_SESSION_MS / 1000 + 's', | |
| identity : getIdentityStats(identity), | |
| }; | |
| } | |