const express = require('express'); const fetch = require('node-fetch'); const app = express(); const USER_AGENTS = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36' ]; app.get('/', async (req, res) => { const query = req.query.q; if (!query) return res.status(400).json({ error: 'Missing ?q=query' }); const target = req.query.target || 'ddg'; const ua = USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]; const headers = { 'User-Agent': ua, 'Accept-Language': 'en-US,en;q=0.9', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' }; let fetchUrl = target === 'startpage' ? `https://www.startpage.com/sp/search?query=${encodeURIComponent(query)}&cat=web&pl=opensearch&language=english&num=20` : `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}&nojs=1`; try { const response = await fetch(fetchUrl, { headers }); const html = await response.text(); const results = target === 'startpage' ? parseStartpage(html) : parseDuckDuckGo(html); res.json({ query, target, status: response.status, source: 'hugging_face_space', results }); } catch (e) { res.status(500).json({ error: e.message }); } }); // --- ТВОИ ПАРСЕРЫ --- function parseDuckDuckGo(html) { const results = []; const seenUrls = new Set(); const regex = /]+class="result__a"[^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/gs; let match; while ((match = regex.exec(html)) !== null) { if (results.length >= 10) break; let href = match[1]; if (href.includes('y.js') || href.includes('/l/?')) continue; let cleanUrl = href; if (href.includes('/l/?uddg=')) { const uddgMatch = href.match(/uddg=([^&]+)/); if (uddgMatch) cleanUrl = decodeURIComponent(uddgMatch[1]); else continue; } if (seenUrls.has(cleanUrl)) continue; seenUrls.add(cleanUrl); const title = match[2].replace(/<[^>]*>/g, '').trim(); results.push({ title, url: cleanUrl }); } return results; } function parseStartpage(html) { const results = []; const seenUrls = new Set(); const titleLinkRegex = /]+class="[^"]*(?:result-title|result-link)[^"]*"[^>]*href="(https?:\/\/[^"]+)"[^>]*>([\s\S]*?)<\/a>/gi; let match; while ((match = titleLinkRegex.exec(html)) !== null) { if (results.length >= 10) break; let url = match[1]; if (url.includes('startpage.com') || seenUrls.has(url)) continue; let title = match[2].replace(/<[^>]*>/g, '').trim(); if (title) { results.push({ title, url }); seenUrls.add(url); } } return results; } // ---------------------- const listener = app.listen(process.env.PORT || 7860, () => { console.log('Your app is listening on port ' + listener.address().port); });