| const express = require('express'); |
| const fetch = require('node-fetch'); |
| const app = express(); |
|
|
| const USER_AGENTS = [ |
| 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
| 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15', |
| 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36' |
| ]; |
|
|
| app.get('/', async (req, res) => { |
| const query = req.query.q; |
| if (!query) return res.status(400).json({ error: 'Missing ?q=query' }); |
|
|
| const target = req.query.target || 'ddg'; |
| const ua = USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]; |
| |
| const headers = { |
| 'User-Agent': ua, |
| 'Accept-Language': 'en-US,en;q=0.9', |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' |
| }; |
|
|
| let fetchUrl = target === 'startpage' |
| ? `https://www.startpage.com/sp/search?query=${encodeURIComponent(query)}&cat=web&pl=opensearch&language=english&num=20` |
| : `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}&nojs=1`; |
|
|
| try { |
| const response = await fetch(fetchUrl, { headers }); |
| const html = await response.text(); |
| |
| const results = target === 'startpage' ? parseStartpage(html) : parseDuckDuckGo(html); |
| |
| res.json({ |
| query, |
| target, |
| status: response.status, |
| source: 'hugging_face_space', |
| results |
| }); |
| } catch (e) { |
| res.status(500).json({ error: e.message }); |
| } |
| }); |
|
|
| |
| function parseDuckDuckGo(html) { |
| const results = []; |
| const seenUrls = new Set(); |
| const regex = /<a[^>]+class="result__a"[^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/gs; |
| let match; |
| while ((match = regex.exec(html)) !== null) { |
| if (results.length >= 10) break; |
| let href = match[1]; |
| if (href.includes('y.js') || href.includes('/l/?')) continue; |
| |
| let cleanUrl = href; |
| if (href.includes('/l/?uddg=')) { |
| const uddgMatch = href.match(/uddg=([^&]+)/); |
| if (uddgMatch) cleanUrl = decodeURIComponent(uddgMatch[1]); |
| else continue; |
| } |
| |
| if (seenUrls.has(cleanUrl)) continue; |
| seenUrls.add(cleanUrl); |
| |
| const title = match[2].replace(/<[^>]*>/g, '').trim(); |
| results.push({ title, url: cleanUrl }); |
| } |
| return results; |
| } |
|
|
| function parseStartpage(html) { |
| const results = []; |
| const seenUrls = new Set(); |
| const titleLinkRegex = /<a[^>]+class="[^"]*(?:result-title|result-link)[^"]*"[^>]*href="(https?:\/\/[^"]+)"[^>]*>([\s\S]*?)<\/a>/gi; |
| let match; |
| while ((match = titleLinkRegex.exec(html)) !== null) { |
| if (results.length >= 10) break; |
| let url = match[1]; |
| if (url.includes('startpage.com') || seenUrls.has(url)) continue; |
| |
| let title = match[2].replace(/<[^>]*>/g, '').trim(); |
| if (title) { |
| results.push({ title, url }); |
| seenUrls.add(url); |
| } |
| } |
| return results; |
| } |
| |
|
|
| const listener = app.listen(process.env.PORT || 7860, () => { |
| console.log('Your app is listening on port ' + listener.address().port); |
| }); |