startpage / server.js
alexcardo's picture
Update server.js
cc7b6bb verified
const express = require('express');
const fetch = require('node-fetch');
const app = express();
const USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
];
app.get('/', async (req, res) => {
const query = req.query.q;
if (!query) return res.status(400).json({ error: 'Missing ?q=query' });
const target = req.query.target || 'ddg';
const ua = USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
const headers = {
'User-Agent': ua,
'Accept-Language': 'en-US,en;q=0.9',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
};
let fetchUrl = target === 'startpage'
? `https://www.startpage.com/sp/search?query=${encodeURIComponent(query)}&cat=web&pl=opensearch&language=english&num=20`
: `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}&nojs=1`;
try {
const response = await fetch(fetchUrl, { headers });
const html = await response.text();
const results = target === 'startpage' ? parseStartpage(html) : parseDuckDuckGo(html);
res.json({
query,
target,
status: response.status,
source: 'hugging_face_space',
results
});
} catch (e) {
res.status(500).json({ error: e.message });
}
});
// --- ТВОИ ПАРСЕРЫ ---
function parseDuckDuckGo(html) {
const results = [];
const seenUrls = new Set();
const regex = /<a[^>]+class="result__a"[^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/gs;
let match;
while ((match = regex.exec(html)) !== null) {
if (results.length >= 10) break;
let href = match[1];
if (href.includes('y.js') || href.includes('/l/?')) continue;
let cleanUrl = href;
if (href.includes('/l/?uddg=')) {
const uddgMatch = href.match(/uddg=([^&]+)/);
if (uddgMatch) cleanUrl = decodeURIComponent(uddgMatch[1]);
else continue;
}
if (seenUrls.has(cleanUrl)) continue;
seenUrls.add(cleanUrl);
const title = match[2].replace(/<[^>]*>/g, '').trim();
results.push({ title, url: cleanUrl });
}
return results;
}
function parseStartpage(html) {
const results = [];
const seenUrls = new Set();
const titleLinkRegex = /<a[^>]+class="[^"]*(?:result-title|result-link)[^"]*"[^>]*href="(https?:\/\/[^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
let match;
while ((match = titleLinkRegex.exec(html)) !== null) {
if (results.length >= 10) break;
let url = match[1];
if (url.includes('startpage.com') || seenUrls.has(url)) continue;
let title = match[2].replace(/<[^>]*>/g, '').trim();
if (title) {
results.push({ title, url });
seenUrls.add(url);
}
}
return results;
}
// ----------------------
const listener = app.listen(process.env.PORT || 7860, () => {
console.log('Your app is listening on port ' + listener.address().port);
});