Update server.js
Browse files
server.js
CHANGED
|
@@ -450,20 +450,29 @@ async function scrapeProvider(domain, url, signal) {
|
|
| 450 |
browserInstance = await browserPool.get();
|
| 451 |
const browser = browserInstance.browser;
|
| 452 |
|
| 453 |
-
// 创建
|
| 454 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
|
| 456 |
-
//
|
| 457 |
-
await
|
| 458 |
-
|
| 459 |
-
'
|
| 460 |
-
'
|
| 461 |
-
'Accept-Encoding': 'gzip, deflate, br',
|
| 462 |
-
'DNT': '1',
|
| 463 |
-
'Connection': 'keep-alive',
|
| 464 |
-
'Upgrade-Insecure-Requests': '1',
|
| 465 |
});
|
| 466 |
|
|
|
|
|
|
|
| 467 |
// 启用请求拦截
|
| 468 |
await page.route('**/*', async (route) => {
|
| 469 |
const requestUrl = route.request().url();
|
|
@@ -589,7 +598,7 @@ async function scrapeProvider(domain, url, signal) {
|
|
| 589 |
if (!firstIframeSrc) throw new Error('First iframe not found');
|
| 590 |
|
| 591 |
// 创建新页面来加载iframe内容
|
| 592 |
-
const iframePage = await
|
| 593 |
await iframePage.goto(firstIframeSrc, {
|
| 594 |
waitUntil: 'networkidle',
|
| 595 |
timeout: 60000
|
|
@@ -621,11 +630,14 @@ async function scrapeProvider(domain, url, signal) {
|
|
| 621 |
} finally {
|
| 622 |
if (browserInstance) {
|
| 623 |
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 624 |
await browserPool.release(browserInstance);
|
| 625 |
}
|
| 626 |
}
|
| 627 |
}
|
| 628 |
-
|
| 629 |
async function scrapeProvider4(domain, url, signal) {
|
| 630 |
if (signal.aborted) throw new Error('Aborted');
|
| 631 |
console.log(`\n[${domain}] Starting scrape for URL: ${url}`);
|
|
|
|
| 450 |
browserInstance = await browserPool.get();
|
| 451 |
const browser = browserInstance.browser;
|
| 452 |
|
| 453 |
+
// 创建带有上下文的页面
|
| 454 |
+
const context = await browser.newContext({
|
| 455 |
+
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 456 |
+
extraHTTPHeaders: {
|
| 457 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
| 458 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
| 459 |
+
'Accept-Encoding': 'gzip, deflate, br',
|
| 460 |
+
'DNT': '1',
|
| 461 |
+
'Connection': 'keep-alive',
|
| 462 |
+
'Upgrade-Insecure-Requests': '1',
|
| 463 |
+
},
|
| 464 |
+
viewport: { width: 1920, height: 1080 }
|
| 465 |
+
});
|
| 466 |
|
| 467 |
+
// 移除自动化痕迹
|
| 468 |
+
await context.addInitScript(() => {
|
| 469 |
+
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
| 470 |
+
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
|
| 471 |
+
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
});
|
| 473 |
|
| 474 |
+
const page = await context.newPage();
|
| 475 |
+
|
| 476 |
// 启用请求拦截
|
| 477 |
await page.route('**/*', async (route) => {
|
| 478 |
const requestUrl = route.request().url();
|
|
|
|
| 598 |
if (!firstIframeSrc) throw new Error('First iframe not found');
|
| 599 |
|
| 600 |
// 创建新页面来加载iframe内容
|
| 601 |
+
const iframePage = await context.newPage();
|
| 602 |
await iframePage.goto(firstIframeSrc, {
|
| 603 |
waitUntil: 'networkidle',
|
| 604 |
timeout: 60000
|
|
|
|
| 630 |
} finally {
|
| 631 |
if (browserInstance) {
|
| 632 |
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
|
| 633 |
+
// 关闭上下文
|
| 634 |
+
if (typeof context !== 'undefined') {
|
| 635 |
+
await context.close();
|
| 636 |
+
}
|
| 637 |
await browserPool.release(browserInstance);
|
| 638 |
}
|
| 639 |
}
|
| 640 |
}
|
|
|
|
| 641 |
async function scrapeProvider4(domain, url, signal) {
|
| 642 |
if (signal.aborted) throw new Error('Aborted');
|
| 643 |
console.log(`\n[${domain}] Starting scrape for URL: ${url}`);
|