stnh70 commited on
Commit
5fc9bfc
·
verified ·
1 Parent(s): 810eac5

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +25 -13
server.js CHANGED
@@ -450,20 +450,29 @@ async function scrapeProvider(domain, url, signal) {
450
  browserInstance = await browserPool.get();
451
  const browser = browserInstance.browser;
452
 
453
- // 创建初始页面
454
- const page = await browser.newPage();
 
 
 
 
 
 
 
 
 
 
 
455
 
456
- // 设置更真实的用户代理和头部
457
- await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
458
- await page.setExtraHTTPHeaders({
459
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
460
- 'Accept-Language': 'en-US,en;q=0.5',
461
- 'Accept-Encoding': 'gzip, deflate, br',
462
- 'DNT': '1',
463
- 'Connection': 'keep-alive',
464
- 'Upgrade-Insecure-Requests': '1',
465
  });
466
 
 
 
467
  // 启用请求拦截
468
  await page.route('**/*', async (route) => {
469
  const requestUrl = route.request().url();
@@ -589,7 +598,7 @@ async function scrapeProvider(domain, url, signal) {
589
  if (!firstIframeSrc) throw new Error('First iframe not found');
590
 
591
  // 创建新页面来加载iframe内容
592
- const iframePage = await browser.newPage();
593
  await iframePage.goto(firstIframeSrc, {
594
  waitUntil: 'networkidle',
595
  timeout: 60000
@@ -621,11 +630,14 @@ async function scrapeProvider(domain, url, signal) {
621
  } finally {
622
  if (browserInstance) {
623
  console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
 
 
 
 
624
  await browserPool.release(browserInstance);
625
  }
626
  }
627
  }
628
-
629
  async function scrapeProvider4(domain, url, signal) {
630
  if (signal.aborted) throw new Error('Aborted');
631
  console.log(`\n[${domain}] Starting scrape for URL: ${url}`);
 
450
  browserInstance = await browserPool.get();
451
  const browser = browserInstance.browser;
452
 
453
+ // 创建带有上下文的页面
454
+ const context = await browser.newContext({
455
+ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
456
+ extraHTTPHeaders: {
457
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
458
+ 'Accept-Language': 'en-US,en;q=0.5',
459
+ 'Accept-Encoding': 'gzip, deflate, br',
460
+ 'DNT': '1',
461
+ 'Connection': 'keep-alive',
462
+ 'Upgrade-Insecure-Requests': '1',
463
+ },
464
+ viewport: { width: 1920, height: 1080 }
465
+ });
466
 
467
+ // 移除自动化痕迹
468
+ await context.addInitScript(() => {
469
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
470
+ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
471
+ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
 
 
 
 
472
  });
473
 
474
+ const page = await context.newPage();
475
+
476
  // 启用请求拦截
477
  await page.route('**/*', async (route) => {
478
  const requestUrl = route.request().url();
 
598
  if (!firstIframeSrc) throw new Error('First iframe not found');
599
 
600
  // 创建新页面来加载iframe内容
601
+ const iframePage = await context.newPage();
602
  await iframePage.goto(firstIframeSrc, {
603
  waitUntil: 'networkidle',
604
  timeout: 60000
 
630
  } finally {
631
  if (browserInstance) {
632
  console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
633
+ // 关闭上下文
634
+ if (typeof context !== 'undefined') {
635
+ await context.close();
636
+ }
637
  await browserPool.release(browserInstance);
638
  }
639
  }
640
  }
 
641
  async function scrapeProvider4(domain, url, signal) {
642
  if (signal.aborted) throw new Error('Aborted');
643
  console.log(`\n[${domain}] Starting scrape for URL: ${url}`);