Update server.js
Browse files
server.js
CHANGED
|
@@ -240,7 +240,7 @@ async function scrapeProvider_(domain, url, signal) {
|
|
| 240 |
}
|
| 241 |
}
|
| 242 |
|
| 243 |
-
async function
|
| 244 |
if (signal.aborted) throw new Error('Aborted');
|
| 245 |
console.log(`\n[${domain}] Starting scrape for URL: ${url}`);
|
| 246 |
|
|
@@ -354,6 +354,110 @@ async function scrapeProvider(domain, url, signal) {
|
|
| 354 |
}
|
| 355 |
|
| 356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
// 修改 handleSpecificTurnstile 函数
|
| 358 |
async function handleSpecificTurnstile(page, domain) {
|
| 359 |
try {
|
|
|
|
| 240 |
}
|
| 241 |
}
|
| 242 |
|
| 243 |
+
async function scrapeProvider__(domain, url, signal) {
|
| 244 |
if (signal.aborted) throw new Error('Aborted');
|
| 245 |
console.log(`\n[${domain}] Starting scrape for URL: ${url}`);
|
| 246 |
|
|
|
|
| 354 |
}
|
| 355 |
|
| 356 |
|
| 357 |
+
async function scrapeProvider(domain, url, signal) {
|
| 358 |
+
if (signal.aborted) throw new Error('Aborted');
|
| 359 |
+
console.log(`\n[${domain}] Starting scrape for URL: ${url}`);
|
| 360 |
+
|
| 361 |
+
let browserInstance = null;
|
| 362 |
+
try {
|
| 363 |
+
browserInstance = await browserPool.get();
|
| 364 |
+
const browser = browserInstance.browser;
|
| 365 |
+
|
| 366 |
+
// 创建初始页面
|
| 367 |
+
const page = await browser.newPage();
|
| 368 |
+
let videoResult = null;
|
| 369 |
+
|
| 370 |
+
// 启用请求拦截
|
| 371 |
+
await page.route('**/*', async (route) => {
|
| 372 |
+
const requestUrl = route.request().url();
|
| 373 |
+
console.log(`[${domain}] Request: ${requestUrl}`);
|
| 374 |
+
|
| 375 |
+
// 如果是 cloudnestra.com/rcp/ 的请求,使用 getVideoAndSubtitles 解析
|
| 376 |
+
if (requestUrl.includes('cloudnestra.com/rcp/')) {
|
| 377 |
+
try {
|
| 378 |
+
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(requestUrl);
|
| 379 |
+
if (!videoFileUrl) throw new Error("HLS URL not found");
|
| 380 |
+
|
| 381 |
+
videoResult = {
|
| 382 |
+
source_domain: domain,
|
| 383 |
+
hls_url: videoFileUrl,
|
| 384 |
+
subtitles: subtitleSources,
|
| 385 |
+
error: null
|
| 386 |
+
};
|
| 387 |
+
route.abort();
|
| 388 |
+
} catch (error) {
|
| 389 |
+
console.error(`[${domain}] Error processing cloudnestra request:`, error);
|
| 390 |
+
route.continue();
|
| 391 |
+
}
|
| 392 |
+
} else {
|
| 393 |
+
route.continue();
|
| 394 |
+
}
|
| 395 |
+
});
|
| 396 |
+
|
| 397 |
+
await page.goto(url, {
|
| 398 |
+
waitUntil: 'networkidle',
|
| 399 |
+
timeout: 60000
|
| 400 |
+
});
|
| 401 |
+
|
| 402 |
+
// 如果已经从 cloudnestra 获取了视频链接,直接返回
|
| 403 |
+
if (videoResult) {
|
| 404 |
+
return videoResult;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
// 检查是否有Turnstile
|
| 408 |
+
const hasTurnstile = await page.evaluate(() => {
|
| 409 |
+
return document.querySelector('.cf-turnstile') !== null;
|
| 410 |
+
});
|
| 411 |
+
console.log(`[${domain}] Has Turnstile: ${hasTurnstile}`);
|
| 412 |
+
|
| 413 |
+
// 处理Turnstile
|
| 414 |
+
if (hasTurnstile) {
|
| 415 |
+
await handleSpecificTurnstile(page, domain);
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
// 获取第一个iframe的URL
|
| 419 |
+
const firstIframeSrc = await extractFirstIframeSrc(page, domain);
|
| 420 |
+
if (!firstIframeSrc) throw new Error('First iframe not found');
|
| 421 |
+
|
| 422 |
+
// 创建新页面来加载iframe内容
|
| 423 |
+
const iframePage = await browser.newPage();
|
| 424 |
+
await iframePage.goto(firstIframeSrc, {
|
| 425 |
+
waitUntil: 'networkidle',
|
| 426 |
+
timeout: 60000
|
| 427 |
+
});
|
| 428 |
+
|
| 429 |
+
// 处理可能存在的第二个Turnstile
|
| 430 |
+
const iframeHasTurnstile = await iframePage.evaluate(() => {
|
| 431 |
+
return document.querySelector('.cf-turnstile') !== null;
|
| 432 |
+
});
|
| 433 |
+
console.log(`[${domain}] Iframe has Turnstile: ${iframeHasTurnstile}`);
|
| 434 |
+
|
| 435 |
+
if (iframeHasTurnstile) {
|
| 436 |
+
await handleSpecificTurnstile(iframePage, domain);
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
// 获取最终iframe的URL
|
| 440 |
+
const finalIframeSrc = await extractFinalIframeSrc(iframePage, domain);
|
| 441 |
+
if (!finalIframeSrc) throw new Error('Final iframe not found');
|
| 442 |
+
|
| 443 |
+
// 使用 getVideoAndSubtitles 函数获取视频和字幕
|
| 444 |
+
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalIframeSrc);
|
| 445 |
+
if (!videoFileUrl) throw new Error("HLS URL not found");
|
| 446 |
+
|
| 447 |
+
return { source_domain: domain, hls_url: videoFileUrl, subtitles: subtitleSources, error: null };
|
| 448 |
+
|
| 449 |
+
} catch (error) {
|
| 450 |
+
console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
|
| 451 |
+
throw error;
|
| 452 |
+
} finally {
|
| 453 |
+
if (browserInstance) {
|
| 454 |
+
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
|
| 455 |
+
await browserPool.release(browserInstance);
|
| 456 |
+
}
|
| 457 |
+
}
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
|
| 461 |
// 修改 handleSpecificTurnstile 函数
|
| 462 |
async function handleSpecificTurnstile(page, domain) {
|
| 463 |
try {
|