Spaces:

stnh70
/

vid

Sleeping

App Files Files Community

stnh70 commited on Aug 20, 2025

Commit

158ed19

verified ·

1 Parent(s): ad1b7e5

Update sever.js

Browse files

Files changed (1) hide show

sever.js +126 -132

sever.js CHANGED Viewed

@@ -1,23 +1,20 @@
 import express, { json } from "express";
 import cors from "cors";
-import { chromium } from "playwright-extra";
-import StealthPlugin from 'puppeteer-extra-plugin-stealth';
 import pLimit from "p-limit";
-import BrowserPool from './pool/BrowserPool.js'; // 确保这个路径是正确的
-import LRU from 'lru-cache';
 import fetch from 'node-fetch';
 import { JSDOM } from 'jsdom';
-// 1. 初始化 Playwright Stealth 插件
-chromium.use(StealthPlugin());
-// 2. Express 服务器基础设置
 const app = express();
-const PORT = process.env.PORT || 3000;
 app.use(cors());
 app.use(json());
-// 3. 配置常量
 const PROVIDERS = [
   "https://vidsrc.xyz",
   "https://vidsrc.in",
@@ -28,87 +25,71 @@ const PROVIDERS = [
 ];
 let browserPool;
-const cache = new LRU({ max: 500, ttl: 15 * 60 * 1000 });
-const limit = pLimit(3); // 并发任务数，可根据服务器性能调整
-// 4. 辅助函数: 从最终页面内容中提取 HLS 和字幕
-async function getVideoAndSubtitles(finalUrl) {
-  try {
-    console.log(`[Helper] Fetching final content from: ${finalUrl}`);
-    const response = await fetch(finalUrl);
-    if (!response.ok) {
-      throw new Error(`HTTP error! Status: ${response.status}`);
-    }
-    const html = await response.text();
-    const dom = new JSDOM(html);
-    const document = dom.window.document;
-    const script = Array.from(document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
-    if (!script) {
-        console.warn(`[Helper] Player.js script not found on ${finalUrl}`);
-        return { hls_url: null, subtitles: [] };
-    }
-    const scriptContent = script.textContent;
-    let hls_url = null;
-    let subtitles = [];
-    const fileMatch = scriptContent.match(/file:"(.*?m3u8.*?)"/);
-    if (fileMatch && fileMatch[1]) {
-      hls_url = fileMatch[1];
-    }
-    const subtitlesMatch = scriptContent.match(/subtitle:"(.*?)"/);
-    if (subtitlesMatch && subtitlesMatch[1]) {
-       try {
-         subtitles = subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean);
-       } catch(e) {
-         console.error('[Helper] Failed to parse subtitles', e);
-       }
     }
-    return { hls_url, subtitles };
-  } catch (error) {
-    console.error(`[Helper] Error in getVideoAndSubtitles for ${finalUrl}:`, error);
-    throw error;
-  }
 }
-// 5. 辅助函数: 处理前端 Turnstile (方法 B)
-async function handleTurnstile(page, domain, signal) {
-  try {
-    await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000, signal });
-    console.log(`[${domain}] [Method B] Turnstile detected. Attempting to trigger JS callback...`);
-    await page.evaluate(() => {
-      if (typeof window.cftCallback === 'function') {
-        const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
-        window.cftCallback(mockToken);
-      }
-    });
-    console.log(`[${domain}] [Method B] Turnstile JS callback triggered.`);
-    await page.waitForTimeout(2000);
-  } catch (error) {
-    if (error.name === 'TimeoutError') {
-      console.log(`[${domain}] [Method B] Turnstile not found on page, skipping JS bypass.`);
-    } else {
-      console.warn(`[${domain}] [Method B] An error occurred during Turnstile JS handling: ${error.message}`);
     }
-  }
 }
-// 6. 核心抓取函数
 async function scrapeProvider(domain, url, signal) {
-  console.log(`\n[${domain}] Starting FULL BYPASS scrape for URL: ${url}`);
-  let browserInstance = null, context = null, page = null, iframePage = null;
   const cleanup = async () => {
-    if (iframePage && !iframePage.isClosed()) await iframePage.close().catch(()=>{});
-    if (page && !page.isClosed()) await page.close().catch(()=>{});
-    if (context) await context.close().catch(()=>{});
     if (browserInstance) {
       console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
       await browserPool.release(browserInstance);
@@ -116,64 +97,76 @@ async function scrapeProvider(domain, url, signal) {
   };
   try {
-    if (signal.aborted) throw new Error('Aborted before starting');
     browserInstance = await browserPool.get();
-    context = await browserInstance.browser.newContext({
-      userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
-      ignoreHTTPSErrors: true,
     });
-    await context.addInitScript(() => { delete navigator.__proto__.webdriver; });
-    // 方法 A: 设置网络层拦截
-    const setupRequestInterception = async (p) => {
-      await p.route('**/*', (route) => {
-        if (signal.aborted) return route.abort();
-        if (route.request().url().includes('/rcp_verify')) {
-          console.log(`[${domain}] [Method A] Mocking /rcp_verify network request.`);
-          return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
-        }
-        return route.continue();
-      });
-    };
-    // --- STAGE 1: 初始页面 ---
-    page = await context.newPage();
-    await setupRequestInterception(page);
-    await page.goto(url, { waitUntil: 'networkidle', timeout: 30000, signal });
-    await handleTurnstile(page, domain, signal);
-    const firstIframeSrc = await page.evaluate(() => {
-        const iframe = document.querySelector('#player_iframe');
-        if (!iframe) return null;
-        let src = iframe.getAttribute('src') || '';
-        return src.startsWith('//') ? `https:${src}` : src;
     });
-    if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
-    console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
-    // --- STAGE 2: 第一个 iframe ---
-    iframePage = await context.newPage();
-    await setupRequestInterception(iframePage);
-    await iframePage.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 30000, signal });
-    await handleTurnstile(iframePage, domain, signal);
-    const finalIframeSrc = await iframePage.evaluate(() => {
-        return document.querySelector('iframe')?.src || null;
     });
-    if (!finalIframeSrc) throw new Error('Final iframe src not found');
     console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
-    // --- STAGE 3: 获取视频数据 ---
-    const { hls_url, subtitles } = await getVideoAndSubtitles(finalIframeSrc);
-    if (!hls_url) throw new Error("Final HLS URL not found");
-    return { source_domain: domain, hls_url, subtitles, error: null };
   } catch (error) {
-    if (signal.aborted || error.name === 'AbortError') {
-      console.log(`[${domain}] Scrape was aborted.`);
     } else {
-      console.error(`[${domain}] CRITICAL ERROR in scrapeProvider: ${error.message}`);
     }
     throw error;
   } finally {
@@ -181,7 +174,7 @@ async function scrapeProvider(domain, url, signal) {
   }
 }
-// 7. Express API 路由
 app.get("/extract", async (req, res) => {
   const type = req.query.type || "movie";
   const tmdb_id = req.query.tmdb_id;
@@ -222,7 +215,7 @@ app.get("/extract", async (req, res) => {
     console.log(`\nSuccess from [${firstSuccessfulResult.source_domain}]. Aborting other scrapers.`);
     controller.abort();
     const response = { success: true, result: firstSuccessfulResult };
     cache.set(cacheKey, response);
     res.json(response);
@@ -238,14 +231,16 @@ app.get("/extract", async (req, res) => {
   }
 });
-// 8. 启动服务器和浏览器池
 (async () => {
   try {
     browserPool = new BrowserPool({
       chromium: chromium,
       minSize: 1,
-      maxSize: 4,
-      maxUsage: 50,
     });
     await browserPool.initialize();
     console.log("Browser pool initialized successfully.");
@@ -256,7 +251,6 @@ app.get("/extract", async (req, res) => {
   }
 })();
-// 9. 优雅关停
 process.on("SIGINT", async () => {
   console.log("Shutting down gracefully...");
   if (browserPool) await browserPool.shutdown();

 import express, { json } from "express";
 import cors from "cors";
+import { chromium } from "playwright";
 import pLimit from "p-limit";
+import BrowserPool from './pool/BrowserPool.js'; // 您的浏览器池代码保持不变
+// [MODIFICATION] 修正 lru-cache 的导入语法
+import { LRUCache } from 'lru-cache';
+// [MODIFICATION] 导入新的辅助库
 import fetch from 'node-fetch';
 import { JSDOM } from 'jsdom';
 const app = express();
+const PORT = process.env.PORT || 7860; // 建议在 Hugging Face 上使用 7860
 app.use(cors());
 app.use(json());
 const PROVIDERS = [
   "https://vidsrc.xyz",
   "https://vidsrc.in",
 ];
 let browserPool;
+// [MODIFICATION] 修正 lru-cache 的实例化语法
+const cache = new LRUCache({
+  max: 500,
+  ttl: 15 * 60 * 1000,
+});
+const limit = pLimit(2);
+// [MODIFICATION] 新增的辅助函数，用于处理 CF Turnstile 的前端 JS 绕过
+async function handleTurnstile(page) {
+    try {
+        await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000 });
+        console.log('Turnstile detected - attempting generic bypass...');
+        await page.evaluate(() => {
+            if (typeof window.cftCallback === 'function') {
+                const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
+                window.cftCallback(mockToken);
+            }
+        });
+        console.log('Turnstile JS callback triggered.');
+        await page.waitForTimeout(2000);
+    } catch (error) {
+        if (error.name.includes('Timeout')) {
+            console.log('Turnstile not found on page, skipping bypass.');
+        } else {
+            console.warn('An error occurred during Turnstile handling:', error.message);
+        }
     }
 }
+// [MODIFICATION] 新增的辅助函数，用于从最终页面提取视频链接
+async function getVideoAndSubtitles(finalUrl) {
+    try {
+        const response = await fetch(finalUrl);
+        if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`);
+        const html = await response.text();
+        const dom = new JSDOM(html);
+        const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
+        if (!script) return { hlsUrl: null, subtitles: [] };
+        const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
+        const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
+        return {
+            hlsUrl: fileMatch ? fileMatch[1] : null,
+            subtitles: subtitlesMatch ? subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean) : []
+        };
+    } catch (error) {
+        console.error('Error in getVideoAndSubtitles:', error);
+        throw error;
     }
 }
+// [MODIFICATION] 对 scrapeProvider 函数进行“外科手术式”升级
 async function scrapeProvider(domain, url, signal) {
+  if (signal.aborted) throw new Error('Scraping aborted before starting.');
+  console.log(`\n[${domain}] Starting UPGRADED scrape for URL: ${url}`);
+  let browserInstance = null;
+  let context = null;
+  let page = null;
   const cleanup = async () => {
+    if (page && !page.isClosed()) await page.close().catch(() => {});
+    if (context) await context.close().catch(() => {});
     if (browserInstance) {
       console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
       await browserPool.release(browserInstance);
   };
   try {
     browserInstance = await browserPool.get();
+    const browser = browserInstance.browser;
+    console.log(`[${domain}] Acquired browser ${browserInstance.id}`);
+    if (signal.aborted) throw new Error('Scraping aborted.');
+    context = await browser.newContext({
+      userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+      ignoreHTTPSErrors: true
     });
+    // [MODIFICATION] 注入脚本，手动实现 StealthPlugin 的核心功能
+    await context.addInitScript(() => {
+        Object.defineProperty(navigator, 'webdriver', { get: () => false });
     });
+    // [MODIFICATION] 设置强大的网络拦截，应用于所有页面
+    await context.route("**/*", (route) => {
+      if (signal.aborted) return route.abort();
+      const request = route.request();
+      const reqUrl = request.url();
+      const resourceType = request.resourceType();
+      // 1. 伪造 Turnstile 验证
+      if (reqUrl.includes('/rcp_verify')) {
+        return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
+      }
+      // 2. 阻止不必要的资源以节省内存
+      if (['image', 'stylesheet', 'font', 'media'].includes(resourceType)) {
+        return route.abort();
+      }
+      // 3. 放行其他请求
+      return route.continue();
     });
+    page = await context.newPage();
+    if (signal.aborted) throw new Error('Scraping aborted.');
+    // --- 新的、多阶段的抓取逻辑 ---
+    // 阶段 1: 访问初始页面
+    await page.goto(url, { waitUntil: "networkidle", timeout: 60000, signal });
+    await handleTurnstile(page);
+    const firstIframeSrc = await page.locator('#player_iframe').getAttribute('src');
+    if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
+    console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
+    if (signal.aborted) throw new Error('Scraping aborted.');
+    // 阶段 2: 访问第一个 iframe
+    await page.goto(firstIframeSrc, { waitUntil: "networkidle", timeout: 60000, signal });
+    await handleTurnstile(page);
+    const finalIframeSrc = await page.frameLocator('iframe').locator('iframe').getAttribute('src') || await page.locator('iframe').getAttribute('src');
+    if (!finalIframeSrc) throw new Error('Final iframe source not found');
     console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
+    if (signal.aborted) throw new Error('Scraping aborted.');
+    // 阶段 3: 从最终源提取数据
+    const { hlsUrl, subtitles } = await getVideoAndSubtitles(finalIframeSrc);
+    if (!hlsUrl) {
+      throw new Error("HLS URL not found after all stages");
+    }
+    return { source_domain: domain, hls_url: hlsUrl, subtitles, error: null };
   } catch (error) {
+    if (error.name === 'AbortError' || (signal && signal.aborted)) {
+      console.log(`[${domain}] Scraping was aborted.`);
     } else {
+      console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
     }
     throw error;
   } finally {
   }
 }
+// [MODIFICATION] 您的 /extract 路由和启动逻辑完全保持不变，因为它们的设计已经非常优秀
 app.get("/extract", async (req, res) => {
   const type = req.query.type || "movie";
   const tmdb_id = req.query.tmdb_id;
     console.log(`\nSuccess from [${firstSuccessfulResult.source_domain}]. Aborting other scrapers.`);
     controller.abort();
     const response = { success: true, result: firstSuccessfulResult };
     cache.set(cacheKey, response);
     res.json(response);
   }
 });
 (async () => {
   try {
     browserPool = new BrowserPool({
       chromium: chromium,
       minSize: 1,
+      maxSize: 5,
+      maxUsage: 100,
+      launchOptions: { // [MODIFICATION] 为 Docker/HuggingFace 环境添加必要的启动参数
+          args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
+      }
     });
     await browserPool.initialize();
     console.log("Browser pool initialized successfully.");
   }
 })();
 process.on("SIGINT", async () => {
   console.log("Shutting down gracefully...");
   if (browserPool) await browserPool.shutdown();