Spaces:

stnh70
/

fr

Sleeping

App Files Files Community

stnh70 commited on Aug 20, 2025

Commit

e69ef9d

verified ·

1 Parent(s): 872dee4

Update server.js

Browse files

Files changed (1) hide show

server.js +86 -95

server.js CHANGED Viewed

@@ -1,51 +1,29 @@
-// server.js
 import express, { json } from "express";
 import cors from "cors";
-import puppeteer from 'puppeteer-extra';
 import StealthPlugin from 'puppeteer-extra-plugin-stealth';
-import { executablePath } from 'puppeteer';
 import fetch from 'node-fetch';
 import { JSDOM } from 'jsdom';
-// --- 1. 初始化 Puppeteer 和插件 ---
-puppeteer.use(StealthPlugin());
 const app = express();
-const PORT = process.env.PORT || 3000;
 app.use(cors());
 app.use(json());
-// --- 2. 优化后的辅助函数 ---
-/**
- * [优化] 移除了对特定 sitekey 的检查，使其更通用和健壮。
- * 现在它会等待任何 Turnstile 控件出现，而不是只寻找特定的那一个。
- */
-async function handleTurnstile(page) {
-    try {
-        await page.waitForSelector('.cf-turnstile', { timeout: 5000 });
-        console.log('Turnstile detected - attempting generic bypass...');
-        await page.evaluate(() => {
-            if (typeof window.cftCallback === 'function') {
-                const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
-                window.cftCallback(mockToken);
-            }
-        });
-        console.log('Turnstile JS callback triggered.');
-        await page.waitForTimeout(2000);
-    } catch (error) {
-        if (error.name === 'TimeoutError') {
-            console.log('Turnstile not found on page, skipping bypass.');
-        } else {
-            console.warn('An error occurred during Turnstile handling:', error.message);
-        }
-    }
-}
 /**
- * 您的参考代码中用于提取最终视频链接的函数，保持不变。
  */
 async function getVideoAndSubtitles(finalUrl) {
     try {
@@ -56,8 +34,11 @@ async function getVideoAndSubtitles(finalUrl) {
         const html = await response.text();
         const dom = new JSDOM(html);
         const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
-        if (!script) return { videoFileUrl: null, subtitleSources: [] };
         const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
         const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
@@ -71,78 +52,93 @@ async function getVideoAndSubtitles(finalUrl) {
     }
 }
-// --- 3. 核心抓取逻辑 ---
 /**
- * [重大重构]
- * - 这是一个统一的抓取函数，支持电影和电视剧。
- * - 它接收一个已经启动的 browser 实例，避免了每次请求都重复启动浏览器的巨大开销。
- * - 使用了隔离的浏览器上下文 (Incognito Context) 来确保每个请求之间互不干扰。
  */
 async function scrapeSource(browser, { type, id, season, episode }) {
     let context = null;
-    console.log(`\nScraping for: type=${type}, id=${id}, s=${season}, e=${episode}`);
     try {
-        // --- URL 构建 ---
         let initialUrl;
-        const domain = "https://vidsrc.xyz"; // 您可以从一个列表中选择，这里以一个为例
         if (type === 'tv') {
             initialUrl = `${domain}/embed/tv?tmdb=${id}&season=${season}&episode=${episode}`;
-        } else { // 默认为 movie
             initialUrl = `${domain}/embed/movie/${id}`;
         }
         console.log(`Initial URL: ${initialUrl}`);
-        // --- 浏览器操作 ---
-        context = await browser.createBrowserContext();
-        const page = await context.newPage();
-        // 设置通用的请求拦截，绕过 Turnstile 的 API 验证
-        await page.setRequestInterception(true);
-        page.on('request', (request) => {
-            if (request.url().includes('/rcp_verify')) {
-                request.respond({ status: 200, contentType: 'application/json', body: '1' });
-            } else {
-                request.continue();
             }
         });
-        // 阶段 1: 访问初始页面
-        await page.goto(initialUrl, { waitUntil: 'networkidle2', timeout: 45000 });
         await handleTurnstile(page);
-        const firstIframeSrc = await page.evaluate(() => document.querySelector('#player_iframe')?.src);
         if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
         console.log(`First iframe src: ${firstIframeSrc}`);
-        // 阶段 2: 访问第一个 Iframe
-        // [优化] 我们可以在同一个页面对象上导航，无需创建新页面
-        await page.goto(firstIframeSrc, { waitUntil: 'networkidle2', timeout: 45000 });
         await handleTurnstile(page);
-        const finalIframeSrc = await page.evaluate(() => document.querySelector('iframe')?.src);
         if (!finalIframeSrc) throw new Error('Final iframe source not found');
         console.log(`Final iframe src: ${finalIframeSrc}`);
-        // 阶段 3: 提取数据
         return await getVideoAndSubtitles(finalIframeSrc);
     } catch (error) {
         console.error('Full error during scraping process:', error.message);
-        throw error; // 将错误向上抛出，由 Express 路由统一处理
     } finally {
         if (context) {
-            await context.close(); // 关闭上下文，释放所有页面和资源，非常高效
         }
     }
 }
-// --- 4. Express 服务器和路由 ---
-let browser; // 全局浏览器实例
 app.get("/extract", async (req, res) => {
     const type = req.query.type || 'movie';
@@ -150,19 +146,26 @@ app.get("/extract", async (req, res) => {
     const season = req.query.season;
     const episode = req.query.episode;
-    // --- 参数校验 ---
     if (!id) {
         return res.status(400).json({ success: false, error: "tmdb_id is required" });
     }
     if (type === 'tv' && (!season || !episode)) {
         return res.status(400).json({ success: false, error: "season and episode are required for type 'tv'" });
     }
     try {
         const result = await scrapeSource(browser, { type, id, season, episode });
         if (result && result.videoFileUrl) {
-            res.status(200).json({ success: true, result });
         } else {
             res.status(404).json({ success: false, error: "Could not find video stream from the source." });
         }
@@ -171,31 +174,18 @@ app.get("/extract", async (req, res) => {
     }
 });
-// --- 5. 服务器启动和优雅关停 ---
 (async () => {
     try {
-        console.log("Launching a persistent browser instance...");
-        browser = await puppeteer.launch({
-            headless: true, // ��生产环境中建议设为 true
-            executablePath: executablePath(),
-            args: [
-                '--no-sandbox',
-                '--disable-setuid-sandbox',
-                '--disable-dev-shm-usage', // 解决临时文件空间不足的问题
-                '--disable-accelerated-2d-canvas',
-                '--no-first-run',
-                '--no-zygote',
-                '--single-process', // 在某些资源受限的环境中有帮助
-                '--disable-gpu'
-            ],
-            defaultViewport: null
         });
         console.log("Browser launched successfully.");
         app.listen(PORT, () => {
-            console.log(`🚀 Scraper server running at http://localhost:${PORT}`);
         });
     } catch (error) {
         console.error("Failed to launch browser:", error);
@@ -203,6 +193,7 @@ app.get("/extract", async (req, res) => {
     }
 })();
 const gracefulShutdown = async () => {
     console.log("\nShutting down gracefully...");
     if (browser) {

+// server.js (Playwright Final Version - COMPLETE)
 import express, { json } from "express";
 import cors from "cors";
+import { chromium } from "playwright-extra";
 import StealthPlugin from 'puppeteer-extra-plugin-stealth';
 import fetch from 'node-fetch';
 import { JSDOM } from 'jsdom';
+import LRU from 'lru-cache';
+// --- 1. 初始化 ---
+chromium.use(StealthPlugin());
 const app = express();
 app.use(cors());
 app.use(json());
+const cache = new LRU({ max: 500, ttl: 15 * 60 * 1000 });
+// 定义要阻止的资源类型和广告域名，以减少内存消耗
+const blockedResourceTypes = new Set(['image', 'stylesheet', 'font', 'media']);
+const blockedDomains = ['googlesyndication.com', 'googletagmanager.com', 'google-analytics.com', 'doubleclick.net'];
+// --- 2. 辅助函数 ---
 /**
+ * 从最终的 HTML 内容中提取视频和字幕信息
  */
 async function getVideoAndSubtitles(finalUrl) {
     try {
         const html = await response.text();
         const dom = new JSDOM(html);
         const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
+        if (!script) {
+            console.warn(`[Helper] Player.js script not found on ${finalUrl}`);
+            return { videoFileUrl: null, subtitleSources: [] };
+        }
         const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
         const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
     }
 }
 /**
+ * 尝试在页面上找到 Cloudflare Turnstile 并触发其 JS 回调
  */
+async function handleTurnstile(page) {
+    try {
+        await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000 });
+        console.log('Turnstile detected - attempting generic bypass...');
+        await page.evaluate(() => {
+            if (typeof window.cftCallback === 'function') {
+                const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
+                window.cftCallback(mockToken);
+            }
+        });
+        console.log('Turnstile JS callback triggered.');
+        await page.waitForTimeout(2000);
+    } catch (error) {
+        if (error.name.includes('Timeout')) {
+            console.log('Turnstile not found on page, skipping bypass.');
+        } else {
+            console.warn('An error occurred during Turnstile handling:', error.message);
+        }
+    }
+}
+// --- 3. 核心抓取逻辑 (Playwright 版本) ---
 async function scrapeSource(browser, { type, id, season, episode }) {
     let context = null;
+    console.log(`\nScraping with Playwright for: type=${type}, id=${id}, s=${season}, e=${episode}`);
     try {
         let initialUrl;
+        const domain = "https://vidsrc.xyz"; // 可以修改为其他域名
         if (type === 'tv') {
             initialUrl = `${domain}/embed/tv?tmdb=${id}&season=${season}&episode=${episode}`;
+        } else {
             initialUrl = `${domain}/embed/movie/${id}`;
         }
         console.log(`Initial URL: ${initialUrl}`);
+        context = await browser.newContext();
+        await context.route('**/*', (route) => {
+            const request = route.request();
+            const url = request.url();
+            const resourceType = request.resourceType();
+            if (url.includes('/rcp_verify')) {
+                return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
             }
+            if (blockedResourceTypes.has(resourceType) || blockedDomains.some(domain => url.includes(domain))) {
+                return route.abort();
+            }
+            return route.continue();
         });
+        const page = await context.newPage();
+        // 阶段 1
+        await page.goto(initialUrl, { waitUntil: 'networkidle', timeout: 60000 });
         await handleTurnstile(page);
+        const firstIframeSrc = await page.locator('#player_iframe').getAttribute('src');
         if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
         console.log(`First iframe src: ${firstIframeSrc}`);
+        // 阶段 2
+        await page.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 60000 });
         await handleTurnstile(page);
+        // 尝试获取嵌套的 iframe src，如果不存在，则获取第一层 iframe src
+        const finalIframeSrc = await page.frameLocator('iframe').locator('iframe').getAttribute('src') || await page.locator('iframe').getAttribute('src');
         if (!finalIframeSrc) throw new Error('Final iframe source not found');
         console.log(`Final iframe src: ${finalIframeSrc}`);
+        // 阶段 3
         return await getVideoAndSubtitles(finalIframeSrc);
     } catch (error) {
         console.error('Full error during scraping process:', error.message);
+        throw error;
     } finally {
         if (context) {
+            await context.close();
         }
     }
 }
+// --- 4. Express API 路由 ---
+let browser;
 app.get("/extract", async (req, res) => {
     const type = req.query.type || 'movie';
     const season = req.query.season;
     const episode = req.query.episode;
     if (!id) {
         return res.status(400).json({ success: false, error: "tmdb_id is required" });
     }
     if (type === 'tv' && (!season || !episode)) {
         return res.status(400).json({ success: false, error: "season and episode are required for type 'tv'" });
     }
+    const cacheKey = JSON.stringify(req.query);
+    const cached = cache.get(cacheKey);
+    if (cached) {
+        console.log("Serving from cache");
+        return res.json(cached);
+    }
     try {
         const result = await scrapeSource(browser, { type, id, season, episode });
         if (result && result.videoFileUrl) {
+            const response = { success: true, result };
+            cache.set(cacheKey, response);
+            res.status(200).json(response);
         } else {
             res.status(404).json({ success: false, error: "Could not find video stream from the source." });
         }
     }
 });
+// --- 5. 服务器启动 ---
 (async () => {
     try {
+        const PORT = process.env.PORT || 7860;
+        console.log("Launching a persistent Playwright browser instance...");
+        browser = await chromium.launch({
+            headless: true,
+            args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
         });
         console.log("Browser launched successfully.");
         app.listen(PORT, () => {
+            console.log(`🚀 Scraper server running on port ${PORT}`);
         });
     } catch (error) {
         console.error("Failed to launch browser:", error);
     }
 })();
+// --- 6. 优雅关停 ---
 const gracefulShutdown = async () => {
     console.log("\nShutting down gracefully...");
     if (browser) {