// server.js (Final Version using STANDARD Playwright) import express, { json } from "express"; import cors from "cors"; // [CORRECT] Using the standard, official playwright library import { chromium } from "playwright"; import fetch from 'node-fetch'; import { JSDOM } from 'jsdom'; import { LRUCache } from 'lru-cache'; // --- 1. 初始化 --- const app = express(); app.use(cors()); app.use(json()); const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 }); const blockedResourceTypes = new Set(['image', 'stylesheet', 'font', 'media']); const blockedDomains = ['googlesyndication.com', 'googletagmanager.com', 'google-analytics.com', 'doubleclick.net']; // --- 2. 辅助函数 (完整且无需修改) --- async function getVideoAndSubtitles(finalUrl) { try { const response = await fetch(finalUrl); if (!response.ok) { throw new Error(`HTTP error! Status: ${response.status}`); } const html = await response.text(); const dom = new JSDOM(html); const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs')); if (!script) { console.warn(`[Helper] Player.js script not found on ${finalUrl}`); return { videoFileUrl: null, subtitleSources: [] }; } const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/); const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/); return { videoFileUrl: fileMatch ? fileMatch[1] : null, subtitleSources: subtitlesMatch ? subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean) : [] }; } catch (error) { console.error('Error in getVideoAndSubtitles:', error); throw error; } } async function handleTurnstile(page) { try { await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000 }); console.log('Turnstile detected - attempting generic bypass...'); await page.evaluate(() => { if (typeof window.cftCallback === 'function') { const mockToken = 'mock-token-' + Math.random().toString(36).substring(2); window.cftCallback(mockToken); } }); console.log('Turnstile JS callback triggered.'); await page.waitForTimeout(2000); } catch (error) { if (error.name.includes('Timeout')) { console.log('Turnstile not found on page, skipping bypass.'); } else { console.warn('An error occurred during Turnstile handling:', error.message); } } } // --- 3. 核心抓取逻辑 (使用标准 Playwright) --- async function scrapeSource(browser, { type, id, season, episode }) { let context = null; console.log(`\nScraping with STANDARD Playwright for: type=${type}, id=${id}, s=${season}, e=${episode}`); try { let initialUrl; const domain = "https://vidsrc.xyz"; if (type === 'tv') { initialUrl = `${domain}/embed/tv?tmdb=${id}&season=${season}&episode=${episode}`; } else { initialUrl = `${domain}/embed/movie/${id}`; } console.log(`Initial URL: ${initialUrl}`); context = await browser.newContext(); // [关键] 手动实现 StealthPlugin 的核心功能 // 在每个页面加载前注入脚本,隐藏 webdriver 标志 await context.addInitScript(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); }); await context.route('**/*', (route) => { const request = route.request(); const url = request.url(); const resourceType = request.resourceType(); if (url.includes('/rcp_verify')) { return route.fulfill({ status: 200, contentType: 'application/json', body: '1' }); } if (blockedResourceTypes.has(resourceType) || blockedDomains.some(domain => url.includes(domain))) { return route.abort(); } return route.continue(); }); const page = await context.newPage(); // 阶段 1 await page.goto(initialUrl, { waitUntil: 'networkidle', timeout: 60000 }); await handleTurnstile(page); const firstIframeSrc = await page.locator('#player_iframe').getAttribute('src'); if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found'); console.log(`First iframe src: ${firstIframeSrc}`); // 阶段 2 await page.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 60000 }); await handleTurnstile(page); const finalIframeSrc = await page.frameLocator('iframe').locator('iframe').getAttribute('src') || await page.locator('iframe').getAttribute('src'); if (!finalIframeSrc) throw new Error('Final iframe source not found'); console.log(`Final iframe src: ${finalIframeSrc}`); // 阶段 3 return await getVideoAndSubtitles(finalIframeSrc); } catch (error) { console.error('Full error during scraping process:', error.message); throw error; } finally { if (context) { await context.close(); } } } // --- 4. Express API 路由 --- let browser; app.get("/extract", async (req, res) => { const type = req.query.type || 'movie'; const id = req.query.tmdb_id; const season = req.query.season; const episode = req.query.episode; if (!id) { return res.status(400).json({ success: false, error: "tmdb_id is required" }); } if (type === 'tv' && (!season || !episode)) { return res.status(400).json({ success: false, error: "season and episode are required for type 'tv'" }); } const cacheKey = JSON.stringify(req.query); const cached = cache.get(cacheKey); if (cached) { console.log("Serving from cache"); return res.json(cached); } try { const result = await scrapeSource(browser, { type, id, season, episode }); if (result && result.videoFileUrl) { const response = { success: true, result }; cache.set(cacheKey, response); res.status(200).json(response); } else { res.status(404).json({ success: false, error: "Could not find video stream from the source." }); } } catch (error) { res.status(500).json({ success: false, error: `An unexpected error occurred: ${error.message}` }); } }); // --- 5. 服务器启动 --- (async () => { try { const PORT = process.env.PORT || 7860; console.log("Launching a persistent STANDARD Playwright browser instance..."); // [CORRECT] Launching the browser using the direct import from 'playwright' browser = await chromium.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] }); console.log("Browser launched successfully."); app.listen(PORT, () => { console.log(`🚀 Scraper server running on port ${PORT}`); }); } catch (error) { console.error("Failed to launch browser:", error); process.exit(1); } })(); // --- 6. 优雅关停 --- const gracefulShutdown = async () => { console.log("\nShutting down gracefully..."); if (browser) { await browser.close(); console.log("Browser instance closed."); } process.exit(0); }; process.on('SIGINT', gracefulShutdown); process.on('SIGTERM', gracefulShutdown);