File size: 7,653 Bytes
974a9bc 523a51b 974a9bc 3c42244 523a51b 73a8c49 523a51b e69ef9d 523a51b 73a8c49 523a51b e69ef9d 523a51b 974a9bc 523a51b e69ef9d 523a51b e69ef9d 974a9bc 523a51b 974a9bc 523a51b 974a9bc 523a51b e69ef9d 523a51b e69ef9d 523a51b 974a9bc e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b e69ef9d 974a9bc e69ef9d 523a51b e69ef9d 523a51b e69ef9d 523a51b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
// server.js (Final Version using STANDARD Playwright)
import express, { json } from "express";
import cors from "cors";
// [CORRECT] Using the standard, official playwright library
import { chromium } from "playwright";
import fetch from 'node-fetch';
import { JSDOM } from 'jsdom';
import { LRUCache } from 'lru-cache';
// --- 1. 初始化 ---
const app = express();
app.use(cors());
app.use(json());
const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 });
const blockedResourceTypes = new Set(['image', 'stylesheet', 'font', 'media']);
const blockedDomains = ['googlesyndication.com', 'googletagmanager.com', 'google-analytics.com', 'doubleclick.net'];
// --- 2. 辅助函数 (完整且无需修改) ---
async function getVideoAndSubtitles(finalUrl) {
try {
const response = await fetch(finalUrl);
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
const html = await response.text();
const dom = new JSDOM(html);
const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
if (!script) {
console.warn(`[Helper] Player.js script not found on ${finalUrl}`);
return { videoFileUrl: null, subtitleSources: [] };
}
const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
return {
videoFileUrl: fileMatch ? fileMatch[1] : null,
subtitleSources: subtitlesMatch ? subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean) : []
};
} catch (error) {
console.error('Error in getVideoAndSubtitles:', error);
throw error;
}
}
async function handleTurnstile(page) {
try {
await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000 });
console.log('Turnstile detected - attempting generic bypass...');
await page.evaluate(() => {
if (typeof window.cftCallback === 'function') {
const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
window.cftCallback(mockToken);
}
});
console.log('Turnstile JS callback triggered.');
await page.waitForTimeout(2000);
} catch (error) {
if (error.name.includes('Timeout')) {
console.log('Turnstile not found on page, skipping bypass.');
} else {
console.warn('An error occurred during Turnstile handling:', error.message);
}
}
}
// --- 3. 核心抓取逻辑 (使用标准 Playwright) ---
async function scrapeSource(browser, { type, id, season, episode }) {
let context = null;
console.log(`\nScraping with STANDARD Playwright for: type=${type}, id=${id}, s=${season}, e=${episode}`);
try {
let initialUrl;
const domain = "https://vidsrc.xyz";
if (type === 'tv') {
initialUrl = `${domain}/embed/tv?tmdb=${id}&season=${season}&episode=${episode}`;
} else {
initialUrl = `${domain}/embed/movie/${id}`;
}
console.log(`Initial URL: ${initialUrl}`);
context = await browser.newContext();
// [关键] 手动实现 StealthPlugin 的核心功能
// 在每个页面加载前注入脚本,隐藏 webdriver 标志
await context.addInitScript(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => false });
});
await context.route('**/*', (route) => {
const request = route.request();
const url = request.url();
const resourceType = request.resourceType();
if (url.includes('/rcp_verify')) {
return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
}
if (blockedResourceTypes.has(resourceType) || blockedDomains.some(domain => url.includes(domain))) {
return route.abort();
}
return route.continue();
});
const page = await context.newPage();
// 阶段 1
await page.goto(initialUrl, { waitUntil: 'networkidle', timeout: 60000 });
await handleTurnstile(page);
const firstIframeSrc = await page.locator('#player_iframe').getAttribute('src');
if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
console.log(`First iframe src: ${firstIframeSrc}`);
// 阶段 2
await page.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 60000 });
await handleTurnstile(page);
const finalIframeSrc = await page.frameLocator('iframe').locator('iframe').getAttribute('src') || await page.locator('iframe').getAttribute('src');
if (!finalIframeSrc) throw new Error('Final iframe source not found');
console.log(`Final iframe src: ${finalIframeSrc}`);
// 阶段 3
return await getVideoAndSubtitles(finalIframeSrc);
} catch (error) {
console.error('Full error during scraping process:', error.message);
throw error;
} finally {
if (context) {
await context.close();
}
}
}
// --- 4. Express API 路由 ---
let browser;
app.get("/extract", async (req, res) => {
const type = req.query.type || 'movie';
const id = req.query.tmdb_id;
const season = req.query.season;
const episode = req.query.episode;
if (!id) {
return res.status(400).json({ success: false, error: "tmdb_id is required" });
}
if (type === 'tv' && (!season || !episode)) {
return res.status(400).json({ success: false, error: "season and episode are required for type 'tv'" });
}
const cacheKey = JSON.stringify(req.query);
const cached = cache.get(cacheKey);
if (cached) {
console.log("Serving from cache");
return res.json(cached);
}
try {
const result = await scrapeSource(browser, { type, id, season, episode });
if (result && result.videoFileUrl) {
const response = { success: true, result };
cache.set(cacheKey, response);
res.status(200).json(response);
} else {
res.status(404).json({ success: false, error: "Could not find video stream from the source." });
}
} catch (error) {
res.status(500).json({ success: false, error: `An unexpected error occurred: ${error.message}` });
}
});
// --- 5. 服务器启动 ---
(async () => {
try {
const PORT = process.env.PORT || 7860;
console.log("Launching a persistent STANDARD Playwright browser instance...");
// [CORRECT] Launching the browser using the direct import from 'playwright'
browser = await chromium.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
});
console.log("Browser launched successfully.");
app.listen(PORT, () => {
console.log(`🚀 Scraper server running on port ${PORT}`);
});
} catch (error) {
console.error("Failed to launch browser:", error);
process.exit(1);
}
})();
// --- 6. 优雅关停 ---
const gracefulShutdown = async () => {
console.log("\nShutting down gracefully...");
if (browser) {
await browser.close();
console.log("Browser instance closed.");
}
process.exit(0);
};
process.on('SIGINT', gracefulShutdown);
process.on('SIGTERM', gracefulShutdown); |