stnh70 commited on
Commit
523a51b
·
verified ·
1 Parent(s): 9bf22e9

Create server.js

Browse files
Files changed (1) hide show
  1. server.js +212 -0
server.js ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // server.js
2
+
3
+ import express, { json } from "express";
4
+ import cors from "cors";
5
+ import puppeteer from 'puppeteer-extra';
6
+ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
7
+ import { executablePath } from 'puppeteer';
8
+ import fetch from 'node-fetch';
9
+ import { JSDOM } from 'jsdom';
10
+
11
+ // --- 1. 初始化 Puppeteer 和插件 ---
12
+ puppeteer.use(StealthPlugin());
13
+ const app = express();
14
+ const PORT = process.env.PORT || 3000;
15
+ app.use(cors());
16
+ app.use(json());
17
+
18
+
19
+ // --- 2. 优化后的辅助函数 ---
20
+
21
+ /**
22
+ * [优化] 移除了对特定 sitekey 的检查,使其更通用和健壮。
23
+ * 现在它会等待任何 Turnstile 控件出现,而不是只寻找特定的那一个。
24
+ */
25
+ async function handleTurnstile(page) {
26
+ try {
27
+ await page.waitForSelector('.cf-turnstile', { timeout: 5000 });
28
+ console.log('Turnstile detected - attempting generic bypass...');
29
+
30
+ await page.evaluate(() => {
31
+ if (typeof window.cftCallback === 'function') {
32
+ const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
33
+ window.cftCallback(mockToken);
34
+ }
35
+ });
36
+ console.log('Turnstile JS callback triggered.');
37
+ await page.waitForTimeout(2000);
38
+ } catch (error) {
39
+ if (error.name === 'TimeoutError') {
40
+ console.log('Turnstile not found on page, skipping bypass.');
41
+ } else {
42
+ console.warn('An error occurred during Turnstile handling:', error.message);
43
+ }
44
+ }
45
+ }
46
+
47
+ /**
48
+ * 您的参考代码中用于提取最终视频链接的函数,保持不变。
49
+ */
50
+ async function getVideoAndSubtitles(finalUrl) {
51
+ try {
52
+ const response = await fetch(finalUrl);
53
+ if (!response.ok) {
54
+ throw new Error(`HTTP error! Status: ${response.status}`);
55
+ }
56
+ const html = await response.text();
57
+ const dom = new JSDOM(html);
58
+ const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
59
+ if (!script) return { videoFileUrl: null, subtitleSources: [] };
60
+
61
+ const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
62
+ const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
63
+
64
+ return {
65
+ videoFileUrl: fileMatch ? fileMatch[1] : null,
66
+ subtitleSources: subtitlesMatch ? subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean) : []
67
+ };
68
+ } catch (error) {
69
+ console.error('Error in getVideoAndSubtitles:', error);
70
+ throw error;
71
+ }
72
+ }
73
+
74
+
75
+ // --- 3. 核心抓取逻辑 ---
76
+
77
+ /**
78
+ * [重大重构]
79
+ * - 这是一个统一的抓取函数,支持电影和电视剧。
80
+ * - 它接收一个已经启动的 browser 实例,避免了每次请求都重复启动浏览器的巨大开销。
81
+ * - 使用了隔离的浏览器上下文 (Incognito Context) 来确保每个请求之间互不干扰。
82
+ */
83
+ async function scrapeSource(browser, { type, id, season, episode }) {
84
+ let context = null;
85
+ console.log(`\nScraping for: type=${type}, id=${id}, s=${season}, e=${episode}`);
86
+
87
+ try {
88
+ // --- URL 构建 ---
89
+ let initialUrl;
90
+ const domain = "https://vidsrc.xyz"; // 您可以从一个列表中选择,这里以一个为例
91
+ if (type === 'tv') {
92
+ initialUrl = `${domain}/embed/tv?tmdb=${id}&season=${season}&episode=${episode}`;
93
+ } else { // 默认为 movie
94
+ initialUrl = `${domain}/embed/movie/${id}`;
95
+ }
96
+ console.log(`Initial URL: ${initialUrl}`);
97
+
98
+ // --- 浏览器操作 ---
99
+ context = await browser.createIncognitoBrowserContext();
100
+ const page = await context.newPage();
101
+
102
+ // 设置通用的请求拦截,绕过 Turnstile 的 API 验证
103
+ await page.setRequestInterception(true);
104
+ page.on('request', (request) => {
105
+ if (request.url().includes('/rcp_verify')) {
106
+ request.respond({ status: 200, contentType: 'application/json', body: '1' });
107
+ } else {
108
+ request.continue();
109
+ }
110
+ });
111
+
112
+ // 阶段 1: 访问初始页面
113
+ await page.goto(initialUrl, { waitUntil: 'networkidle2', timeout: 45000 });
114
+ await handleTurnstile(page);
115
+
116
+ const firstIframeSrc = await page.evaluate(() => document.querySelector('#player_iframe')?.src);
117
+ if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
118
+ console.log(`First iframe src: ${firstIframeSrc}`);
119
+
120
+ // 阶段 2: 访问第一个 Iframe
121
+ // [优化] 我们可以在同一个页面对象上导航,无需创建新页面
122
+ await page.goto(firstIframeSrc, { waitUntil: 'networkidle2', timeout: 45000 });
123
+ await handleTurnstile(page);
124
+
125
+ const finalIframeSrc = await page.evaluate(() => document.querySelector('iframe')?.src);
126
+ if (!finalIframeSrc) throw new Error('Final iframe source not found');
127
+ console.log(`Final iframe src: ${finalIframeSrc}`);
128
+
129
+ // 阶段 3: 提取数据
130
+ return await getVideoAndSubtitles(finalIframeSrc);
131
+
132
+ } catch (error) {
133
+ console.error('Full error during scraping process:', error.message);
134
+ throw error; // 将错误向上抛出,由 Express 路由统一处理
135
+ } finally {
136
+ if (context) {
137
+ await context.close(); // 关闭上下文,释放所有页面和资源,非常高效
138
+ }
139
+ }
140
+ }
141
+
142
+
143
+ // --- 4. Express 服务器和路由 ---
144
+
145
+ let browser; // 全局浏览器实例
146
+
147
+ app.get("/extract", async (req, res) => {
148
+ const type = req.query.type || 'movie';
149
+ const id = req.query.tmdb_id;
150
+ const season = req.query.season;
151
+ const episode = req.query.episode;
152
+
153
+ // --- 参数校验 ---
154
+ if (!id) {
155
+ return res.status(400).json({ success: false, error: "tmdb_id is required" });
156
+ }
157
+ if (type === 'tv' && (!season || !episode)) {
158
+ return res.status(400).json({ success: false, error: "season and episode are required for type 'tv'" });
159
+ }
160
+
161
+ try {
162
+ const result = await scrapeSource(browser, { type, id, season, episode });
163
+
164
+ if (result && result.videoFileUrl) {
165
+ res.status(200).json({ success: true, result });
166
+ } else {
167
+ res.status(404).json({ success: false, error: "Could not find video stream from the source." });
168
+ }
169
+ } catch (error) {
170
+ res.status(500).json({ success: false, error: `An unexpected error occurred: ${error.message}` });
171
+ }
172
+ });
173
+
174
+
175
+ // --- 5. 服务器启动和优雅关停 ---
176
+
177
+ (async () => {
178
+ try {
179
+ console.log("Launching a persistent browser instance...");
180
+ browser = await puppeteer.launch({
181
+ headless: true, // 在生产环境中建议设为 true
182
+ executablePath: executablePath(),
183
+ args: [
184
+ '--no-sandbox',
185
+ '--disable-setuid-sandbox',
186
+ '--disable-dev-shm-usage',
187
+ '--window-size=1200,800'
188
+ ],
189
+ defaultViewport: null
190
+ });
191
+ console.log("Browser launched successfully.");
192
+
193
+ app.listen(PORT, () => {
194
+ console.log(`🚀 Scraper server running at http://localhost:${PORT}`);
195
+ });
196
+ } catch (error) {
197
+ console.error("Failed to launch browser:", error);
198
+ process.exit(1);
199
+ }
200
+ })();
201
+
202
+ const gracefulShutdown = async () => {
203
+ console.log("\nShutting down gracefully...");
204
+ if (browser) {
205
+ await browser.close();
206
+ console.log("Browser instance closed.");
207
+ }
208
+ process.exit(0);
209
+ };
210
+
211
+ process.on('SIGINT', gracefulShutdown);
212
+ process.on('SIGTERM', gracefulShutdown);