stnh70 commited on
Commit
4c186f5
·
verified ·
1 Parent(s): f13a053

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +206 -166
server.js CHANGED
@@ -1,4 +1,4 @@
1
- // server.js (Final Version - 1-to-1 Replication of the working Puppeteer code - COMPLETE AND UNABRIDGED)
2
 
3
  import express, { json } from "express";
4
  import cors from "cors";
@@ -9,11 +9,10 @@ import fetch from 'node-fetch';
9
  import { JSDOM } from 'jsdom';
10
  import fs from 'fs/promises';
11
 
12
- // [REPLICATION] 1. 深度伪装: 使用 playwright-extra 和 stealth 插件
13
- import playwright from 'playwright-extra';
14
  import StealthPlugin from 'puppeteer-extra-plugin-stealth';
15
- playwright.chromium.use(StealthPlugin());
16
-
17
 
18
  const app = express();
19
  const PORT = process.env.PORT || 3000;
@@ -25,15 +24,13 @@ let browserPool;
25
  const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 });
26
  const limit = pLimit(2);
27
 
28
-
29
- // --- 辅助函数 (直接从您的工作代码翻译) ---
30
-
31
- // [REPLICATION] 2. 精确打击: 100% 复制您的 handleSpecificTurnstile 逻辑
32
  async function handleSpecificTurnstile(page, domain) {
33
  try {
34
- const turnstileExists = await page.evaluate(() => {
35
- return !!document.querySelector('.cf-turnstile[data-sitekey="0x4AAAAAABNpWSLmOnUi7s0b"]');
36
- });
 
37
 
38
  if (!turnstileExists) {
39
  console.log(`[${domain}] Specific Turnstile not detected`);
@@ -41,16 +38,26 @@ async function handleSpecificTurnstile(page, domain) {
41
  }
42
 
43
  console.log(`[${domain}] Specific Turnstile detected - bypassing...`);
44
- await page.locator('.cf-turnstile').waitFor({ state: 'visible', timeout: 10000 });
45
-
46
  await page.evaluate(() => {
 
 
47
  if (typeof window.cftCallback === 'function') {
48
- const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
49
  window.cftCallback(mockToken);
50
  }
 
 
 
 
 
 
 
 
 
 
 
51
  });
52
-
53
- // 等待网络空闲以确认挑战已处理
54
  await page.waitForLoadState('networkidle', { timeout: 30000 });
55
  console.log(`[${domain}] Turnstile bypass completed`);
56
 
@@ -61,13 +68,13 @@ async function handleSpecificTurnstile(page, domain) {
61
 
62
  async function extractFirstIframeSrc(page, domain) {
63
  try {
64
- const iframeLocator = page.locator('#player_iframe');
65
- await iframeLocator.waitFor({ state: 'visible', timeout: 15000 });
66
- let src = await iframeLocator.getAttribute('src');
67
- if (src && src.startsWith('//')) {
68
- src = `https:${src}`;
69
- }
70
- return src;
71
  } catch (error) {
72
  console.error(`[${domain}] Error finding first iframe:`, error);
73
  return null;
@@ -76,20 +83,16 @@ async function extractFirstIframeSrc(page, domain) {
76
 
77
  async function extractFinalIframeSrc(page, domain) {
78
  try {
79
- await page.waitForFunction(() => document.querySelectorAll('script, iframe').length > 0, null, { timeout: 15000 });
80
- return await page.evaluate(() => {
81
- const iframe = document.querySelector('iframe:not([style*="display:none"])');
82
- if (iframe?.src) return iframe.src;
83
- const scripts = Array.from(document.querySelectorAll('script'));
84
- for (const script of scripts) {
85
- if (!script.textContent) continue;
86
- if (script.textContent.includes('loadIframe')) {
87
- const srcMatch = script.textContent.match(/src:\s*['"](.*?)['"]/);
88
- if (srcMatch) return new URL(srcMatch[1], window.location.href).href;
89
- }
90
- }
91
- return null;
92
  });
 
 
93
  } catch (error) {
94
  console.error(`[${domain}] Error finding final iframe:`, error);
95
  return null;
@@ -106,10 +109,12 @@ async function getVideoAndSubtitles(finalUrl) {
106
  const scriptTags = document.querySelectorAll('script');
107
  let videoFileUrl = null;
108
  let subtitleSources = null;
 
109
  for (const script of scriptTags) {
110
  if (script.textContent.includes('new Playerjs({')) {
111
  const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
112
  if (fileMatch && fileMatch[1]) { videoFileUrl = fileMatch[1]; }
 
113
  const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
114
  if (subtitlesMatch && subtitlesMatch[1]) {
115
  try {
@@ -118,9 +123,11 @@ async function getVideoAndSubtitles(finalUrl) {
118
  subtitleSources = subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean);
119
  }
120
  }
 
121
  if (videoFileUrl) { break; }
122
  }
123
  }
 
124
  return { videoFileUrl, subtitleSources };
125
  } catch (error) {
126
  console.error('An error occurred during the process:', error);
@@ -128,143 +135,175 @@ async function getVideoAndSubtitles(finalUrl) {
128
  }
129
  }
130
 
131
-
132
- // --- 核心抓取逻辑: 100% 复刻成功模式 ---
133
  async function scrapeProvider(domain, url, signal) {
134
- if (signal.aborted) throw new Error('Aborted');
135
- console.log(`\n[${domain}] Starting REPLICATED scrape for URL: ${url}`);
136
-
137
- let browserInstance = null;
138
- let context = null;
139
-
140
- const cleanup = async () => {
141
- if (context) await context.close().catch(()=>{});
142
- if (browserInstance) {
143
- console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
144
- await browserPool.release(browserInstance);
145
- }
146
- };
147
-
148
- try {
149
- browserInstance = await browserPool.get();
150
- const browser = browserInstance.browser;
151
-
152
- context = await browser.newContext({
153
- userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
154
- ignoreHTTPSErrors: true
155
- });
156
-
157
- await context.route("**/*", (route) => {
158
- if (signal.aborted) return route.abort();
159
- if (route.request().url().includes('/rcp_verify')) {
160
- return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
161
- }
162
- return route.continue();
163
- });
164
-
165
- const page = await context.newPage();
166
-
167
- // 阶段 1: 访问初始页面 (使用“极度耐心”策略)
168
- console.log(`[${domain}] Navigating and patiently waiting for network to be idle...`);
169
- await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
170
-
171
- await handleSpecificTurnstile(page, domain);
172
-
173
- const firstIframeSrc = await extractFirstIframeSrc(page, domain);
174
- if (!firstIframeSrc) throw new Error('First iframe not found');
175
- console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
176
-
177
- // 阶段 2: 访问第一个 iframe (在新页面中,严格复刻)
178
- const iframePage = await context.newPage();
179
- await iframePage.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 60000 });
180
- await handleSpecificTurnstile(iframePage, domain);
181
-
182
- const finalIframeSrc = await extractFinalIframeSrc(iframePage, domain);
183
- if (!finalIframeSrc) throw new Error('Final iframe source not found');
184
- console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
185
-
186
- // 阶段 3: 提取数据
187
- const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalIframeSrc);
188
- if (!videoFileUrl) throw new Error("HLS URL not found");
189
-
190
- return { source_domain: domain, hls_url: videoFileUrl, subtitles: subtitleSources, error: null };
191
-
192
- } catch (error) {
193
- console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
194
- if (page && !page.isClosed()) {
195
- const timestamp = new Date().toISOString().replace(/:/g, '-');
196
- const safeDomain = domain.replace(/https?:\/\//, '').replace(/\./g, '_');
197
- const screenshotPath = `debug_screenshot_${safeDomain}_${timestamp}.png`;
198
- await page.screenshot({ path: screenshotPath, fullPage: true });
199
- console.log(`[DEBUG] Saved screenshot to ${screenshotPath}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  }
201
- throw error;
202
- } finally {
203
- await cleanup();
204
- }
205
  }
206
 
207
- // --- 您的 Express 路由和启动逻辑 ---
208
  app.get("/extract", async (req, res) => {
209
- const type = req.query.type || "movie";
210
- const tmdb_id = req.query.tmdb_id;
211
- const season = req.query.season ? parseInt(req.query.season) : undefined;
212
- const episode = req.query.episode ? parseInt(req.query.episode) : undefined;
213
-
214
- if (!tmdb_id) { return res.status(400).json({ success: false, error: "tmdb_id is required" }); }
215
- if (type === "tv" && (season == null || episode == null)) { return res.status(400).json({ success: false, error: "season and episode are required" }); }
216
-
217
- const cacheKey = JSON.stringify(req.query);
218
- const cached = cache.get(cacheKey);
219
- if (cached) { console.log("Serving from cache"); return res.json(cached); }
220
-
221
- const urls = PROVIDERS.reduce((acc, domain) => {
222
- acc[domain] = type === "tv" ? `${domain}/embed/tv?tmdb=${tmdb_id}&season=${season}&episode=${episode}` : `${domain}/embed/movie/${tmdb_id}`;
223
- return acc;
224
- }, {});
225
-
226
- const controller = new AbortController();
227
- const signal = controller.signal;
228
-
229
- try {
230
- const promises = Object.entries(urls).map(([domain, url]) =>
231
- limit(() => scrapeProvider(domain, url, signal))
232
- );
233
- const firstSuccessfulResult = await Promise.any(promises);
234
- controller.abort();
235
- const response = { success: true, result: firstSuccessfulResult };
236
- cache.set(cacheKey, response);
237
- res.json(response);
238
- } catch (err) {
239
- if (err instanceof AggregateError) {
240
- console.error("All providers failed to find a link.");
241
- res.status(404).json({ success: false, error: "Could not find the video from any provider." });
242
- } else {
243
- console.error("An unexpected server error occurred:", err);
244
- res.status(500).json({ success: false, error: "Unexpected server error" });
 
 
 
 
245
  }
246
- }
247
  });
248
 
249
  (async () => {
250
- try {
251
- browserPool = new BrowserPool({
252
- chromium: playwright.chromium,
253
- minSize: 1,
254
- maxSize: 5,
255
- maxUsage: 100,
256
- launchOptions: {
257
- headless: true,
258
- args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
259
- }
260
- });
261
- await browserPool.initialize();
262
- console.log("Browser pool initialized successfully.");
263
- app.listen(PORT, () => console.log(`🚀 Universal Video Extractor running at http://localhost:${PORT}`));
264
- } catch (error) {
265
- console.error("Failed to initialize browser pool:", error);
266
- process.exit(1);
267
- }
 
 
 
 
 
 
 
 
 
 
268
  })();
269
 
270
  process.on("SIGINT", async () => {
@@ -272,8 +311,9 @@ process.on("SIGINT", async () => {
272
  if (browserPool) await browserPool.shutdown();
273
  process.exit(0);
274
  });
 
275
  process.on("SIGTERM", async () => {
276
  console.log("Shutting down gracefully...");
277
  if (browserPool) await browserPool.shutdown();
278
  process.exit(0);
279
- });
 
1
+ // server.js (Complete Playwright Version with CF Bypass)
2
 
3
  import express, { json } from "express";
4
  import cors from "cors";
 
9
  import { JSDOM } from 'jsdom';
10
  import fs from 'fs/promises';
11
 
12
+ // Playwright with stealth plugin
13
+ import { chromium } from 'playwright-extra';
14
  import StealthPlugin from 'puppeteer-extra-plugin-stealth';
15
+ chromium.use(StealthPlugin());
 
16
 
17
  const app = express();
18
  const PORT = process.env.PORT || 3000;
 
24
  const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 });
25
  const limit = pLimit(2);
26
 
27
+ // Complete Turnstile bypass function
 
 
 
28
  async function handleSpecificTurnstile(page, domain) {
29
  try {
30
+ const turnstileExists = await page.waitForSelector('.cf-turnstile[data-sitekey="0x4AAAAAABNpWSLmOnUi7s0b"]', {
31
+ state: 'attached',
32
+ timeout: 10000
33
+ }).catch(() => null);
34
 
35
  if (!turnstileExists) {
36
  console.log(`[${domain}] Specific Turnstile not detected`);
 
38
  }
39
 
40
  console.log(`[${domain}] Specific Turnstile detected - bypassing...`);
41
+
 
42
  await page.evaluate(() => {
43
+ const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
44
+
45
  if (typeof window.cftCallback === 'function') {
 
46
  window.cftCallback(mockToken);
47
  }
48
+
49
+ const form = document.createElement('form');
50
+ form.method = 'POST';
51
+ form.action = '/rcp_verify';
52
+ const input = document.createElement('input');
53
+ input.type = 'hidden';
54
+ input.name = 'token';
55
+ input.value = mockToken;
56
+ form.appendChild(input);
57
+ document.body.appendChild(form);
58
+ form.submit();
59
  });
60
+
 
61
  await page.waitForLoadState('networkidle', { timeout: 30000 });
62
  console.log(`[${domain}] Turnstile bypass completed`);
63
 
 
68
 
69
  async function extractFirstIframeSrc(page, domain) {
70
  try {
71
+ await page.waitForSelector('#player_iframe', {
72
+ state: 'attached',
73
+ timeout: 15000
74
+ });
75
+
76
+ const frame = await page.frame('#player_iframe');
77
+ return frame ? frame.url() : null;
78
  } catch (error) {
79
  console.error(`[${domain}] Error finding first iframe:`, error);
80
  return null;
 
83
 
84
  async function extractFinalIframeSrc(page, domain) {
85
  try {
86
+ await page.waitForFunction(() => {
87
+ return document.querySelectorAll('script, iframe').length > 0;
88
+ }, { timeout: 15000 });
89
+
90
+ const frames = await page.frames();
91
+ const visibleFrame = frames.find(frame => {
92
+ return !frame.$('iframe[style*="display:none"]');
 
 
 
 
 
 
93
  });
94
+
95
+ return visibleFrame?.url();
96
  } catch (error) {
97
  console.error(`[${domain}] Error finding final iframe:`, error);
98
  return null;
 
109
  const scriptTags = document.querySelectorAll('script');
110
  let videoFileUrl = null;
111
  let subtitleSources = null;
112
+
113
  for (const script of scriptTags) {
114
  if (script.textContent.includes('new Playerjs({')) {
115
  const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
116
  if (fileMatch && fileMatch[1]) { videoFileUrl = fileMatch[1]; }
117
+
118
  const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
119
  if (subtitlesMatch && subtitlesMatch[1]) {
120
  try {
 
123
  subtitleSources = subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean);
124
  }
125
  }
126
+
127
  if (videoFileUrl) { break; }
128
  }
129
  }
130
+
131
  return { videoFileUrl, subtitleSources };
132
  } catch (error) {
133
  console.error('An error occurred during the process:', error);
 
135
  }
136
  }
137
 
 
 
138
  async function scrapeProvider(domain, url, signal) {
139
+ if (signal.aborted) throw new Error('Aborted');
140
+ console.log(`\n[${domain}] Starting scrape for URL: ${url}`);
141
+
142
+ let browserInstance = null;
143
+ let context = null;
144
+
145
+ const cleanup = async () => {
146
+ if (context) await context.close().catch(() => {});
147
+ if (browserInstance) {
148
+ console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
149
+ await browserPool.release(browserInstance);
150
+ }
151
+ };
152
+
153
+ try {
154
+ browserInstance = await browserPool.get();
155
+ const browser = browserInstance.browser;
156
+
157
+ context = await browser.newContext({
158
+ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
159
+ ignoreHTTPSErrors: true,
160
+ javaScriptEnabled: true,
161
+ bypassCSP: true
162
+ });
163
+
164
+ await context.addInitScript(() => {
165
+ delete navigator.__proto__.webdriver;
166
+ Object.defineProperty(navigator, 'plugins', {
167
+ get: () => [{
168
+ 0: { type: "application/x-google-chrome-pdf" },
169
+ description: "Portable Document Format",
170
+ filename: "internal-pdf-viewer",
171
+ length: 1,
172
+ name: "Chrome PDF Plugin"
173
+ }]
174
+ });
175
+ });
176
+
177
+ const page = await context.newPage();
178
+
179
+ // Setup request interception
180
+ await page.route('**/rcp_verify', (route) => {
181
+ route.fulfill({
182
+ status: 200,
183
+ contentType: 'application/json',
184
+ body: '1'
185
+ });
186
+ });
187
+
188
+ // Stage 1: Initial page
189
+ console.log(`[${domain}] Navigating and waiting for network to be idle...`);
190
+ await page.goto(url, {
191
+ waitUntil: 'networkidle',
192
+ timeout: 60000
193
+ });
194
+
195
+ await handleSpecificTurnstile(page, domain);
196
+
197
+ const firstIframeSrc = await extractFirstIframeSrc(page, domain);
198
+ if (!firstIframeSrc) throw new Error('First iframe not found');
199
+ console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
200
+
201
+ // Stage 2: First iframe
202
+ const iframePage = await context.newPage();
203
+ await iframePage.goto(firstIframeSrc, {
204
+ waitUntil: 'networkidle',
205
+ timeout: 60000
206
+ });
207
+ await handleSpecificTurnstile(iframePage, domain);
208
+
209
+ const finalIframeSrc = await extractFinalIframeSrc(iframePage, domain);
210
+ if (!finalIframeSrc) throw new Error('Final iframe source not found');
211
+ console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
212
+
213
+ // Stage 3: Extract data
214
+ const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalIframeSrc);
215
+ if (!videoFileUrl) throw new Error("HLS URL not found");
216
+
217
+ return { source_domain: domain, hls_url: videoFileUrl, subtitles: subtitleSources, error: null };
218
+
219
+ } catch (error) {
220
+ console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
221
+ if (page && !page.isClosed()) {
222
+ const timestamp = new Date().toISOString().replace(/:/g, '-');
223
+ const safeDomain = domain.replace(/https?:\/\//, '').replace(/\./g, '_');
224
+ const screenshotPath = `debug_screenshot_${safeDomain}_${timestamp}.png`;
225
+ await page.screenshot({ path: screenshotPath, fullPage: true });
226
+ console.log(`[DEBUG] Saved screenshot to ${screenshotPath}`);
227
+ }
228
+ throw error;
229
+ } finally {
230
+ await cleanup();
231
  }
 
 
 
 
232
  }
233
 
 
234
  app.get("/extract", async (req, res) => {
235
+ const type = req.query.type || "movie";
236
+ const tmdb_id = req.query.tmdb_id;
237
+ const season = req.query.season ? parseInt(req.query.season) : undefined;
238
+ const episode = req.query.episode ? parseInt(req.query.episode) : undefined;
239
+
240
+ if (!tmdb_id) { return res.status(400).json({ success: false, error: "tmdb_id is required" }); }
241
+ if (type === "tv" && (season == null || episode == null)) { return res.status(400).json({ success: false, error: "season and episode are required" }); }
242
+
243
+ const cacheKey = JSON.stringify(req.query);
244
+ const cached = cache.get(cacheKey);
245
+ if (cached) {
246
+ console.log("Serving from cache");
247
+ return res.json(cached);
248
+ }
249
+
250
+ const urls = PROVIDERS.reduce((acc, domain) => {
251
+ acc[domain] = type === "tv" ? `${domain}/embed/tv?tmdb=${tmdb_id}&season=${season}&episode=${episode}` : `${domain}/embed/movie/${tmdb_id}`;
252
+ return acc;
253
+ }, {});
254
+
255
+ const controller = new AbortController();
256
+ const signal = controller.signal;
257
+
258
+ try {
259
+ const promises = Object.entries(urls).map(([domain, url]) =>
260
+ limit(() => scrapeProvider(domain, url, signal))
261
+ );
262
+ const firstSuccessfulResult = await Promise.any(promises);
263
+ controller.abort();
264
+ const response = { success: true, result: firstSuccessfulResult };
265
+ cache.set(cacheKey, response);
266
+ res.json(response);
267
+ } catch (err) {
268
+ if (err instanceof AggregateError) {
269
+ console.error("All providers failed to find a link.");
270
+ res.status(404).json({ success: false, error: "Could not find the video from any provider." });
271
+ } else {
272
+ console.error("An unexpected server error occurred:", err);
273
+ res.status(500).json({ success: false, error: "Unexpected server error" });
274
+ }
275
  }
 
276
  });
277
 
278
  (async () => {
279
+ try {
280
+ browserPool = new BrowserPool({
281
+ chromium: chromium,
282
+ minSize: 1,
283
+ maxSize: 5,
284
+ maxUsage: 100,
285
+ launchOptions: {
286
+ headless: true,
287
+ args: [
288
+ '--no-sandbox',
289
+ '--disable-setuid-sandbox',
290
+ '--disable-dev-shm-usage',
291
+ '--disable-gpu',
292
+ '--disable-extensions',
293
+ '--disable-notifications',
294
+ '--disable-infobars',
295
+ '--disable-web-security',
296
+ '--disable-features=IsolateOrigins,site-per-process'
297
+ ]
298
+ }
299
+ });
300
+ await browserPool.initialize();
301
+ console.log("Browser pool initialized successfully.");
302
+ app.listen(PORT, () => console.log(`🚀 Universal Video Extractor running at http://localhost:${PORT}`));
303
+ } catch (error) {
304
+ console.error("Failed to initialize browser pool:", error);
305
+ process.exit(1);
306
+ }
307
  })();
308
 
309
  process.on("SIGINT", async () => {
 
311
  if (browserPool) await browserPool.shutdown();
312
  process.exit(0);
313
  });
314
+
315
  process.on("SIGTERM", async () => {
316
  console.log("Shutting down gracefully...");
317
  if (browserPool) await browserPool.shutdown();
318
  process.exit(0);
319
+ });