stnh70 commited on
Commit
158ed19
·
verified ·
1 Parent(s): ad1b7e5

Update sever.js

Browse files
Files changed (1) hide show
  1. sever.js +126 -132
sever.js CHANGED
@@ -1,23 +1,20 @@
1
  import express, { json } from "express";
2
  import cors from "cors";
3
- import { chromium } from "playwright-extra";
4
- import StealthPlugin from 'puppeteer-extra-plugin-stealth';
5
  import pLimit from "p-limit";
6
- import BrowserPool from './pool/BrowserPool.js'; // 确保这个路径是正确的
7
- import LRU from 'lru-cache';
 
 
8
  import fetch from 'node-fetch';
9
  import { JSDOM } from 'jsdom';
10
 
11
- // 1. 初始化 Playwright Stealth 插件
12
- chromium.use(StealthPlugin());
13
-
14
- // 2. Express 服务器基础设置
15
  const app = express();
16
- const PORT = process.env.PORT || 3000;
 
17
  app.use(cors());
18
  app.use(json());
19
 
20
- // 3. 配置常量
21
  const PROVIDERS = [
22
  "https://vidsrc.xyz",
23
  "https://vidsrc.in",
@@ -28,87 +25,71 @@ const PROVIDERS = [
28
  ];
29
 
30
  let browserPool;
31
- const cache = new LRU({ max: 500, ttl: 15 * 60 * 1000 });
32
- const limit = pLimit(3); // 并发任务数,可根据服务器性能调整
33
 
34
- // 4. 辅助函数: 从最终页面内容中提取 HLS 和字幕
35
- async function getVideoAndSubtitles(finalUrl) {
36
- try {
37
- console.log(`[Helper] Fetching final content from: ${finalUrl}`);
38
- const response = await fetch(finalUrl);
39
- if (!response.ok) {
40
- throw new Error(`HTTP error! Status: ${response.status}`);
41
- }
42
-
43
- const html = await response.text();
44
- const dom = new JSDOM(html);
45
- const document = dom.window.document;
46
-
47
- const script = Array.from(document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
48
- if (!script) {
49
- console.warn(`[Helper] Player.js script not found on ${finalUrl}`);
50
- return { hls_url: null, subtitles: [] };
51
- }
52
-
53
- const scriptContent = script.textContent;
54
- let hls_url = null;
55
- let subtitles = [];
56
-
57
- const fileMatch = scriptContent.match(/file:"(.*?m3u8.*?)"/);
58
- if (fileMatch && fileMatch[1]) {
59
- hls_url = fileMatch[1];
60
- }
61
 
62
- const subtitlesMatch = scriptContent.match(/subtitle:"(.*?)"/);
63
- if (subtitlesMatch && subtitlesMatch[1]) {
64
- try {
65
- subtitles = subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean);
66
- } catch(e) {
67
- console.error('[Helper] Failed to parse subtitles', e);
68
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
70
-
71
- return { hls_url, subtitles };
72
- } catch (error) {
73
- console.error(`[Helper] Error in getVideoAndSubtitles for ${finalUrl}:`, error);
74
- throw error;
75
- }
76
  }
77
 
78
- // 5. 辅助函数: 处理前端 Turnstile (方法 B)
79
- async function handleTurnstile(page, domain, signal) {
80
- try {
81
- await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000, signal });
82
- console.log(`[${domain}] [Method B] Turnstile detected. Attempting to trigger JS callback...`);
83
-
84
- await page.evaluate(() => {
85
- if (typeof window.cftCallback === 'function') {
86
- const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
87
- window.cftCallback(mockToken);
88
- }
89
- });
90
-
91
- console.log(`[${domain}] [Method B] Turnstile JS callback triggered.`);
92
- await page.waitForTimeout(2000);
93
-
94
- } catch (error) {
95
- if (error.name === 'TimeoutError') {
96
- console.log(`[${domain}] [Method B] Turnstile not found on page, skipping JS bypass.`);
97
- } else {
98
- console.warn(`[${domain}] [Method B] An error occurred during Turnstile JS handling: ${error.message}`);
99
  }
100
- }
101
  }
102
 
103
- // 6. 核心抓取函数
 
104
  async function scrapeProvider(domain, url, signal) {
105
- console.log(`\n[${domain}] Starting FULL BYPASS scrape for URL: ${url}`);
106
- let browserInstance = null, context = null, page = null, iframePage = null;
 
 
 
 
107
 
108
  const cleanup = async () => {
109
- if (iframePage && !iframePage.isClosed()) await iframePage.close().catch(()=>{});
110
- if (page && !page.isClosed()) await page.close().catch(()=>{});
111
- if (context) await context.close().catch(()=>{});
112
  if (browserInstance) {
113
  console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
114
  await browserPool.release(browserInstance);
@@ -116,64 +97,76 @@ async function scrapeProvider(domain, url, signal) {
116
  };
117
 
118
  try {
119
- if (signal.aborted) throw new Error('Aborted before starting');
120
-
121
  browserInstance = await browserPool.get();
122
- context = await browserInstance.browser.newContext({
123
- userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
124
- ignoreHTTPSErrors: true,
 
 
 
 
125
  });
126
- await context.addInitScript(() => { delete navigator.__proto__.webdriver; });
127
-
128
- // 方法 A: 设置网络层拦截
129
- const setupRequestInterception = async (p) => {
130
- await p.route('**/*', (route) => {
131
- if (signal.aborted) return route.abort();
132
- if (route.request().url().includes('/rcp_verify')) {
133
- console.log(`[${domain}] [Method A] Mocking /rcp_verify network request.`);
134
- return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
135
- }
136
- return route.continue();
137
- });
138
- };
139
 
140
- // --- STAGE 1: 初始页面 ---
141
- page = await context.newPage();
142
- await setupRequestInterception(page);
143
- await page.goto(url, { waitUntil: 'networkidle', timeout: 30000, signal });
144
- await handleTurnstile(page, domain, signal);
145
-
146
- const firstIframeSrc = await page.evaluate(() => {
147
- const iframe = document.querySelector('#player_iframe');
148
- if (!iframe) return null;
149
- let src = iframe.getAttribute('src') || '';
150
- return src.startsWith('//') ? `https:${src}` : src;
151
  });
152
- if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
153
- console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
154
 
155
- // --- STAGE 2: 第一个 iframe ---
156
- iframePage = await context.newPage();
157
- await setupRequestInterception(iframePage);
158
- await iframePage.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 30000, signal });
159
- await handleTurnstile(iframePage, domain, signal);
 
160
 
161
- const finalIframeSrc = await iframePage.evaluate(() => {
162
- return document.querySelector('iframe')?.src || null;
 
 
 
 
 
 
 
 
163
  });
164
- if (!finalIframeSrc) throw new Error('Final iframe src not found');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
 
166
 
167
- // --- STAGE 3: 获取视频数据 ---
168
- const { hls_url, subtitles } = await getVideoAndSubtitles(finalIframeSrc);
169
- if (!hls_url) throw new Error("Final HLS URL not found");
 
 
 
 
 
170
 
171
- return { source_domain: domain, hls_url, subtitles, error: null };
172
  } catch (error) {
173
- if (signal.aborted || error.name === 'AbortError') {
174
- console.log(`[${domain}] Scrape was aborted.`);
175
  } else {
176
- console.error(`[${domain}] CRITICAL ERROR in scrapeProvider: ${error.message}`);
177
  }
178
  throw error;
179
  } finally {
@@ -181,7 +174,7 @@ async function scrapeProvider(domain, url, signal) {
181
  }
182
  }
183
 
184
- // 7. Express API 路由
185
  app.get("/extract", async (req, res) => {
186
  const type = req.query.type || "movie";
187
  const tmdb_id = req.query.tmdb_id;
@@ -222,7 +215,7 @@ app.get("/extract", async (req, res) => {
222
 
223
  console.log(`\nSuccess from [${firstSuccessfulResult.source_domain}]. Aborting other scrapers.`);
224
  controller.abort();
225
-
226
  const response = { success: true, result: firstSuccessfulResult };
227
  cache.set(cacheKey, response);
228
  res.json(response);
@@ -238,14 +231,16 @@ app.get("/extract", async (req, res) => {
238
  }
239
  });
240
 
241
- // 8. 启动服务器和浏览器池
242
  (async () => {
243
  try {
244
  browserPool = new BrowserPool({
245
  chromium: chromium,
246
  minSize: 1,
247
- maxSize: 4,
248
- maxUsage: 50,
 
 
 
249
  });
250
  await browserPool.initialize();
251
  console.log("Browser pool initialized successfully.");
@@ -256,7 +251,6 @@ app.get("/extract", async (req, res) => {
256
  }
257
  })();
258
 
259
- // 9. 优雅关停
260
  process.on("SIGINT", async () => {
261
  console.log("Shutting down gracefully...");
262
  if (browserPool) await browserPool.shutdown();
 
1
  import express, { json } from "express";
2
  import cors from "cors";
3
+ import { chromium } from "playwright";
 
4
  import pLimit from "p-limit";
5
+ import BrowserPool from './pool/BrowserPool.js'; // 您的浏览器池代码保持不变
6
+ // [MODIFICATION] 修正 lru-cache 的导入语法
7
+ import { LRUCache } from 'lru-cache';
8
+ // [MODIFICATION] 导入新的辅助库
9
  import fetch from 'node-fetch';
10
  import { JSDOM } from 'jsdom';
11
 
 
 
 
 
12
  const app = express();
13
+ const PORT = process.env.PORT || 7860; // 建议在 Hugging Face 上使用 7860
14
+
15
  app.use(cors());
16
  app.use(json());
17
 
 
18
  const PROVIDERS = [
19
  "https://vidsrc.xyz",
20
  "https://vidsrc.in",
 
25
  ];
26
 
27
  let browserPool;
 
 
28
 
29
+ // [MODIFICATION] 修正 lru-cache 的实例化语法
30
+ const cache = new LRUCache({
31
+ max: 500,
32
+ ttl: 15 * 60 * 1000,
33
+ });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ const limit = pLimit(2);
36
+
37
+ // [MODIFICATION] 新增的辅助函数,用于处理 CF Turnstile 的前端 JS 绕过
38
+ async function handleTurnstile(page) {
39
+ try {
40
+ await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000 });
41
+ console.log('Turnstile detected - attempting generic bypass...');
42
+ await page.evaluate(() => {
43
+ if (typeof window.cftCallback === 'function') {
44
+ const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
45
+ window.cftCallback(mockToken);
46
+ }
47
+ });
48
+ console.log('Turnstile JS callback triggered.');
49
+ await page.waitForTimeout(2000);
50
+ } catch (error) {
51
+ if (error.name.includes('Timeout')) {
52
+ console.log('Turnstile not found on page, skipping bypass.');
53
+ } else {
54
+ console.warn('An error occurred during Turnstile handling:', error.message);
55
+ }
56
  }
 
 
 
 
 
 
57
  }
58
 
59
+ // [MODIFICATION] 新增的辅助函数,用于从最终页面提取视频链接
60
+ async function getVideoAndSubtitles(finalUrl) {
61
+ try {
62
+ const response = await fetch(finalUrl);
63
+ if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`);
64
+ const html = await response.text();
65
+ const dom = new JSDOM(html);
66
+ const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
67
+ if (!script) return { hlsUrl: null, subtitles: [] };
68
+ const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
69
+ const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
70
+ return {
71
+ hlsUrl: fileMatch ? fileMatch[1] : null,
72
+ subtitles: subtitlesMatch ? subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean) : []
73
+ };
74
+ } catch (error) {
75
+ console.error('Error in getVideoAndSubtitles:', error);
76
+ throw error;
 
 
 
77
  }
 
78
  }
79
 
80
+
81
+ // [MODIFICATION] 对 scrapeProvider 函数进行“外科手术式”升级
82
  async function scrapeProvider(domain, url, signal) {
83
+ if (signal.aborted) throw new Error('Scraping aborted before starting.');
84
+ console.log(`\n[${domain}] Starting UPGRADED scrape for URL: ${url}`);
85
+
86
+ let browserInstance = null;
87
+ let context = null;
88
+ let page = null;
89
 
90
  const cleanup = async () => {
91
+ if (page && !page.isClosed()) await page.close().catch(() => {});
92
+ if (context) await context.close().catch(() => {});
 
93
  if (browserInstance) {
94
  console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
95
  await browserPool.release(browserInstance);
 
97
  };
98
 
99
  try {
 
 
100
  browserInstance = await browserPool.get();
101
+ const browser = browserInstance.browser;
102
+ console.log(`[${domain}] Acquired browser ${browserInstance.id}`);
103
+ if (signal.aborted) throw new Error('Scraping aborted.');
104
+
105
+ context = await browser.newContext({
106
+ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
107
+ ignoreHTTPSErrors: true
108
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ // [MODIFICATION] 注入脚本,手动实现 StealthPlugin 的核心功能
111
+ await context.addInitScript(() => {
112
+ Object.defineProperty(navigator, 'webdriver', { get: () => false });
 
 
 
 
 
 
 
 
113
  });
 
 
114
 
115
+ // [MODIFICATION] 设置强大的网络拦截,应用于所有页面
116
+ await context.route("**/*", (route) => {
117
+ if (signal.aborted) return route.abort();
118
+ const request = route.request();
119
+ const reqUrl = request.url();
120
+ const resourceType = request.resourceType();
121
 
122
+ // 1. 伪造 Turnstile 验证
123
+ if (reqUrl.includes('/rcp_verify')) {
124
+ return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
125
+ }
126
+ // 2. 阻止不必要的资源以节省内存
127
+ if (['image', 'stylesheet', 'font', 'media'].includes(resourceType)) {
128
+ return route.abort();
129
+ }
130
+ // 3. 放行其他请求
131
+ return route.continue();
132
  });
133
+
134
+ page = await context.newPage();
135
+ if (signal.aborted) throw new Error('Scraping aborted.');
136
+
137
+ // --- 新的、多阶段的抓取逻辑 ---
138
+ // 阶段 1: 访问初始页面
139
+ await page.goto(url, { waitUntil: "networkidle", timeout: 60000, signal });
140
+ await handleTurnstile(page);
141
+
142
+ const firstIframeSrc = await page.locator('#player_iframe').getAttribute('src');
143
+ if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
144
+ console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
145
+ if (signal.aborted) throw new Error('Scraping aborted.');
146
+
147
+ // 阶段 2: 访问第一个 iframe
148
+ await page.goto(firstIframeSrc, { waitUntil: "networkidle", timeout: 60000, signal });
149
+ await handleTurnstile(page);
150
+
151
+ const finalIframeSrc = await page.frameLocator('iframe').locator('iframe').getAttribute('src') || await page.locator('iframe').getAttribute('src');
152
+ if (!finalIframeSrc) throw new Error('Final iframe source not found');
153
  console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
154
+ if (signal.aborted) throw new Error('Scraping aborted.');
155
 
156
+ // 阶段 3: 从最终源提取数据
157
+ const { hlsUrl, subtitles } = await getVideoAndSubtitles(finalIframeSrc);
158
+
159
+ if (!hlsUrl) {
160
+ throw new Error("HLS URL not found after all stages");
161
+ }
162
+
163
+ return { source_domain: domain, hls_url: hlsUrl, subtitles, error: null };
164
 
 
165
  } catch (error) {
166
+ if (error.name === 'AbortError' || (signal && signal.aborted)) {
167
+ console.log(`[${domain}] Scraping was aborted.`);
168
  } else {
169
+ console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
170
  }
171
  throw error;
172
  } finally {
 
174
  }
175
  }
176
 
177
+ // [MODIFICATION] 您的 /extract 路由和启动逻辑完全保持不变,因为它们的设计已经非常优秀
178
  app.get("/extract", async (req, res) => {
179
  const type = req.query.type || "movie";
180
  const tmdb_id = req.query.tmdb_id;
 
215
 
216
  console.log(`\nSuccess from [${firstSuccessfulResult.source_domain}]. Aborting other scrapers.`);
217
  controller.abort();
218
+
219
  const response = { success: true, result: firstSuccessfulResult };
220
  cache.set(cacheKey, response);
221
  res.json(response);
 
231
  }
232
  });
233
 
 
234
  (async () => {
235
  try {
236
  browserPool = new BrowserPool({
237
  chromium: chromium,
238
  minSize: 1,
239
+ maxSize: 5,
240
+ maxUsage: 100,
241
+ launchOptions: { // [MODIFICATION] 为 Docker/HuggingFace 环境添加必要的启动参数
242
+ args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
243
+ }
244
  });
245
  await browserPool.initialize();
246
  console.log("Browser pool initialized successfully.");
 
251
  }
252
  })();
253
 
 
254
  process.on("SIGINT", async () => {
255
  console.log("Shutting down gracefully...");
256
  if (browserPool) await browserPool.shutdown();