stnh70 commited on
Commit
d5663f9
·
verified ·
1 Parent(s): 50b3d75

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +63 -106
server.js CHANGED
@@ -1,62 +1,26 @@
 
 
1
  import express, { json } from "express";
2
  import cors from "cors";
3
  import { chromium } from "playwright";
4
  import pLimit from "p-limit";
5
- import BrowserPool from './pool/BrowserPool.js'; // 您的浏览器池代码保持不变
6
- // [MODIFICATION] 修正 lru-cache 的导入语法
7
  import { LRUCache } from 'lru-cache';
8
- // [MODIFICATION] 导入新的辅助库
9
  import fetch from 'node-fetch';
10
  import { JSDOM } from 'jsdom';
 
11
 
12
  const app = express();
13
- const PORT = process.env.PORT || 3000; // 建议在 Hugging Face 上使用 7860
14
-
15
  app.use(cors());
16
  app.use(json());
17
 
18
- const PROVIDERS = [
19
- "https://vidsrc.xyz",
20
- "https://vidsrc.in",
21
- "https://vidsrc.pm",
22
- "https://vidsrc.net",
23
- "https://vidsrc.io",
24
- "https://vidsrc.vc",
25
- ];
26
-
27
  let browserPool;
28
-
29
- // [MODIFICATION] 修正 lru-cache 的实例化语法
30
- const cache = new LRUCache({
31
- max: 500,
32
- ttl: 15 * 60 * 1000,
33
- });
34
-
35
  const limit = pLimit(2);
36
 
37
- // [MODIFICATION] 新增的辅助函数,用于处理 CF Turnstile 的前端 JS 绕过
38
- async function handleTurnstile(page) {
39
- try {
40
- await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000 });
41
- console.log('Turnstile detected - attempting generic bypass...');
42
- await page.evaluate(() => {
43
- if (typeof window.cftCallback === 'function') {
44
- const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
45
- window.cftCallback(mockToken);
46
- }
47
- });
48
- console.log('Turnstile JS callback triggered.');
49
- await page.waitForTimeout(2000);
50
- } catch (error) {
51
- if (error.name.includes('Timeout')) {
52
- console.log('Turnstile not found on page, skipping bypass.');
53
- } else {
54
- console.warn('An error occurred during Turnstile handling:', error.message);
55
- }
56
- }
57
- }
58
-
59
- // [MODIFICATION] 新增的辅助函数,用于从最终页面提取视频链接
60
  async function getVideoAndSubtitles(finalUrl) {
61
  try {
62
  const response = await fetch(finalUrl);
@@ -78,95 +42,88 @@ async function getVideoAndSubtitles(finalUrl) {
78
  }
79
 
80
 
81
- // [MODIFICATION] scrapeProvider 函数进行“外科手术式”升级
82
  async function scrapeProvider(domain, url, signal) {
83
- if (signal.aborted) throw new Error('Scraping aborted before starting.');
84
- console.log(`\n[${domain}] Starting UPGRADED scrape for URL: ${url}`);
85
-
86
- let browserInstance = null;
87
- let context = null;
88
- let page = null;
89
 
 
90
  const cleanup = async () => {
91
- if (page && !page.isClosed()) await page.close().catch(() => {});
92
- if (context) await context.close().catch(() => {});
93
- if (browserInstance) {
94
- console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
95
- await browserPool.release(browserInstance);
96
- }
97
  };
98
 
99
  try {
100
  browserInstance = await browserPool.get();
101
  const browser = browserInstance.browser;
102
- console.log(`[${domain}] Acquired browser ${browserInstance.id}`);
103
- if (signal.aborted) throw new Error('Scraping aborted.');
104
-
105
  context = await browser.newContext({
106
  userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
107
  ignoreHTTPSErrors: true
108
  });
109
-
110
- // [MODIFICATION] 注入脚本,手动实现 StealthPlugin 的核心功能
111
- await context.addInitScript(() => {
112
- Object.defineProperty(navigator, 'webdriver', { get: () => false });
113
- });
114
-
115
- // [MODIFICATION] 设置强大的网络拦截,应用于所有页面
116
  await context.route("**/*", (route) => {
117
  if (signal.aborted) return route.abort();
118
- const request = route.request();
119
- const reqUrl = request.url();
120
- const resourceType = request.resourceType();
121
-
122
- // 1. 伪造 Turnstile 验证
123
- if (reqUrl.includes('/rcp_verify')) {
124
- return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
125
- }
126
- // 2. 阻止不必要的资源以节省内存
127
- if (['image', 'stylesheet', 'font', 'media'].includes(resourceType)) {
128
- return route.abort();
129
- }
130
- // 3. 放行其他请求
131
  return route.continue();
132
  });
133
 
134
  page = await context.newPage();
135
- if (signal.aborted) throw new Error('Scraping aborted.');
136
 
137
- // --- 新的、多阶段的抓取逻辑 ---
138
- // 阶段 1: 访问初始页面
139
- await page.goto(url, { waitUntil: "networkidle", timeout: 60000, signal });
140
- await handleTurnstile(page);
 
 
 
 
141
 
142
- const firstIframeSrc = await page.locator('#player_iframe').getAttribute('src');
143
- if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
145
- if (signal.aborted) throw new Error('Scraping aborted.');
146
 
147
- // 阶段 2: 访问第一个 iframe
148
- await page.goto(firstIframeSrc, { waitUntil: "networkidle", timeout: 60000, signal });
149
- await handleTurnstile(page);
150
 
151
- const finalIframeSrc = await page.frameLocator('iframe').locator('iframe').getAttribute('src') || await page.locator('iframe').getAttribute('src');
 
 
152
  if (!finalIframeSrc) throw new Error('Final iframe source not found');
153
  console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
154
- if (signal.aborted) throw new Error('Scraping aborted.');
155
 
156
- // 阶段 3: 从最终源提取数据
157
  const { hlsUrl, subtitles } = await getVideoAndSubtitles(finalIframeSrc);
158
-
159
- if (!hlsUrl) {
160
- throw new Error("HLS URL not found after all stages");
161
- }
162
-
163
  return { source_domain: domain, hls_url: hlsUrl, subtitles, error: null };
164
 
165
  } catch (error) {
166
- if (error.name === 'AbortError' || (signal && signal.aborted)) {
167
- console.log(`[${domain}] Scraping was aborted.`);
168
- } else {
169
- console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
 
 
 
170
  }
171
  throw error;
172
  } finally {
@@ -174,7 +131,7 @@ async function scrapeProvider(domain, url, signal) {
174
  }
175
  }
176
 
177
- // [MODIFICATION] 您的 /extract 路由和启动逻辑完全保持不变,因为它们的设计已经非常优秀
178
  app.get("/extract", async (req, res) => {
179
  const type = req.query.type || "movie";
180
  const tmdb_id = req.query.tmdb_id;
@@ -185,7 +142,7 @@ app.get("/extract", async (req, res) => {
185
  return res.status(400).json({ success: false, error: "tmdb_id query param is required" });
186
  }
187
  if (type === "tv" && (season == null || episode == null)) {
188
- return res.status(400).json({ success: false, error: "season and episode query params are required for TV shows" });
189
  }
190
 
191
  const cacheKey = JSON.stringify(req.query);
@@ -238,7 +195,7 @@ app.get("/extract", async (req, res) => {
238
  minSize: 1,
239
  maxSize: 5,
240
  maxUsage: 100,
241
- launchOptions: { // [MODIFICATION] 为 Docker/HuggingFace 环境添加必要的启动参数
242
  args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
243
  }
244
  });
 
1
+ // server.js (Final Corrected Version - COMPLETE AND UNABRIDGED)
2
+
3
  import express, { json } from "express";
4
  import cors from "cors";
5
  import { chromium } from "playwright";
6
  import pLimit from "p-limit";
7
+ import BrowserPool from './pool/BrowserPool.js';
 
8
  import { LRUCache } from 'lru-cache';
 
9
  import fetch from 'node-fetch';
10
  import { JSDOM } from 'jsdom';
11
+ import fs from 'fs/promises';
12
 
13
  const app = express();
14
+ const PORT = process.env.PORT || 7860;
 
15
  app.use(cors());
16
  app.use(json());
17
 
18
+ const PROVIDERS = ["https://vidsrc.xyz", "https://vidsrc.in", "https://vidsrc.pm", "https://vidsrc.net", "https://vidsrc.io", "https://vidsrc.vc"];
 
 
 
 
 
 
 
 
19
  let browserPool;
20
+ const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 });
 
 
 
 
 
 
21
  const limit = pLimit(2);
22
 
23
+ // --- 辅助函数 ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  async function getVideoAndSubtitles(finalUrl) {
25
  try {
26
  const response = await fetch(finalUrl);
 
42
  }
43
 
44
 
45
+ // --- 核心抓取逻辑 ---
46
  async function scrapeProvider(domain, url, signal) {
47
+ if (signal.aborted) throw new Error('Aborted');
48
+ console.log(`\n[${domain}] Starting CORRECTED scrape for URL: ${url}`);
 
 
 
 
49
 
50
+ let browserInstance = null, context = null, page = null;
51
  const cleanup = async () => {
52
+ if (page && !page.isClosed()) await page.close().catch(()=>{});
53
+ if (context) await context.close().catch(()=>{});
54
+ if (browserInstance) {
55
+ console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
56
+ await browserPool.release(browserInstance);
57
+ }
58
  };
59
 
60
  try {
61
  browserInstance = await browserPool.get();
62
  const browser = browserInstance.browser;
 
 
 
63
  context = await browser.newContext({
64
  userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
65
  ignoreHTTPSErrors: true
66
  });
67
+
68
+ await context.addInitScript(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); });
69
+
 
 
 
 
70
  await context.route("**/*", (route) => {
71
  if (signal.aborted) return route.abort();
72
+ const reqUrl = route.request().url();
73
+ if (reqUrl.includes('/rcp_verify')) return route.fulfill({ status: 200, json: { success: true } });
74
+ if (['image', 'stylesheet', 'font', 'media'].includes(route.request().resourceType())) return route.abort();
 
 
 
 
 
 
 
 
 
 
75
  return route.continue();
76
  });
77
 
78
  page = await context.newPage();
 
79
 
80
+ // 阶段 1
81
+ console.log(`[${domain}] Navigating to initial page...`);
82
+ await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45000, signal });
83
+ console.log(`[${domain}] Page loaded. Checking for Cloudflare challenge...`);
84
+
85
+ const challengeExists = await page.evaluate(() => {
86
+ return !!document.querySelector('.cf-turnstile[data-sitekey="0x4AAAAAABNpWSLmOnUi7s0b"]');
87
+ });
88
 
89
+ if (challengeExists) {
90
+ console.log(`[${domain}] Specific Cloudflare challenge DETECTED. Waiting for resolution...`);
91
+ await page.waitForLoadState('networkidle', { timeout: 25000 });
92
+ console.log(`[${domain}] Challenge resolved or timed out. Proceeding to find iframe.`);
93
+ } else {
94
+ console.log(`[${domain}] Specific Cloudflare challenge NOT DETECTED. Proceeding directly.`);
95
+ }
96
+
97
+ // 阶段 2
98
+ console.log(`[${domain}] Now attempting to find the iframe...`);
99
+ const iframeLocator = page.locator('#player_iframe, #the_frame');
100
+ await iframeLocator.first().waitFor({ state: 'visible', timeout: 20000 });
101
+
102
+ const firstIframeSrc = await iframeLocator.first().getAttribute('src');
103
+ if (!firstIframeSrc) throw new Error('First iframe not found AFTER challenge handling');
104
  console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
 
105
 
106
+ // 阶段 3
107
+ await page.goto(firstIframeSrc, { waitUntil: "networkidle", timeout: 45000, signal });
 
108
 
109
+ const finalIframeLocator = page.locator('iframe');
110
+ await finalIframeLocator.first().waitFor({ state: 'visible', timeout: 15000 });
111
+ const finalIframeSrc = await finalIframeLocator.first().getAttribute('src');
112
  if (!finalIframeSrc) throw new Error('Final iframe source not found');
113
  console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
 
114
 
 
115
  const { hlsUrl, subtitles } = await getVideoAndSubtitles(finalIframeSrc);
116
+ if (!hlsUrl) throw new Error("HLS URL not found");
 
 
 
 
117
  return { source_domain: domain, hls_url: hlsUrl, subtitles, error: null };
118
 
119
  } catch (error) {
120
+ console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
121
+ if (page && !page.isClosed()) {
122
+ const timestamp = new Date().toISOString().replace(/:/g, '-');
123
+ const safeDomain = domain.replace(/https?:\/\//, '').replace(/\./g, '_');
124
+ const screenshotPath = `debug_screenshot_${safeDomain}_${timestamp}.png`;
125
+ await page.screenshot({ path: screenshotPath, fullPage: true });
126
+ console.log(`[DEBUG] Saved screenshot to ${screenshotPath}`);
127
  }
128
  throw error;
129
  } finally {
 
131
  }
132
  }
133
 
134
+ // --- 您的 /extract 路由和启动/关闭逻辑 ---
135
  app.get("/extract", async (req, res) => {
136
  const type = req.query.type || "movie";
137
  const tmdb_id = req.query.tmdb_id;
 
142
  return res.status(400).json({ success: false, error: "tmdb_id query param is required" });
143
  }
144
  if (type === "tv" && (season == null || episode == null)) {
145
+ return res.status(400).json({ success: false, error: "season and episode are required for TV shows" });
146
  }
147
 
148
  const cacheKey = JSON.stringify(req.query);
 
195
  minSize: 1,
196
  maxSize: 5,
197
  maxUsage: 100,
198
+ launchOptions: {
199
  args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
200
  }
201
  });