stnh70 commited on
Commit
7f88b18
·
verified ·
1 Parent(s): 8d2c746

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +145 -86
server.js CHANGED
@@ -1,7 +1,7 @@
 
1
 
2
  import express, { json } from "express";
3
  import cors from "cors";
4
- import { chromium } from "playwright";
5
  import pLimit from "p-limit";
6
  import BrowserPool from './pool/BrowserPool.js';
7
  import { LRUCache } from 'lru-cache';
@@ -9,112 +9,185 @@ import fetch from 'node-fetch';
9
  import { JSDOM } from 'jsdom';
10
  import fs from 'fs/promises';
11
 
 
 
 
 
 
 
12
  const app = express();
13
- const PORT = process.env.PORT || 3000;
14
  app.use(cors());
15
  app.use(json());
16
 
17
- //const PROVIDERS = ["https://vidsrc.xyz", "https://vidsrc.in", "https://vidsrc.pm", "https://vidsrc.net", "https://vidsrc.io", "https://vidsrc.vc"];
18
- const PROVIDERS = ["https://cdn.moviesapi.club"];
19
  let browserPool;
20
  const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 });
21
  const limit = pLimit(2);
22
 
23
- // --- 辅助函数 ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  async function getVideoAndSubtitles(finalUrl) {
25
  try {
26
  const response = await fetch(finalUrl);
27
- if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`);
28
  const html = await response.text();
29
  const dom = new JSDOM(html);
30
- const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
31
- if (!script) return { hlsUrl: null, subtitles: [] };
32
- const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
33
- const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
34
- return {
35
- hlsUrl: fileMatch ? fileMatch[1] : null,
36
- subtitles: subtitlesMatch ? subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean) : []
37
- };
 
 
 
 
 
 
 
 
 
 
 
 
38
  } catch (error) {
39
- console.error('Error in getVideoAndSubtitles:', error);
40
  throw error;
41
  }
42
  }
43
 
44
 
45
- // --- 核心抓取逻辑 ---
46
  async function scrapeProvider(domain, url, signal) {
47
  if (signal.aborted) throw new Error('Aborted');
48
- console.log(`\n[${domain}] Starting CORRECTED scrape for URL: ${url}`);
 
 
 
49
 
50
- let browserInstance = null, context = null, page = null;
51
  const cleanup = async () => {
52
- if (page && !page.isClosed()) await page.close().catch(()=>{});
53
  if (context) await context.close().catch(()=>{});
54
  if (browserInstance) {
55
- console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
56
- await browserPool.release(browserInstance);
57
  }
58
  };
59
 
60
  try {
61
  browserInstance = await browserPool.get();
62
  const browser = browserInstance.browser;
 
63
  context = await browser.newContext({
64
- userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
65
  ignoreHTTPSErrors: true
66
  });
67
 
68
- await context.addInitScript(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); });
69
-
70
  await context.route("**/*", (route) => {
71
  if (signal.aborted) return route.abort();
72
- const reqUrl = route.request().url();
73
- if (reqUrl.includes('/rcp_verify')) return route.fulfill({ status: 200, json: { success: true } });
74
- if (['image', 'stylesheet', 'font', 'media'].includes(route.request().resourceType())) return route.abort();
75
  return route.continue();
76
  });
77
 
78
- page = await context.newPage();
79
 
80
- // 阶段 1
81
- console.log(`[${domain}] Navigating to initial page...`);
82
- await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45000, signal });
83
- console.log(`[${domain}] Page loaded. Checking for Cloudflare challenge...`);
84
 
85
- const challengeExists = await page.evaluate(() => {
86
- return !!document.querySelector('.cf-turnstile[data-sitekey="0x4AAAAAABNpWSLmOnUi7s0b"]');
87
- });
88
 
89
- if (challengeExists) {
90
- console.log(`[${domain}] Specific Cloudflare challenge DETECTED. Waiting for resolution...`);
91
- await page.waitForLoadState('networkidle', { timeout: 25000 });
92
- console.log(`[${domain}] Challenge resolved or timed out. Proceeding to find iframe.`);
93
- } else {
94
- console.log(`[${domain}] Specific Cloudflare challenge NOT DETECTED. Proceeding directly.`);
95
- }
96
-
97
- // 阶段 2
98
- console.log(`[${domain}] Now attempting to find the iframe...`);
99
- const iframeLocator = page.locator('#player_iframe, #the_frame');
100
- await iframeLocator.first().waitFor({ state: 'visible', timeout: 20000 });
101
-
102
- const firstIframeSrc = await iframeLocator.first().getAttribute('src');
103
- if (!firstIframeSrc) throw new Error('First iframe not found AFTER challenge handling');
104
  console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
105
 
106
- // 阶段 3
107
- await page.goto(firstIframeSrc, { waitUntil: "networkidle", timeout: 45000, signal });
108
-
109
- const finalIframeLocator = page.locator('iframe');
110
- await finalIframeLocator.first().waitFor({ state: 'visible', timeout: 15000 });
111
- const finalIframeSrc = await finalIframeLocator.first().getAttribute('src');
112
  if (!finalIframeSrc) throw new Error('Final iframe source not found');
113
  console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
114
 
115
- const { hlsUrl, subtitles } = await getVideoAndSubtitles(finalIframeSrc);
116
- if (!hlsUrl) throw new Error("HLS URL not found");
117
- return { source_domain: domain, hls_url: hlsUrl, subtitles, error: null };
 
 
118
 
119
  } catch (error) {
120
  console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
@@ -131,32 +204,22 @@ async function scrapeProvider(domain, url, signal) {
131
  }
132
  }
133
 
134
- // --- 您的 /extract 路由和启动/关闭逻辑 ---
135
  app.get("/extract", async (req, res) => {
136
  const type = req.query.type || "movie";
137
  const tmdb_id = req.query.tmdb_id;
138
  const season = req.query.season ? parseInt(req.query.season) : undefined;
139
  const episode = req.query.episode ? parseInt(req.query.episode) : undefined;
140
 
141
- if (!tmdb_id) {
142
- return res.status(400).json({ success: false, error: "tmdb_id query param is required" });
143
- }
144
- if (type === "tv" && (season == null || episode == null)) {
145
- return res.status(400).json({ success: false, error: "season and episode are required for TV shows" });
146
- }
147
 
148
  const cacheKey = JSON.stringify(req.query);
149
  const cached = cache.get(cacheKey);
150
- if (cached) {
151
- console.log("Serving from cache");
152
- return res.json(cached);
153
- }
154
 
155
  const urls = PROVIDERS.reduce((acc, domain) => {
156
- acc[domain] =
157
- type === "tv"
158
- ? `${domain}/embed/tv?tmdb=${tmdb_id}&season=${season}&episode=${episode}`
159
- : `${domain}/embed/movie/${tmdb_id}`;
160
  return acc;
161
  }, {});
162
 
@@ -167,16 +230,11 @@ app.get("/extract", async (req, res) => {
167
  const promises = Object.entries(urls).map(([domain, url]) =>
168
  limit(() => scrapeProvider(domain, url, signal))
169
  );
170
-
171
  const firstSuccessfulResult = await Promise.any(promises);
172
-
173
- console.log(`\nSuccess from [${firstSuccessfulResult.source_domain}]. Aborting other scrapers.`);
174
  controller.abort();
175
-
176
  const response = { success: true, result: firstSuccessfulResult };
177
  cache.set(cacheKey, response);
178
  res.json(response);
179
-
180
  } catch (err) {
181
  if (err instanceof AggregateError) {
182
  console.error("All providers failed to find a link.");
@@ -191,11 +249,12 @@ app.get("/extract", async (req, res) => {
191
  (async () => {
192
  try {
193
  browserPool = new BrowserPool({
194
- chromium: chromium,
195
  minSize: 1,
196
  maxSize: 5,
197
  maxUsage: 100,
198
  launchOptions: {
 
199
  args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
200
  }
201
  });
@@ -209,12 +268,12 @@ app.get("/extract", async (req, res) => {
209
  })();
210
 
211
  process.on("SIGINT", async () => {
212
- console.log("Shutting down gracefully...");
213
- if (browserPool) await browserPool.shutdown();
214
- process.exit(0);
215
  });
216
  process.on("SIGTERM", async () => {
217
- console.log("Shutting down gracefully...");
218
- if (browserPool) await browserPool.shutdown();
219
- process.exit(0);
220
  });
 
1
+ // server.js (Final Version - 1-to-1 Replication of the working Puppeteer code - COMPLETE AND UNABRIDGED)
2
 
3
  import express, { json } from "express";
4
  import cors from "cors";
 
5
  import pLimit from "p-limit";
6
  import BrowserPool from './pool/BrowserPool.js';
7
  import { LRUCache } from 'lru-cache';
 
9
  import { JSDOM } from 'jsdom';
10
  import fs from 'fs/promises';
11
 
12
+ // [REPLICATION] 1. 深度伪装: 使用 playwright-extra 和 stealth 插件
13
+ import playwright from 'playwright-extra';
14
+ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
15
+ playwright.chromium.use(StealthPlugin());
16
+
17
+
18
  const app = express();
19
+ const PORT = process.env.PORT || 7860;
20
  app.use(cors());
21
  app.use(json());
22
 
23
+ const PROVIDERS = ["https://cdn.moviesapi.club", "https://vidsrc.xyz", "https://vidsrc.in"];
 
24
  let browserPool;
25
  const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 });
26
  const limit = pLimit(2);
27
 
28
+
29
+ // --- 辅助函数 (直接从您的工作代码翻译) ---
30
+
31
+ // [REPLICATION] 2. 精确打击: 100% 复制您的 handleSpecificTurnstile 逻辑
32
+ async function handleSpecificTurnstile(page, domain) {
33
+ try {
34
+ const turnstileExists = await page.evaluate(() => {
35
+ return !!document.querySelector('.cf-turnstile[data-sitekey="0x4AAAAAABNpWSLmOnUi7s0b"]');
36
+ });
37
+
38
+ if (!turnstileExists) {
39
+ console.log(`[${domain}] Specific Turnstile not detected`);
40
+ return;
41
+ }
42
+
43
+ console.log(`[${domain}] Specific Turnstile detected - bypassing...`);
44
+ await page.locator('.cf-turnstile').waitFor({ state: 'visible', timeout: 10000 });
45
+
46
+ await page.evaluate(() => {
47
+ if (typeof window.cftCallback === 'function') {
48
+ const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
49
+ window.cftCallback(mockToken);
50
+ }
51
+ });
52
+
53
+ // 等待网络空闲以确认挑战已处理
54
+ await page.waitForLoadState('networkidle', { timeout: 30000 });
55
+ console.log(`[${domain}] Turnstile bypass completed`);
56
+
57
+ } catch (error) {
58
+ console.warn(`[${domain}] Turnstile bypass warning: ${error.message}`);
59
+ }
60
+ }
61
+
62
+ async function extractFirstIframeSrc(page, domain) {
63
+ try {
64
+ const iframeLocator = page.locator('#player_iframe');
65
+ await iframeLocator.waitFor({ state: 'visible', timeout: 15000 });
66
+ let src = await iframeLocator.getAttribute('src');
67
+ if (src && src.startsWith('//')) {
68
+ src = `https:${src}`;
69
+ }
70
+ return src;
71
+ } catch (error) {
72
+ console.error(`[${domain}] Error finding first iframe:`, error);
73
+ return null;
74
+ }
75
+ }
76
+
77
+ async function extractFinalIframeSrc(page, domain) {
78
+ try {
79
+ await page.waitForFunction(() => document.querySelectorAll('script, iframe').length > 0, null, { timeout: 15000 });
80
+ return await page.evaluate(() => {
81
+ const iframe = document.querySelector('iframe:not([style*="display:none"])');
82
+ if (iframe?.src) return iframe.src;
83
+ const scripts = Array.from(document.querySelectorAll('script'));
84
+ for (const script of scripts) {
85
+ if (!script.textContent) continue;
86
+ if (script.textContent.includes('loadIframe')) {
87
+ const srcMatch = script.textContent.match(/src:\s*['"](.*?)['"]/);
88
+ if (srcMatch) return new URL(srcMatch[1], window.location.href).href;
89
+ }
90
+ }
91
+ return null;
92
+ });
93
+ } catch (error) {
94
+ console.error(`[${domain}] Error finding final iframe:`, error);
95
+ return null;
96
+ }
97
+ }
98
+
99
  async function getVideoAndSubtitles(finalUrl) {
100
  try {
101
  const response = await fetch(finalUrl);
102
+ if (!response.ok) { throw new Error(`HTTP error! Status: ${response.status}`); }
103
  const html = await response.text();
104
  const dom = new JSDOM(html);
105
+ const document = dom.window.document;
106
+ const scriptTags = document.querySelectorAll('script');
107
+ let videoFileUrl = null;
108
+ let subtitleSources = null;
109
+ for (const script of scriptTags) {
110
+ if (script.textContent.includes('new Playerjs({')) {
111
+ const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
112
+ if (fileMatch && fileMatch[1]) { videoFileUrl = fileMatch[1]; }
113
+ const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
114
+ if (subtitlesMatch && subtitlesMatch[1]) {
115
+ try {
116
+ subtitleSources = JSON.parse(subtitlesMatch[1].replace(/"/g, '"'));
117
+ } catch (e) {
118
+ subtitleSources = subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean);
119
+ }
120
+ }
121
+ if (videoFileUrl) { break; }
122
+ }
123
+ }
124
+ return { videoFileUrl, subtitleSources };
125
  } catch (error) {
126
+ console.error('An error occurred during the process:', error);
127
  throw error;
128
  }
129
  }
130
 
131
 
132
+ // --- 核心抓取逻辑: 100% 复刻成功模式 ---
133
  async function scrapeProvider(domain, url, signal) {
134
  if (signal.aborted) throw new Error('Aborted');
135
+ console.log(`\n[${domain}] Starting REPLICATED scrape for URL: ${url}`);
136
+
137
+ let browserInstance = null;
138
+ let context = null;
139
 
 
140
  const cleanup = async () => {
 
141
  if (context) await context.close().catch(()=>{});
142
  if (browserInstance) {
143
+ console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`);
144
+ await browserPool.release(browserInstance);
145
  }
146
  };
147
 
148
  try {
149
  browserInstance = await browserPool.get();
150
  const browser = browserInstance.browser;
151
+
152
  context = await browser.newContext({
153
+ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
154
  ignoreHTTPSErrors: true
155
  });
156
 
 
 
157
  await context.route("**/*", (route) => {
158
  if (signal.aborted) return route.abort();
159
+ if (route.request().url().includes('/rcp_verify')) {
160
+ return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
161
+ }
162
  return route.continue();
163
  });
164
 
165
+ const page = await context.newPage();
166
 
167
+ // 阶段 1: 访问初始页面 (使用“极度耐心”策略)
168
+ console.log(`[${domain}] Navigating and patiently waiting for network to be idle...`);
169
+ await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
 
170
 
171
+ await handleSpecificTurnstile(page, domain);
 
 
172
 
173
+ const firstIframeSrc = await extractFirstIframeSrc(page, domain);
174
+ if (!firstIframeSrc) throw new Error('First iframe not found');
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  console.log(`[${domain}] Found first iframe src: ${firstIframeSrc}`);
176
 
177
+ // 阶段 2: 访问第一个 iframe (在新页面中,严格复刻)
178
+ const iframePage = await context.newPage();
179
+ await iframePage.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 60000 });
180
+ await handleSpecificTurnstile(iframePage, domain);
181
+
182
+ const finalIframeSrc = await extractFinalIframeSrc(iframePage, domain);
183
  if (!finalIframeSrc) throw new Error('Final iframe source not found');
184
  console.log(`[${domain}] Found final iframe src: ${finalIframeSrc}`);
185
 
186
+ // 阶段 3: 提取数据
187
+ const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalIframeSrc);
188
+ if (!videoFileUrl) throw new Error("HLS URL not found");
189
+
190
+ return { source_domain: domain, hls_url: videoFileUrl, subtitles: subtitleSources, error: null };
191
 
192
  } catch (error) {
193
  console.error(`[${domain}] Error in scrapeProvider: ${error.message}`);
 
204
  }
205
  }
206
 
207
+ // --- 您的 Express 路由和启动逻辑 ---
208
  app.get("/extract", async (req, res) => {
209
  const type = req.query.type || "movie";
210
  const tmdb_id = req.query.tmdb_id;
211
  const season = req.query.season ? parseInt(req.query.season) : undefined;
212
  const episode = req.query.episode ? parseInt(req.query.episode) : undefined;
213
 
214
+ if (!tmdb_id) { return res.status(400).json({ success: false, error: "tmdb_id is required" }); }
215
+ if (type === "tv" && (season == null || episode == null)) { return res.status(400).json({ success: false, error: "season and episode are required" }); }
 
 
 
 
216
 
217
  const cacheKey = JSON.stringify(req.query);
218
  const cached = cache.get(cacheKey);
219
+ if (cached) { console.log("Serving from cache"); return res.json(cached); }
 
 
 
220
 
221
  const urls = PROVIDERS.reduce((acc, domain) => {
222
+ acc[domain] = type === "tv" ? `${domain}/embed/tv?tmdb=${tmdb_id}&season=${season}&episode=${episode}` : `${domain}/embed/movie/${tmdb_id}`;
 
 
 
223
  return acc;
224
  }, {});
225
 
 
230
  const promises = Object.entries(urls).map(([domain, url]) =>
231
  limit(() => scrapeProvider(domain, url, signal))
232
  );
 
233
  const firstSuccessfulResult = await Promise.any(promises);
 
 
234
  controller.abort();
 
235
  const response = { success: true, result: firstSuccessfulResult };
236
  cache.set(cacheKey, response);
237
  res.json(response);
 
238
  } catch (err) {
239
  if (err instanceof AggregateError) {
240
  console.error("All providers failed to find a link.");
 
249
  (async () => {
250
  try {
251
  browserPool = new BrowserPool({
252
+ chromium: playwright.chromium,
253
  minSize: 1,
254
  maxSize: 5,
255
  maxUsage: 100,
256
  launchOptions: {
257
+ headless: true,
258
  args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
259
  }
260
  });
 
268
  })();
269
 
270
  process.on("SIGINT", async () => {
271
+ console.log("Shutting down gracefully...");
272
+ if (browserPool) await browserPool.shutdown();
273
+ process.exit(0);
274
  });
275
  process.on("SIGTERM", async () => {
276
+ console.log("Shutting down gracefully...");
277
+ if (browserPool) await browserPool.shutdown();
278
+ process.exit(0);
279
  });