|
|
|
|
|
import express, { json } from "express"; |
|
|
import cors from "cors"; |
|
|
import pLimit from "p-limit"; |
|
|
import BrowserPool from './pool/BrowserPool.js'; |
|
|
import { LRUCache } from 'lru-cache'; |
|
|
import fetch from 'node-fetch'; |
|
|
import { JSDOM } from 'jsdom'; |
|
|
import fs from 'fs/promises'; |
|
|
|
|
|
|
|
|
import { chromium } from 'playwright-extra'; |
|
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth'; |
|
|
chromium.use(StealthPlugin()); |
|
|
|
|
|
const app = express(); |
|
|
const PORT = process.env.PORT || 3000; |
|
|
app.use(cors()); |
|
|
app.use(json()); |
|
|
|
|
|
const PROVIDERS = ["https://cdn.moviesapi.club"]; |
|
|
let browserPool; |
|
|
const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 }); |
|
|
const limit = pLimit(2); |
|
|
|
|
|
async function extractFirstIframeSrc(page, domain) { |
|
|
try { |
|
|
await page.waitForSelector('#player_iframe', { timeout: 15000 }); |
|
|
const src = await page.evaluate(() => { |
|
|
const iframe = document.getElementById('player_iframe'); |
|
|
if (!iframe) return null; |
|
|
|
|
|
let src = iframe.getAttribute('src') || ''; |
|
|
if (src.startsWith('//')) { |
|
|
src = `https:${src}`; |
|
|
} |
|
|
return src; |
|
|
}); |
|
|
|
|
|
if (!src) { |
|
|
throw new Error('First iframe src not found'); |
|
|
} |
|
|
|
|
|
console.log(`[${domain}] Found first iframe src: ${src}`); |
|
|
return src; |
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error finding first iframe:`, error); |
|
|
throw error; |
|
|
} |
|
|
} |
|
|
|
|
|
async function getVideoAndSubtitles(finalUrl) { |
|
|
try { |
|
|
const response = await fetch(finalUrl); |
|
|
if (!response.ok) { |
|
|
throw new Error(`HTTP error! Status: ${response.status}`); |
|
|
} |
|
|
|
|
|
const html = await response.text(); |
|
|
const dom = new JSDOM(html); |
|
|
const document = dom.window.document; |
|
|
const scriptTags = document.querySelectorAll('script'); |
|
|
let videoFileUrl = null; |
|
|
let subtitleSources = null; |
|
|
|
|
|
for (const script of scriptTags) { |
|
|
if (script.textContent.includes('new Playerjs({')) { |
|
|
const fileMatch = script.textContent.match(/file:\s*['"](.*?)['"]/); |
|
|
if (fileMatch && fileMatch[1]) { |
|
|
videoFileUrl = fileMatch[1]; |
|
|
} |
|
|
|
|
|
const subtitlesMatch = script.textContent.match(/default_subtitles\s*=\s*['"](.*?)['"]/); |
|
|
if (subtitlesMatch && subtitlesMatch[1]) { |
|
|
try { |
|
|
subtitleSources = JSON.parse(subtitlesMatch[1]); |
|
|
} catch (e) { |
|
|
subtitleSources = subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean); |
|
|
} |
|
|
} |
|
|
|
|
|
if (videoFileUrl) { |
|
|
break; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
return { videoFileUrl, subtitleSources }; |
|
|
} catch (error) { |
|
|
console.error('An error occurred during the process:', error); |
|
|
throw error; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function scrapeProvider_(domain, url, signal) { |
|
|
if (signal.aborted) throw new Error('Aborted'); |
|
|
console.log(`\n[${domain}] Starting scrape for URL: ${url}`); |
|
|
|
|
|
let browserInstance = null; |
|
|
try { |
|
|
browserInstance = await browserPool.get(); |
|
|
const browser = browserInstance.browser; |
|
|
|
|
|
|
|
|
const page = await browser.newPage(); |
|
|
|
|
|
|
|
|
await page.route('**/*', route => { |
|
|
console.log(`[${domain}] Request: ${route.request().url()}`); |
|
|
route.continue(); |
|
|
}); |
|
|
|
|
|
await page.goto(url, { |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 60000 |
|
|
}); |
|
|
|
|
|
|
|
|
const html = await page.content(); |
|
|
console.log(`[${domain}] Page HTML:\n${html}`); |
|
|
|
|
|
|
|
|
const hasTurnstile = await page.evaluate(() => { |
|
|
return document.querySelector('.cf-turnstile') !== null; |
|
|
}); |
|
|
console.log(`[${domain}] Has Turnstile: ${hasTurnstile}`); |
|
|
|
|
|
|
|
|
if (hasTurnstile) { |
|
|
await handleSpecificTurnstile(page, domain); |
|
|
} |
|
|
|
|
|
|
|
|
const firstIframeSrc = await extractFirstIframeSrc(page, domain); |
|
|
if (!firstIframeSrc) throw new Error('First iframe not found'); |
|
|
|
|
|
|
|
|
const iframePage = await browser.newPage(); |
|
|
await iframePage.goto(firstIframeSrc, { |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 60000 |
|
|
}); |
|
|
|
|
|
|
|
|
const iframeHtml = await iframePage.content(); |
|
|
console.log(`[${domain}] Iframe HTML:\n${iframeHtml}`); |
|
|
|
|
|
|
|
|
const iframeHasTurnstile = await iframePage.evaluate(() => { |
|
|
return document.querySelector('.cf-turnstile') !== null; |
|
|
}); |
|
|
console.log(`[${domain}] Iframe has Turnstile: ${iframeHasTurnstile}`); |
|
|
|
|
|
if (iframeHasTurnstile) { |
|
|
await handleSpecificTurnstile(iframePage, domain); |
|
|
} |
|
|
|
|
|
|
|
|
const finalIframeSrc = await extractFinalIframeSrc(iframePage, domain); |
|
|
if (!finalIframeSrc) throw new Error('Final iframe not found'); |
|
|
|
|
|
|
|
|
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalIframeSrc); |
|
|
if (!videoFileUrl) throw new Error("HLS URL not found"); |
|
|
|
|
|
return { source_domain: domain, hls_url: videoFileUrl, subtitles: subtitleSources, error: null }; |
|
|
|
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error in scrapeProvider: ${error.message}`); |
|
|
throw error; |
|
|
} finally { |
|
|
if (browserInstance) { |
|
|
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`); |
|
|
await browserPool.release(browserInstance); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
async function scrapeProvider__(domain, url, signal) { |
|
|
if (signal.aborted) throw new Error('Aborted'); |
|
|
console.log(`\n[${domain}] Starting scrape for URL: ${url}`); |
|
|
|
|
|
let browserInstance = null; |
|
|
try { |
|
|
browserInstance = await browserPool.get(); |
|
|
const browser = browserInstance.browser; |
|
|
|
|
|
|
|
|
const page = await browser.newPage(); |
|
|
|
|
|
|
|
|
await page.route('**/*', route => { |
|
|
const requestUrl = route.request().url(); |
|
|
console.log(`[${domain}] Request: ${requestUrl}`); |
|
|
|
|
|
|
|
|
if (requestUrl.includes('cloudnestra.com/rcp/')) { |
|
|
route.continue().then(response => { |
|
|
response.text().then(text => { |
|
|
console.log(`[${domain}] Cloudnestra response:`, text); |
|
|
|
|
|
page.cloudnestraResponse = text; |
|
|
}); |
|
|
}); |
|
|
} else { |
|
|
route.continue(); |
|
|
} |
|
|
}); |
|
|
|
|
|
await page.goto(url, { |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 60000 |
|
|
}); |
|
|
|
|
|
|
|
|
const hasTurnstile = await page.evaluate(() => { |
|
|
return document.querySelector('.cf-turnstile') !== null; |
|
|
}); |
|
|
console.log(`[${domain}] Has Turnstile: ${hasTurnstile}`); |
|
|
|
|
|
|
|
|
if (hasTurnstile) { |
|
|
await handleSpecificTurnstile(page, domain); |
|
|
} |
|
|
|
|
|
|
|
|
const firstIframeSrc = await extractFirstIframeSrc(page, domain); |
|
|
if (!firstIframeSrc) throw new Error('First iframe not found'); |
|
|
|
|
|
|
|
|
const iframePage = await browser.newPage(); |
|
|
await iframePage.goto(firstIframeSrc, { |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 60000 |
|
|
}); |
|
|
|
|
|
|
|
|
const iframeHasTurnstile = await iframePage.evaluate(() => { |
|
|
return document.querySelector('.cf-turnstile') !== null; |
|
|
}); |
|
|
console.log(`[${domain}] Iframe has Turnstile: ${iframeHasTurnstile}`); |
|
|
|
|
|
if (iframeHasTurnstile) { |
|
|
await handleSpecificTurnstile(iframePage, domain); |
|
|
} |
|
|
|
|
|
|
|
|
const finalIframeSrc = await extractFinalIframeSrc(iframePage, domain); |
|
|
if (!finalIframeSrc) throw new Error('Final iframe not found'); |
|
|
|
|
|
|
|
|
if (finalIframeSrc.includes('cloudnestra.com/rcp/')) { |
|
|
|
|
|
const responseText = page.cloudnestraResponse; |
|
|
if (!responseText) throw new Error('No cloudnestra response found'); |
|
|
|
|
|
|
|
|
try { |
|
|
const data = JSON.parse(responseText); |
|
|
if (data.file) { |
|
|
console.log(`[${domain}] Found video URL in cloudnestra response:`, data.file); |
|
|
return { |
|
|
source_domain: domain, |
|
|
hls_url: data.file, |
|
|
subtitles: data.subtitles || [], |
|
|
error: null |
|
|
}; |
|
|
} |
|
|
throw new Error("No video URL found in cloudnestra response"); |
|
|
} catch (e) { |
|
|
throw new Error("Failed to parse cloudnestra response"); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalIframeSrc); |
|
|
if (!videoFileUrl) throw new Error("HLS URL not found"); |
|
|
|
|
|
return { source_domain: domain, hls_url: videoFileUrl, subtitles: subtitleSources, error: null }; |
|
|
|
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error in scrapeProvider: ${error.message}`); |
|
|
throw error; |
|
|
} finally { |
|
|
if (browserInstance) { |
|
|
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`); |
|
|
await browserPool.release(browserInstance); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function scrapeProvider6(domain, url, signal) { |
|
|
if (signal.aborted) throw new Error('Aborted'); |
|
|
console.log(`\n[${domain}] Starting scrape for URL: ${url}`); |
|
|
|
|
|
let browserInstance = null; |
|
|
try { |
|
|
browserInstance = await browserPool.get(); |
|
|
const browser = browserInstance.browser; |
|
|
|
|
|
|
|
|
const page = await browser.newPage(); |
|
|
let videoResult = null; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
await page.route('**/*', async (route) => { |
|
|
const requestUrl = route.request().url(); |
|
|
console.log(`[${domain}] Request: ${requestUrl}`); |
|
|
|
|
|
|
|
|
if (requestUrl.includes('cloudnestra.com/rcp/')) { |
|
|
console.log(`[${domain}] 🎯 检测到 cloudnestra.com/rcp/ 请求:`); |
|
|
console.log(`[${domain}] URL: ${requestUrl}`); |
|
|
|
|
|
try { |
|
|
|
|
|
const response = await route.fetch(); |
|
|
const content = await response.text(); |
|
|
|
|
|
console.log(`[${domain}] 响应状态: ${response.status()}`); |
|
|
console.log(`[${domain}] 响应头:`, response.headers()); |
|
|
console.log(`[${domain}] 响应内容长度: ${content.length}`); |
|
|
console.log(`[${domain}] 响应内容预览 (前500字符):`); |
|
|
console.log(content.substring(0, 500)); |
|
|
|
|
|
|
|
|
if (content.trim().startsWith('{') || content.trim().startsWith('[')) { |
|
|
try { |
|
|
const jsonContent = JSON.parse(content); |
|
|
console.log(`[${domain}] JSON内容:`, JSON.stringify(jsonContent, null, 2)); |
|
|
} catch (e) { |
|
|
console.log(`[${domain}] 不是有效的JSON格式`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
route.continue(); |
|
|
|
|
|
} catch (error) { |
|
|
console.error(`[${domain}] 获取cloudnestra内容时出错:`, error); |
|
|
route.continue(); |
|
|
} |
|
|
} else { |
|
|
route.continue(); |
|
|
} |
|
|
}); |
|
|
|
|
|
await page.goto(url, { |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 60000 |
|
|
}); |
|
|
|
|
|
|
|
|
if (videoResult) { |
|
|
return videoResult; |
|
|
} |
|
|
|
|
|
|
|
|
const hasTurnstile = await page.evaluate(() => { |
|
|
return document.querySelector('.cf-turnstile') !== null; |
|
|
}); |
|
|
console.log(`[${domain}] Has Turnstile: ${hasTurnstile}`); |
|
|
|
|
|
|
|
|
if (hasTurnstile) { |
|
|
await handleSpecificTurnstile(page, domain); |
|
|
} |
|
|
|
|
|
|
|
|
const firstIframeSrc = await extractFirstIframeSrc(page, domain); |
|
|
if (!firstIframeSrc) throw new Error('First iframe not found'); |
|
|
|
|
|
|
|
|
const iframePage = await browser.newPage(); |
|
|
await iframePage.goto(firstIframeSrc, { |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 60000 |
|
|
}); |
|
|
|
|
|
|
|
|
const iframeHasTurnstile = await iframePage.evaluate(() => { |
|
|
return document.querySelector('.cf-turnstile') !== null; |
|
|
}); |
|
|
console.log(`[${domain}] Iframe has Turnstile: ${iframeHasTurnstile}`); |
|
|
|
|
|
if (iframeHasTurnstile) { |
|
|
await handleSpecificTurnstile(iframePage, domain); |
|
|
} |
|
|
|
|
|
|
|
|
const finalIframeSrc = await extractFinalIframeSrc(iframePage, domain); |
|
|
if (!finalIframeSrc) throw new Error('Final iframe not found'); |
|
|
|
|
|
|
|
|
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalIframeSrc); |
|
|
if (!videoFileUrl) throw new Error("HLS URL not found"); |
|
|
|
|
|
return { source_domain: domain, hls_url: videoFileUrl, subtitles: subtitleSources, error: null }; |
|
|
|
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error in scrapeProvider: ${error.message}`); |
|
|
throw error; |
|
|
} finally { |
|
|
if (browserInstance) { |
|
|
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`); |
|
|
await browserPool.release(browserInstance); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function scrapeProvider(domain, url, signal) { |
|
|
if (signal.aborted) throw new Error('Aborted'); |
|
|
console.log(`\n[${domain}] Starting scrape for URL: ${url}`); |
|
|
|
|
|
let browserInstance = null; |
|
|
let cloudnestraData = null; |
|
|
|
|
|
try { |
|
|
browserInstance = await browserPool.get(); |
|
|
const browser = browserInstance.browser; |
|
|
|
|
|
|
|
|
const context = await browser.newContext({ |
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
|
|
extraHTTPHeaders: { |
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', |
|
|
'Accept-Language': 'en-US,en;q=0.5', |
|
|
'Accept-Encoding': 'gzip, deflate, br', |
|
|
'DNT': '1', |
|
|
'Connection': 'keep-alive', |
|
|
'Upgrade-Insecure-Requests': '1', |
|
|
}, |
|
|
viewport: { width: 1920, height: 1080 } |
|
|
}); |
|
|
|
|
|
|
|
|
await context.addInitScript(() => { |
|
|
Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); |
|
|
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); |
|
|
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); |
|
|
}); |
|
|
|
|
|
const page = await context.newPage(); |
|
|
|
|
|
|
|
|
await page.route('**/*', async (route) => { |
|
|
const requestUrl = route.request().url(); |
|
|
console.log(`[${domain}] Request: ${requestUrl}`); |
|
|
|
|
|
|
|
|
if (requestUrl.includes('cloudnestra.com/rcp/')) { |
|
|
console.log(`[${domain}] 🎯 检测到 cloudnestra.com/rcp/ 请求: ${requestUrl}`); |
|
|
|
|
|
try { |
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, Math.random() * 1000 + 500)); |
|
|
|
|
|
|
|
|
const response = await route.fetch({ |
|
|
headers: { |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
|
|
'Accept-Language': 'en-US,en;q=0.9', |
|
|
'Referer': domain, |
|
|
'Cache-Control': 'no-cache' |
|
|
} |
|
|
}); |
|
|
|
|
|
console.log(`[${domain}] Cloudnestra 响应状态: ${response.status()}`); |
|
|
|
|
|
if (response.status() === 200) { |
|
|
const content = await response.text(); |
|
|
console.log(`[${domain}] ✅ 成功获取 cloudnestra 内容,长度: ${content.length}`); |
|
|
|
|
|
|
|
|
const srcMatch = content.match(/src\s*:\s*['"](.*?)['"]/); |
|
|
if (srcMatch && srcMatch[1]) { |
|
|
let finalUrl = srcMatch[1]; |
|
|
|
|
|
|
|
|
if (finalUrl.startsWith('/')) { |
|
|
const baseUrl = requestUrl.split('/rcp')[0]; |
|
|
finalUrl = `${baseUrl}${finalUrl}`; |
|
|
} |
|
|
finalUrl = finalUrl.replace('http://', 'https://'); |
|
|
|
|
|
console.log(`[${domain}] 从 cloudnestra 提取到的 URL: ${finalUrl}`); |
|
|
|
|
|
|
|
|
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalUrl); |
|
|
if (videoFileUrl) { |
|
|
cloudnestraData = { |
|
|
source_domain: domain, |
|
|
hls_url: videoFileUrl, |
|
|
subtitles: subtitleSources, |
|
|
error: null |
|
|
}; |
|
|
console.log(`[${domain}] 🎉 成功从 cloudnestra 获取视频: ${videoFileUrl}`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (content.trim().startsWith('{') || content.trim().startsWith('[')) { |
|
|
try { |
|
|
const jsonContent = JSON.parse(content); |
|
|
if (jsonContent.file) { |
|
|
cloudnestraData = { |
|
|
source_domain: domain, |
|
|
hls_url: jsonContent.file, |
|
|
subtitles: jsonContent.subtitles || [], |
|
|
error: null |
|
|
}; |
|
|
console.log(`[${domain}] 🎉 从 JSON 响应获取视频: ${jsonContent.file}`); |
|
|
} |
|
|
} catch (e) { |
|
|
console.log(`[${domain}] 内容不是有效的JSON格式`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
route.continue(); |
|
|
} else if (response.status() === 522) { |
|
|
console.log(`[${domain}] ❌ Cloudflare 连接超时 (522)`); |
|
|
route.continue(); |
|
|
} else if (response.status() === 403) { |
|
|
console.log(`[${domain}] ❌ 被 Cloudflare 拒绝访问 (403)`); |
|
|
route.continue(); |
|
|
} else { |
|
|
console.log(`[${domain}] ⚠️ 异常响应状态: ${response.status()}`); |
|
|
route.continue(); |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
console.error(`[${domain}] ❌ 处理 cloudnestra 请求失败:`, error.message); |
|
|
route.continue(); |
|
|
} |
|
|
} else { |
|
|
route.continue(); |
|
|
} |
|
|
}); |
|
|
|
|
|
await page.goto(url, { |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 60000 |
|
|
}); |
|
|
|
|
|
|
|
|
if (cloudnestraData) { |
|
|
console.log(`[${domain}] 🎯 使用 cloudnestra 数据`); |
|
|
return cloudnestraData; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
const hasTurnstile = await page.evaluate(() => { |
|
|
return document.querySelector('.cf-turnstile') !== null; |
|
|
}); |
|
|
console.log(`[${domain}] Has Turnstile: ${hasTurnstile}`); |
|
|
|
|
|
|
|
|
if (hasTurnstile) { |
|
|
await handleSpecificTurnstile(page, domain); |
|
|
} |
|
|
|
|
|
|
|
|
const firstIframeSrc = await extractFirstIframeSrc(page, domain); |
|
|
if (!firstIframeSrc) throw new Error('First iframe not found'); |
|
|
|
|
|
|
|
|
const iframePage = await context.newPage(); |
|
|
await iframePage.goto(firstIframeSrc, { |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 60000 |
|
|
}); |
|
|
|
|
|
|
|
|
const iframeHasTurnstile = await iframePage.evaluate(() => { |
|
|
return document.querySelector('.cf-turnstile') !== null; |
|
|
}); |
|
|
console.log(`[${domain}] Iframe has Turnstile: ${iframeHasTurnstile}`); |
|
|
|
|
|
if (iframeHasTurnstile) { |
|
|
await handleSpecificTurnstile(iframePage, domain); |
|
|
} |
|
|
|
|
|
|
|
|
const finalIframeSrc = await extractFinalIframeSrc(iframePage, domain); |
|
|
if (!finalIframeSrc) throw new Error('Final iframe not found'); |
|
|
|
|
|
|
|
|
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalIframeSrc); |
|
|
if (!videoFileUrl) throw new Error("HLS URL not found"); |
|
|
|
|
|
return { source_domain: domain, hls_url: videoFileUrl, subtitles: subtitleSources, error: null }; |
|
|
|
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error in scrapeProvider: ${error.message}`); |
|
|
throw error; |
|
|
} finally { |
|
|
if (browserInstance) { |
|
|
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`); |
|
|
|
|
|
if (typeof context !== 'undefined') { |
|
|
await context.close(); |
|
|
} |
|
|
await browserPool.release(browserInstance); |
|
|
} |
|
|
} |
|
|
} |
|
|
async function scrapeProvider4(domain, url, signal) { |
|
|
if (signal.aborted) throw new Error('Aborted'); |
|
|
console.log(`\n[${domain}] Starting scrape for URL: ${url}`); |
|
|
let browserInstance = null; |
|
|
try { |
|
|
browserInstance = await browserPool.get(); |
|
|
const browser = browserInstance.browser; |
|
|
|
|
|
|
|
|
const page = await browser.newPage(); |
|
|
let videoResult = null; |
|
|
|
|
|
|
|
|
await page.route('**/*', async (route) => { |
|
|
const requestUrl = route.request().url(); |
|
|
console.log(`[${domain}] Request: ${requestUrl}`); |
|
|
|
|
|
|
|
|
if (requestUrl.includes('cloudnestra.com/rcp/')) { |
|
|
try { |
|
|
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(requestUrl); |
|
|
if (!videoFileUrl) throw new Error("HLS URL not found"); |
|
|
|
|
|
videoResult = { |
|
|
source_domain: domain, |
|
|
hls_url: videoFileUrl, |
|
|
subtitles: subtitleSources, |
|
|
error: null |
|
|
}; |
|
|
route.abort(); |
|
|
|
|
|
await page.unroute('**/*'); |
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error processing cloudnestra request:`, error); |
|
|
route.abort(); |
|
|
|
|
|
await page.unroute('**/*'); |
|
|
} |
|
|
} else { |
|
|
route.abort(); |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
await page.waitForFunction( |
|
|
() => videoResult !== null, |
|
|
{ timeout: 30000 } |
|
|
).catch(() => { |
|
|
throw new Error('Timeout waiting for cloudnestra.com/rcp/ request'); |
|
|
}); |
|
|
return videoResult; |
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error in scrapeProvider: ${error.message}`); |
|
|
throw error; |
|
|
} finally { |
|
|
if (browserInstance) { |
|
|
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`); |
|
|
await browserPool.release(browserInstance); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function scrapeProvider5(domain, url, signal) { |
|
|
if (signal.aborted) throw new Error('Aborted'); |
|
|
console.log(`\n[${domain}] Starting scrape for URL: ${url}`); |
|
|
|
|
|
let browserInstance = null; |
|
|
try { |
|
|
browserInstance = await browserPool.get(); |
|
|
const browser = browserInstance.browser; |
|
|
|
|
|
|
|
|
const page = await browser.newPage(); |
|
|
let videoResult = null; |
|
|
|
|
|
|
|
|
await page.route('**/*', async (route) => { |
|
|
const requestUrl = route.request().url(); |
|
|
console.log(`[${domain}] Request: ${requestUrl}`); |
|
|
|
|
|
|
|
|
if (requestUrl.includes('cloudnestra.com/rcp/')) { |
|
|
try { |
|
|
|
|
|
const response = await route.fetch(); |
|
|
const html = await response.text(); |
|
|
|
|
|
|
|
|
const proMatch = html.match(/src\s*:\s*'([^']+)'/); |
|
|
let finalUrl = proMatch?.[1]; |
|
|
|
|
|
if (!finalUrl) { |
|
|
throw new Error('Could not find src in response'); |
|
|
} |
|
|
|
|
|
|
|
|
const baseUrl = requestUrl.split('/rcp')[0]; |
|
|
finalUrl = `${baseUrl}${finalUrl}`; |
|
|
finalUrl = finalUrl.replace('http://', 'https://'); |
|
|
console.log('Final URL:', finalUrl); |
|
|
|
|
|
|
|
|
const { videoFileUrl, subtitleSources } = await getVideoAndSubtitles(finalUrl); |
|
|
if (!videoFileUrl) throw new Error("HLS URL not found"); |
|
|
|
|
|
videoResult = { |
|
|
source_domain: domain, |
|
|
hls_url: videoFileUrl, |
|
|
subtitles: subtitleSources, |
|
|
error: null |
|
|
}; |
|
|
|
|
|
|
|
|
await page.unroute('**/*'); |
|
|
route.abort(); |
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error processing cloudnestra request:`, error); |
|
|
route.abort(); |
|
|
await page.unroute('**/*'); |
|
|
} |
|
|
} else { |
|
|
route.abort(); |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
await page.waitForFunction( |
|
|
() => videoResult !== null, |
|
|
{ timeout: 30000 } |
|
|
).catch(() => { |
|
|
throw new Error('Timeout waiting for cloudnestra.com/rcp/ request'); |
|
|
}); |
|
|
|
|
|
return videoResult; |
|
|
|
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error in scrapeProvider: ${error.message}`); |
|
|
throw error; |
|
|
} finally { |
|
|
if (browserInstance) { |
|
|
console.log(`[${domain}] Releasing browser ${browserInstance.id} back to pool.`); |
|
|
await browserPool.release(browserInstance); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function handleSpecificTurnstile(page, domain) { |
|
|
try { |
|
|
console.log(`[${domain}] Checking for Turnstile...`); |
|
|
|
|
|
|
|
|
const turnstileExists = await page.waitForSelector('.cf-turnstile', { |
|
|
timeout: 10000, |
|
|
state: 'attached' |
|
|
}).then(() => true).catch(() => false); |
|
|
|
|
|
if (!turnstileExists) { |
|
|
console.log(`[${domain}] No Turnstile detected`); |
|
|
return; |
|
|
} |
|
|
|
|
|
console.log(`[${domain}] Turnstile detected - bypassing...`); |
|
|
|
|
|
|
|
|
await page.waitForSelector('.cf-turnstile', { |
|
|
timeout: 10000, |
|
|
state: 'visible' |
|
|
}); |
|
|
|
|
|
|
|
|
await page.evaluate(() => { |
|
|
const mockToken = 'mock-token-' + Math.random().toString(36).substring(2); |
|
|
if (typeof window.cftCallback === 'function') { |
|
|
window.cftCallback(mockToken); |
|
|
} |
|
|
|
|
|
|
|
|
const form = document.createElement('form'); |
|
|
form.method = 'POST'; |
|
|
form.action = '/rcp_verify'; |
|
|
const input = document.createElement('input'); |
|
|
input.type = 'hidden'; |
|
|
input.name = 'token'; |
|
|
input.value = mockToken; |
|
|
form.appendChild(input); |
|
|
document.body.appendChild(form); |
|
|
form.submit(); |
|
|
}); |
|
|
|
|
|
|
|
|
await page.waitForNavigation({ |
|
|
waitUntil: 'networkidle', |
|
|
timeout: 30000 |
|
|
}); |
|
|
console.log(`[${domain}] Turnstile bypass completed`); |
|
|
|
|
|
} catch (error) { |
|
|
console.warn(`[${domain}] Turnstile bypass warning: ${error.message}`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function extractFinalIframeSrc(page, domain) { |
|
|
try { |
|
|
|
|
|
await page.waitForFunction(() => { |
|
|
return document.querySelectorAll('script, iframe').length > 0; |
|
|
}, { timeout: 15000 }); |
|
|
|
|
|
return await page.evaluate(() => { |
|
|
|
|
|
const iframe = document.querySelector('iframe:not([style*="display:none"])'); |
|
|
if (iframe?.src) return iframe.src; |
|
|
|
|
|
|
|
|
const scripts = Array.from(document.querySelectorAll('script')); |
|
|
for (const script of scripts) { |
|
|
if (!script.textContent) continue; |
|
|
|
|
|
|
|
|
if (script.textContent.includes('loadIframe')) { |
|
|
const srcMatch = script.textContent.match(/src:\s*['"](.*?)['"]/); |
|
|
if (srcMatch) return new URL(srcMatch[1], window.location.href).href; |
|
|
} |
|
|
|
|
|
|
|
|
const urlMatch = script.textContent.match(/(https?:)?\/\/[^'"]+embed[^'"]*/); |
|
|
if (urlMatch) { |
|
|
return urlMatch[0].startsWith('http') ? urlMatch[0] : `https:${urlMatch[0]}`; |
|
|
} |
|
|
} |
|
|
|
|
|
return null; |
|
|
}); |
|
|
} catch (error) { |
|
|
console.error(`[${domain}] Error finding final iframe: ${error.message}`); |
|
|
return null; |
|
|
} |
|
|
} |
|
|
|
|
|
app.get("/extract", async (req, res) => { |
|
|
const type = req.query.type || "movie"; |
|
|
const tmdb_id = req.query.tmdb_id; |
|
|
const season = req.query.season ? parseInt(req.query.season) : undefined; |
|
|
const episode = req.query.episode ? parseInt(req.query.episode) : undefined; |
|
|
|
|
|
if (!tmdb_id) { return res.status(400).json({ success: false, error: "tmdb_id is required" }); } |
|
|
if (type === "tv" && (season == null || episode == null)) { return res.status(400).json({ success: false, error: "season and episode are required" }); } |
|
|
|
|
|
const cacheKey = JSON.stringify(req.query); |
|
|
const cached = cache.get(cacheKey); |
|
|
if (cached) { |
|
|
console.log("Serving from cache"); |
|
|
return res.json(cached); |
|
|
} |
|
|
|
|
|
const urls = PROVIDERS.reduce((acc, domain) => { |
|
|
acc[domain] = type === "tv" ? `${domain}/embed/tv?tmdb=${tmdb_id}&season=${season}&episode=${episode}` : `${domain}/embed/movie/${tmdb_id}`; |
|
|
return acc; |
|
|
}, {}); |
|
|
|
|
|
const controller = new AbortController(); |
|
|
const signal = controller.signal; |
|
|
|
|
|
try { |
|
|
const promises = Object.entries(urls).map(([domain, url]) => |
|
|
limit(() => scrapeProvider(domain, url, signal)) |
|
|
); |
|
|
const firstSuccessfulResult = await Promise.any(promises); |
|
|
controller.abort(); |
|
|
const response = { success: true, result: firstSuccessfulResult }; |
|
|
cache.set(cacheKey, response); |
|
|
res.json(response); |
|
|
} catch (err) { |
|
|
if (err instanceof AggregateError) { |
|
|
console.error("All providers failed to find a link."); |
|
|
res.status(404).json({ success: false, error: "Could not find the video from any provider." }); |
|
|
} else { |
|
|
console.error("An unexpected server error occurred:", err); |
|
|
res.status(500).json({ success: false, error: "Unexpected server error" }); |
|
|
} |
|
|
} |
|
|
}); |
|
|
|
|
|
(async () => { |
|
|
try { |
|
|
browserPool = new BrowserPool({ |
|
|
chromium: chromium, |
|
|
minSize: 1, |
|
|
maxSize: 5, |
|
|
maxUsage: 100, |
|
|
launchOptions: { |
|
|
headless: true, |
|
|
args: [ |
|
|
'--no-sandbox', |
|
|
'--disable-setuid-sandbox', |
|
|
'--disable-dev-shm-usage', |
|
|
'--disable-gpu', |
|
|
'--disable-extensions', |
|
|
'--disable-notifications', |
|
|
'--disable-infobars', |
|
|
'--disable-web-security', |
|
|
'--disable-features=IsolateOrigins,site-per-process' |
|
|
] |
|
|
} |
|
|
}); |
|
|
await browserPool.initialize(); |
|
|
console.log("Browser pool initialized successfully."); |
|
|
app.listen(PORT, () => console.log(`🚀 Universal Video Extractor running at http://localhost:${PORT}`)); |
|
|
} catch (error) { |
|
|
console.error("Failed to initialize browser pool:", error); |
|
|
process.exit(1); |
|
|
} |
|
|
})(); |
|
|
|
|
|
process.on("SIGINT", async () => { |
|
|
console.log("Shutting down gracefully..."); |
|
|
if (browserPool) await browserPool.shutdown(); |
|
|
process.exit(0); |
|
|
}); |
|
|
|
|
|
process.on("SIGTERM", async () => { |
|
|
console.log("Shutting down gracefully..."); |
|
|
if (browserPool) await browserPool.shutdown(); |
|
|
process.exit(0); |
|
|
}); |
|
|
|