|
|
|
|
|
|
|
|
import express, { json } from "express"; |
|
|
import cors from "cors"; |
|
|
|
|
|
import { chromium } from "playwright"; |
|
|
import fetch from 'node-fetch'; |
|
|
import { JSDOM } from 'jsdom'; |
|
|
import { LRUCache } from 'lru-cache'; |
|
|
|
|
|
|
|
|
const app = express(); |
|
|
app.use(cors()); |
|
|
app.use(json()); |
|
|
|
|
|
const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 }); |
|
|
|
|
|
const blockedResourceTypes = new Set(['image', 'stylesheet', 'font', 'media']); |
|
|
const blockedDomains = ['googlesyndication.com', 'googletagmanager.com', 'google-analytics.com', 'doubleclick.net']; |
|
|
|
|
|
|
|
|
|
|
|
async function getVideoAndSubtitles(finalUrl) { |
|
|
try { |
|
|
const response = await fetch(finalUrl); |
|
|
if (!response.ok) { |
|
|
throw new Error(`HTTP error! Status: ${response.status}`); |
|
|
} |
|
|
const html = await response.text(); |
|
|
const dom = new JSDOM(html); |
|
|
const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs')); |
|
|
if (!script) { |
|
|
console.warn(`[Helper] Player.js script not found on ${finalUrl}`); |
|
|
return { videoFileUrl: null, subtitleSources: [] }; |
|
|
} |
|
|
|
|
|
const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/); |
|
|
const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/); |
|
|
|
|
|
return { |
|
|
videoFileUrl: fileMatch ? fileMatch[1] : null, |
|
|
subtitleSources: subtitlesMatch ? subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean) : [] |
|
|
}; |
|
|
} catch (error) { |
|
|
console.error('Error in getVideoAndSubtitles:', error); |
|
|
throw error; |
|
|
} |
|
|
} |
|
|
|
|
|
async function handleTurnstile(page) { |
|
|
try { |
|
|
await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000 }); |
|
|
console.log('Turnstile detected - attempting generic bypass...'); |
|
|
await page.evaluate(() => { |
|
|
if (typeof window.cftCallback === 'function') { |
|
|
const mockToken = 'mock-token-' + Math.random().toString(36).substring(2); |
|
|
window.cftCallback(mockToken); |
|
|
} |
|
|
}); |
|
|
console.log('Turnstile JS callback triggered.'); |
|
|
await page.waitForTimeout(2000); |
|
|
} catch (error) { |
|
|
if (error.name.includes('Timeout')) { |
|
|
console.log('Turnstile not found on page, skipping bypass.'); |
|
|
} else { |
|
|
console.warn('An error occurred during Turnstile handling:', error.message); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function scrapeSource(browser, { type, id, season, episode }) { |
|
|
let context = null; |
|
|
console.log(`\nScraping with STANDARD Playwright for: type=${type}, id=${id}, s=${season}, e=${episode}`); |
|
|
|
|
|
try { |
|
|
let initialUrl; |
|
|
const domain = "https://vidsrc.xyz"; |
|
|
if (type === 'tv') { |
|
|
initialUrl = `${domain}/embed/tv?tmdb=${id}&season=${season}&episode=${episode}`; |
|
|
} else { |
|
|
initialUrl = `${domain}/embed/movie/${id}`; |
|
|
} |
|
|
console.log(`Initial URL: ${initialUrl}`); |
|
|
|
|
|
context = await browser.newContext(); |
|
|
|
|
|
|
|
|
|
|
|
await context.addInitScript(() => { |
|
|
Object.defineProperty(navigator, 'webdriver', { get: () => false }); |
|
|
}); |
|
|
|
|
|
await context.route('**/*', (route) => { |
|
|
const request = route.request(); |
|
|
const url = request.url(); |
|
|
const resourceType = request.resourceType(); |
|
|
|
|
|
if (url.includes('/rcp_verify')) { |
|
|
return route.fulfill({ status: 200, contentType: 'application/json', body: '1' }); |
|
|
} |
|
|
if (blockedResourceTypes.has(resourceType) || blockedDomains.some(domain => url.includes(domain))) { |
|
|
return route.abort(); |
|
|
} |
|
|
return route.continue(); |
|
|
}); |
|
|
|
|
|
const page = await context.newPage(); |
|
|
|
|
|
|
|
|
await page.goto(initialUrl, { waitUntil: 'networkidle', timeout: 60000 }); |
|
|
await handleTurnstile(page); |
|
|
const firstIframeSrc = await page.locator('#player_iframe').getAttribute('src'); |
|
|
if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found'); |
|
|
console.log(`First iframe src: ${firstIframeSrc}`); |
|
|
|
|
|
|
|
|
await page.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 60000 }); |
|
|
await handleTurnstile(page); |
|
|
const finalIframeSrc = await page.frameLocator('iframe').locator('iframe').getAttribute('src') || await page.locator('iframe').getAttribute('src'); |
|
|
if (!finalIframeSrc) throw new Error('Final iframe source not found'); |
|
|
console.log(`Final iframe src: ${finalIframeSrc}`); |
|
|
|
|
|
|
|
|
return await getVideoAndSubtitles(finalIframeSrc); |
|
|
|
|
|
} catch (error) { |
|
|
console.error('Full error during scraping process:', error.message); |
|
|
throw error; |
|
|
} finally { |
|
|
if (context) { |
|
|
await context.close(); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
let browser; |
|
|
|
|
|
app.get("/extract", async (req, res) => { |
|
|
const type = req.query.type || 'movie'; |
|
|
const id = req.query.tmdb_id; |
|
|
const season = req.query.season; |
|
|
const episode = req.query.episode; |
|
|
|
|
|
if (!id) { |
|
|
return res.status(400).json({ success: false, error: "tmdb_id is required" }); |
|
|
} |
|
|
if (type === 'tv' && (!season || !episode)) { |
|
|
return res.status(400).json({ success: false, error: "season and episode are required for type 'tv'" }); |
|
|
} |
|
|
|
|
|
const cacheKey = JSON.stringify(req.query); |
|
|
const cached = cache.get(cacheKey); |
|
|
if (cached) { |
|
|
console.log("Serving from cache"); |
|
|
return res.json(cached); |
|
|
} |
|
|
|
|
|
try { |
|
|
const result = await scrapeSource(browser, { type, id, season, episode }); |
|
|
if (result && result.videoFileUrl) { |
|
|
const response = { success: true, result }; |
|
|
cache.set(cacheKey, response); |
|
|
res.status(200).json(response); |
|
|
} else { |
|
|
res.status(404).json({ success: false, error: "Could not find video stream from the source." }); |
|
|
} |
|
|
} catch (error) { |
|
|
res.status(500).json({ success: false, error: `An unexpected error occurred: ${error.message}` }); |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
(async () => { |
|
|
try { |
|
|
const PORT = process.env.PORT || 7860; |
|
|
console.log("Launching a persistent STANDARD Playwright browser instance..."); |
|
|
|
|
|
browser = await chromium.launch({ |
|
|
headless: true, |
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] |
|
|
}); |
|
|
console.log("Browser launched successfully."); |
|
|
app.listen(PORT, () => { |
|
|
console.log(`🚀 Scraper server running on port ${PORT}`); |
|
|
}); |
|
|
} catch (error) { |
|
|
console.error("Failed to launch browser:", error); |
|
|
process.exit(1); |
|
|
} |
|
|
})(); |
|
|
|
|
|
|
|
|
const gracefulShutdown = async () => { |
|
|
console.log("\nShutting down gracefully..."); |
|
|
if (browser) { |
|
|
await browser.close(); |
|
|
console.log("Browser instance closed."); |
|
|
} |
|
|
process.exit(0); |
|
|
}; |
|
|
|
|
|
process.on('SIGINT', gracefulShutdown); |
|
|
process.on('SIGTERM', gracefulShutdown); |