File size: 7,653 Bytes
974a9bc
523a51b
 
 
974a9bc
3c42244
523a51b
 
73a8c49
523a51b
e69ef9d
523a51b
 
 
 
73a8c49
523a51b
e69ef9d
 
523a51b
974a9bc
523a51b
 
 
 
 
 
 
 
 
 
e69ef9d
 
 
 
 
523a51b
 
 
 
 
 
 
 
 
 
 
 
 
e69ef9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
974a9bc
523a51b
 
974a9bc
523a51b
 
 
974a9bc
523a51b
 
e69ef9d
523a51b
 
 
 
e69ef9d
523a51b
974a9bc
 
 
 
 
 
e69ef9d
 
 
 
 
 
 
523a51b
e69ef9d
 
 
 
523a51b
 
e69ef9d
 
 
 
523a51b
e69ef9d
523a51b
 
 
e69ef9d
 
523a51b
e69ef9d
523a51b
 
 
e69ef9d
523a51b
 
 
 
e69ef9d
523a51b
 
e69ef9d
523a51b
 
 
 
e69ef9d
 
523a51b
 
 
 
 
 
 
 
 
 
 
 
 
e69ef9d
 
 
 
 
 
 
523a51b
 
 
 
e69ef9d
 
 
523a51b
 
 
 
 
 
 
 
e69ef9d
523a51b
 
e69ef9d
974a9bc
 
e69ef9d
 
 
523a51b
 
 
e69ef9d
523a51b
 
 
 
 
 
 
e69ef9d
523a51b
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
// server.js (Final Version using STANDARD Playwright)

import express, { json } from "express";
import cors from "cors";
// [CORRECT] Using the standard, official playwright library
import { chromium } from "playwright";
import fetch from 'node-fetch';
import { JSDOM } from 'jsdom';
import { LRUCache } from 'lru-cache';

// --- 1. 初始化 ---
const app = express();
app.use(cors());
app.use(json());

const cache = new LRUCache({ max: 500, ttl: 15 * 60 * 1000 });

const blockedResourceTypes = new Set(['image', 'stylesheet', 'font', 'media']);
const blockedDomains = ['googlesyndication.com', 'googletagmanager.com', 'google-analytics.com', 'doubleclick.net'];

// --- 2. 辅助函数 (完整且无需修改) ---

async function getVideoAndSubtitles(finalUrl) {
    try {
        const response = await fetch(finalUrl);
        if (!response.ok) {
            throw new Error(`HTTP error! Status: ${response.status}`);
        }
        const html = await response.text();
        const dom = new JSDOM(html);
        const script = Array.from(dom.window.document.querySelectorAll('script')).find(s => s.textContent?.includes('new Playerjs'));
        if (!script) {
            console.warn(`[Helper] Player.js script not found on ${finalUrl}`);
            return { videoFileUrl: null, subtitleSources: [] };
        }
        
        const fileMatch = script.textContent.match(/file:"(.*?m3u8.*?)"/);
        const subtitlesMatch = script.textContent.match(/subtitle:"(.*?)"/);
        
        return {
            videoFileUrl: fileMatch ? fileMatch[1] : null,
            subtitleSources: subtitlesMatch ? subtitlesMatch[1].split(',').map(s => s.trim()).filter(Boolean) : []
        };
    } catch (error) {
        console.error('Error in getVideoAndSubtitles:', error);
        throw error;
    }
}

async function handleTurnstile(page) {
    try {
        await page.waitForSelector('.cf-turnstile', { state: 'visible', timeout: 5000 });
        console.log('Turnstile detected - attempting generic bypass...');
        await page.evaluate(() => {
            if (typeof window.cftCallback === 'function') {
                const mockToken = 'mock-token-' + Math.random().toString(36).substring(2);
                window.cftCallback(mockToken);
            }
        });
        console.log('Turnstile JS callback triggered.');
        await page.waitForTimeout(2000);
    } catch (error) {
        if (error.name.includes('Timeout')) {
            console.log('Turnstile not found on page, skipping bypass.');
        } else {
            console.warn('An error occurred during Turnstile handling:', error.message);
        }
    }
}

// --- 3. 核心抓取逻辑 (使用标准 Playwright) ---
async function scrapeSource(browser, { type, id, season, episode }) {
    let context = null;
    console.log(`\nScraping with STANDARD Playwright for: type=${type}, id=${id}, s=${season}, e=${episode}`);

    try {
        let initialUrl;
        const domain = "https://vidsrc.xyz";
        if (type === 'tv') {
            initialUrl = `${domain}/embed/tv?tmdb=${id}&season=${season}&episode=${episode}`;
        } else {
            initialUrl = `${domain}/embed/movie/${id}`;
        }
        console.log(`Initial URL: ${initialUrl}`);

        context = await browser.newContext();
        
        // [关键] 手动实现 StealthPlugin 的核心功能
        // 在每个页面加载前注入脚本,隐藏 webdriver 标志
        await context.addInitScript(() => {
            Object.defineProperty(navigator, 'webdriver', { get: () => false });
        });
        
        await context.route('**/*', (route) => {
            const request = route.request();
            const url = request.url();
            const resourceType = request.resourceType();

            if (url.includes('/rcp_verify')) {
                return route.fulfill({ status: 200, contentType: 'application/json', body: '1' });
            }
            if (blockedResourceTypes.has(resourceType) || blockedDomains.some(domain => url.includes(domain))) {
                return route.abort();
            }
            return route.continue();
        });

        const page = await context.newPage();

        // 阶段 1
        await page.goto(initialUrl, { waitUntil: 'networkidle', timeout: 60000 });
        await handleTurnstile(page);
        const firstIframeSrc = await page.locator('#player_iframe').getAttribute('src');
        if (!firstIframeSrc) throw new Error('First iframe (#player_iframe) not found');
        console.log(`First iframe src: ${firstIframeSrc}`);

        // 阶段 2
        await page.goto(firstIframeSrc, { waitUntil: 'networkidle', timeout: 60000 });
        await handleTurnstile(page);
        const finalIframeSrc = await page.frameLocator('iframe').locator('iframe').getAttribute('src') || await page.locator('iframe').getAttribute('src');
        if (!finalIframeSrc) throw new Error('Final iframe source not found');
        console.log(`Final iframe src: ${finalIframeSrc}`);

        // 阶段 3
        return await getVideoAndSubtitles(finalIframeSrc);

    } catch (error) {
        console.error('Full error during scraping process:', error.message);
        throw error;
    } finally {
        if (context) {
            await context.close();
        }
    }
}

// --- 4. Express API 路由 ---
let browser;

app.get("/extract", async (req, res) => {
    const type = req.query.type || 'movie';
    const id = req.query.tmdb_id;
    const season = req.query.season;
    const episode = req.query.episode;

    if (!id) {
        return res.status(400).json({ success: false, error: "tmdb_id is required" });
    }
    if (type === 'tv' && (!season || !episode)) {
        return res.status(400).json({ success: false, error: "season and episode are required for type 'tv'" });
    }
    
    const cacheKey = JSON.stringify(req.query);
    const cached = cache.get(cacheKey);
    if (cached) {
        console.log("Serving from cache");
        return res.json(cached);
    }

    try {
        const result = await scrapeSource(browser, { type, id, season, episode });
        if (result && result.videoFileUrl) {
            const response = { success: true, result };
            cache.set(cacheKey, response);
            res.status(200).json(response);
        } else {
            res.status(404).json({ success: false, error: "Could not find video stream from the source." });
        }
    } catch (error) {
        res.status(500).json({ success: false, error: `An unexpected error occurred: ${error.message}` });
    }
});

// --- 5. 服务器启动 ---
(async () => {
    try {
        const PORT = process.env.PORT || 7860;
        console.log("Launching a persistent STANDARD Playwright browser instance...");
        // [CORRECT] Launching the browser using the direct import from 'playwright'
        browser = await chromium.launch({
            headless: true,
            args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
        });
        console.log("Browser launched successfully.");
        app.listen(PORT, () => {
            console.log(`🚀 Scraper server running on port ${PORT}`);
        });
    } catch (error) {
        console.error("Failed to launch browser:", error);
        process.exit(1);
    }
})();

// --- 6. 优雅关停 ---
const gracefulShutdown = async () => {
    console.log("\nShutting down gracefully...");
    if (browser) {
        await browser.close();
        console.log("Browser instance closed.");
    }
    process.exit(0);
};

process.on('SIGINT', gracefulShutdown);
process.on('SIGTERM', gracefulShutdown);