File size: 3,954 Bytes
c892026 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 b208db7 53f17c3 e574575 b208db7 c892026 b208db7 c892026 53f17c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | const puppeteer = require('puppeteer-core');
// --- PINTEREST SCRAPER ---
async function scrapePinterest(browser, query) {
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36');
const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}&rs=typed`;
await page.goto(searchUrl, { waitUntil: 'networkidle2', timeout: 30000 });
// Scroll to load more
await page.evaluate(async () => {
for (let i = 0; i < 3; i++) {
window.scrollBy(0, window.innerHeight * 2);
await new Promise(r => setTimeout(r, 1500));
}
});
const pinUrls = await page.evaluate(() => {
return Array.from(document.querySelectorAll('a[href*="/pin/"]'))
.map(a => a.href).filter(href => href.includes('/pin/'));
});
const uniquePins = [...new Set(pinUrls)].slice(0, 40); // Check 40 pins
for (const pinUrl of uniquePins) {
const pPage = await browser.newPage();
try {
await pPage.goto(pinUrl, { waitUntil: 'domcontentloaded', timeout: 8000 });
const content = await pPage.content();
const videoRegex = /https:\/\/v1\.pinimg\.com\/videos\/mc\/[^\s"']+/g;
const matches = content.match(videoRegex);
if (matches) {
let rawUrl = matches[0].replace(/\\u002F/g, '/').replace(/[奖励"']/g, '');
const hashMatch = rawUrl.match(/([a-f0-9]{32})/);
if (hashMatch) {
const h = hashMatch[1];
// Output 720p version
process.stdout.write(`https://v1.pinimg.com/videos/mc/720p/${h.substring(0,2)}/${h.substring(2,4)}/${h.substring(4,6)}/${h}.mp4\n`);
}
}
} catch (e) {}
await pPage.close();
}
await page.close();
}
// --- PEXELS SCRAPER (FALLBACK) ---
async function scrapePexels(browser, query, orientation) {
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36');
// Pexels allows orientation filtering in the URL!
let pexelsUrl = `https://www.pexels.com/search/videos/${encodeURIComponent(query)}/`;
if (orientation === 'Portrait') pexelsUrl += '?orientation=portrait';
if (orientation === 'Landscape') pexelsUrl += '?orientation=landscape';
console.error(`Fallback: Searching Pexels for ${orientation}...`);
await page.goto(pexelsUrl, { waitUntil: 'networkidle2', timeout: 30000 });
// Extract video links directly from the source tags
const videoUrls = await page.evaluate(() => {
const sources = Array.from(document.querySelectorAll('source[type="video/mp4"]'));
return sources.map(s => s.src);
});
// Clean and print URLs
const uniqueUrls = [...new Set(videoUrls)];
uniqueUrls.forEach(url => {
// Remove query params to get direct link
const cleanUrl = url.split('?')[0];
process.stdout.write(`${cleanUrl}\n`);
});
await page.close();
}
// --- MAIN CONTROLLER ---
(async () => {
const args = process.argv.slice(2);
const mode = args[0]; // 'pinterest' or 'pexels'
const query = args[1];
const orientation = args[2] || 'Any';
const browser = await puppeteer.launch({
executablePath: '/usr/bin/chromium',
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
});
try {
if (mode === 'pinterest') {
await scrapePinterest(browser, query);
} else if (mode === 'pexels') {
await scrapePexels(browser, query, orientation);
}
} catch (e) {
console.error(e);
} finally {
await browser.close();
}
})(); |