nazib61 commited on
Commit
78dd2f3
·
verified ·
1 Parent(s): 41e1b2b

Update scraper.js

Browse files
Files changed (1) hide show
  1. scraper.js +14 -39
scraper.js CHANGED
@@ -1,12 +1,12 @@
1
  const puppeteer = require('puppeteer-core');
2
 
3
- async function getPinterestVideo(query) {
4
  let browser;
5
  try {
6
  browser = await puppeteer.launch({
7
  executablePath: '/usr/bin/chromium',
8
  headless: 'new',
9
- args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
10
  });
11
 
12
  const page = await browser.newPage();
@@ -15,64 +15,39 @@ async function getPinterestVideo(query) {
15
  const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}&rs=typed`;
16
  await page.goto(searchUrl, { waitUntil: 'networkidle2', timeout: 30000 });
17
 
18
- // Get pin links
19
  const pinUrls = await page.evaluate(() => {
20
  return Array.from(document.querySelectorAll('a[href*="/pin/"]'))
21
- .map(a => a.href)
22
- .filter(href => href.includes('/pin/'));
23
  });
24
 
25
  const uniquePins = [...new Set(pinUrls)].slice(0, 10);
26
 
 
27
  for (const pinUrl of uniquePins) {
28
  const pPage = await browser.newPage();
29
  try {
30
- await pPage.goto(pinUrl, { waitUntil: 'domcontentloaded', timeout: 15000 });
31
  const content = await pPage.content();
32
-
33
- // 1. Look for any Pinterest video URL
34
  const videoRegex = /https:\/\/v1\.pinimg\.com\/videos\/mc\/[^\s"']+/g;
35
  const matches = content.match(videoRegex);
36
 
37
- if (matches && matches.length > 0) {
38
- let rawUrl = matches[0].replace(/\\u002F/g, '/').replace(/"/g, '').replace(/'/g, '');
39
-
40
- // 2. QUALITY BOOSTER LOGIC
41
- // Pinterest stores videos in a specific hash format.
42
- // We extract the 32-character MD5 hash and build the 720p path.
43
  const hashMatch = rawUrl.match(/([a-f0-9]{32})/);
44
-
45
  if (hashMatch) {
46
- const hash = hashMatch[1];
47
- const h1 = hash.substring(0, 2);
48
- const h2 = hash.substring(2, 4);
49
- const h3 = hash.substring(4, 6);
50
-
51
- // This is the official Pinterest path for HD videos
52
- const hdUrl = `https://v1.pinimg.com/videos/mc/720p/${h1}/${h2}/${h3}/${hash}.mp4`;
53
-
54
- console.log(hdUrl); // Return the HD URL
55
- } else {
56
- console.log(rawUrl); // Fallback to original if hash fails
57
  }
58
-
59
- await pPage.close();
60
- await browser.close();
61
- return;
62
  }
63
- } catch (e) {
64
- // skip failed pins
65
- } finally {
66
- if (!pPage.isClosed()) await pPage.close();
67
- }
68
  }
69
- console.log("ERROR: No video found");
70
  } catch (error) {
71
- console.log("ERROR: " + error.message);
72
  } finally {
73
  if (browser) await browser.close();
74
  }
75
  }
76
 
77
- const args = process.argv.slice(2);
78
- getPinterestVideo(args.join(' '));
 
1
  const puppeteer = require('puppeteer-core');
2
 
3
+ async function getPinterestVideos(query) {
4
  let browser;
5
  try {
6
  browser = await puppeteer.launch({
7
  executablePath: '/usr/bin/chromium',
8
  headless: 'new',
9
+ args: ['--no-sandbox', '--disable-setuid-sandbox']
10
  });
11
 
12
  const page = await browser.newPage();
 
15
  const searchUrl = `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}&rs=typed`;
16
  await page.goto(searchUrl, { waitUntil: 'networkidle2', timeout: 30000 });
17
 
 
18
  const pinUrls = await page.evaluate(() => {
19
  return Array.from(document.querySelectorAll('a[href*="/pin/"]'))
20
+ .map(a => a.href).filter(href => href.includes('/pin/'));
 
21
  });
22
 
23
  const uniquePins = [...new Set(pinUrls)].slice(0, 10);
24
 
25
+ // Loop through pins and find as many video URLs as possible
26
  for (const pinUrl of uniquePins) {
27
  const pPage = await browser.newPage();
28
  try {
29
+ await pPage.goto(pinUrl, { waitUntil: 'domcontentloaded', timeout: 10000 });
30
  const content = await pPage.content();
 
 
31
  const videoRegex = /https:\/\/v1\.pinimg\.com\/videos\/mc\/[^\s"']+/g;
32
  const matches = content.match(videoRegex);
33
 
34
+ if (matches) {
35
+ let rawUrl = matches[0].replace(/\\u002F/g, '/').replace(/[奖励"']/g, '');
 
 
 
 
36
  const hashMatch = rawUrl.match(/([a-f0-9]{32})/);
 
37
  if (hashMatch) {
38
+ const h = hashMatch[1];
39
+ // Return 720p version
40
+ console.log(`https://v1.pinimg.com/videos/mc/720p/${h.substring(0,2)}/${h.substring(2,4)}/${h.substring(4,6)}/${h}.mp4`);
 
 
 
 
 
 
 
 
41
  }
 
 
 
 
42
  }
43
+ } catch (e) {}
44
+ await pPage.close();
 
 
 
45
  }
 
46
  } catch (error) {
47
+ console.log("ERROR");
48
  } finally {
49
  if (browser) await browser.close();
50
  }
51
  }
52
 
53
+ getPinterestVideos(process.argv.slice(2).join(' '));