Spaces:
Build error
Build error
fix: crawling IP url
Browse files
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -13,6 +13,7 @@ import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
|
|
| 13 |
import { SecurityCompromiseError, ServiceCrashedError, ServiceNodeResourceDrainError } from '../shared/lib/errors';
|
| 14 |
import { TimeoutError } from 'puppeteer';
|
| 15 |
import _ from 'lodash';
|
|
|
|
| 16 |
const tldExtract = require('tld-extract');
|
| 17 |
|
| 18 |
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
|
@@ -570,15 +571,19 @@ export class PuppeteerControl extends AsyncService {
|
|
| 570 |
if (!requestUrl.startsWith('http:') && !requestUrl.startsWith('https:') && !requestUrl.startsWith('chrome-extension:') && requestUrl !== 'about:blank') {
|
| 571 |
return req.abort('blockedbyclient', 1000);
|
| 572 |
}
|
|
|
|
|
|
|
| 573 |
try {
|
| 574 |
-
|
| 575 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 576 |
} catch (err) {
|
| 577 |
return req.abort('blockedbyclient', 1000);
|
| 578 |
}
|
| 579 |
|
| 580 |
-
const parsedUrl = new URL(requestUrl);
|
| 581 |
-
|
| 582 |
if (this.circuitBreakerHosts.has(parsedUrl.hostname.toLowerCase())) {
|
| 583 |
page.emit('abuse', { url: requestUrl, page, sn, reason: `Abusive request: ${requestUrl}` });
|
| 584 |
return req.abort('blockedbyclient', 1000);
|
|
|
|
| 13 |
import { SecurityCompromiseError, ServiceCrashedError, ServiceNodeResourceDrainError } from '../shared/lib/errors';
|
| 14 |
import { TimeoutError } from 'puppeteer';
|
| 15 |
import _ from 'lodash';
|
| 16 |
+
import { isIP } from 'net';
|
| 17 |
const tldExtract = require('tld-extract');
|
| 18 |
|
| 19 |
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
|
|
|
| 571 |
if (!requestUrl.startsWith('http:') && !requestUrl.startsWith('https:') && !requestUrl.startsWith('chrome-extension:') && requestUrl !== 'about:blank') {
|
| 572 |
return req.abort('blockedbyclient', 1000);
|
| 573 |
}
|
| 574 |
+
|
| 575 |
+
const parsedUrl = new URL(requestUrl);
|
| 576 |
try {
|
| 577 |
+
if (isIP(parsedUrl.hostname)) {
|
| 578 |
+
domainSet.add(parsedUrl.hostname);
|
| 579 |
+
} else {
|
| 580 |
+
const tldParsed = tldExtract(requestUrl);
|
| 581 |
+
domainSet.add(tldParsed.domain);
|
| 582 |
+
}
|
| 583 |
} catch (err) {
|
| 584 |
return req.abort('blockedbyclient', 1000);
|
| 585 |
}
|
| 586 |
|
|
|
|
|
|
|
| 587 |
if (this.circuitBreakerHosts.has(parsedUrl.hostname.toLowerCase())) {
|
| 588 |
page.emit('abuse', { url: requestUrl, page, sn, reason: `Abusive request: ${requestUrl}` });
|
| 589 |
return req.abort('blockedbyclient', 1000);
|