nomagick commited on
Commit
dceb361
·
unverified ·
1 Parent(s): 5bbd75a

fix: url check

Browse files
Files changed (1) hide show
  1. src/api/crawler.ts +3 -3
src/api/crawler.ts CHANGED
@@ -46,6 +46,8 @@ import { RobotsTxtService } from '../services/robots-text';
46
  import { lookup } from 'dns/promises';
47
  import { isIP } from 'net';
48
 
 
 
49
  export interface ExtraScrappingOptions extends ScrappingOptions {
50
  withIframe?: boolean | 'quoted';
51
  withShadowDom?: boolean;
@@ -474,8 +476,7 @@ export class CrawlerHost extends RPCHost {
474
 
475
  const targetUrlFromGet = originPath.slice(1);
476
  if (crawlerOptions.pdf) {
477
- const pdfBuf = crawlerOptions.pdf instanceof Blob ? await crawlerOptions.pdf.arrayBuffer().then((x) => Buffer.from(x)) : Buffer.from(crawlerOptions.pdf, 'base64');
478
- url = `blob://pdf/${md5Hasher.hash(pdfBuf)}`;
479
  } else if (targetUrlFromGet) {
480
  url = targetUrlFromGet.trim();
481
  } else if (crawlerOptions.url) {
@@ -485,7 +486,6 @@ export class CrawlerHost extends RPCHost {
485
  }
486
 
487
  let result: URL;
488
- const normalizeUrl = require('@esm2cjs/normalize-url').default;
489
  try {
490
  result = new URL(
491
  normalizeUrl(
 
46
  import { lookup } from 'dns/promises';
47
  import { isIP } from 'net';
48
 
49
+ const normalizeUrl = require('@esm2cjs/normalize-url').default;
50
+
51
  export interface ExtraScrappingOptions extends ScrappingOptions {
52
  withIframe?: boolean | 'quoted';
53
  withShadowDom?: boolean;
 
476
 
477
  const targetUrlFromGet = originPath.slice(1);
478
  if (crawlerOptions.pdf) {
479
+ url = `blob://pdf/${randomUUID()}`;
 
480
  } else if (targetUrlFromGet) {
481
  url = targetUrlFromGet.trim();
482
  } else if (crawlerOptions.url) {
 
486
  }
487
 
488
  let result: URL;
 
489
  try {
490
  result = new URL(
491
  normalizeUrl(