nomagick commited on
Commit
5789ae1
·
unverified ·
1 Parent(s): 1e3bae6

chore: dont abuse our service

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -571,10 +571,15 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
571
  if (blockade) {
572
  throw new SecurityCompromiseError(`Domain ${urlToCrawl.hostname} blocked until ${blockade.expireAt || 'Eternally'} due to previous abuse found on ${blockade.triggerUrl || 'site'}: ${blockade.triggerReason}`);
573
  }
574
- }
 
 
 
575
 
 
576
  const crawlOpts = this.configure(crawlerOptions);
577
 
 
578
  if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
579
  const sseStream = new OutputServerEventStream();
580
  rpcReflect.return(sseStream);
@@ -767,7 +772,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
767
  return r;
768
  }
769
 
770
- async *cachedScrap(urlToCrawl: URL, crawlOpts?: ExtraScrappingOptions, cacheTolerance: number = this.cacheValidMs) {
771
  let cache;
772
  if (cacheTolerance && !crawlOpts?.cookies?.length) {
773
  cache = await this.queryCache(urlToCrawl, cacheTolerance);
@@ -821,7 +826,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
821
  }
822
 
823
 
824
- async *scrapMany(urls: URL[], options?: ScrappingOptions, cacheTolerance?: number) {
825
  const iterators = urls.map((url) => this.cachedScrap(url, options, cacheTolerance));
826
 
827
  const results: (PageSnapshot | undefined)[] = iterators.map((_x) => undefined);
 
571
  if (blockade) {
572
  throw new SecurityCompromiseError(`Domain ${urlToCrawl.hostname} blocked until ${blockade.expireAt || 'Eternally'} due to previous abuse found on ${blockade.triggerUrl || 'site'}: ${blockade.triggerReason}`);
573
  }
574
+ if (urlToCrawl.protocol === 'http:' && (!urlToCrawl.pathname || urlToCrawl.pathname === '/') &&
575
+ crawlerOptions.respondWith === 'html') {
576
+ throw new SecurityCompromiseError(`Your request is categorized as abuse. Please don't abuse our service. If you are sure you are not abusing, please authenticate yourself with an API key.`);
577
+ }
578
 
579
+ }
580
  const crawlOpts = this.configure(crawlerOptions);
581
 
582
+
583
  if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
584
  const sseStream = new OutputServerEventStream();
585
  rpcReflect.return(sseStream);
 
772
  return r;
773
  }
774
 
775
+ async * cachedScrap(urlToCrawl: URL, crawlOpts?: ExtraScrappingOptions, cacheTolerance: number = this.cacheValidMs) {
776
  let cache;
777
  if (cacheTolerance && !crawlOpts?.cookies?.length) {
778
  cache = await this.queryCache(urlToCrawl, cacheTolerance);
 
826
  }
827
 
828
 
829
+ async * scrapMany(urls: URL[], options?: ScrappingOptions, cacheTolerance?: number) {
830
  const iterators = urls.map((url) => this.cachedScrap(url, options, cacheTolerance));
831
 
832
  const results: (PageSnapshot | undefined)[] = iterators.map((_x) => undefined);