mapleeit commited on
Commit
8008e53
·
1 Parent(s): 3f88f8d

feat(adaptive-crawl): disable invalid link

Browse files
backend/functions/src/cloud-functions/adaptive-crawler.ts CHANGED
@@ -418,11 +418,22 @@ export class AdaptiveCrawlerHost extends RPCHost {
418
  query: string;
419
  links: Record<string, string>;
420
  }) {
 
 
 
 
 
 
 
 
 
 
 
421
  const data = {
422
  model: 'jina-reranker-v2-base-multilingual',
423
  query,
424
  top_n: 15,
425
- documents: Object.entries(links).map(([title, link]) => link)
426
  };
427
 
428
  const response = await fetch('https://api.jina.ai/v1/rerank', {
 
418
  query: string;
419
  links: Record<string, string>;
420
  }) {
421
+ const invalidSuffix = [
422
+ '.zip',
423
+ '.docx',
424
+ '.pptx',
425
+ '.xlsx',
426
+ ];
427
+
428
+ const validLinks = Object.entries(links)
429
+ .map(([title, link]) => link)
430
+ .filter(link => link.startsWith('http') && !invalidSuffix.some(suffix => link.endsWith(suffix)));
431
+
432
  const data = {
433
  model: 'jina-reranker-v2-base-multilingual',
434
  query,
435
  top_n: 15,
436
+ documents: validLinks,
437
  };
438
 
439
  const response = await fetch('https://api.jina.ai/v1/rerank', {