mapleeit commited on
Commit
3f88f8d
·
1 Parent(s): 39e49ca

fix(adaptive): url hash

Browse files
backend/functions/src/cloud-functions/adaptive-crawler.ts CHANGED
@@ -21,9 +21,13 @@ import { Timestamp } from 'firebase-admin/firestore';
21
 
22
  const md5Hasher = new HashManager('md5', 'hex');
23
  const removeURLHash = (url: string) => {
24
- const o = new URL(url);
25
- o.hash = '';
26
- return o.toString();
 
 
 
 
27
  }
28
 
29
  @singleton()
@@ -440,7 +444,7 @@ export class AdaptiveCrawlerHost extends RPCHost {
440
  }[];
441
  };
442
 
443
- return json.results.filter(r => r.relevance_score > 0.3).map(r => r.document.text);
444
  }
445
 
446
  getIndex(user?: JinaEmbeddingsTokenAccount) {
 
21
 
22
  const md5Hasher = new HashManager('md5', 'hex');
23
  const removeURLHash = (url: string) => {
24
+ try {
25
+ const o = new URL(url);
26
+ o.hash = '';
27
+ return o.toString();
28
+ } catch (e) {
29
+ return url;
30
+ }
31
  }
32
 
33
  @singleton()
 
444
  }[];
445
  };
446
 
447
+ return json.results.filter(r => r.relevance_score > 0.3).map(r => removeURLHash(r.document.text));
448
  }
449
 
450
  getIndex(user?: JinaEmbeddingsTokenAccount) {