Spaces:
Build error
Build error
fix(adaptive): url hash
Browse files
backend/functions/src/cloud-functions/adaptive-crawler.ts
CHANGED
|
@@ -21,9 +21,13 @@ import { Timestamp } from 'firebase-admin/firestore';
|
|
| 21 |
|
| 22 |
const md5Hasher = new HashManager('md5', 'hex');
|
| 23 |
const removeURLHash = (url: string) => {
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
}
|
| 28 |
|
| 29 |
@singleton()
|
|
@@ -440,7 +444,7 @@ export class AdaptiveCrawlerHost extends RPCHost {
|
|
| 440 |
}[];
|
| 441 |
};
|
| 442 |
|
| 443 |
-
return json.results.filter(r => r.relevance_score > 0.3).map(r => r.document.text);
|
| 444 |
}
|
| 445 |
|
| 446 |
getIndex(user?: JinaEmbeddingsTokenAccount) {
|
|
|
|
| 21 |
|
| 22 |
const md5Hasher = new HashManager('md5', 'hex');
|
| 23 |
const removeURLHash = (url: string) => {
|
| 24 |
+
try {
|
| 25 |
+
const o = new URL(url);
|
| 26 |
+
o.hash = '';
|
| 27 |
+
return o.toString();
|
| 28 |
+
} catch (e) {
|
| 29 |
+
return url;
|
| 30 |
+
}
|
| 31 |
}
|
| 32 |
|
| 33 |
@singleton()
|
|
|
|
| 444 |
}[];
|
| 445 |
};
|
| 446 |
|
| 447 |
+
return json.results.filter(r => r.relevance_score > 0.3).map(r => removeURLHash(r.document.text));
|
| 448 |
}
|
| 449 |
|
| 450 |
getIndex(user?: JinaEmbeddingsTokenAccount) {
|