Spaces:
Build error
Build error
feat(adaptive-crawl): disable invalid link
Browse files
backend/functions/src/cloud-functions/adaptive-crawler.ts
CHANGED
|
@@ -418,11 +418,22 @@ export class AdaptiveCrawlerHost extends RPCHost {
|
|
| 418 |
query: string;
|
| 419 |
links: Record<string, string>;
|
| 420 |
}) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
const data = {
|
| 422 |
model: 'jina-reranker-v2-base-multilingual',
|
| 423 |
query,
|
| 424 |
top_n: 15,
|
| 425 |
-
documents:
|
| 426 |
};
|
| 427 |
|
| 428 |
const response = await fetch('https://api.jina.ai/v1/rerank', {
|
|
|
|
| 418 |
query: string;
|
| 419 |
links: Record<string, string>;
|
| 420 |
}) {
|
| 421 |
+
const invalidSuffix = [
|
| 422 |
+
'.zip',
|
| 423 |
+
'.docx',
|
| 424 |
+
'.pptx',
|
| 425 |
+
'.xlsx',
|
| 426 |
+
];
|
| 427 |
+
|
| 428 |
+
const validLinks = Object.entries(links)
|
| 429 |
+
.map(([title, link]) => link)
|
| 430 |
+
.filter(link => link.startsWith('http') && !invalidSuffix.some(suffix => link.endsWith(suffix)));
|
| 431 |
+
|
| 432 |
const data = {
|
| 433 |
model: 'jina-reranker-v2-base-multilingual',
|
| 434 |
query,
|
| 435 |
top_n: 15,
|
| 436 |
+
documents: validLinks,
|
| 437 |
};
|
| 438 |
|
| 439 |
const response = await fetch('https://api.jina.ai/v1/rerank', {
|