Spaces:
Build error
Build error
fix: fallback logic (#1182)
Browse files* fix: fallback logic
* fix: fallback logic
* fix: fallback logic
* chore: cleanup
* chore: cleanup
* chore: adjust fallback logic
* chore: adjust fallback logic
* chore: cleanup
* chore: cleanup
* tweak: fallback mech
---------
Co-authored-by: Yanlong Wang <yanlong.wang@naiver.org>
- src/api/searcher-serper.ts +52 -49
- src/api/serp.ts +30 -7
src/api/searcher-serper.ts
CHANGED
|
@@ -286,30 +286,13 @@ export class SearcherHost extends RPCHost {
|
|
| 286 |
page,
|
| 287 |
};
|
| 288 |
|
| 289 |
-
const {
|
| 290 |
searchParams, fallback, crawlerOptions.noCache
|
| 291 |
);
|
| 292 |
chargeAmountScaler *= tryTimes;
|
| 293 |
|
| 294 |
fallbackQuery = successQuery !== searchQuery ? successQuery : undefined;
|
| 295 |
|
| 296 |
-
let results;
|
| 297 |
-
switch (variant) {
|
| 298 |
-
case 'images': {
|
| 299 |
-
results = (r as SerperImageSearchResponse).images;
|
| 300 |
-
break;
|
| 301 |
-
}
|
| 302 |
-
case 'news': {
|
| 303 |
-
results = (r as SerperNewsSearchResponse).news;
|
| 304 |
-
break;
|
| 305 |
-
}
|
| 306 |
-
case 'web':
|
| 307 |
-
default: {
|
| 308 |
-
results = (r as SerperWebSearchResponse).organic;
|
| 309 |
-
break;
|
| 310 |
-
}
|
| 311 |
-
}
|
| 312 |
-
|
| 313 |
if (!results.length) {
|
| 314 |
throw new AssertionFailureError(`No search results available for query ${searchQuery}`);
|
| 315 |
}
|
|
@@ -323,7 +306,7 @@ export class SearcherHost extends RPCHost {
|
|
| 323 |
const targetResultCount = crawlWithoutContent ? count : count + 2;
|
| 324 |
const trimmedResults = results.filter((x) => Boolean(x.link)).slice(0, targetResultCount).map((x) => this.mapToFinalResults(x));
|
| 325 |
trimmedResults.toString = function () {
|
| 326 |
-
let r =
|
| 327 |
if (fallbackQuery) {
|
| 328 |
r = `Fallback query: ${fallbackQuery}\n\n${r}`;
|
| 329 |
}
|
|
@@ -515,52 +498,72 @@ export class SearcherHost extends RPCHost {
|
|
| 515 |
params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; },
|
| 516 |
useFallback: boolean = false,
|
| 517 |
noCache: boolean = false
|
| 518 |
-
)
|
| 519 |
// Try original query first
|
| 520 |
const originalQuery = params.q;
|
| 521 |
-
const
|
| 522 |
|
| 523 |
// Extract results based on variant
|
| 524 |
-
let results: any[] = [];
|
| 525 |
let tryTimes = 1;
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
case 'web': default: results = (response as SerperWebSearchResponse).organic; break;
|
| 530 |
-
}
|
| 531 |
-
|
| 532 |
-
// Return early if we got results or fallback is disabled
|
| 533 |
-
if (results.length > 0 || !useFallback) {
|
| 534 |
-
return { response, query: originalQuery, tryTimes };
|
| 535 |
}
|
| 536 |
|
| 537 |
-
|
| 538 |
-
const
|
| 539 |
|
| 540 |
this.logger.info(`No results for "${originalQuery}", trying fallback queries`);
|
| 541 |
-
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(originalQuery);
|
| 542 |
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
|
| 547 |
-
|
| 548 |
-
const
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
case 'news': fallbackResults = (fallbackResponse as SerperNewsSearchResponse).news; break;
|
| 554 |
-
case 'web': default: fallbackResults = (fallbackResponse as SerperWebSearchResponse).organic; break;
|
| 555 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
|
| 557 |
-
tryTimes++;
|
| 558 |
if (fallbackResults.length > 0) {
|
| 559 |
-
return {
|
| 560 |
}
|
| 561 |
}
|
| 562 |
|
| 563 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
}
|
| 565 |
|
| 566 |
async *fetchSearchResults(
|
|
@@ -651,7 +654,7 @@ export class SearcherHost extends RPCHost {
|
|
| 651 |
metadata.fallback = fallbackQuery;
|
| 652 |
}
|
| 653 |
|
| 654 |
-
assignMeta(formatted,
|
| 655 |
|
| 656 |
return final;
|
| 657 |
}
|
|
|
|
| 286 |
page,
|
| 287 |
};
|
| 288 |
|
| 289 |
+
const { results, query: successQuery, tryTimes } = await this.searchWithFallback(
|
| 290 |
searchParams, fallback, crawlerOptions.noCache
|
| 291 |
);
|
| 292 |
chargeAmountScaler *= tryTimes;
|
| 293 |
|
| 294 |
fallbackQuery = successQuery !== searchQuery ? successQuery : undefined;
|
| 295 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
if (!results.length) {
|
| 297 |
throw new AssertionFailureError(`No search results available for query ${searchQuery}`);
|
| 298 |
}
|
|
|
|
| 306 |
const targetResultCount = crawlWithoutContent ? count : count + 2;
|
| 307 |
const trimmedResults = results.filter((x) => Boolean(x.link)).slice(0, targetResultCount).map((x) => this.mapToFinalResults(x));
|
| 308 |
trimmedResults.toString = function () {
|
| 309 |
+
let r = this.map((x, i) => x ? Reflect.apply(x.toString, x, [i]) : '').join('\n\n').trimEnd() + '\n';
|
| 310 |
if (fallbackQuery) {
|
| 311 |
r = `Fallback query: ${fallbackQuery}\n\n${r}`;
|
| 312 |
}
|
|
|
|
| 498 |
params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; },
|
| 499 |
useFallback: boolean = false,
|
| 500 |
noCache: boolean = false
|
| 501 |
+
) {
|
| 502 |
// Try original query first
|
| 503 |
const originalQuery = params.q;
|
| 504 |
+
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(originalQuery);
|
| 505 |
|
| 506 |
// Extract results based on variant
|
|
|
|
| 507 |
let tryTimes = 1;
|
| 508 |
+
const results = await this.doSearch(params, noCache);
|
| 509 |
+
if (results.length && !useFallback) {
|
| 510 |
+
return { results, query: params.q, tryTimes };
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
}
|
| 512 |
|
| 513 |
+
let queryTerms = originalQuery.split(/\s+/);
|
| 514 |
+
const lastResort = containsRTL ? queryTerms.slice(queryTerms.length - 2) : queryTerms.slice(0, 2);
|
| 515 |
|
| 516 |
this.logger.info(`No results for "${originalQuery}", trying fallback queries`);
|
|
|
|
| 517 |
|
| 518 |
+
let terms: string[] = [];
|
| 519 |
+
// fallback n times
|
| 520 |
+
const n = 4;
|
| 521 |
|
| 522 |
+
while (tryTimes <= n) {
|
| 523 |
+
const delta = Math.ceil(queryTerms.length / n) * tryTimes;
|
| 524 |
+
terms = containsRTL ? queryTerms.slice(0, queryTerms.length - delta) : queryTerms.slice(delta);
|
| 525 |
+
const query = terms.join(' ');
|
| 526 |
+
if (!query) {
|
| 527 |
+
break;
|
|
|
|
|
|
|
| 528 |
}
|
| 529 |
+
tryTimes += 1;
|
| 530 |
+
this.logger.info(`Retrying search with fallback query: "${query}"`);
|
| 531 |
+
const fallbackParams = { ...params, q: query };
|
| 532 |
+
const fallbackResults = await this.doSearch(fallbackParams, noCache);
|
| 533 |
+
if (fallbackResults.length > 0) {
|
| 534 |
+
return { results: fallbackResults, query: fallbackParams.q, tryTimes };
|
| 535 |
+
}
|
| 536 |
+
}
|
| 537 |
+
|
| 538 |
+
if (terms.length > lastResort.length) {
|
| 539 |
+
const query = lastResort.join(' ');
|
| 540 |
+
this.logger.info(`Retrying search with fallback query: "${query}"`);
|
| 541 |
+
const fallbackParams = { ...params, q: query };
|
| 542 |
+
tryTimes += 1;
|
| 543 |
+
const fallbackResults = await this.doSearch(fallbackParams, noCache);
|
| 544 |
|
|
|
|
| 545 |
if (fallbackResults.length > 0) {
|
| 546 |
+
return { results: fallbackResults, query, tryTimes };
|
| 547 |
}
|
| 548 |
}
|
| 549 |
|
| 550 |
+
return { results, query: originalQuery, tryTimes };
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
async doSearch(
|
| 554 |
+
params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; },
|
| 555 |
+
noCache: boolean = false,
|
| 556 |
+
) {
|
| 557 |
+
const response = await this.cachedSearch(params, noCache);
|
| 558 |
+
|
| 559 |
+
let results = [];
|
| 560 |
+
switch (params.variant) {
|
| 561 |
+
case 'images': results = (response as SerperImageSearchResponse).images; break;
|
| 562 |
+
case 'news': results = (response as SerperNewsSearchResponse).news; break;
|
| 563 |
+
case 'web': default: results = (response as SerperWebSearchResponse).organic; break;
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
return results;
|
| 567 |
}
|
| 568 |
|
| 569 |
async *fetchSearchResults(
|
|
|
|
| 654 |
metadata.fallback = fallbackQuery;
|
| 655 |
}
|
| 656 |
|
| 657 |
+
assignMeta(formatted, metadata);
|
| 658 |
|
| 659 |
return final;
|
| 660 |
}
|
src/api/serp.ts
CHANGED
|
@@ -275,6 +275,8 @@ export class SerpHost extends RPCHost {
|
|
| 275 |
}
|
| 276 |
|
| 277 |
let realQuery = q;
|
|
|
|
|
|
|
| 278 |
let results = await this.cachedSearch(variant, {
|
| 279 |
provider: searchEngine,
|
| 280 |
q,
|
|
@@ -289,14 +291,21 @@ export class SerpHost extends RPCHost {
|
|
| 289 |
if (fallback && !results?.length && (!page || page === 1)) {
|
| 290 |
let tryTimes = 1;
|
| 291 |
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(q);
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
| 298 |
break;
|
| 299 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
this.logger.info(`Retrying search with fallback query: "${realQuery}"`);
|
| 301 |
results = await this.cachedSearch(variant, {
|
| 302 |
provider: searchEngine,
|
|
@@ -306,11 +315,25 @@ export class SerpHost extends RPCHost {
|
|
| 306 |
hl,
|
| 307 |
location,
|
| 308 |
}, crawlerOptions);
|
| 309 |
-
tryTimes += 1;
|
| 310 |
if (results?.length) {
|
| 311 |
break;
|
| 312 |
}
|
| 313 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
chargeAmountScaler *= tryTimes;
|
| 315 |
}
|
| 316 |
|
|
|
|
| 275 |
}
|
| 276 |
|
| 277 |
let realQuery = q;
|
| 278 |
+
let queryTerms = q.split(/\s+/g).filter((x) => !!x);
|
| 279 |
+
|
| 280 |
let results = await this.cachedSearch(variant, {
|
| 281 |
provider: searchEngine,
|
| 282 |
q,
|
|
|
|
| 291 |
if (fallback && !results?.length && (!page || page === 1)) {
|
| 292 |
let tryTimes = 1;
|
| 293 |
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(q);
|
| 294 |
+
const lastResort = (containsRTL ? queryTerms.slice(queryTerms.length - 2) : queryTerms.slice(0, 2)).join(' ');
|
| 295 |
+
const n = 4;
|
| 296 |
+
let terms: string[] = [];
|
| 297 |
+
while (tryTimes <= n) {
|
| 298 |
+
const delta = Math.ceil(queryTerms.length / n) * tryTimes;
|
| 299 |
+
terms = containsRTL ? queryTerms.slice(0, queryTerms.length - delta) : queryTerms.slice(delta);
|
| 300 |
+
const query = terms.join(' ');
|
| 301 |
+
if (!query) {
|
| 302 |
break;
|
| 303 |
}
|
| 304 |
+
if (realQuery === query) {
|
| 305 |
+
continue;
|
| 306 |
+
}
|
| 307 |
+
tryTimes += 1;
|
| 308 |
+
realQuery = query;
|
| 309 |
this.logger.info(`Retrying search with fallback query: "${realQuery}"`);
|
| 310 |
results = await this.cachedSearch(variant, {
|
| 311 |
provider: searchEngine,
|
|
|
|
| 315 |
hl,
|
| 316 |
location,
|
| 317 |
}, crawlerOptions);
|
|
|
|
| 318 |
if (results?.length) {
|
| 319 |
break;
|
| 320 |
}
|
| 321 |
}
|
| 322 |
+
|
| 323 |
+
if (!results?.length && realQuery.length > lastResort.length) {
|
| 324 |
+
realQuery = lastResort;
|
| 325 |
+
this.logger.info(`Retrying search with fallback query: "${realQuery}"`);
|
| 326 |
+
tryTimes += 1;
|
| 327 |
+
results = await this.cachedSearch(variant, {
|
| 328 |
+
provider: searchEngine,
|
| 329 |
+
q: realQuery,
|
| 330 |
+
num,
|
| 331 |
+
gl,
|
| 332 |
+
hl,
|
| 333 |
+
location,
|
| 334 |
+
}, crawlerOptions);
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
chargeAmountScaler *= tryTimes;
|
| 338 |
}
|
| 339 |
|