chat-v4 / chat-ui /src /lib /server /websearch /runWebSearch.ts
muryshev's picture
Added display of more extended error messages
292b85c
import { SAIGA_TRUNCATE_WEB_CONTEXT, WEB_SEARCH_TEMPLATE, USE_CUSTOM_PARSER, MAX_N_PAGES_SCRAPE, SIMILARITY_API_URL, SIMILARITY_THRESHOLD } from "$env/static/private";
import { searchWeb } from "$lib/server/websearch/searchWeb";
import type { Message } from "$lib/types/Message";
import type { WebSearch, WebSearchSource } from "$lib/types/WebSearch";
import { generateQuery } from "$lib/server/websearch/generateQuery";
import { parseWeb } from "$lib/server/websearch/parseWeb";
import { chunk } from "$lib/utils/chunk";
import {
MAX_SEQ_LEN as CHUNK_CAR_LEN//,
//findSimilarSentences,
} from "$lib/server/websearch/sentenceSimilarity";
import type { Conversation } from "$lib/types/Conversation";
import type { MessageUpdate } from "$lib/types/MessageUpdate";
import { parseNalogGovRu } from "$lib/server/websearch/parseNalogGovRu";
import { text } from "svelte/internal";
const MAX_N_PAGES_EMBED = 10 as const;
export async function runWebSearch(
conv: Conversation,
prompt: string,
updatePad: (upd: MessageUpdate) => void
) {
const messages = (() => {
return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
})() satisfies Message[];
const webSearch: WebSearch = {
prompt: prompt,
searchQuery: "",
results: [],
context: "",
contextSources: [],
createdAt: new Date(),
updatedAt: new Date(),
};
function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
}
try {
const userMessages = messages.filter(({ from }) => from === "user");
const lastMessage = userMessages[userMessages.length - 1];
appendUpdate("Генерируем поисковый запрос...");
webSearch.searchQuery = await generateQuery(messages);
webSearch.searchQuery = (WEB_SEARCH_TEMPLATE ?? "{{query}}").replace("{{query}}", webSearch.searchQuery);
console.log('Web search query: ', webSearch.searchQuery)
appendUpdate("Ищем в яндексе по запросу: ", [webSearch.searchQuery]);
const results = await searchWeb(webSearch.searchQuery);
console.log('search results', results)
webSearch.results =
(results && results.organic_results &&
results.organic_results.map((el: { title: string; link: string }) => {
const { title, link } = el;
const { hostname } = new URL(link);
return { title, link, hostname };
})) ??
[];
webSearch.results = webSearch.results
.filter(({ link }) => !link.includes("youtube.com") && !link.includes(".pdf")); // filter out youtube links
//slice(0, Number(MAX_N_PAGES_SCRAPE)); // limit to first 10 links only
// let paragraphChunks: { source: WebSearchSource; text: string }[] = [];
let texts: { source: any, text: string }[] = [];
if (webSearch.results.length > 0) {
appendUpdate("Обработка результатов");
//let fullText = '';
for (const i in webSearch.results) {
if (texts.length > 30) break;
const { link, hostname, title } = webSearch.results[i];
let text = "";
try {
if (USE_CUSTOM_PARSER) {
try {
text = await parseNalogGovRu(link);
} catch (e) {
console.log('Custom parser failed. link: ', link)
console.log('Custom parser failed. error: ', e)
console.log('Starting fallback parser')
text = await parseWeb(link);
console.log('Fallback returned: ', text)
}
} else {
text = await parseWeb(link);
}
appendUpdate("Обработка страницы", [link]);
} catch (e) {
console.error(`Error parsing webpage "${link}"`, e);
}
if (text.length > 0) {
texts.push({ source: { link: link, hostname: hostname, title: title }, text })
}
// fullText += text;
// if(fullText.length >= Number(SAIGA_TRUNCATE_WEB_CONTEXT)){
// break;
// }
// texts.push(...chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS));
}
} else {
appendUpdate("Поиск не вернул релевантных результатов");
appendUpdate("Генерируем ответ на основе данных нейросети");
appendUpdate("По вашему запросу ничего не найдено.", [], "error");
throw new Error("По вашему запросу ничего не найдено.");
}
if (texts && texts.length > 0) {
appendUpdate("Получение релевантной информации");
console.log('webSearch.contextSources', webSearch.contextSources)
try {
const allIndices = await findSimilarSentences(prompt, texts.map((t) => t.text));//, { topK: topKClosestParagraphs});
console.log('similarity check result:', allIndices);
const indices = allIndices.filter((r) => r.score >= Number(SIMILARITY_THRESHOLD))
.sort((a, b) => b.score - a.score)
.slice(0, Number(MAX_N_PAGES_SCRAPE));
let fullText = '';
for (const i in indices) {
const text = texts[indices[i].i];
if (fullText.length >= Number(SAIGA_TRUNCATE_WEB_CONTEXT)) {
break;
}
fullText += text.text + "\n";
webSearch.contextSources.push(text.source);
}
webSearch.context = fullText.slice(0, Number(SAIGA_TRUNCATE_WEB_CONTEXT));
console.log('web search context:', webSearch.context);
updatePad({
type: "webSearch",
messageType: "sources",
message: "sources",
sources: webSearch.contextSources,
});
} catch (e) {
if (e instanceof Error) {
appendUpdate(
e.message,
[],
"error"
);
}
throw e;
}
// webSearch.contextSources = [
// {
// link: 'https://www.nalog.gov.ru/rn77/fl/interest/inn/calculation/',
// hostname: 'www.nalog.gov.ru',
// title: 'Я хочу встать на учёт в налоговый орган (получить...)'
// },
// {
// link: 'https://www.nalog.gov.ru/rn71/news/activities_fts/13690572/',
// hostname: 'www.nalog.gov.ru',
// title: 'Как получить свидетельство ИНН | ФНС России'
// },
// {
// link: 'https://www.nalog.gov.ru/rn10/news/activities_fts/13604016/',
// hostname: 'www.nalog.gov.ru',
// title: 'Как получить ИНН через электронные сервисы ФНС'
// }
// ];
// webSearch.context = texts.map((t) => t.text)
// .join("\n")
// .slice(0, Number(SAIGA_TRUNCATE_WEB_CONTEXT));
// updatePad({
// type: "webSearch",
// messageType: "sources",
// message: "sources",
// sources: [],
// });
// const usedSources = new Set<string>();
// for (const idx of indices) {
// const { source } = texts[idx.i];
// if (!usedSources.has(source.link)) {
// usedSources.add(source.link);
// webSearch.contextSources.push(source);
// updatePad({
// type: "webSearch",
// messageType: "sources",
// message: "sources",
// sources: webSearch.contextSources,
// });
// }
// }
}
} catch (searchError) {
console.log('searchError', searchError)
if (searchError instanceof Error) {
appendUpdate(
searchError.message,
[],
"error"
);
}
throw searchError;
}
return webSearch;
}
async function findSimilarSentences(query: string, sentences: string[]): Promise<{ i: number, score: number }[]> {
const requestOptions = {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
query: query,
sentences: sentences,
// threshold: 0.8
}),
};
let retries = 0;
const maxRetries = 5;
while (retries < maxRetries) {
try {
const response = await fetch(SIMILARITY_API_URL, requestOptions);
if (!response.ok) {
throw new Error('Network response was not ok');
}
const jsonData = await response.json();
return jsonData.result;
} catch (error) {
console.error('Error:', error);
retries++;
console.log(`Retrying similarity request (${retries}/${maxRetries})...`);
continue; // Retry the loop
}
}
throw new Error(`Сервер генератора контекста недоступен.`);
}