Spaces:
Build error
Build error
Update src/lib/server/websearch/runWebSearch.ts
Browse files
src/lib/server/websearch/runWebSearch.ts
CHANGED
|
@@ -10,6 +10,7 @@ import {
|
|
| 10 |
} from "$lib/server/websearch/sentenceSimilarity";
|
| 11 |
import type { Conversation } from "$lib/types/Conversation";
|
| 12 |
import type { MessageUpdate } from "$lib/types/MessageUpdate";
|
|
|
|
| 13 |
|
| 14 |
const MAX_N_PAGES_SCRAPE = 10 as const;
|
| 15 |
const MAX_N_PAGES_EMBED = 5 as const;
|
|
@@ -17,7 +18,8 @@ const MAX_N_PAGES_EMBED = 5 as const;
|
|
| 17 |
export async function runWebSearch(
|
| 18 |
conv: Conversation,
|
| 19 |
prompt: string,
|
| 20 |
-
updatePad: (upd: MessageUpdate) => void
|
|
|
|
| 21 |
) {
|
| 22 |
const messages = (() => {
|
| 23 |
return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
|
|
@@ -26,6 +28,7 @@ export async function runWebSearch(
|
|
| 26 |
const webSearch: WebSearch = {
|
| 27 |
prompt: prompt,
|
| 28 |
searchQuery: "",
|
|
|
|
| 29 |
results: [],
|
| 30 |
context: "",
|
| 31 |
contextSources: [],
|
|
@@ -33,12 +36,40 @@ export async function runWebSearch(
|
|
| 33 |
updatedAt: new Date(),
|
| 34 |
};
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
|
| 37 |
updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
|
| 38 |
}
|
| 39 |
|
| 40 |
try {
|
| 41 |
webSearch.searchQuery = await generateQuery(messages);
|
|
|
|
|
|
|
| 42 |
appendUpdate("Searching Google", [webSearch.searchQuery]);
|
| 43 |
const results = await searchWeb(webSearch.searchQuery);
|
| 44 |
webSearch.results =
|
|
@@ -63,7 +94,7 @@ export async function runWebSearch(
|
|
| 63 |
text = await parseWeb(link);
|
| 64 |
appendUpdate("Browsing webpage", [link]);
|
| 65 |
} catch (e) {
|
| 66 |
-
|
| 67 |
}
|
| 68 |
const MAX_N_CHUNKS = 100;
|
| 69 |
const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
|
|
@@ -111,4 +142,4 @@ export async function runWebSearch(
|
|
| 111 |
}
|
| 112 |
|
| 113 |
return webSearch;
|
| 114 |
-
}
|
|
|
|
| 10 |
} from "$lib/server/websearch/sentenceSimilarity";
|
| 11 |
import type { Conversation } from "$lib/types/Conversation";
|
| 12 |
import type { MessageUpdate } from "$lib/types/MessageUpdate";
|
| 13 |
+
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
| 14 |
|
| 15 |
const MAX_N_PAGES_SCRAPE = 10 as const;
|
| 16 |
const MAX_N_PAGES_EMBED = 5 as const;
|
|
|
|
| 18 |
export async function runWebSearch(
|
| 19 |
conv: Conversation,
|
| 20 |
prompt: string,
|
| 21 |
+
updatePad: (upd: MessageUpdate) => void,
|
| 22 |
+
domainFiltersStr: string
|
| 23 |
) {
|
| 24 |
const messages = (() => {
|
| 25 |
return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
|
|
|
|
| 28 |
const webSearch: WebSearch = {
|
| 29 |
prompt: prompt,
|
| 30 |
searchQuery: "",
|
| 31 |
+
domainFilters: [],
|
| 32 |
results: [],
|
| 33 |
context: "",
|
| 34 |
contextSources: [],
|
|
|
|
| 36 |
updatedAt: new Date(),
|
| 37 |
};
|
| 38 |
|
| 39 |
+
|
| 40 |
+
function extractDomains(domainFiltersStr: string): string[] {
|
| 41 |
+
const parts = domainFiltersStr.split(/[ ,]+/);
|
| 42 |
+
const domains: string[] = [];
|
| 43 |
+
const domainPattern = /^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
|
| 44 |
+
for (const part of parts) {
|
| 45 |
+
if (domainPattern.test(part)) {
|
| 46 |
+
domains.push(part);
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
return domains;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
webSearch.domainFilters = extractDomains(domainFiltersStr);
|
| 54 |
+
|
| 55 |
+
function formatSearchQuery(domainFilters: string[]): string {
|
| 56 |
+
if (domainFilters.length === 0) {
|
| 57 |
+
return "";
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
const filteredQueryPrefix = domainFilters.map((domain) => `site:${domain}`).join(" OR ") + " ";
|
| 61 |
+
|
| 62 |
+
return filteredQueryPrefix;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
|
| 66 |
updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
|
| 67 |
}
|
| 68 |
|
| 69 |
try {
|
| 70 |
webSearch.searchQuery = await generateQuery(messages);
|
| 71 |
+
// limit the sources to certain sites
|
| 72 |
+
webSearch.searchQuery = formatSearchQuery(webSearch.domainFilters) + webSearch.searchQuery;
|
| 73 |
appendUpdate("Searching Google", [webSearch.searchQuery]);
|
| 74 |
const results = await searchWeb(webSearch.searchQuery);
|
| 75 |
webSearch.results =
|
|
|
|
| 94 |
text = await parseWeb(link);
|
| 95 |
appendUpdate("Browsing webpage", [link]);
|
| 96 |
} catch (e) {
|
| 97 |
+
console.error(`Error parsing webpage "${link}"`, e);
|
| 98 |
}
|
| 99 |
const MAX_N_CHUNKS = 100;
|
| 100 |
const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
|
|
|
|
| 142 |
}
|
| 143 |
|
| 144 |
return webSearch;
|
| 145 |
+
}
|