Spaces:
Sleeping
Sleeping
Claw Web commited on
Commit ·
bea12d2
1
Parent(s): 68a3bb7
Fix 400 error (tool_calls JSON parse), rewrite WebFetch/WebSearch 1:1 from original claw-code
Browse files- server/runtime/agent.ts +12 -0
- server/runtime/chat-endpoint.ts +14 -7
- server/tools/executor.ts +315 -49
server/runtime/agent.ts
CHANGED
|
@@ -329,6 +329,18 @@ export async function runAgentLoop(
|
|
| 329 |
}
|
| 330 |
|
| 331 |
if (!response || !response.ok) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
sendSSE(res, "error", {
|
| 333 |
message: `API error: ${lastStatus} after ${MAX_RETRIES + 1} attempts`,
|
| 334 |
details: lastErrorText,
|
|
|
|
| 329 |
}
|
| 330 |
|
| 331 |
if (!response || !response.ok) {
|
| 332 |
+
// Log the full payload for debugging 400 errors
|
| 333 |
+
console.error(`[agent] API error ${lastStatus}:`, lastErrorText);
|
| 334 |
+
console.error(`[agent] Payload model:`, apiConfig.model);
|
| 335 |
+
console.error(`[agent] Payload messages count:`, (payload.messages as any[]).length);
|
| 336 |
+
console.error(`[agent] Message roles:`, (payload.messages as any[]).map((m: any) => m.role).join(', '));
|
| 337 |
+
if (lastStatus === 400) {
|
| 338 |
+
// For 400 errors, log more details to help debug
|
| 339 |
+
console.error(`[agent] Full error body:`, lastErrorText);
|
| 340 |
+
(payload.messages as any[]).forEach((m: any, i: number) => {
|
| 341 |
+
console.error(`[agent] msg[${i}] role=${m.role} content_type=${typeof m.content} content_len=${String(m.content || '').length} has_tool_calls=${!!m.tool_calls} has_tool_call_id=${!!m.tool_call_id}`);
|
| 342 |
+
});
|
| 343 |
+
}
|
| 344 |
sendSSE(res, "error", {
|
| 345 |
message: `API error: ${lastStatus} after ${MAX_RETRIES + 1} attempts`,
|
| 346 |
details: lastErrorText,
|
server/runtime/chat-endpoint.ts
CHANGED
|
@@ -85,13 +85,20 @@ export async function handleChatStream(req: Request, res: Response) {
|
|
| 85 |
const historyMessages = await db.getSessionMessages(sessionId);
|
| 86 |
|
| 87 |
// Convert DB messages to agent format
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
// Get plan mode and effort level
|
| 97 |
const planMode = getPlanMode(sessionId);
|
|
|
|
| 85 |
const historyMessages = await db.getSessionMessages(sessionId);
|
| 86 |
|
| 87 |
// Convert DB messages to agent format
|
| 88 |
+
// IMPORTANT: tool_calls is stored as JSON string in DB, must parse back to array
|
| 89 |
+
const agentMessages = historyMessages.map((m) => {
|
| 90 |
+
let toolCalls = m.toolCalls;
|
| 91 |
+
if (typeof toolCalls === "string") {
|
| 92 |
+
try { toolCalls = JSON.parse(toolCalls); } catch { toolCalls = undefined; }
|
| 93 |
+
}
|
| 94 |
+
return {
|
| 95 |
+
role: m.role as "user" | "assistant" | "system" | "tool",
|
| 96 |
+
content: m.content,
|
| 97 |
+
tool_calls: toolCalls || undefined,
|
| 98 |
+
tool_call_id: m.toolCallId || undefined,
|
| 99 |
+
name: m.toolName || undefined,
|
| 100 |
+
};
|
| 101 |
+
});
|
| 102 |
|
| 103 |
// Get plan mode and effort level
|
| 104 |
const planMode = getPlanMode(sessionId);
|
server/tools/executor.ts
CHANGED
|
@@ -864,46 +864,119 @@ async function executeWebFetch(args: Record<string, unknown>): Promise<string> {
|
|
| 864 |
const url = String(args.url || "");
|
| 865 |
if (!url) throw new Error("No URL provided");
|
| 866 |
const prompt = String(args.prompt || "Summarize this page");
|
| 867 |
-
const
|
| 868 |
try {
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 872 |
});
|
|
|
|
|
|
|
|
|
|
| 873 |
const contentType = response.headers.get("content-type") || "";
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 886 |
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 903 |
}
|
| 904 |
-
} catch (error: any) {
|
| 905 |
-
return `Fetch failed: ${error.message}`;
|
| 906 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 907 |
}
|
| 908 |
|
| 909 |
// ─── 8. WebSearch (was: web_search) ────────────────────────────────────────
|
|
@@ -911,26 +984,219 @@ async function executeWebFetch(args: Record<string, unknown>): Promise<string> {
|
|
| 911 |
async function executeWebSearch(args: Record<string, unknown>): Promise<string> {
|
| 912 |
const query = String(args.query || "");
|
| 913 |
if (!query) throw new Error("No query provided");
|
|
|
|
|
|
|
|
|
|
|
|
|
| 914 |
try {
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 918 |
});
|
|
|
|
| 919 |
const html = await response.text();
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
let
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
}
|
| 930 |
-
|
| 931 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 932 |
} catch (error: any) {
|
| 933 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 934 |
}
|
| 935 |
}
|
| 936 |
|
|
|
|
| 864 |
const url = String(args.url || "");
|
| 865 |
if (!url) throw new Error("No URL provided");
|
| 866 |
const prompt = String(args.prompt || "Summarize this page");
|
| 867 |
+
const started = Date.now();
|
| 868 |
try {
|
| 869 |
+
// Normalize URL: upgrade http to https for non-localhost (matches original)
|
| 870 |
+
let requestUrl = url;
|
| 871 |
+
try {
|
| 872 |
+
const parsed = new URL(url);
|
| 873 |
+
if (parsed.protocol === "http:" && parsed.hostname !== "localhost" && parsed.hostname !== "127.0.0.1" && parsed.hostname !== "::1") {
|
| 874 |
+
parsed.protocol = "https:";
|
| 875 |
+
requestUrl = parsed.toString();
|
| 876 |
+
}
|
| 877 |
+
} catch {}
|
| 878 |
+
|
| 879 |
+
const response = await fetch(requestUrl, {
|
| 880 |
+
headers: { "User-Agent": "claw-rust-tools/0.1" },
|
| 881 |
+
signal: AbortSignal.timeout(20000),
|
| 882 |
+
redirect: "follow",
|
| 883 |
});
|
| 884 |
+
const finalUrl = response.url;
|
| 885 |
+
const code = response.status;
|
| 886 |
+
const codeText = response.statusText || "Unknown";
|
| 887 |
const contentType = response.headers.get("content-type") || "";
|
| 888 |
+
const body = await response.text();
|
| 889 |
+
const bytes = body.length;
|
| 890 |
+
|
| 891 |
+
// Normalize content: strip HTML tags if HTML (matches original html_to_text)
|
| 892 |
+
const normalized = contentType.includes("html") ? htmlToText(body) : body.trim();
|
| 893 |
+
|
| 894 |
+
// Summarize based on prompt (matches original summarize_web_fetch)
|
| 895 |
+
const result = summarizeWebFetch(finalUrl, prompt, normalized, body, contentType);
|
| 896 |
+
const durationMs = Date.now() - started;
|
| 897 |
+
|
| 898 |
+
return JSON.stringify({
|
| 899 |
+
bytes,
|
| 900 |
+
code,
|
| 901 |
+
codeText,
|
| 902 |
+
result,
|
| 903 |
+
durationMs,
|
| 904 |
+
url: finalUrl,
|
| 905 |
+
}, null, 2);
|
| 906 |
+
} catch (error: any) {
|
| 907 |
+
return JSON.stringify({
|
| 908 |
+
bytes: 0,
|
| 909 |
+
code: 0,
|
| 910 |
+
codeText: "Error",
|
| 911 |
+
result: `Fetch failed: ${error.message}`,
|
| 912 |
+
durationMs: Date.now() - started,
|
| 913 |
+
url,
|
| 914 |
+
}, null, 2);
|
| 915 |
+
}
|
| 916 |
+
}
|
| 917 |
|
| 918 |
+
// Matches original html_to_text: strip tags, collapse whitespace
|
| 919 |
+
function htmlToText(html: string): string {
|
| 920 |
+
const text = html.replace(/<[^>]+>/g, " ");
|
| 921 |
+
return collapseWhitespace(decodeHtmlEntities(text));
|
| 922 |
+
}
|
| 923 |
+
|
| 924 |
+
function decodeHtmlEntities(input: string): string {
|
| 925 |
+
return input
|
| 926 |
+
.replace(/&/g, "&")
|
| 927 |
+
.replace(/</g, "<")
|
| 928 |
+
.replace(/>/g, ">")
|
| 929 |
+
.replace(/"/g, '"')
|
| 930 |
+
.replace(/'/g, "'")
|
| 931 |
+
.replace(/ /g, " ");
|
| 932 |
+
}
|
| 933 |
+
|
| 934 |
+
function collapseWhitespace(input: string): string {
|
| 935 |
+
return input.split(/\s+/).filter(Boolean).join(" ");
|
| 936 |
+
}
|
| 937 |
+
|
| 938 |
+
function previewText(input: string, maxChars: number): string {
|
| 939 |
+
if (input.length <= maxChars) return input;
|
| 940 |
+
return input.substring(0, maxChars).trimEnd() + "…";
|
| 941 |
+
}
|
| 942 |
+
|
| 943 |
+
function extractTitle(content: string, rawBody: string, contentType: string): string | null {
|
| 944 |
+
if (contentType.includes("html")) {
|
| 945 |
+
const lowered = rawBody.toLowerCase();
|
| 946 |
+
const start = lowered.indexOf("<title>");
|
| 947 |
+
if (start !== -1) {
|
| 948 |
+
const after = start + "<title>".length;
|
| 949 |
+
const endRel = lowered.indexOf("</title>", after);
|
| 950 |
+
if (endRel !== -1) {
|
| 951 |
+
const title = collapseWhitespace(decodeHtmlEntities(rawBody.substring(after, endRel)));
|
| 952 |
+
if (title) return title;
|
| 953 |
+
}
|
| 954 |
}
|
|
|
|
|
|
|
| 955 |
}
|
| 956 |
+
for (const line of content.split("\n")) {
|
| 957 |
+
const trimmed = line.trim();
|
| 958 |
+
if (trimmed) return trimmed;
|
| 959 |
+
}
|
| 960 |
+
return null;
|
| 961 |
+
}
|
| 962 |
+
|
| 963 |
+
// Matches original summarize_web_fetch exactly
|
| 964 |
+
function summarizeWebFetch(url: string, prompt: string, content: string, rawBody: string, contentType: string): string {
|
| 965 |
+
const lowerPrompt = prompt.toLowerCase();
|
| 966 |
+
const compact = collapseWhitespace(content);
|
| 967 |
+
|
| 968 |
+
let detail: string;
|
| 969 |
+
if (lowerPrompt.includes("title")) {
|
| 970 |
+
const title = extractTitle(content, rawBody, contentType);
|
| 971 |
+
detail = title ? `Title: ${title}` : previewText(compact, 600);
|
| 972 |
+
} else if (lowerPrompt.includes("summary") || lowerPrompt.includes("summarize")) {
|
| 973 |
+
detail = previewText(compact, 900);
|
| 974 |
+
} else {
|
| 975 |
+
const preview = previewText(compact, 900);
|
| 976 |
+
detail = `Prompt: ${prompt}\nContent preview:\n${preview}`;
|
| 977 |
+
}
|
| 978 |
+
|
| 979 |
+
return `Fetched ${url}\n${detail}`;
|
| 980 |
}
|
| 981 |
|
| 982 |
// ─── 8. WebSearch (was: web_search) ────────────────────────────────────────
|
|
|
|
| 984 |
async function executeWebSearch(args: Record<string, unknown>): Promise<string> {
|
| 985 |
const query = String(args.query || "");
|
| 986 |
if (!query) throw new Error("No query provided");
|
| 987 |
+
const allowedDomains = (args.allowed_domains as string[]) || null;
|
| 988 |
+
const blockedDomains = (args.blocked_domains as string[]) || null;
|
| 989 |
+
const started = Date.now();
|
| 990 |
+
|
| 991 |
try {
|
| 992 |
+
// Build search URL: support CLAW_WEB_SEARCH_BASE_URL env (matches original)
|
| 993 |
+
let searchUrl: string;
|
| 994 |
+
const baseUrl = process.env.CLAW_WEB_SEARCH_BASE_URL;
|
| 995 |
+
if (baseUrl) {
|
| 996 |
+
const u = new URL(baseUrl);
|
| 997 |
+
u.searchParams.set("q", query);
|
| 998 |
+
searchUrl = u.toString();
|
| 999 |
+
} else {
|
| 1000 |
+
searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
|
| 1001 |
+
}
|
| 1002 |
+
|
| 1003 |
+
const response = await fetch(searchUrl, {
|
| 1004 |
+
headers: { "User-Agent": "claw-rust-tools/0.1" },
|
| 1005 |
+
signal: AbortSignal.timeout(20000),
|
| 1006 |
+
redirect: "follow",
|
| 1007 |
});
|
| 1008 |
+
const finalUrl = response.url;
|
| 1009 |
const html = await response.text();
|
| 1010 |
+
|
| 1011 |
+
// Extract search hits using DDG "result__a" class (matches original extract_search_hits)
|
| 1012 |
+
let hits = extractSearchHits(html);
|
| 1013 |
+
|
| 1014 |
+
// Fallback: extract from generic <a> links if no DDG results (matches original)
|
| 1015 |
+
if (hits.length === 0) {
|
| 1016 |
+
hits = extractSearchHitsFromGenericLinks(html);
|
| 1017 |
+
}
|
| 1018 |
+
|
| 1019 |
+
// Apply domain filters (matches original)
|
| 1020 |
+
if (allowedDomains && allowedDomains.length > 0) {
|
| 1021 |
+
hits = hits.filter(hit => hostMatchesList(hit.url, allowedDomains));
|
| 1022 |
+
}
|
| 1023 |
+
if (blockedDomains && blockedDomains.length > 0) {
|
| 1024 |
+
hits = hits.filter(hit => !hostMatchesList(hit.url, blockedDomains));
|
| 1025 |
+
}
|
| 1026 |
+
|
| 1027 |
+
// Dedupe and truncate (matches original)
|
| 1028 |
+
dedupeHits(hits);
|
| 1029 |
+
hits = hits.slice(0, 8);
|
| 1030 |
+
|
| 1031 |
+
const durationSeconds = (Date.now() - started) / 1000;
|
| 1032 |
+
|
| 1033 |
+
// Build summary (matches original format)
|
| 1034 |
+
let summary: string;
|
| 1035 |
+
if (hits.length === 0) {
|
| 1036 |
+
summary = `No web search results matched the query "${query}".`;
|
| 1037 |
+
} else {
|
| 1038 |
+
const renderedHits = hits.map(hit => `- [${hit.title}](${hit.url})`).join("\n");
|
| 1039 |
+
summary = `Search results for "${query}". Include a Sources section in the final answer.\n${renderedHits}`;
|
| 1040 |
}
|
| 1041 |
+
|
| 1042 |
+
return JSON.stringify({
|
| 1043 |
+
query,
|
| 1044 |
+
results: [
|
| 1045 |
+
summary,
|
| 1046 |
+
{ tool_use_id: "web_search_1", content: hits },
|
| 1047 |
+
],
|
| 1048 |
+
durationSeconds,
|
| 1049 |
+
}, null, 2);
|
| 1050 |
} catch (error: any) {
|
| 1051 |
+
return JSON.stringify({
|
| 1052 |
+
query,
|
| 1053 |
+
results: [`Search failed: ${error.message}`],
|
| 1054 |
+
durationSeconds: (Date.now() - started) / 1000,
|
| 1055 |
+
}, null, 2);
|
| 1056 |
+
}
|
| 1057 |
+
}
|
| 1058 |
+
|
| 1059 |
+
// Matches original extract_search_hits: parse DDG result__a anchors
|
| 1060 |
+
function extractSearchHits(html: string): Array<{ title: string; url: string }> {
|
| 1061 |
+
const hits: Array<{ title: string; url: string }> = [];
|
| 1062 |
+
let remaining = html;
|
| 1063 |
+
|
| 1064 |
+
while (true) {
|
| 1065 |
+
const anchorStart = remaining.indexOf("result__a");
|
| 1066 |
+
if (anchorStart === -1) break;
|
| 1067 |
+
const afterClass = remaining.substring(anchorStart);
|
| 1068 |
+
|
| 1069 |
+
const hrefIdx = afterClass.indexOf('href=');
|
| 1070 |
+
if (hrefIdx === -1) { remaining = afterClass.substring(1); continue; }
|
| 1071 |
+
|
| 1072 |
+
const hrefSlice = afterClass.substring(hrefIdx + 5);
|
| 1073 |
+
const extracted = extractQuotedValue(hrefSlice);
|
| 1074 |
+
if (!extracted) { remaining = afterClass.substring(1); continue; }
|
| 1075 |
+
|
| 1076 |
+
const [rawUrl, rest] = extracted;
|
| 1077 |
+
const closeTagIdx = rest.indexOf('>');
|
| 1078 |
+
if (closeTagIdx === -1) { remaining = afterClass.substring(1); continue; }
|
| 1079 |
+
|
| 1080 |
+
const afterTag = rest.substring(closeTagIdx + 1);
|
| 1081 |
+
const endAnchorIdx = afterTag.indexOf('</a>');
|
| 1082 |
+
if (endAnchorIdx === -1) { remaining = afterTag.substring(1); continue; }
|
| 1083 |
+
|
| 1084 |
+
const title = htmlToText(afterTag.substring(0, endAnchorIdx)).trim();
|
| 1085 |
+
const decodedUrl = decodeDuckDuckGoRedirect(rawUrl);
|
| 1086 |
+
if (decodedUrl) {
|
| 1087 |
+
hits.push({ title, url: decodedUrl });
|
| 1088 |
+
}
|
| 1089 |
+
remaining = afterTag.substring(endAnchorIdx + 4);
|
| 1090 |
+
}
|
| 1091 |
+
|
| 1092 |
+
return hits;
|
| 1093 |
+
}
|
| 1094 |
+
|
| 1095 |
+
// Matches original extract_search_hits_from_generic_links
|
| 1096 |
+
function extractSearchHitsFromGenericLinks(html: string): Array<{ title: string; url: string }> {
|
| 1097 |
+
const hits: Array<{ title: string; url: string }> = [];
|
| 1098 |
+
let remaining = html;
|
| 1099 |
+
|
| 1100 |
+
while (true) {
|
| 1101 |
+
const anchorStart = remaining.indexOf('<a');
|
| 1102 |
+
if (anchorStart === -1) break;
|
| 1103 |
+
const afterAnchor = remaining.substring(anchorStart);
|
| 1104 |
+
|
| 1105 |
+
const hrefIdx = afterAnchor.indexOf('href=');
|
| 1106 |
+
if (hrefIdx === -1) { remaining = afterAnchor.substring(2); continue; }
|
| 1107 |
+
|
| 1108 |
+
const hrefSlice = afterAnchor.substring(hrefIdx + 5);
|
| 1109 |
+
const extracted = extractQuotedValue(hrefSlice);
|
| 1110 |
+
if (!extracted) { remaining = afterAnchor.substring(2); continue; }
|
| 1111 |
+
|
| 1112 |
+
const [rawUrl, rest] = extracted;
|
| 1113 |
+
const closeTagIdx = rest.indexOf('>');
|
| 1114 |
+
if (closeTagIdx === -1) { remaining = afterAnchor.substring(2); continue; }
|
| 1115 |
+
|
| 1116 |
+
const afterTag = rest.substring(closeTagIdx + 1);
|
| 1117 |
+
const endAnchorIdx = afterTag.indexOf('</a>');
|
| 1118 |
+
if (endAnchorIdx === -1) { remaining = afterAnchor.substring(2); continue; }
|
| 1119 |
+
|
| 1120 |
+
const title = htmlToText(afterTag.substring(0, endAnchorIdx)).trim();
|
| 1121 |
+
if (!title) { remaining = afterTag.substring(endAnchorIdx + 4); continue; }
|
| 1122 |
+
|
| 1123 |
+
const decodedUrl = decodeDuckDuckGoRedirect(rawUrl) || rawUrl;
|
| 1124 |
+
if (decodedUrl.startsWith('http://') || decodedUrl.startsWith('https://')) {
|
| 1125 |
+
hits.push({ title, url: decodedUrl });
|
| 1126 |
+
}
|
| 1127 |
+
remaining = afterTag.substring(endAnchorIdx + 4);
|
| 1128 |
+
}
|
| 1129 |
+
|
| 1130 |
+
return hits;
|
| 1131 |
+
}
|
| 1132 |
+
|
| 1133 |
+
function extractQuotedValue(input: string): [string, string] | null {
|
| 1134 |
+
const quote = input[0];
|
| 1135 |
+
if (quote !== '"' && quote !== "'") return null;
|
| 1136 |
+
const rest = input.substring(1);
|
| 1137 |
+
const end = rest.indexOf(quote);
|
| 1138 |
+
if (end === -1) return null;
|
| 1139 |
+
return [rest.substring(0, end), rest.substring(end + 1)];
|
| 1140 |
+
}
|
| 1141 |
+
|
| 1142 |
+
// Matches original decode_duckduckgo_redirect
|
| 1143 |
+
function decodeDuckDuckGoRedirect(url: string): string | null {
|
| 1144 |
+
if (url.startsWith('http://') || url.startsWith('https://')) {
|
| 1145 |
+
return decodeHtmlEntities(url);
|
| 1146 |
+
}
|
| 1147 |
+
|
| 1148 |
+
let joined: string;
|
| 1149 |
+
if (url.startsWith('//')) {
|
| 1150 |
+
joined = `https:${url}`;
|
| 1151 |
+
} else if (url.startsWith('/')) {
|
| 1152 |
+
joined = `https://duckduckgo.com${url}`;
|
| 1153 |
+
} else {
|
| 1154 |
+
return null;
|
| 1155 |
+
}
|
| 1156 |
+
|
| 1157 |
+
try {
|
| 1158 |
+
const parsed = new URL(joined);
|
| 1159 |
+
if (parsed.pathname === '/l/' || parsed.pathname === '/l') {
|
| 1160 |
+
const uddg = parsed.searchParams.get('uddg');
|
| 1161 |
+
if (uddg) return decodeHtmlEntities(uddg);
|
| 1162 |
+
}
|
| 1163 |
+
} catch {}
|
| 1164 |
+
return joined;
|
| 1165 |
+
}
|
| 1166 |
+
|
| 1167 |
+
function hostMatchesList(url: string, domains: string[]): boolean {
|
| 1168 |
+
try {
|
| 1169 |
+
const parsed = new URL(url);
|
| 1170 |
+
const host = parsed.hostname.toLowerCase();
|
| 1171 |
+
return domains.some(domain => {
|
| 1172 |
+
const normalized = normalizeDomainFilter(domain);
|
| 1173 |
+
return normalized && (host === normalized || host.endsWith(`.${normalized}`));
|
| 1174 |
+
});
|
| 1175 |
+
} catch {
|
| 1176 |
+
return false;
|
| 1177 |
+
}
|
| 1178 |
+
}
|
| 1179 |
+
|
| 1180 |
+
function normalizeDomainFilter(domain: string): string {
|
| 1181 |
+
const trimmed = domain.trim();
|
| 1182 |
+
try {
|
| 1183 |
+
const parsed = new URL(trimmed);
|
| 1184 |
+
return (parsed.hostname || trimmed).replace(/^\./, '').replace(/\/$/, '').toLowerCase();
|
| 1185 |
+
} catch {
|
| 1186 |
+
return trimmed.replace(/^\./, '').replace(/\/$/, '').toLowerCase();
|
| 1187 |
+
}
|
| 1188 |
+
}
|
| 1189 |
+
|
| 1190 |
+
function dedupeHits(hits: Array<{ title: string; url: string }>): void {
|
| 1191 |
+
const seen = new Set<string>();
|
| 1192 |
+
let i = 0;
|
| 1193 |
+
while (i < hits.length) {
|
| 1194 |
+
if (seen.has(hits[i].url)) {
|
| 1195 |
+
hits.splice(i, 1);
|
| 1196 |
+
} else {
|
| 1197 |
+
seen.add(hits[i].url);
|
| 1198 |
+
i++;
|
| 1199 |
+
}
|
| 1200 |
}
|
| 1201 |
}
|
| 1202 |
|