Claw Web commited on
Commit
bea12d2
·
1 Parent(s): 68a3bb7

Fix 400 error (tool_calls JSON parse), rewrite WebFetch/WebSearch 1:1 from original claw-code

Browse files
server/runtime/agent.ts CHANGED
@@ -329,6 +329,18 @@ export async function runAgentLoop(
329
  }
330
 
331
  if (!response || !response.ok) {
 
 
 
 
 
 
 
 
 
 
 
 
332
  sendSSE(res, "error", {
333
  message: `API error: ${lastStatus} after ${MAX_RETRIES + 1} attempts`,
334
  details: lastErrorText,
 
329
  }
330
 
331
  if (!response || !response.ok) {
332
+ // Log the full payload for debugging 400 errors
333
+ console.error(`[agent] API error ${lastStatus}:`, lastErrorText);
334
+ console.error(`[agent] Payload model:`, apiConfig.model);
335
+ console.error(`[agent] Payload messages count:`, (payload.messages as any[]).length);
336
+ console.error(`[agent] Message roles:`, (payload.messages as any[]).map((m: any) => m.role).join(', '));
337
+ if (lastStatus === 400) {
338
+ // For 400 errors, log more details to help debug
339
+ console.error(`[agent] Full error body:`, lastErrorText);
340
+ (payload.messages as any[]).forEach((m: any, i: number) => {
341
+ console.error(`[agent] msg[${i}] role=${m.role} content_type=${typeof m.content} content_len=${String(m.content || '').length} has_tool_calls=${!!m.tool_calls} has_tool_call_id=${!!m.tool_call_id}`);
342
+ });
343
+ }
344
  sendSSE(res, "error", {
345
  message: `API error: ${lastStatus} after ${MAX_RETRIES + 1} attempts`,
346
  details: lastErrorText,
server/runtime/chat-endpoint.ts CHANGED
@@ -85,13 +85,20 @@ export async function handleChatStream(req: Request, res: Response) {
85
  const historyMessages = await db.getSessionMessages(sessionId);
86
 
87
  // Convert DB messages to agent format
88
- const agentMessages = historyMessages.map((m) => ({
89
- role: m.role as "user" | "assistant" | "system" | "tool",
90
- content: m.content,
91
- tool_calls: m.toolCalls as any,
92
- tool_call_id: m.toolCallId || undefined,
93
- name: m.toolName || undefined,
94
- }));
 
 
 
 
 
 
 
95
 
96
  // Get plan mode and effort level
97
  const planMode = getPlanMode(sessionId);
 
85
  const historyMessages = await db.getSessionMessages(sessionId);
86
 
87
  // Convert DB messages to agent format
88
+ // IMPORTANT: tool_calls is stored as JSON string in DB, must parse back to array
89
+ const agentMessages = historyMessages.map((m) => {
90
+ let toolCalls = m.toolCalls;
91
+ if (typeof toolCalls === "string") {
92
+ try { toolCalls = JSON.parse(toolCalls); } catch { toolCalls = undefined; }
93
+ }
94
+ return {
95
+ role: m.role as "user" | "assistant" | "system" | "tool",
96
+ content: m.content,
97
+ tool_calls: toolCalls || undefined,
98
+ tool_call_id: m.toolCallId || undefined,
99
+ name: m.toolName || undefined,
100
+ };
101
+ });
102
 
103
  // Get plan mode and effort level
104
  const planMode = getPlanMode(sessionId);
server/tools/executor.ts CHANGED
@@ -864,46 +864,119 @@ async function executeWebFetch(args: Record<string, unknown>): Promise<string> {
864
  const url = String(args.url || "");
865
  if (!url) throw new Error("No URL provided");
866
  const prompt = String(args.prompt || "Summarize this page");
867
- const maxLength = 15000;
868
  try {
869
- const response = await fetch(url, {
870
- headers: { "User-Agent": "Mozilla/5.0 (compatible; ClawBot/1.0)" },
871
- signal: AbortSignal.timeout(15000),
 
 
 
 
 
 
 
 
 
 
 
872
  });
 
 
 
873
  const contentType = response.headers.get("content-type") || "";
874
- if (!contentType.includes("text") && !contentType.includes("json") && !contentType.includes("xml")) {
875
- return `Fetched ${url} — Content-Type: ${contentType} (binary content, not displayed)`;
876
- }
877
- let text = await response.text();
878
- if (contentType.includes("html")) {
879
- text = text.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
880
- .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
881
- .replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
882
- }
883
- if (text.length > maxLength) {
884
- text = text.substring(0, maxLength) + `\n... (truncated)`;
885
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
886
 
887
- // Use LLM to answer the prompt based on fetched content
888
- try {
889
- const llmResponse = await invokeLLM({
890
- messages: [
891
- { role: "system", content: "You are a helpful assistant. Answer the user's question based ONLY on the provided web page content. Be concise and accurate. If the content doesn't contain relevant information, say so." },
892
- { role: "user", content: `Web page URL: ${url}\n\nWeb page content:\n${text}\n\n---\nQuestion: ${prompt}` },
893
- ],
894
- max_tokens: 4096,
895
- });
896
- const answer = typeof llmResponse.choices?.[0]?.message?.content === "string"
897
- ? llmResponse.choices[0].message.content
898
- : "(no answer generated)";
899
- return `WebFetch: ${url}\nPrompt: ${prompt}\n${"".repeat(60)}\n${answer}`;
900
- } catch (llmError: any) {
901
- // Fallback: return raw content if LLM fails
902
- return `Fetched: ${url} (LLM unavailable: ${llmError.message})\n${"─".repeat(60)}\n${text}`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
903
  }
904
- } catch (error: any) {
905
- return `Fetch failed: ${error.message}`;
906
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
907
  }
908
 
909
  // ─── 8. WebSearch (was: web_search) ────────────────────────────────────────
@@ -911,26 +984,219 @@ async function executeWebFetch(args: Record<string, unknown>): Promise<string> {
911
  async function executeWebSearch(args: Record<string, unknown>): Promise<string> {
912
  const query = String(args.query || "");
913
  if (!query) throw new Error("No query provided");
 
 
 
 
914
  try {
915
- const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
916
- const response = await fetch(url, {
917
- headers: { "User-Agent": "Mozilla/5.0 (compatible; ClawBot/1.0)" },
 
 
 
 
 
 
 
 
 
 
 
 
918
  });
 
919
  const html = await response.text();
920
- const results: { title: string; url: string; snippet: string }[] = [];
921
- const regex = new RegExp('<a rel="nofollow" class="result__a" href="([^"]*)"[^>]*>(.*?)</a>.*?<a class="result__snippet"[^>]*>(.*?)</a>', 'gs');
922
- let match;
923
- while ((match = regex.exec(html)) !== null && results.length < 5) {
924
- results.push({
925
- url: match[1].replace(/&amp;/g, "&"),
926
- title: match[2].replace(/<[^>]+>/g, "").trim(),
927
- snippet: match[3].replace(/<[^>]+>/g, "").trim(),
928
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929
  }
930
- if (results.length === 0) return `No results found for: "${query}"`;
931
- return results.map((r, i) => `${i + 1}. **${r.title}**\n ${r.url}\n ${r.snippet}`).join("\n\n");
 
 
 
 
 
 
 
932
  } catch (error: any) {
933
- return `Search failed: ${error.message}`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
934
  }
935
  }
936
 
 
864
  const url = String(args.url || "");
865
  if (!url) throw new Error("No URL provided");
866
  const prompt = String(args.prompt || "Summarize this page");
867
+ const started = Date.now();
868
  try {
869
+ // Normalize URL: upgrade http to https for non-localhost (matches original)
870
+ let requestUrl = url;
871
+ try {
872
+ const parsed = new URL(url);
873
+ if (parsed.protocol === "http:" && parsed.hostname !== "localhost" && parsed.hostname !== "127.0.0.1" && parsed.hostname !== "::1") {
874
+ parsed.protocol = "https:";
875
+ requestUrl = parsed.toString();
876
+ }
877
+ } catch {}
878
+
879
+ const response = await fetch(requestUrl, {
880
+ headers: { "User-Agent": "claw-rust-tools/0.1" },
881
+ signal: AbortSignal.timeout(20000),
882
+ redirect: "follow",
883
  });
884
+ const finalUrl = response.url;
885
+ const code = response.status;
886
+ const codeText = response.statusText || "Unknown";
887
  const contentType = response.headers.get("content-type") || "";
888
+ const body = await response.text();
889
+ const bytes = body.length;
890
+
891
+ // Normalize content: strip HTML tags if HTML (matches original html_to_text)
892
+ const normalized = contentType.includes("html") ? htmlToText(body) : body.trim();
893
+
894
+ // Summarize based on prompt (matches original summarize_web_fetch)
895
+ const result = summarizeWebFetch(finalUrl, prompt, normalized, body, contentType);
896
+ const durationMs = Date.now() - started;
897
+
898
+ return JSON.stringify({
899
+ bytes,
900
+ code,
901
+ codeText,
902
+ result,
903
+ durationMs,
904
+ url: finalUrl,
905
+ }, null, 2);
906
+ } catch (error: any) {
907
+ return JSON.stringify({
908
+ bytes: 0,
909
+ code: 0,
910
+ codeText: "Error",
911
+ result: `Fetch failed: ${error.message}`,
912
+ durationMs: Date.now() - started,
913
+ url,
914
+ }, null, 2);
915
+ }
916
+ }
917
 
918
+ // Matches original html_to_text: strip tags, collapse whitespace
919
+ function htmlToText(html: string): string {
920
+ const text = html.replace(/<[^>]+>/g, " ");
921
+ return collapseWhitespace(decodeHtmlEntities(text));
922
+ }
923
+
924
+ function decodeHtmlEntities(input: string): string {
925
+ return input
926
+ .replace(/&amp;/g, "&")
927
+ .replace(/&lt;/g, "<")
928
+ .replace(/&gt;/g, ">")
929
+ .replace(/&quot;/g, '"')
930
+ .replace(/&#39;/g, "'")
931
+ .replace(/&nbsp;/g, " ");
932
+ }
933
+
934
+ function collapseWhitespace(input: string): string {
935
+ return input.split(/\s+/).filter(Boolean).join(" ");
936
+ }
937
+
938
+ function previewText(input: string, maxChars: number): string {
939
+ if (input.length <= maxChars) return input;
940
+ return input.substring(0, maxChars).trimEnd() + "…";
941
+ }
942
+
943
+ function extractTitle(content: string, rawBody: string, contentType: string): string | null {
944
+ if (contentType.includes("html")) {
945
+ const lowered = rawBody.toLowerCase();
946
+ const start = lowered.indexOf("<title>");
947
+ if (start !== -1) {
948
+ const after = start + "<title>".length;
949
+ const endRel = lowered.indexOf("</title>", after);
950
+ if (endRel !== -1) {
951
+ const title = collapseWhitespace(decodeHtmlEntities(rawBody.substring(after, endRel)));
952
+ if (title) return title;
953
+ }
954
  }
 
 
955
  }
956
+ for (const line of content.split("\n")) {
957
+ const trimmed = line.trim();
958
+ if (trimmed) return trimmed;
959
+ }
960
+ return null;
961
+ }
962
+
963
+ // Matches original summarize_web_fetch exactly
964
+ function summarizeWebFetch(url: string, prompt: string, content: string, rawBody: string, contentType: string): string {
965
+ const lowerPrompt = prompt.toLowerCase();
966
+ const compact = collapseWhitespace(content);
967
+
968
+ let detail: string;
969
+ if (lowerPrompt.includes("title")) {
970
+ const title = extractTitle(content, rawBody, contentType);
971
+ detail = title ? `Title: ${title}` : previewText(compact, 600);
972
+ } else if (lowerPrompt.includes("summary") || lowerPrompt.includes("summarize")) {
973
+ detail = previewText(compact, 900);
974
+ } else {
975
+ const preview = previewText(compact, 900);
976
+ detail = `Prompt: ${prompt}\nContent preview:\n${preview}`;
977
+ }
978
+
979
+ return `Fetched ${url}\n${detail}`;
980
  }
981
 
982
  // ─── 8. WebSearch (was: web_search) ────────────────────────────────────────
 
984
  async function executeWebSearch(args: Record<string, unknown>): Promise<string> {
985
  const query = String(args.query || "");
986
  if (!query) throw new Error("No query provided");
987
+ const allowedDomains = (args.allowed_domains as string[]) || null;
988
+ const blockedDomains = (args.blocked_domains as string[]) || null;
989
+ const started = Date.now();
990
+
991
  try {
992
+ // Build search URL: support CLAW_WEB_SEARCH_BASE_URL env (matches original)
993
+ let searchUrl: string;
994
+ const baseUrl = process.env.CLAW_WEB_SEARCH_BASE_URL;
995
+ if (baseUrl) {
996
+ const u = new URL(baseUrl);
997
+ u.searchParams.set("q", query);
998
+ searchUrl = u.toString();
999
+ } else {
1000
+ searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
1001
+ }
1002
+
1003
+ const response = await fetch(searchUrl, {
1004
+ headers: { "User-Agent": "claw-rust-tools/0.1" },
1005
+ signal: AbortSignal.timeout(20000),
1006
+ redirect: "follow",
1007
  });
1008
+ const finalUrl = response.url;
1009
  const html = await response.text();
1010
+
1011
+ // Extract search hits using DDG "result__a" class (matches original extract_search_hits)
1012
+ let hits = extractSearchHits(html);
1013
+
1014
+ // Fallback: extract from generic <a> links if no DDG results (matches original)
1015
+ if (hits.length === 0) {
1016
+ hits = extractSearchHitsFromGenericLinks(html);
1017
+ }
1018
+
1019
+ // Apply domain filters (matches original)
1020
+ if (allowedDomains && allowedDomains.length > 0) {
1021
+ hits = hits.filter(hit => hostMatchesList(hit.url, allowedDomains));
1022
+ }
1023
+ if (blockedDomains && blockedDomains.length > 0) {
1024
+ hits = hits.filter(hit => !hostMatchesList(hit.url, blockedDomains));
1025
+ }
1026
+
1027
+ // Dedupe and truncate (matches original)
1028
+ dedupeHits(hits);
1029
+ hits = hits.slice(0, 8);
1030
+
1031
+ const durationSeconds = (Date.now() - started) / 1000;
1032
+
1033
+ // Build summary (matches original format)
1034
+ let summary: string;
1035
+ if (hits.length === 0) {
1036
+ summary = `No web search results matched the query "${query}".`;
1037
+ } else {
1038
+ const renderedHits = hits.map(hit => `- [${hit.title}](${hit.url})`).join("\n");
1039
+ summary = `Search results for "${query}". Include a Sources section in the final answer.\n${renderedHits}`;
1040
  }
1041
+
1042
+ return JSON.stringify({
1043
+ query,
1044
+ results: [
1045
+ summary,
1046
+ { tool_use_id: "web_search_1", content: hits },
1047
+ ],
1048
+ durationSeconds,
1049
+ }, null, 2);
1050
  } catch (error: any) {
1051
+ return JSON.stringify({
1052
+ query,
1053
+ results: [`Search failed: ${error.message}`],
1054
+ durationSeconds: (Date.now() - started) / 1000,
1055
+ }, null, 2);
1056
+ }
1057
+ }
1058
+
1059
+ // Matches original extract_search_hits: parse DDG result__a anchors
1060
+ function extractSearchHits(html: string): Array<{ title: string; url: string }> {
1061
+ const hits: Array<{ title: string; url: string }> = [];
1062
+ let remaining = html;
1063
+
1064
+ while (true) {
1065
+ const anchorStart = remaining.indexOf("result__a");
1066
+ if (anchorStart === -1) break;
1067
+ const afterClass = remaining.substring(anchorStart);
1068
+
1069
+ const hrefIdx = afterClass.indexOf('href=');
1070
+ if (hrefIdx === -1) { remaining = afterClass.substring(1); continue; }
1071
+
1072
+ const hrefSlice = afterClass.substring(hrefIdx + 5);
1073
+ const extracted = extractQuotedValue(hrefSlice);
1074
+ if (!extracted) { remaining = afterClass.substring(1); continue; }
1075
+
1076
+ const [rawUrl, rest] = extracted;
1077
+ const closeTagIdx = rest.indexOf('>');
1078
+ if (closeTagIdx === -1) { remaining = afterClass.substring(1); continue; }
1079
+
1080
+ const afterTag = rest.substring(closeTagIdx + 1);
1081
+ const endAnchorIdx = afterTag.indexOf('</a>');
1082
+ if (endAnchorIdx === -1) { remaining = afterTag.substring(1); continue; }
1083
+
1084
+ const title = htmlToText(afterTag.substring(0, endAnchorIdx)).trim();
1085
+ const decodedUrl = decodeDuckDuckGoRedirect(rawUrl);
1086
+ if (decodedUrl) {
1087
+ hits.push({ title, url: decodedUrl });
1088
+ }
1089
+ remaining = afterTag.substring(endAnchorIdx + 4);
1090
+ }
1091
+
1092
+ return hits;
1093
+ }
1094
+
1095
+ // Matches original extract_search_hits_from_generic_links
1096
+ function extractSearchHitsFromGenericLinks(html: string): Array<{ title: string; url: string }> {
1097
+ const hits: Array<{ title: string; url: string }> = [];
1098
+ let remaining = html;
1099
+
1100
+ while (true) {
1101
+ const anchorStart = remaining.indexOf('<a');
1102
+ if (anchorStart === -1) break;
1103
+ const afterAnchor = remaining.substring(anchorStart);
1104
+
1105
+ const hrefIdx = afterAnchor.indexOf('href=');
1106
+ if (hrefIdx === -1) { remaining = afterAnchor.substring(2); continue; }
1107
+
1108
+ const hrefSlice = afterAnchor.substring(hrefIdx + 5);
1109
+ const extracted = extractQuotedValue(hrefSlice);
1110
+ if (!extracted) { remaining = afterAnchor.substring(2); continue; }
1111
+
1112
+ const [rawUrl, rest] = extracted;
1113
+ const closeTagIdx = rest.indexOf('>');
1114
+ if (closeTagIdx === -1) { remaining = afterAnchor.substring(2); continue; }
1115
+
1116
+ const afterTag = rest.substring(closeTagIdx + 1);
1117
+ const endAnchorIdx = afterTag.indexOf('</a>');
1118
+ if (endAnchorIdx === -1) { remaining = afterAnchor.substring(2); continue; }
1119
+
1120
+ const title = htmlToText(afterTag.substring(0, endAnchorIdx)).trim();
1121
+ if (!title) { remaining = afterTag.substring(endAnchorIdx + 4); continue; }
1122
+
1123
+ const decodedUrl = decodeDuckDuckGoRedirect(rawUrl) || rawUrl;
1124
+ if (decodedUrl.startsWith('http://') || decodedUrl.startsWith('https://')) {
1125
+ hits.push({ title, url: decodedUrl });
1126
+ }
1127
+ remaining = afterTag.substring(endAnchorIdx + 4);
1128
+ }
1129
+
1130
+ return hits;
1131
+ }
1132
+
1133
+ function extractQuotedValue(input: string): [string, string] | null {
1134
+ const quote = input[0];
1135
+ if (quote !== '"' && quote !== "'") return null;
1136
+ const rest = input.substring(1);
1137
+ const end = rest.indexOf(quote);
1138
+ if (end === -1) return null;
1139
+ return [rest.substring(0, end), rest.substring(end + 1)];
1140
+ }
1141
+
1142
+ // Matches original decode_duckduckgo_redirect
1143
+ function decodeDuckDuckGoRedirect(url: string): string | null {
1144
+ if (url.startsWith('http://') || url.startsWith('https://')) {
1145
+ return decodeHtmlEntities(url);
1146
+ }
1147
+
1148
+ let joined: string;
1149
+ if (url.startsWith('//')) {
1150
+ joined = `https:${url}`;
1151
+ } else if (url.startsWith('/')) {
1152
+ joined = `https://duckduckgo.com${url}`;
1153
+ } else {
1154
+ return null;
1155
+ }
1156
+
1157
+ try {
1158
+ const parsed = new URL(joined);
1159
+ if (parsed.pathname === '/l/' || parsed.pathname === '/l') {
1160
+ const uddg = parsed.searchParams.get('uddg');
1161
+ if (uddg) return decodeHtmlEntities(uddg);
1162
+ }
1163
+ } catch {}
1164
+ return joined;
1165
+ }
1166
+
1167
+ function hostMatchesList(url: string, domains: string[]): boolean {
1168
+ try {
1169
+ const parsed = new URL(url);
1170
+ const host = parsed.hostname.toLowerCase();
1171
+ return domains.some(domain => {
1172
+ const normalized = normalizeDomainFilter(domain);
1173
+ return normalized && (host === normalized || host.endsWith(`.${normalized}`));
1174
+ });
1175
+ } catch {
1176
+ return false;
1177
+ }
1178
+ }
1179
+
1180
+ function normalizeDomainFilter(domain: string): string {
1181
+ const trimmed = domain.trim();
1182
+ try {
1183
+ const parsed = new URL(trimmed);
1184
+ return (parsed.hostname || trimmed).replace(/^\./, '').replace(/\/$/, '').toLowerCase();
1185
+ } catch {
1186
+ return trimmed.replace(/^\./, '').replace(/\/$/, '').toLowerCase();
1187
+ }
1188
+ }
1189
+
1190
+ function dedupeHits(hits: Array<{ title: string; url: string }>): void {
1191
+ const seen = new Set<string>();
1192
+ let i = 0;
1193
+ while (i < hits.length) {
1194
+ if (seen.has(hits[i].url)) {
1195
+ hits.splice(i, 1);
1196
+ } else {
1197
+ seen.add(hits[i].url);
1198
+ i++;
1199
+ }
1200
  }
1201
  }
1202