claw-web-v2

Sleeping

App Files Files Community

Claw Web commited on Apr 2

Commit

bea12d2

1 Parent(s): 68a3bb7

Fix 400 error (tool_calls JSON parse), rewrite WebFetch/WebSearch 1:1 from original claw-code

Browse files

Files changed (3) hide show

server/runtime/agent.ts +12 -0
server/runtime/chat-endpoint.ts +14 -7
server/tools/executor.ts +315 -49

server/runtime/agent.ts CHANGED Viewed

@@ -329,6 +329,18 @@ export async function runAgentLoop(
       }
       if (!response || !response.ok) {
         sendSSE(res, "error", {
           message: `API error: ${lastStatus} after ${MAX_RETRIES + 1} attempts`,
           details: lastErrorText,

       }
       if (!response || !response.ok) {
+        // Log the full payload for debugging 400 errors
+        console.error(`[agent] API error ${lastStatus}:`, lastErrorText);
+        console.error(`[agent] Payload model:`, apiConfig.model);
+        console.error(`[agent] Payload messages count:`, (payload.messages as any[]).length);
+        console.error(`[agent] Message roles:`, (payload.messages as any[]).map((m: any) => m.role).join(', '));
+        if (lastStatus === 400) {
+          // For 400 errors, log more details to help debug
+          console.error(`[agent] Full error body:`, lastErrorText);
+          (payload.messages as any[]).forEach((m: any, i: number) => {
+            console.error(`[agent] msg[${i}] role=${m.role} content_type=${typeof m.content} content_len=${String(m.content || '').length} has_tool_calls=${!!m.tool_calls} has_tool_call_id=${!!m.tool_call_id}`);
+          });
+        }
         sendSSE(res, "error", {
           message: `API error: ${lastStatus} after ${MAX_RETRIES + 1} attempts`,
           details: lastErrorText,

server/runtime/chat-endpoint.ts CHANGED Viewed

@@ -85,13 +85,20 @@ export async function handleChatStream(req: Request, res: Response) {
     const historyMessages = await db.getSessionMessages(sessionId);
     // Convert DB messages to agent format
-    const agentMessages = historyMessages.map((m) => ({
-      role: m.role as "user" | "assistant" | "system" | "tool",
-      content: m.content,
-      tool_calls: m.toolCalls as any,
-      tool_call_id: m.toolCallId || undefined,
-      name: m.toolName || undefined,
-    }));
     // Get plan mode and effort level
     const planMode = getPlanMode(sessionId);

     const historyMessages = await db.getSessionMessages(sessionId);
     // Convert DB messages to agent format
+    // IMPORTANT: tool_calls is stored as JSON string in DB, must parse back to array
+    const agentMessages = historyMessages.map((m) => {
+      let toolCalls = m.toolCalls;
+      if (typeof toolCalls === "string") {
+        try { toolCalls = JSON.parse(toolCalls); } catch { toolCalls = undefined; }
+      }
+      return {
+        role: m.role as "user" | "assistant" | "system" | "tool",
+        content: m.content,
+        tool_calls: toolCalls || undefined,
+        tool_call_id: m.toolCallId || undefined,
+        name: m.toolName || undefined,
+      };
+    });
     // Get plan mode and effort level
     const planMode = getPlanMode(sessionId);

server/tools/executor.ts CHANGED Viewed

@@ -864,46 +864,119 @@ async function executeWebFetch(args: Record<string, unknown>): Promise<string> {
   const url = String(args.url || "");
   if (!url) throw new Error("No URL provided");
   const prompt = String(args.prompt || "Summarize this page");
-  const maxLength = 15000;
   try {
-    const response = await fetch(url, {
-      headers: { "User-Agent": "Mozilla/5.0 (compatible; ClawBot/1.0)" },
-      signal: AbortSignal.timeout(15000),
     });
     const contentType = response.headers.get("content-type") || "";
-    if (!contentType.includes("text") && !contentType.includes("json") && !contentType.includes("xml")) {
-      return `Fetched ${url} — Content-Type: ${contentType} (binary content, not displayed)`;
-    }
-    let text = await response.text();
-    if (contentType.includes("html")) {
-      text = text.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
-        .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
-        .replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
-    }
-    if (text.length > maxLength) {
-      text = text.substring(0, maxLength) + `\n... (truncated)`;
-    }
-    // Use LLM to answer the prompt based on fetched content
-    try {
-      const llmResponse = await invokeLLM({
-        messages: [
-          { role: "system", content: "You are a helpful assistant. Answer the user's question based ONLY on the provided web page content. Be concise and accurate. If the content doesn't contain relevant information, say so." },
-          { role: "user", content: `Web page URL: ${url}\n\nWeb page content:\n${text}\n\n---\nQuestion: ${prompt}` },
-        ],
-        max_tokens: 4096,
-      });
-      const answer = typeof llmResponse.choices?.[0]?.message?.content === "string"
-        ? llmResponse.choices[0].message.content
-        : "(no answer generated)";
-      return `WebFetch: ${url}\nPrompt: ${prompt}\n${"─".repeat(60)}\n${answer}`;
-    } catch (llmError: any) {
-      // Fallback: return raw content if LLM fails
-      return `Fetched: ${url} (LLM unavailable: ${llmError.message})\n${"─".repeat(60)}\n${text}`;
     }
-  } catch (error: any) {
-    return `Fetch failed: ${error.message}`;
   }
 }
 // ─── 8. WebSearch (was: web_search) ────────────────────────────────────────
@@ -911,26 +984,219 @@ async function executeWebFetch(args: Record<string, unknown>): Promise<string> {
 async function executeWebSearch(args: Record<string, unknown>): Promise<string> {
   const query = String(args.query || "");
   if (!query) throw new Error("No query provided");
   try {
-    const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
-    const response = await fetch(url, {
-      headers: { "User-Agent": "Mozilla/5.0 (compatible; ClawBot/1.0)" },
     });
     const html = await response.text();
-    const results: { title: string; url: string; snippet: string }[] = [];
-    const regex = new RegExp('<a rel="nofollow" class="result__a" href="([^"]*)"[^>]*>(.*?)</a>.*?<a class="result__snippet"[^>]*>(.*?)</a>', 'gs');
-    let match;
-    while ((match = regex.exec(html)) !== null && results.length < 5) {
-      results.push({
-        url: match[1].replace(/&amp;/g, "&"),
-        title: match[2].replace(/<[^>]+>/g, "").trim(),
-        snippet: match[3].replace(/<[^>]+>/g, "").trim(),
-      });
     }
-    if (results.length === 0) return `No results found for: "${query}"`;
-    return results.map((r, i) => `${i + 1}. **${r.title}**\n   ${r.url}\n   ${r.snippet}`).join("\n\n");
   } catch (error: any) {
-    return `Search failed: ${error.message}`;
   }
 }

   const url = String(args.url || "");
   if (!url) throw new Error("No URL provided");
   const prompt = String(args.prompt || "Summarize this page");
+  const started = Date.now();
   try {
+    // Normalize URL: upgrade http to https for non-localhost (matches original)
+    let requestUrl = url;
+    try {
+      const parsed = new URL(url);
+      if (parsed.protocol === "http:" && parsed.hostname !== "localhost" && parsed.hostname !== "127.0.0.1" && parsed.hostname !== "::1") {
+        parsed.protocol = "https:";
+        requestUrl = parsed.toString();
+      }
+    } catch {}
+    const response = await fetch(requestUrl, {
+      headers: { "User-Agent": "claw-rust-tools/0.1" },
+      signal: AbortSignal.timeout(20000),
+      redirect: "follow",
     });
+    const finalUrl = response.url;
+    const code = response.status;
+    const codeText = response.statusText || "Unknown";
     const contentType = response.headers.get("content-type") || "";
+    const body = await response.text();
+    const bytes = body.length;
+    // Normalize content: strip HTML tags if HTML (matches original html_to_text)
+    const normalized = contentType.includes("html") ? htmlToText(body) : body.trim();
+    // Summarize based on prompt (matches original summarize_web_fetch)
+    const result = summarizeWebFetch(finalUrl, prompt, normalized, body, contentType);
+    const durationMs = Date.now() - started;
+    return JSON.stringify({
+      bytes,
+      code,
+      codeText,
+      result,
+      durationMs,
+      url: finalUrl,
+    }, null, 2);
+  } catch (error: any) {
+    return JSON.stringify({
+      bytes: 0,
+      code: 0,
+      codeText: "Error",
+      result: `Fetch failed: ${error.message}`,
+      durationMs: Date.now() - started,
+      url,
+    }, null, 2);
+  }
+}
+// Matches original html_to_text: strip tags, collapse whitespace
+function htmlToText(html: string): string {
+  const text = html.replace(/<[^>]+>/g, " ");
+  return collapseWhitespace(decodeHtmlEntities(text));
+}
+function decodeHtmlEntities(input: string): string {
+  return input
+    .replace(/&amp;/g, "&")
+    .replace(/&lt;/g, "<")
+    .replace(/&gt;/g, ">")
+    .replace(/&quot;/g, '"')
+    .replace(/&#39;/g, "'")
+    .replace(/&nbsp;/g, " ");
+}
+function collapseWhitespace(input: string): string {
+  return input.split(/\s+/).filter(Boolean).join(" ");
+}
+function previewText(input: string, maxChars: number): string {
+  if (input.length <= maxChars) return input;
+  return input.substring(0, maxChars).trimEnd() + "…";
+}
+function extractTitle(content: string, rawBody: string, contentType: string): string | null {
+  if (contentType.includes("html")) {
+    const lowered = rawBody.toLowerCase();
+    const start = lowered.indexOf("<title>");
+    if (start !== -1) {
+      const after = start + "<title>".length;
+      const endRel = lowered.indexOf("</title>", after);
+      if (endRel !== -1) {
+        const title = collapseWhitespace(decodeHtmlEntities(rawBody.substring(after, endRel)));
+        if (title) return title;
+      }
     }
   }
+  for (const line of content.split("\n")) {
+    const trimmed = line.trim();
+    if (trimmed) return trimmed;
+  }
+  return null;
+}
+// Matches original summarize_web_fetch exactly
+function summarizeWebFetch(url: string, prompt: string, content: string, rawBody: string, contentType: string): string {
+  const lowerPrompt = prompt.toLowerCase();
+  const compact = collapseWhitespace(content);
+  let detail: string;
+  if (lowerPrompt.includes("title")) {
+    const title = extractTitle(content, rawBody, contentType);
+    detail = title ? `Title: ${title}` : previewText(compact, 600);
+  } else if (lowerPrompt.includes("summary") || lowerPrompt.includes("summarize")) {
+    detail = previewText(compact, 900);
+  } else {
+    const preview = previewText(compact, 900);
+    detail = `Prompt: ${prompt}\nContent preview:\n${preview}`;
+  }
+  return `Fetched ${url}\n${detail}`;
 }
 // ─── 8. WebSearch (was: web_search) ────────────────────────────────────────
 async function executeWebSearch(args: Record<string, unknown>): Promise<string> {
   const query = String(args.query || "");
   if (!query) throw new Error("No query provided");
+  const allowedDomains = (args.allowed_domains as string[]) || null;
+  const blockedDomains = (args.blocked_domains as string[]) || null;
+  const started = Date.now();
   try {
+    // Build search URL: support CLAW_WEB_SEARCH_BASE_URL env (matches original)
+    let searchUrl: string;
+    const baseUrl = process.env.CLAW_WEB_SEARCH_BASE_URL;
+    if (baseUrl) {
+      const u = new URL(baseUrl);
+      u.searchParams.set("q", query);
+      searchUrl = u.toString();
+    } else {
+      searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
+    }
+    const response = await fetch(searchUrl, {
+      headers: { "User-Agent": "claw-rust-tools/0.1" },
+      signal: AbortSignal.timeout(20000),
+      redirect: "follow",
     });
+    const finalUrl = response.url;
     const html = await response.text();
+    // Extract search hits using DDG "result__a" class (matches original extract_search_hits)
+    let hits = extractSearchHits(html);
+    // Fallback: extract from generic <a> links if no DDG results (matches original)
+    if (hits.length === 0) {
+      hits = extractSearchHitsFromGenericLinks(html);
+    }
+    // Apply domain filters (matches original)
+    if (allowedDomains && allowedDomains.length > 0) {
+      hits = hits.filter(hit => hostMatchesList(hit.url, allowedDomains));
+    }
+    if (blockedDomains && blockedDomains.length > 0) {
+      hits = hits.filter(hit => !hostMatchesList(hit.url, blockedDomains));
+    }
+    // Dedupe and truncate (matches original)
+    dedupeHits(hits);
+    hits = hits.slice(0, 8);
+    const durationSeconds = (Date.now() - started) / 1000;
+    // Build summary (matches original format)
+    let summary: string;
+    if (hits.length === 0) {
+      summary = `No web search results matched the query "${query}".`;
+    } else {
+      const renderedHits = hits.map(hit => `- [${hit.title}](${hit.url})`).join("\n");
+      summary = `Search results for "${query}". Include a Sources section in the final answer.\n${renderedHits}`;
     }
+    return JSON.stringify({
+      query,
+      results: [
+        summary,
+        { tool_use_id: "web_search_1", content: hits },
+      ],
+      durationSeconds,
+    }, null, 2);
   } catch (error: any) {
+    return JSON.stringify({
+      query,
+      results: [`Search failed: ${error.message}`],
+      durationSeconds: (Date.now() - started) / 1000,
+    }, null, 2);
+  }
+}
+// Matches original extract_search_hits: parse DDG result__a anchors
+function extractSearchHits(html: string): Array<{ title: string; url: string }> {
+  const hits: Array<{ title: string; url: string }> = [];
+  let remaining = html;
+  while (true) {
+    const anchorStart = remaining.indexOf("result__a");
+    if (anchorStart === -1) break;
+    const afterClass = remaining.substring(anchorStart);
+    const hrefIdx = afterClass.indexOf('href=');
+    if (hrefIdx === -1) { remaining = afterClass.substring(1); continue; }
+    const hrefSlice = afterClass.substring(hrefIdx + 5);
+    const extracted = extractQuotedValue(hrefSlice);
+    if (!extracted) { remaining = afterClass.substring(1); continue; }
+    const [rawUrl, rest] = extracted;
+    const closeTagIdx = rest.indexOf('>');
+    if (closeTagIdx === -1) { remaining = afterClass.substring(1); continue; }
+    const afterTag = rest.substring(closeTagIdx + 1);
+    const endAnchorIdx = afterTag.indexOf('</a>');
+    if (endAnchorIdx === -1) { remaining = afterTag.substring(1); continue; }
+    const title = htmlToText(afterTag.substring(0, endAnchorIdx)).trim();
+    const decodedUrl = decodeDuckDuckGoRedirect(rawUrl);
+    if (decodedUrl) {
+      hits.push({ title, url: decodedUrl });
+    }
+    remaining = afterTag.substring(endAnchorIdx + 4);
+  }
+  return hits;
+}
+// Matches original extract_search_hits_from_generic_links
+function extractSearchHitsFromGenericLinks(html: string): Array<{ title: string; url: string }> {
+  const hits: Array<{ title: string; url: string }> = [];
+  let remaining = html;
+  while (true) {
+    const anchorStart = remaining.indexOf('<a');
+    if (anchorStart === -1) break;
+    const afterAnchor = remaining.substring(anchorStart);
+    const hrefIdx = afterAnchor.indexOf('href=');
+    if (hrefIdx === -1) { remaining = afterAnchor.substring(2); continue; }
+    const hrefSlice = afterAnchor.substring(hrefIdx + 5);
+    const extracted = extractQuotedValue(hrefSlice);
+    if (!extracted) { remaining = afterAnchor.substring(2); continue; }
+    const [rawUrl, rest] = extracted;
+    const closeTagIdx = rest.indexOf('>');
+    if (closeTagIdx === -1) { remaining = afterAnchor.substring(2); continue; }
+    const afterTag = rest.substring(closeTagIdx + 1);
+    const endAnchorIdx = afterTag.indexOf('</a>');
+    if (endAnchorIdx === -1) { remaining = afterAnchor.substring(2); continue; }
+    const title = htmlToText(afterTag.substring(0, endAnchorIdx)).trim();
+    if (!title) { remaining = afterTag.substring(endAnchorIdx + 4); continue; }
+    const decodedUrl = decodeDuckDuckGoRedirect(rawUrl) || rawUrl;
+    if (decodedUrl.startsWith('http://') || decodedUrl.startsWith('https://')) {
+      hits.push({ title, url: decodedUrl });
+    }
+    remaining = afterTag.substring(endAnchorIdx + 4);
+  }
+  return hits;
+}
+function extractQuotedValue(input: string): [string, string] | null {
+  const quote = input[0];
+  if (quote !== '"' && quote !== "'") return null;
+  const rest = input.substring(1);
+  const end = rest.indexOf(quote);
+  if (end === -1) return null;
+  return [rest.substring(0, end), rest.substring(end + 1)];
+}
+// Matches original decode_duckduckgo_redirect
+function decodeDuckDuckGoRedirect(url: string): string | null {
+  if (url.startsWith('http://') || url.startsWith('https://')) {
+    return decodeHtmlEntities(url);
+  }
+  let joined: string;
+  if (url.startsWith('//')) {
+    joined = `https:${url}`;
+  } else if (url.startsWith('/')) {
+    joined = `https://duckduckgo.com${url}`;
+  } else {
+    return null;
+  }
+  try {
+    const parsed = new URL(joined);
+    if (parsed.pathname === '/l/' || parsed.pathname === '/l') {
+      const uddg = parsed.searchParams.get('uddg');
+      if (uddg) return decodeHtmlEntities(uddg);
+    }
+  } catch {}
+  return joined;
+}
+function hostMatchesList(url: string, domains: string[]): boolean {
+  try {
+    const parsed = new URL(url);
+    const host = parsed.hostname.toLowerCase();
+    return domains.some(domain => {
+      const normalized = normalizeDomainFilter(domain);
+      return normalized && (host === normalized || host.endsWith(`.${normalized}`));
+    });
+  } catch {
+    return false;
+  }
+}
+function normalizeDomainFilter(domain: string): string {
+  const trimmed = domain.trim();
+  try {
+    const parsed = new URL(trimmed);
+    return (parsed.hostname || trimmed).replace(/^\./, '').replace(/\/$/, '').toLowerCase();
+  } catch {
+    return trimmed.replace(/^\./, '').replace(/\/$/, '').toLowerCase();
+  }
+}
+function dedupeHits(hits: Array<{ title: string; url: string }>): void {
+  const seen = new Set<string>();
+  let i = 0;
+  while (i < hits.length) {
+    if (seen.has(hits[i].url)) {
+      hits.splice(i, 1);
+    } else {
+      seen.add(hits[i].url);
+      i++;
+    }
   }
 }