Luis Milke commited on
Commit
4109e70
·
1 Parent(s): feda7c2

Fix JSON hallucination text bleed and bypass DuckDuckGo HF block via Puppeteer

Browse files
Files changed (2) hide show
  1. src/agent.ts +1 -0
  2. src/tools/search.ts +38 -33
src/agent.ts CHANGED
@@ -535,6 +535,7 @@ Wenn du KEIN Tool aufrufst, darf deine Ausgabe NUR die Nachricht an den Nutzer s
535
  }
536
  });
537
  hasTools = true;
 
538
  console.log(`[Agent] Intercepted hallucinated JSON tool call: ${parsed.name}`);
539
  }
540
  } catch (e) {
 
535
  }
536
  });
537
  hasTools = true;
538
+ response.message.content = response.message.content.replace(block, '').trim();
539
  console.log(`[Agent] Intercepted hallucinated JSON tool call: ${parsed.name}`);
540
  }
541
  } catch (e) {
src/tools/search.ts CHANGED
@@ -1,4 +1,4 @@
1
- import * as cheerio from 'cheerio';
2
 
3
  export const webSearchTool = {
4
  type: 'function',
@@ -19,50 +19,51 @@ export const webSearchTool = {
19
  };
20
 
21
  export async function executeWebSearch(query: string): Promise<string> {
 
22
  try {
23
- const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
24
- const res = await fetch(url, {
25
- headers: {
26
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
27
- 'Accept': 'text/html,application/xhtml+xml,application/xml;'
28
- }
29
  });
30
 
31
- if (!res.ok) {
32
- return `Error performing web search: HTTP Status ${res.status}`;
33
- }
34
 
35
- const html = await res.text();
36
- const $ = cheerio.load(html);
37
 
38
- const results: Array<{ title: string, url: string, description: string }> = [];
 
 
39
 
40
- $('.result').each((i: number, el: any) => {
41
- if (results.length >= 5) return; // Top 5
42
 
43
- const titleEl = $(el).find('.result__title a');
44
- const snippetEl = $(el).find('.result__snippet');
45
 
46
- const title = titleEl.text().trim();
47
- let link = titleEl.attr('href') || '';
48
- const snippet = snippetEl.text().trim();
 
49
 
50
- if (title && link) {
51
- // duckduckgo wraps external links in a redirect url sometimes
52
- if (link.startsWith('//duckduckgo.com/l/?uddg=')) {
53
- try {
54
- const rawUrl = new URL('https:' + link);
55
- const actualUrl = rawUrl.searchParams.get('uddg');
56
- if (actualUrl) link = decodeURIComponent(actualUrl);
57
- } catch (e) { }
 
 
58
  }
59
-
60
- results.push({ title, url: link, description: snippet });
61
- }
62
  });
63
 
64
  if (results.length === 0) {
65
- return `No search results found for query: "${query}"`;
66
  }
67
 
68
  const formattedResults = results.map((r, index) => {
@@ -71,6 +72,10 @@ export async function executeWebSearch(query: string): Promise<string> {
71
 
72
  return `Search Results for "${query}":\n\n${formattedResults}`;
73
  } catch (error: any) {
74
- return `Error performing web search: ${error.message}`;
 
 
 
 
75
  }
76
  }
 
1
+ import puppeteer from 'puppeteer';
2
 
3
  export const webSearchTool = {
4
  type: 'function',
 
19
  };
20
 
21
  export async function executeWebSearch(query: string): Promise<string> {
22
+ let browser;
23
  try {
24
+ browser = await puppeteer.launch({
25
+ headless: true,
26
+ args: ['--no-sandbox', '--disable-setuid-sandbox']
 
 
 
27
  });
28
 
29
+ const page = await browser.newPage();
30
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
 
31
 
32
+ const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
33
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 });
34
 
35
+ const results = await page.evaluate(() => {
36
+ const res: Array<{ title: string, url: string, description: string }> = [];
37
+ const elements = document.querySelectorAll('.result');
38
 
39
+ elements.forEach((el) => {
40
+ if (res.length >= 5) return;
41
 
42
+ const titleEl = el.querySelector('.result__title a');
43
+ const snippetEl = el.querySelector('.result__snippet');
44
 
45
+ if (titleEl && snippetEl) {
46
+ const title = titleEl.textContent?.trim() || '';
47
+ let link = titleEl.getAttribute('href') || '';
48
+ const description = snippetEl.textContent?.trim() || '';
49
 
50
+ if (title && link) {
51
+ if (link.startsWith('//duckduckgo.com/l/?uddg=')) {
52
+ try {
53
+ const rawUrl = new URL('https:' + link);
54
+ const actualUrl = rawUrl.searchParams.get('uddg');
55
+ if (actualUrl) link = decodeURIComponent(actualUrl);
56
+ } catch (e) { }
57
+ }
58
+ res.push({ title, url: link, description });
59
+ }
60
  }
61
+ });
62
+ return res;
 
63
  });
64
 
65
  if (results.length === 0) {
66
+ return `No search results found for query: "${query}". Try using the browser_visit tool directly on a known URL.`;
67
  }
68
 
69
  const formattedResults = results.map((r, index) => {
 
72
 
73
  return `Search Results for "${query}":\n\n${formattedResults}`;
74
  } catch (error: any) {
75
+ return `Error performing web search (Cloud/Network block): ${error.message}`;
76
+ } finally {
77
+ if (browser) {
78
+ await browser.close();
79
+ }
80
  }
81
  }