File size: 1,691 Bytes
3a65265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import { createWebFetchTool } from "../src/agents/tools/web-tools.js";

const DEFAULT_URLS = [
  "https://example.com/",
  "https://news.ycombinator.com/",
  "https://www.reddit.com/r/javascript/",
  "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent",
  "https://httpbin.org/html",
];

const urls = process.argv.slice(2);
const targets = urls.length > 0 ? urls : DEFAULT_URLS;

async function runFetch(url: string, readability: boolean) {
  if (!readability) {
    throw new Error("Basic extraction removed. Set readability=true or enable Firecrawl.");
  }
  const tool = createWebFetchTool({
    config: {
      tools: {
        web: { fetch: { readability, cacheTtlMinutes: 0, firecrawl: { enabled: false } } },
      },
    },
    sandboxed: false,
  });
  if (!tool) throw new Error("web_fetch tool is disabled");
  const result = await tool.execute("test", { url, extractMode: "markdown" });
  return result.details as {
    text?: string;
    title?: string;
    extractor?: string;
    length?: number;
    truncated?: boolean;
  };
}

function truncate(value: string, max = 160): string {
  if (!value) return "";
  return value.length > max ? `${value.slice(0, max)}…` : value;
}

async function run() {
  for (const url of targets) {
    console.log(`\n=== ${url}`);
    const readable = await runFetch(url, true);

    console.log(
      `readability: ${readable.extractor ?? "unknown"} len=${readable.length ?? 0} title=${truncate(
        readable.title ?? "",
        80,
      )}`,
    );
    if (readable.text) console.log(`readability sample: ${truncate(readable.text)}`);
  }
}

run().catch((error) => {
  console.error(error);
  process.exit(1);
});