| import { JSDOM } from "jsdom"; | |
| import { Readability } from "@mozilla/readability"; | |
| import { fetchText } from "./fetch.js"; | |
| export async function extractReadable(url: string) { | |
| const html = await fetchText(url, { | |
| headers: { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36" | |
| } | |
| }); | |
| const dom = new JSDOM(html, { url }); | |
| const reader = new Readability(dom.window.document); | |
| const article = reader.parse(); | |
| if (article?.textContent) { | |
| return { | |
| title: article.title ?? "", | |
| content: article.textContent.replace(/\s+/g, " ").trim() | |
| }; | |
| } | |
| const fallbackText = dom.window.document.body?.textContent ?? ""; | |
| return { | |
| title: dom.window.document.title ?? "", | |
| content: fallbackText.replace(/\s+/g, " ").trim() | |
| }; | |
| } | |