rendergate / renderer.js
tantk's picture
feat: structured output with description, headings, links
c862a09
import { chromium } from "playwright";
const MAX_CONCURRENT = 3;
const MAX_CONTENT_LENGTH = 100_000;
let browserPromise = null;
let activeRenders = 0;
async function getBrowser() {
if (!browserPromise) {
browserPromise = chromium.launch({ headless: true }).catch((err) => {
browserPromise = null;
throw err;
});
}
return browserPromise;
}
export async function renderUrl(url, { timeout = 30000, scroll = true } = {}) {
if (activeRenders >= MAX_CONCURRENT) {
throw new Error("Too many concurrent renders, try again later");
}
activeRenders++;
let page;
try {
const b = await getBrowser();
page = await b.newPage();
} catch (err) {
activeRenders--;
browserPromise = null; // Reset on browser failure
throw new Error(`Browser launch failed: ${err.message}`);
}
try {
await page.setExtraHTTPHeaders({
"User-Agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
});
await page.goto(url, { waitUntil: "domcontentloaded", timeout });
// Smart wait: poll until content stabilizes
let prevLen = 0;
let stableCount = 0;
for (let i = 0; i < 20; i++) {
await page.waitForTimeout(250);
const curLen = await page.evaluate(() => document.body.innerText.length);
if (curLen === prevLen && curLen > 100) {
stableCount++;
if (stableCount >= 2) break;
} else {
stableCount = 0;
}
prevLen = curLen;
}
// Scroll to trigger lazy-loaded content
if (scroll) {
for (let i = 0; i < 3; i++) {
await page.evaluate(() => window.scrollBy(0, 1000));
await page.waitForTimeout(500);
}
await page.waitForTimeout(500);
}
const extracted = await page.evaluate(() => {
const remove = document.querySelectorAll(
'script, style, nav[aria-label="Footer"], [role="complementary"]',
);
remove.forEach((el) => el.remove());
const title = document.title;
const description =
document.querySelector('meta[name="description"]')?.content ||
document.querySelector('meta[property="og:description"]')?.content ||
"";
const headings = Array.from(
document.querySelectorAll("h1, h2, h3"),
(el) => ({ level: el.tagName, text: el.innerText.trim() }),
).filter((h) => h.text.length > 0);
const links = Array.from(
document.querySelectorAll("a[href]"),
(el) => ({ text: el.innerText.trim(), href: el.href }),
)
.filter((l) => l.text.length > 0 && l.href.startsWith("http"))
.slice(0, 50);
const content = document.body.innerText;
return { title, description, headings, links, content };
});
if (extracted.content.length > MAX_CONTENT_LENGTH) {
extracted.content = extracted.content.slice(0, MAX_CONTENT_LENGTH);
}
return {
title: extracted.title,
description: extracted.description,
headings: extracted.headings,
links: extracted.links,
content: extracted.content,
url,
renderedAt: new Date().toISOString(),
};
} catch (err) {
// If page interaction fails, browser may be dead
if (err.message?.includes("Target closed") || err.message?.includes("Browser closed")) {
browserPromise = null;
}
throw err;
} finally {
activeRenders--;
await page.close().catch(() => {});
}
}
export async function closeBrowser() {
if (browserPromise) {
const b = await browserPromise.catch(() => null);
if (b) await b.close().catch(() => {});
browserPromise = null;
}
}