ShoaibSSM's picture
Upload 9 files
e1f3958 verified
from langchain_core.tools import tool
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
from urllib.parse import urljoin
@tool
def get_rendered_html(url: str) -> dict:
"""
Fetch and return the fully rendered HTML of a webpage.
"""
print("\nFetching and rendering:", url)
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(url, wait_until="networkidle")
content = page.content()
browser.close()
# Parse images
soup = BeautifulSoup(content, "html.parser")
imgs = [urljoin(url, img["src"]) for img in soup.find_all("img", src=True)]
return {
"html": content,
"images": imgs,
"url": url
}
except Exception as e:
return {"error": f"Error fetching/rendering page: {str(e)}"}