File size: 1,010 Bytes
e1f3958
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from langchain_core.tools import tool
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
from urllib.parse import urljoin

@tool
def get_rendered_html(url: str) -> dict:
    """

    Fetch and return the fully rendered HTML of a webpage.

    """
    print("\nFetching and rendering:", url)
    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()

            page.goto(url, wait_until="networkidle")
            content = page.content()

            browser.close()

            # Parse images
            soup = BeautifulSoup(content, "html.parser")
            imgs = [urljoin(url, img["src"]) for img in soup.find_all("img", src=True)]

            return {
                "html": content,
                "images": imgs,
                "url": url
            }

    except Exception as e:
        return {"error": f"Error fetching/rendering page: {str(e)}"}