import re import requests from markdownify import markdownify from requests.exceptions import RequestException from langchain_core.tools import tool import requests from langchain_community.tools import DuckDuckGoSearchResults DEFAULT_HEADERS = { # Generic, browser-like UA. For Wikipedia, better to identify your app & contact. "User-Agent": ( "Mozilla/5.0 (X11; Linux x86_64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/120.0 Safari/537.36" ), "Accept": ( "text/html,application/xhtml+xml,application/xml;" "q=0.9,image/avif,image/webp,*/*;q=0.8" ), "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate, br", } def visit_webpage(url: str) -> str: """Visits a webpage at the given URL and returns its content as a markdown string. Args: url: The URL of the webpage to visit. Returns: The content of the webpage converted to Markdown, or an error message if the request fails. """ try: # Send a GET request to the URL response = requests.get("https://urltomarkdown.herokuapp.com/?url=" + url) #print(response.text) response.raise_for_status() # Raise an exception for bad status codes # Convert the HTML content to Markdown markdown_content = markdownify(response.text).strip() # Remove multiple line breaks markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content) return markdown_content except RequestException as e: return f"Error fetching the webpage: {str(e)}" except Exception as e: return f"An unexpected error occurred: {str(e)}"