Final_Assignment_Template / visit_web_pages_tool.py
GLECO's picture
Ajout tool pour webpage
6a4320d
import re
import requests
from markdownify import markdownify
from requests.exceptions import RequestException
from langchain_core.tools import tool
import requests
from langchain_community.tools import DuckDuckGoSearchResults
DEFAULT_HEADERS = {
# Generic, browser-like UA. For Wikipedia, better to identify your app & contact.
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0 Safari/537.36"
),
"Accept": (
"text/html,application/xhtml+xml,application/xml;"
"q=0.9,image/avif,image/webp,*/*;q=0.8"
),
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
}
def visit_webpage(url: str) -> str:
"""Visits a webpage at the given URL and returns its content as a markdown string.
Args:
url: The URL of the webpage to visit.
Returns:
The content of the webpage converted to Markdown, or an error message if the request fails.
"""
try:
# Send a GET request to the URL
response = requests.get("https://urltomarkdown.herokuapp.com/?url=" + url)
#print(response.text)
response.raise_for_status() # Raise an exception for bad status codes
# Convert the HTML content to Markdown
markdown_content = markdownify(response.text).strip()
# Remove multiple line breaks
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
return markdown_content
except RequestException as e:
return f"Error fetching the webpage: {str(e)}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"