Final_Assignment_Template

Sleeping

Final_Assignment_Template / visit_web_pages_tool.py

Ajout tool pour webpage

6a4320d about 2 months ago

1.68 kB

	import re
	import requests
	from markdownify import markdownify
	from requests.exceptions import RequestException
	from langchain_core.tools import tool
	import requests
	from langchain_community.tools import DuckDuckGoSearchResults

	DEFAULT_HEADERS = {
	# Generic, browser-like UA. For Wikipedia, better to identify your app & contact.
	"User-Agent": (
	"Mozilla/5.0 (X11; Linux x86_64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/120.0 Safari/537.36"
	),
	"Accept": (
	"text/html,application/xhtml+xml,application/xml;"
	"q=0.9,image/avif,image/webp,/;q=0.8"
	),
	"Accept-Language": "en-US,en;q=0.5",
	"Accept-Encoding": "gzip, deflate, br",
	}

	def visit_webpage(url: str) -> str:
	"""Visits a webpage at the given URL and returns its content as a markdown string.

	Args:
	url: The URL of the webpage to visit.

	Returns:
	The content of the webpage converted to Markdown, or an error message if the request fails.
	"""
	try:
	# Send a GET request to the URL
	response = requests.get("https://urltomarkdown.herokuapp.com/?url=" + url)
	#print(response.text)
	response.raise_for_status() # Raise an exception for bad status codes

	# Convert the HTML content to Markdown
	markdown_content = markdownify(response.text).strip()

	# Remove multiple line breaks
	markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)

	return markdown_content

	except RequestException as e:
	return f"Error fetching the webpage: {str(e)}"
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}"