Final_Assignment_Template

Sleeping

App Files Files Community

GLECO commited on Dec 8, 2025

Commit

6a4320d

1 Parent(s): 43fcb87

Ajout tool pour webpage

Browse files

Files changed (1) hide show

visit_web_pages_tool.py +50 -0

visit_web_pages_tool.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import re
+import requests
+from markdownify import markdownify
+from requests.exceptions import RequestException
+from langchain_core.tools import tool
+import requests
+from langchain_community.tools import DuckDuckGoSearchResults
+DEFAULT_HEADERS = {
+    # Generic, browser-like UA. For Wikipedia, better to identify your app & contact.
+    "User-Agent": (
+        "Mozilla/5.0 (X11; Linux x86_64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/120.0 Safari/537.36"
+    ),
+    "Accept": (
+        "text/html,application/xhtml+xml,application/xml;"
+        "q=0.9,image/avif,image/webp,*/*;q=0.8"
+    ),
+    "Accept-Language": "en-US,en;q=0.5",
+    "Accept-Encoding": "gzip, deflate, br",
+}
+def visit_webpage(url: str) -> str:
+    """Visits a webpage at the given URL and returns its content as a markdown string.
+    Args:
+        url: The URL of the webpage to visit.
+    Returns:
+        The content of the webpage converted to Markdown, or an error message if the request fails.
+    """
+    try:
+        # Send a GET request to the URL
+        response = requests.get("https://urltomarkdown.herokuapp.com/?url=" + url)
+        #print(response.text)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        # Convert the HTML content to Markdown
+        markdown_content = markdownify(response.text).strip()
+        # Remove multiple line breaks
+        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+        return markdown_content
+    except RequestException as e:
+        return f"Error fetching the webpage: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"