Final_Assignment_Template

Sleeping

aelin commited on May 6, 2025

Commit

f791164

1 Parent(s): d0210fc

Adds web page to markdown conversion tool

Introduces a tool to fetch a web page by URL and convert its content to markdown, with error handling and output length limits. Also updates agent context initialization to ensure proper usage across runs.

Enhances web content extraction and improves agent reliability.

Files changed (2) hide show

_tools.py +31 -0
app.py +4 -2

_tools.py CHANGED Viewed

@@ -1,3 +1,8 @@
 import requests
 import io
@@ -78,6 +83,25 @@ def _extract_text_from_audio_file(file_bytes: bytes) -> str:
     """Extract text from an audio file."""
     return client.automatic_speech_recognition(file_bytes, model="openai/whisper-large-v2").text
 # Initialize tools
 search_tool = FunctionTool.from_defaults(
   _search_tool,
@@ -133,6 +157,12 @@ extract_text_from_audio_file_tool = FunctionTool.from_defaults(
   description="Extract text from an audio file."
 )
 tools = [
   search_tool,
   fetch_file_bytes_tool,
@@ -143,5 +173,6 @@ tools = [
   extract_text_from_code_file_tool,
   extract_text_from_audio_file_tool,
   xlsx_to_text_tool,
 ]

+import re
+from markdownify import markdownify
 import requests
 import io
     """Extract text from an audio file."""
     return client.automatic_speech_recognition(file_bytes, model="openai/whisper-large-v2").text
+def _webpage_to_markdown(url: str) -> str:
+    """
+    Access a web page and return its content as markdown.
+    Limits output to 10,000 characters to avoid excessive responses.
+    """
+    try:
+        response = requests.get(url, timeout=20)
+        response.raise_for_status()
+        markdown_content = markdownify(response.text).strip()
+        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+        return markdown_content[:10000]
+    except requests.exceptions.Timeout:
+        return "Request timed out. Please try again later or check the URL."
+    except requests.exceptions.RequestException as e:
+        return f"Error fetching the webpage: {str(e)}"
+    except Exception as e:
+        return f"Unexpected error: {str(e)}"
 # Initialize tools
 search_tool = FunctionTool.from_defaults(
   _search_tool,
   description="Extract text from an audio file."
 )
+webpage_to_markdown_tool = FunctionTool.from_defaults(
+    _webpage_to_markdown,
+    name="Webpage to Markdown",
+    description="Access a web page by URL and return the content as markdown. Use to read web pages."
+)
 tools = [
   search_tool,
   fetch_file_bytes_tool,
   extract_text_from_code_file_tool,
   extract_text_from_audio_file_tool,
   xlsx_to_text_tool,
+  webpage_to_markdown_tool,
 ]

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ import asyncio
 from utils import cache_answers, update_cache_answer, get_cached_answer, load_cache
-context = Context()
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -42,7 +41,10 @@ class BasicAgent:
                 Don't use any other format than the one above and limit your attempts to answer the question to 3 times.
             """,
         )
         self.agent = agent
     async def run(self, question: Question) -> str:
         question_text = question["question"]
@@ -69,7 +71,7 @@ class BasicAgent:
                 return str(cached["answer"])
-            answer = await self.agent.run(prompt, ctx=context)
             print(f"Agent returning answer: {answer}")

 from utils import cache_answers, update_cache_answer, get_cached_answer, load_cache
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
                 Don't use any other format than the one above and limit your attempts to answer the question to 3 times.
             """,
         )
+        context = Context(agent)
         self.agent = agent
+        self.context = context
     async def run(self, question: Question) -> str:
         question_text = question["question"]
                 return str(cached["answer"])
+            answer = await self.agent.run(prompt, ctx=self.context)
             print(f"Agent returning answer: {answer}")