Final_Project_Agent_Course

Sleeping

App Files Files Community

Thanh Vinh Vo commited on Jul 9, 2025

Commit

a45f805

1 Parent(s): ee7c16d

update

Browse files

Files changed (1) hide show

app.py +73 -2

app.py CHANGED Viewed

@@ -26,6 +26,78 @@ import whisper
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @tool
 def audio_to_text(file_path: str) -> str:
     """
@@ -171,7 +243,7 @@ class BasicAgent:
         )
         self.code_agent = CodeAgent(
-            tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
             model=InferenceClientModel(
                 model_id="Qwen/Qwen2.5-Coder-32B-Instruct"
             ),
@@ -205,7 +277,6 @@ class BasicAgent:
             Please follow rules below:
                 1. Take the question literally! Do not add any additional information or assumptions.
                 2. `wikipedia` Python package is provided, we should use it to search and retrieve Wikipedia pages.
-                3. `pandas` Python package is provided, we should use it to extract table data from Wikipedia pages.
         """
         result = self.code_agent.run(prompt)
         print(f"Agent responded with: {result}")

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+@tool
+def wikipedia_page_to_markdown(page_name: str) -> str:
+    """
+    A tool that retrieves a Wikipedia page and converts its content to Markdown format.
+    This function searches for a Wikipedia page by name, retrieves its HTML content,
+    and converts it to Markdown format for easier text processing and readability.
+    The function handles Wikipedia's search and disambiguation features automatically.
+    Args:
+        page_name (str): The name or title of the Wikipedia page to retrieve.
+                        Can be an exact page title or a search term that Wikipedia
+                        can match to an existing page. Examples: "Python programming",
+                        "Albert Einstein", "Machine Learning", etc.
+    Returns:
+        str: The Wikipedia page content converted to Markdown format, including:
+             - Page title as header
+             - All text content with proper formatting
+             - Links preserved as Markdown links
+             - Lists, tables, and other structured content
+             - Images and media references (though actual images are not embedded)
+    Raises:
+        wikipedia.exceptions.DisambiguationError: If the page name matches multiple pages,
+                                                 the function will automatically select the first option.
+        wikipedia.exceptions.PageError: If no Wikipedia page is found for the given name.
+        Exception: If there are network issues or other errors accessing Wikipedia.
+    Example:
+        >>> markdown_content = wikipedia_page_to_markdown("Python (programming language)")
+        >>> print(markdown_content[:200])
+        # Python (programming language)
+        **Python** is a high-level, general-purpose programming language...
+        >>> markdown_content = wikipedia_page_to_markdown("Albert Einstein")
+        >>> # Returns the full Wikipedia article about Einstein in Markdown format
+    Note:
+        - Uses the `wikipedia` Python package to access Wikipedia's API
+        - Converts HTML content to Markdown using the `markdownify` library
+        - Automatically handles Wikipedia's disambiguation by selecting the first match
+        - The returned Markdown preserves the structure and formatting of the original page
+        - Large pages may take a moment to process due to content conversion
+    """
+    import wikipedia
+    from markdownify import markdownify as md
+    try:
+        # Get the Wikipedia page
+        page = wikipedia.page(page_name)
+        # Convert HTML content to Markdown
+        markdown_content = md(page.html())
+        return markdown_content
+    except wikipedia.exceptions.DisambiguationError as e:
+        # If there are multiple pages, use the first option
+        first_option = e.options[0]
+        page = wikipedia.page(first_option)
+        markdown_content = md(page.html())
+        return markdown_content
+    except wikipedia.exceptions.PageError:
+        raise Exception(f"No Wikipedia page found for '{page_name}'. Please check the page name and try again.")
+    except Exception as e:
+        raise Exception(f"Error retrieving Wikipedia page '{page_name}': {str(e)}")
 @tool
 def audio_to_text(file_path: str) -> str:
     """
         )
         self.code_agent = CodeAgent(
+            tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text, wikipedia_page_to_markdown],
             model=InferenceClientModel(
                 model_id="Qwen/Qwen2.5-Coder-32B-Instruct"
             ),
             Please follow rules below:
                 1. Take the question literally! Do not add any additional information or assumptions.
                 2. `wikipedia` Python package is provided, we should use it to search and retrieve Wikipedia pages.
         """
         result = self.code_agent.run(prompt)
         print(f"Agent responded with: {result}")