Final_Project_Agent_Course

Sleeping

App Files Files Community

Thanh Vinh Vo commited on Jul 9, 2025

Commit

2d82e56

1 Parent(s): dc8c03a

update

Browse files

Files changed (1) hide show

app.py +97 -18

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ from PIL import Image
 from smolagents import (
     CodeAgent,
     DuckDuckGoSearchTool,
-    GoogleSearchTool,
     InferenceClientModel,
     load_tool,
     OpenAIServerModel,
@@ -18,7 +17,6 @@ from smolagents import (
     Tool,
     ToolCollection,
     VisitWebpageTool,
-    WikipediaSearchTool
 )
 import whisper
@@ -27,6 +25,44 @@ import whisper
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @tool
 def audio_to_text(file_path: str) -> str:
     """
@@ -145,8 +181,8 @@ class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         self.multimodal_agent = CodeAgent(
-            tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
-            model= OpenAIServerModel(model_id="gpt-4o", temperature=0.0,),
             additional_authorized_imports=[
                 "requests",
                 "bs4",
@@ -161,8 +197,7 @@ class BasicAgent:
                 "numpy",
                 "json",
                 "whisper",
-                "openpyxl",
-                "youtube_transcript_api",
             ],
             name="multimodal_agent",
             description="""
@@ -172,9 +207,9 @@ class BasicAgent:
         )
         self.code_agent = CodeAgent(
-            tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
             model=InferenceClientModel(
-                model_id="Qwen/Qwen2.5-Coder-32B-Instruct"
             ),
             additional_authorized_imports=[
                 "requests",
@@ -186,29 +221,73 @@ class BasicAgent:
                 "PIL",
                 "chess",
                 "img2text",
                 "PIL.Image",
                 "bytes",
                 "cv2",
                 "numpy",
                 "json",
                 "whisper",
-                "openpyxl",
-                "youtube_transcript_api",
-            ]
         )
     def __call__(self, question: str, question_id: str, file_name: str) -> str:
         print(f"Agent received question: {question}")
         file = f"Mentioned file: {file_name}" if file_name else ""
         prompt = f"""
-            Answer the following question (question_id is {question_id}):
                "{question}""{file}"
-            Please follow rules below:
-                1. `wikipedia` Python package is provided, we should use it to interact with Wikipedia pages.
-                2. `pandas` Python package is provided, we should use it to read table data from HTML pages.
-                3. Take the question literally! Do not add any additional information or assumptions.
         """
-        result = self.code_agent.run(prompt)
         print(f"Agent responded with: {result}")
         return result

 from smolagents import (
     CodeAgent,
     DuckDuckGoSearchTool,
     InferenceClientModel,
     load_tool,
     OpenAIServerModel,
     Tool,
     ToolCollection,
     VisitWebpageTool,
 )
 import whisper
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+@tool
+def extract_table_from_html(html: str, match: str | None = None) -> list:
+    """
+    A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
+    Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
+    Args:
+        html (str): The HTML content containing HTML tables to extract. This can be raw HTML
+                   string content or a URL to a webpage.
+        match (str | None, optional): A string or regular expression pattern to match
+                                    against table text content. If None, all tables
+                                    are extracted. Defaults to None.
+                                    DO NOT use HTML strings / tags in this parameter.
+    Returns:
+        list: A list of pandas DataFrames, where each DataFrame represents a table found
+              in the HTML content. Returns an empty list if no tables are found.
+    """
+    import pandas as pd
+    try:
+        # Extract tables using pandas
+        if match is not None:
+            tables = pd.read_html(html, match=match)
+        else:
+            tables = pd.read_html(html)
+        # Return the list of DataFrames directly
+        return tables if tables else []
+    except ValueError as e:
+        if "No tables found" in str(e):
+            # Return empty list instead of raising error
+            return []
+        else:
+            raise ValueError(f"Error extracting tables from HTML content: {e}")
+    except Exception as e:
+        raise Exception(f"Failed to extract tables from HTML content: {e}")
 @tool
 def audio_to_text(file_path: str) -> str:
     """
     def __init__(self):
         print("BasicAgent initialized.")
         self.multimodal_agent = CodeAgent(
+            tools=[VisitWebpageTool(), DuckDuckGoSearchTool(), get_file, audio_to_text],
+            model= OpenAIServerModel(model_id="gpt-4o"),
             additional_authorized_imports=[
                 "requests",
                 "bs4",
                 "numpy",
                 "json",
                 "whisper",
+                "openpyxl"
             ],
             name="multimodal_agent",
             description="""
         )
         self.code_agent = CodeAgent(
+            tools=[VisitWebpageTool(), DuckDuckGoSearchTool(), get_file, audio_to_text, extract_table_from_html],
             model=InferenceClientModel(
+                model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
             ),
             additional_authorized_imports=[
                 "requests",
                 "PIL",
                 "chess",
                 "img2text",
+                "chess.pgn",
                 "PIL.Image",
                 "bytes",
                 "cv2",
                 "numpy",
+                "chess.engine",
                 "json",
                 "whisper",
+                "openpyxl"
+            ],
+            name="code_agent",
+            description="""
+                This agent specializes at:
+                    - Writing code to solve problem.
+                    - Browse the web to find information.
+                    - Solving chess problems.
+                This agent follow rules below when possible:
+                    1. `wikipedia` Python package is provided to interact with Wikipedia pages.
+                    2. Use `extract_table_from_html` tool to process Wikipedia pages first before other approaches.
+                    2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
+                    3. Please take the question literally! Do not add any additional information or assumptions.
+            """,
+            verbosity_level=0,
+            max_steps=10,
         )
+        self.manager_agent = CodeAgent(
+            model=InferenceClientModel(
+                "Qwen/Qwen2.5-32B-Instruct"
+            ),
+            tools=[get_file, audio_to_text],
+            managed_agents=[
+                self.multimodal_agent,
+                self.code_agent],
+            additional_authorized_imports=[
+                "requests",
+                "bs4",
+                "markdownify",
+                "wikipedia",
+                "pandas",
+                "io",
+                "PIL",
+                "chess",
+                "img2text",
+                "chess.pgn",
+                "PIL.Image",
+                "bytes",
+                "cv2",
+                "numpy",
+                "chess.engine",
+                "whisper",
+                "openpyxl"
+                "json",
+            ],
+            planning_interval=5,
+            max_steps=15,
+        )
     def __call__(self, question: str, question_id: str, file_name: str) -> str:
         print(f"Agent received question: {question}")
         file = f"Mentioned file: {file_name}" if file_name else ""
         prompt = f"""
+            Answer the following question (question_id is {question_id}):):
                "{question}""{file}"
         """
+        result = self.manager_agent.run(prompt)
         print(f"Agent responded with: {result}")
         return result