Final_Project_Agent_Course

Sleeping

App Files Files Community

Thanh Vinh Vo commited on Jul 9, 2025

Commit

abda1eb

1 Parent(s): b34523d

update

Browse files

Files changed (2) hide show

app.py +80 -2
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -24,6 +24,80 @@ import whisper
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @tool
 def audio_to_text(file_path: str) -> str:
     """
@@ -83,6 +157,7 @@ def audio_to_text(file_path: str) -> str:
     return json.dumps({"transcript": transcript_data})
 @tool
@@ -160,6 +235,7 @@ class BasicAgent:
                 "numpy",
                 "json",
                 "whisper",
             ],
             name="multimodal_agent",
             description="""
@@ -169,7 +245,7 @@ class BasicAgent:
         )
         self.code_agent = CodeAgent(
-            tools=[VisitWebpageTool(), DuckDuckGoSearchTool(), get_file, audio_to_text],
             model=InferenceClientModel(
                 model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
             ),
@@ -191,6 +267,7 @@ class BasicAgent:
                 "chess.engine",
                 "json",
                 "whisper",
             ],
             name="code_agent",
             description="""
@@ -213,7 +290,7 @@ class BasicAgent:
             model=InferenceClientModel(
                 "Qwen/Qwen2.5-32B-Instruct"
             ),
-            tools=[get_file, audio_to_text],
             managed_agents=[
                 self.multimodal_agent,
                 self.code_agent],
@@ -235,6 +312,7 @@ class BasicAgent:
                 "chess.engine",
                 "whisper",
                 "json",
             ],
             planning_interval=5,
             max_steps=15,

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+@tool
+def extract_table_from_html(html: str, match: str | None = None) -> list:
+    """
+    A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames. Example use-cases include extracting tables from Wikipedia pages, HTML emails, or other web content.
+    This function uses pandas.read_html() to parse HTML tables from the provided HTML content
+    and returns the extracted tables as a list of pandas DataFrames. It can optionally filter
+    tables based on a text pattern match.
+    Args:
+        html (str): The HTML content containing HTML tables to extract. This can be raw HTML
+                   string content or a URL to a webpage.
+        match (str | None, optional): A string or regular expression pattern to match
+                                    against table text content. Only tables containing
+                                    this pattern will be returned. If None, all tables
+                                    are extracted. Defaults to None.
+    Returns:
+        list: A list of pandas DataFrames, where each DataFrame represents a table found
+              in the HTML content. Returns an empty list if no tables are found.
+    Raises:
+        ValueError: If the HTML content is invalid or cannot be parsed.
+        Exception: If HTML parsing fails or other unexpected errors occur.
+    Example:
+        >>> html_content = '''
+        ... <table>
+        ...   <tr><th>Name</th><th>Age</th></tr>
+        ...   <tr><td>John</td><td>25</td></tr>
+        ... </table>
+        ... '''
+        >>> tables = extract_table_from_html(html_content)
+        >>> print(f"Found {len(tables)} tables")
+        >>> if tables:
+        ...     first_table = tables[0]
+        ...     print(f"First table shape: {first_table.shape}")
+        ...     print(first_table.head())
+        >>> # Extract tables containing specific text
+        >>> tables = extract_table_from_html(html_content, match="Name")
+        >>> for i, table in enumerate(tables):
+        ...     print(f"Table {i}: {table.shape[0]} rows, {table.shape[1]} columns")
+    Note:
+        - Uses pandas.read_html() which requires lxml, html5lib, or BeautifulSoup4
+        - Tables must be properly formatted HTML <table> elements
+        - The match parameter is case-sensitive
+        - Returns native pandas DataFrames for direct manipulation and analysis
+        - Can accept either raw HTML content or URLs (pandas.read_html supports both)
+        - Returns empty list instead of raising error when no tables are found
+    """
+    import pandas as pd
+    try:
+        # Extract tables using pandas
+        if match is not None:
+            tables = pd.read_html(html, match=match)
+        else:
+            tables = pd.read_html(html)
+        # Return the list of DataFrames directly
+        return tables if tables else []
+    except ValueError as e:
+        if "No tables found" in str(e):
+            # Return empty list instead of raising error
+            return []
+        else:
+            raise ValueError(f"Error extracting tables from HTML content: {e}")
+    except Exception as e:
+        raise Exception(f"Failed to extract tables from HTML content: {e}")
 @tool
 def audio_to_text(file_path: str) -> str:
     """
     return json.dumps({"transcript": transcript_data})
+@tool
 @tool
                 "numpy",
                 "json",
                 "whisper",
+                "openpyxl"
             ],
             name="multimodal_agent",
             description="""
         )
         self.code_agent = CodeAgent(
+            tools=[VisitWebpageTool(), DuckDuckGoSearchTool(), get_file, audio_to_text, extract_table_from_html],
             model=InferenceClientModel(
                 model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
             ),
                 "chess.engine",
                 "json",
                 "whisper",
+                "openpyxl"
             ],
             name="code_agent",
             description="""
             model=InferenceClientModel(
                 "Qwen/Qwen2.5-32B-Instruct"
             ),
+            tools=[get_file, audio_to_text, extract_table_from_html],
             managed_agents=[
                 self.multimodal_agent,
                 self.code_agent],
                 "chess.engine",
                 "whisper",
                 "json",
+                "openpyxl"
             ],
             planning_interval=5,
             max_steps=15,

requirements.txt CHANGED Viewed

@@ -14,3 +14,4 @@ opencv-python
 numpy
 html5lib
 openai-whisper

 numpy
 html5lib
 openai-whisper
+openpyxl