Final_Assignment_Template

Runtime error

App Files Files Community

marcos-banik commited on Jun 21, 2025

Commit

1c6a590

1 Parent(s): 020b298

🚧 More tools

Browse files

Files changed (2) hide show

app.py +7 -1
tools.py +123 -0

app.py CHANGED Viewed

@@ -12,7 +12,11 @@ from smolagents import (
     PythonInterpreterTool,
 )
-from tools import list_wikipedia_sections
 # (Keep Constants as is)
 # --- Constants ---
@@ -66,6 +70,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                 wiki_search,
                 py_run,
                 list_wikipedia_sections,
             ],
             verbosity_level=2,
             additional_authorized_imports=authorized_imports,

     PythonInterpreterTool,
 )
+from tools import (
+    list_wikipedia_sections,
+    count_tables_in_wikipedia_section,
+    extract_nth_table_in_wikipedia_section,
+)
 # (Keep Constants as is)
 # --- Constants ---
                 wiki_search,
                 py_run,
                 list_wikipedia_sections,
+                count_tables_in_wikipedia_section,
+                extract_nth_table_in_wikipedia_section,
             ],
             verbosity_level=2,
             additional_authorized_imports=authorized_imports,

tools.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from bs4 import BeautifulSoup
 import requests
 import re
 def list_wikipedia_sections(page_title: str) -> list[str]:
     """
     Return an ordered list of section headings from a Wikipedia article.
@@ -34,3 +36,124 @@ def list_wikipedia_sections(page_title: str) -> list[str]:
                 sections.append(text)
     return sections

 from bs4 import BeautifulSoup
 import requests
 import re
+from smolagents import tool
+@tool
 def list_wikipedia_sections(page_title: str) -> list[str]:
     """
     Return an ordered list of section headings from a Wikipedia article.
                 sections.append(text)
     return sections
+@tool
+def count_tables_in_wikipedia_section(
+    page_title: str, section_title: str
+) -> int:
+    """
+    Return the number of HTML <table> elements found within a specified section of a Wikipedia article.
+    Args:
+        page_title (str): Title of the Wikipedia article (spaces will be replaced with underscores),
+                          e.g., "Python (programming language)".
+        section_title (str): Visible heading of the section to inspect, e.g., "Discography".
+    Returns:
+        int: The count of <table> tags under the given section heading, stopping at the next section.
+             Returns 0 if the article or section is not found or if no tables are present.
+    """
+    url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
+    resp = requests.get(url, timeout=10)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+    div = None
+    for div in soup.find_all(
+        "div", class_=lambda cls: cls and cls.startswith("mw-heading")
+    ):
+        title = div.text.strip()
+        if title:
+            title = re.sub(r"\[edit\]$", "", title).strip()
+        if title.lower() == section_title.lower():
+            div = div
+            break
+    if not div:
+        return 0
+    count = 0
+    for sibling in div.find_next_siblings():
+        if sibling.name == "table":
+            count += 1
+        if (
+            sibling.name == "div"
+            and sibling.get("class")
+            and any(c.startswith("mw-heading") for c in sibling["class"])
+        ):
+            break
+    return count
+@tool
+def extract_nth_table_in_wikipedia_section(
+    page_title: str, section_title: str, n: int
+) -> str:
+    """
+    Extract the HTML of the nth table within a specified section of a Wikipedia article.
+     Args:
+         page_title (str):
+             Title of the Wikipedia article (e.g., "Queen (band)" or "Python (programming language)").
+             Spaces are automatically replaced with underscores.
+         section_title (str):
+             Visible title of the section to search (e.g., "Discography").
+         n (int):
+             1-based index specifying which table to extract (1 for the first table, 2 for second, etc.).
+     Returns:
+         str: The full HTML string of the requested <table> element, including all nested tags.
+              Returns an empty string if:
+              - The article cannot be found.
+              - The section does not exist.
+              - The section contains fewer than n tables.
+    """
+    url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
+    resp = requests.get(url, timeout=10)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+    div = None
+    for div in soup.find_all(
+        "div", class_=lambda cls: cls and cls.startswith("mw-heading")
+    ):
+        title = div.text.strip()
+        if title:
+            title = re.sub(r"\[edit\]$", "", title).strip()
+        if title.lower() == section_title.lower():
+            break
+    if not div:
+        return ""
+    # Iterate siblings until next section header
+    count = 0
+    tbl = None
+    for sib in div.find_next_siblings():
+        if (
+            sib.name == "div"
+            and sib.get("class")
+            and any(c.startswith("mw-heading") for c in sib["class"])
+        ):
+            break
+        if sib.name == "table":
+            count += 1
+            if count == n:
+                tbl = sib
+                break
+    # Convert the table to TSV
+    rows = []
+    for tr in tbl.find_all("tr"):
+        cells = tr.find_all(["th", "td"])
+        texts = [
+            cell.get_text(separator=" ", strip=True).replace("\t", " ")
+            for cell in cells
+        ]
+        if texts:
+            rows.append("\t".join(texts))
+    return "\n".join(rows)