Spaces:

matheuscs
/

agents-course-final-assessment

Sleeping

App Files Files Community

matheuscs commited on Apr 29, 2025

Commit

ee2f945

1 Parent(s): 4e574ef

editing template to add my agent

Browse files

Files changed (2) hide show

app.py +325 -15
requirements.txt +8 -2

app.py CHANGED Viewed

@@ -1,23 +1,285 @@
-import os
 import gradio as gr
-import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -40,11 +302,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -76,11 +343,54 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

 import gradio as gr
 import pandas as pd
+from smolagents import CodeAgent, OpenAIServerModel, tool
+import os, subprocess
+from bs4 import BeautifulSoup
+from duckduckgo_search import DDGS
+import csv
+import json
+import requests
+import whisper
+from typing import Optional
+import openpyxl
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
+# ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
+def download_file(file_name: str) -> None:
+    if not os.path.exists(file_name):
+        url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[0]}"
+        r = requests.get(url)
+        with open(file_name, "wb") as f:
+            f.write(r.content)
+@tool
+def open_file_as_text(file_name: str, filetype: Optional[str] = "txt") -> str:
+    """
+    Opens a file and returns its content as readable text.
+    Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (transcribes speech to text).
+    Args:
+        file_name (str): The path or name of the file.
+        filetype (Optional[str]): Type of file ('txt', 'json', 'csv', 'xlsx', 'mp3'). Defaults to 'txt'.
+    Returns:
+        str: The content of the file as text, or transcribed speech if 'mp3'.
+    """
+    download_file(file_name)
+    try:
+        if filetype == "txt":
+            with open(file_name, "r", encoding="utf-8") as f:
+                return f.read()
+        elif filetype == "json":
+            with open(file_name, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            return json.dumps(data, indent=2)
+        elif filetype == "csv":
+            with open(file_name, "r", encoding="utf-8") as f:
+                reader = csv.reader(f)
+                rows = list(reader)
+            return "\n".join([", ".join(row) for row in rows])
+        elif filetype == "xlsx":
+            wb = openpyxl.load_workbook(file_name, data_only=True)
+            sheet = wb.active
+            content = []
+            for row in sheet.iter_rows(values_only=True):
+                content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
+            return "\n".join(content)
+        elif filetype == "mp3":
+            w = whisper.load_model("base")
+            res = w.transcribe(file_name)
+            return res["text"]
+        else:
+            return f"Unsupported filetype '{filetype}'. Supported types are 'txt', 'json', 'csv', 'xlsx', and 'mp3'."
+    except FileNotFoundError:
+        return f"File '{file_name}' not found."
+    except Exception as e:
+        return f"Error opening file '{file_name}': {str(e)}"
+@tool
+def web_search(query: str) -> str:
+    """
+    Searches the web using DuckDuckGo and returns top search snippets.
+    Args:
+        query (str): The search query string.
+    Returns:
+        str: A list of top search results with title, snippet, and URL.
+    """
+    try:
+        with DDGS() as ddgs:
+            results = ddgs.text(query, max_results=3)
+            if not results:
+                return "No results found."
+            return "\n\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
+    except Exception as e:
+        return f"Error during search: {str(e)}"
+def parse_wikipedia_table(table) -> str:
+    """
+    Parses a Wikipedia table into a clean, readable text format.
+    Args:
+        table (Tag): BeautifulSoup Tag for the table.
+    Returns:
+        str: Formatted table as readable text.
+    """
+    rows = []
+    headers = []
+    # Try to get headers
+    thead = table.find('thead')
+    if thead:
+        for th in thead.find_all('th'):
+            header_text = th.get_text(separator=" ", strip=True)
+            headers.append(header_text)
+        if headers:
+            rows.append(" | ".join(headers))
+    # Parse table body rows
+    tbody = table.find('tbody')
+    if not tbody:
+        tbody = table  # fallback: some tables have no tbody explicitly
+    for tr in tbody.find_all('tr'):
+        cells = tr.find_all(['th', 'td'])
+        cell_texts = []
+        for cell in cells:
+            # Clean references like [7], [note 1], etc.
+            for sup in cell.find_all('sup', class_='reference'):
+                sup.decompose()
+            text = cell.get_text(separator=" ", strip=True)
+            cell_texts.append(text)
+        if cell_texts:
+            row_text = " | ".join(cell_texts)
+            rows.append(row_text)
+    return "\n".join(rows)
+@tool
+def read_wikipedia_page(url: str) -> str:
+    """
+    Fetches a Wikipedia article and extracts clean sectioned text around the relevant query.
+    Args:
+        url (str): The Wikipedia page URL.
+    Returns:
+        str: Sectioned and readable snippet focused around the query.
+    """
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
+    }
+    resp = requests.get(url, headers=headers, timeout=10)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+    content_div = soup.find('div', id='mw-content-text')
+    if not content_div:
+        return "Content not found."
+    parts = []
+    for elem in content_div.find_all(['h2', 'h3', 'p', 'ul', 'ol', 'table']):
+        if elem.name in ['h2', 'h3']:
+            parts.append("\n\n" + elem.get_text(strip=True) + "\n")
+        elif elem.name in ['p', 'ul', 'ol']:
+            parts.append(elem.get_text(strip=True))
+        elif elem.name == 'table':
+            parts.append(parse_wikipedia_table(elem))
+    full_text = "\n".join(parts)
+    return full_text
+@tool
+def smart_paginate_around_query(full_text: str, query: str) -> list:
+    """
+    Splits text into windows around each occurrence of the query.
+    Args:
+        full_text (str): The full text to search within.
+        query (str): The search query.
+    Returns:
+        list: List of relevant text windows (pages).
+    """
+    before_chars = 1000
+    after_chars = 3000
+    full_text_lower = full_text.lower()
+    query_lower = query.lower()
+    query_len = len(query_lower)
+    pages = []
+    search_pos = 0
+    text_len = len(full_text)
+    while True:
+        match_pos = full_text_lower.find(query_lower, search_pos)
+        if match_pos == -1:
+            break  # no more matches
+        # Define window around match
+        start = max(0, match_pos - before_chars)
+        end = min(text_len, match_pos + query_len + after_chars)
+        page = full_text[start:end]
+        pages.append(page)
+        # Move search pointer to AFTER current window
+        search_pos = end
+    return pages
+@tool
+def reverse_sentence(text: str) -> str:
+    """
+    Reverses the input text.
+    Args:
+        text (str): The input string to be reversed.
+    Returns:
+        str: The reversed string.
+    """
+    return text[::-1]
+@tool
+def run_python_code(file_name: str) -> str:
+    """
+    Executes a Python file and returns its printed final output.
+    Args:
+        file_name (str): Name of the Python file.
+    Returns:
+        str: The final printed output.
+    """
+    download_file(file_name)
+    try:
+        # Run in subprocess with timeout
+        result = subprocess.run(
+            ["python", file_name],
+            capture_output=True,
+            text=True,
+            timeout=10  # seconds
+        )
+        if result.returncode != 0:
+            return f"Error running code: {result.stderr.strip()}"
+        output = result.stdout.strip()
+        return output
+    except subprocess.TimeoutExpired:
+        return "Execution timed out."
+    except Exception as e:
+        return f"Error: {str(e)}"
+tools = [
+    open_file_as_text,
+    web_search,
+    read_wikipedia_page,
+    smart_paginate_around_query,
+    reverse_sentence,
+]
+model = OpenAIServerModel(
+    model_id="gpt-4o",
+    api_key=os.getenv("OPENAI_API_KEY"),
+    temperature=0
+)
+agent = CodeAgent(
+    model=model,
+    tools=tools,
+    additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv", "urllib"]
+)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = CodeAgent(
+            model=model,
+            tools=tools,
+            additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv",
+                                           "urllib"]
+        )
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase (useful for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            full_prompt = f"""You are a highly precise answering agent.
+When given a question:
+- If necessary, perform a web search using the tool `web_search` to find possible sources of information.
+- If the web search only returns titles and short snippets, you MUST visit the actual webpage to read the full content before answering.
+- Use the `read_wikipedia_page` tool to fetch and read the Wikipedia page when necessary.
+- You just have the ability to read Wikipedia pages only.
+- You MUST paginate the content using `smart_paginate_around_query`.
+- When using `smart_paginate_around_query`, you must select a short, general query based on the main keywords only. Avoid using full questions or long phrases. Use 1–3 essential words.
+- If the task requires reversing the order of words, letters, phrases, or any text, you must use the `reverse_sentence` tool to perform the operation.
+- Never reverse text manually inside your code. Always call the tool instead.
+- If the task requires reading, listening, or analyzing a file, you must use the file specified in the `file_name` field of the task metadata, not the file name mentioned casually inside the question text.
+- Comma separated lists MUST contain a single space after each comma.
+- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+- Only answer after you have gathered enough information by reading the actual page contents.
+- Once you have the final answer, you must call `final_answer("your_answer")` immediately after printing it.
+- Do not retry or execute anything else after calling `final_answer`.
+- `final_answer` must wrap the exact printed value.
+Provide ONLY the precise answer requested.
+Do not include explanations, steps, reasoning, or additional text.
+Be direct and specific. GAIA benchmark requires exact matching answers.
+Example: if asked "What is the capital of France?", respond exactly:
+Thoughts: I need to retrieve the capital of France from Wikipedia and output it directly.
+Code:
+```py
+print("Paris")
+```<end_code>
+Based on the above guidelines, answer the following question:
+--begin of question--
+{question_text}
+--end of question--
+If the questions mentions the need to use a file, use the following `file_name` value as the `file_name` parameter in any function calls:
+file_name: {file_name}"""
+            submitted_answer = agent.run(full_prompt)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

requirements.txt CHANGED Viewed

@@ -1,2 +1,8 @@
-gradio
-requests

+gradio~=5.27.1
+requests~=2.32.3
+pandas~=2.2.3
+openai-whisper~=20240930
+openpyxl~=3.1.5
+smolagents~=1.14.0
+beautifulsoup4~=4.13.4
+duckduckgo_search~=8.0.1