GaiaAgent_Final_Assignment

Sleeping

App Files Files Community

Francesco-A commited on 19 days ago

Commit

f4c14e9

1 Parent(s): 856f7b8

app_build_v1

Browse files

Files changed (7) hide show

agent.py +80 -0
app.py +63 -7
requirements.txt +21 -9
tools/PLACEHOLDER.txt +0 -0
tools/download_file.py +37 -0
tools/files_to_dict.py +0 -62
tools/files_to_text.py +20 -18

agent.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import torch
+from smolagents import tool
+import pandas as pd
+from smolagents import (
+    CodeAgent,
+    InferenceClientModel,
+    Tool,
+    DuckDuckGoSearchTool,
+    VisitWebpageTool,
+    WikipediaSearchTool,
+    PythonInterpreterTool,
+    FinalAnswerTool,
+)
+# Import your custom tools (to be used in app, not in local notebook)
+from tools.download_file import download_file_from_url
+from tools.files_to_text import image_to_text, pdf_to_text, text_file_to_string
+def create_agent(
+    model_path: str = "Qwen/Qwen2.5-Coder-32B-Instruct"
+):
+    """
+    Creates and configures a CodeAgent.
+    This function initializes a smolagents CodeAgent equipped with the
+    recommended default tools (web search, browser, and Python interpreter),
+    together with any custom tools you may define.
+    Args:
+        model_path (str): The identifier or local path of the Hugging Face
+            model to be loaded. By default, it uses `Qwen/Qwen2.5-32B-Instruct`,
+            but any compatible model can be substituted.
+    Returns:
+        CodeAgent: A fully initialized agent ready to run code, query tools,
+        and perform multi-step reasoning using the selected model.
+    """
+    # Choose a lightweight but reasoning-capable model
+    model = InferenceClientModel(
+        model_id=model_path,
+        temperature        = 0.0,
+        top_p              = 1.0,   # NEW
+    )
+    # Default smolagents tools (high-level)
+    default_tools = [
+        DuckDuckGoSearchTool(),     # Internet search
+        VisitWebpageTool(),         # Retrieve webpage content
+        PythonInterpreterTool(),    # Executes agent-generated Python code
+        FinalAnswerTool(),          # Ends agent reasoning and returns final answer
+    ]
+    # Custom tools (critical for GAIA)
+    custom_tools = [
+        download_file_from_url,     # file downloader
+        text_file_to_string,        # .txt, .md, .json, etc.
+        pdf_to_text,                # PyMuPDF-based safe PDF parser
+        image_to_text,              # OCR for images
+    ]
+    tools = default_tools + custom_tools
+    # Create the CodeAgent (best for GAIA because it supports Python)
+    agent = CodeAgent(
+        model=model,
+        tools=tools,
+        add_base_tools=True,        # probably redundant, but it does not hurt
+        max_steps=7,
+        additional_authorized_imports = ['numpy','subprocess', 're', 'pandas',
+                                         'json', 'os', 'pathlib', 'tempfile',
+                                         'matplotlib.pyplot', 'seaborn'],
+        verbosity_level = 1,
+        max_print_outputs_length=1_000_000
+    )
+    return agent
+# WIP: Agentic RAG Systems

app.py CHANGED Viewed

@@ -3,21 +3,77 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 import requests
 import inspect
 import pandas as pd
+from agent import create_agent
+from typing import Optional
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self):
+        self.agent = create_agent()
+        self.system_prompt = """
+        You are an expert **General AI Assistant** and **Python Programmer** tasked with solving complex GAIA benchmark problems.
+        ### 1. Reason-Act-Observe
+        Follow a **PLAN → ACT → OBSERVE** loop:
+        - **PLAN:** Break the task into 1–3 logical steps. Identify tools for each step.
+        - **ACT:** Write and run one self-contained Python block per step.
+        - **OBSERVE:** Examine outputs or errors before proceeding.
+        ### 2. File Handling
+        - When a tool like `download_file_from_url` returns a local file path (e.g., `/tmp/data.csv`), you **MUST** save this path to a descriptive variable (e.g., `filepath`) and **immediately use that variable** as the argument for the next file-reading tool.
+        You must select the reading method based strictly on the file extension:
+        | File Extension | Tool / Method to Use |
+        | :--- | :--- |
+        | .csv | `pd.read_csv(filepath)` |
+        | .xlsx, .xls | `pd.read_excel(filepath)` |
+        | .pdf | `pdf_to_text(filepath)` |
+        | .txt, .md, .json | `text_file_to_string(filepath)` |
+        | .png, .jpg, .jpeg | `image_to_text(filepath)` |
+        ### 3. Data Analysis & Answer
+        - Inspect loaded datasets first (`.head()`, `.info()`, `.describe()`) before analysis.
+        - Write clean, idiomatic Python code. Before that, check if there is any pre-made tool that would work for the task.
+        - Use `FinalAnswerTool` **only once the problem is fully solved** to give a concise final answer.
+        ### 4. Additional instructions for the following tasks provided by GAIA team
+        - You are a general AI assistant. I will ask you a question. Do not reveal your internal reasoning. Only the content inside FinalAnswerTool will be evaluated.
+        - Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        ### 5. To provide the final answer, you MUST call the final_answer tool inside a <code> block.
+        - Example of how to end the task:
+        Thought: I have found the answer. I will now provide it.
+        <code>
+        final_answer("FINAL ANSWER: The capital of France is Paris")
+        </code>
+        \n\n
+        """
+        # print("Agent initialized.")
+    def __call__(self, question: str, file_path: Optional[str] = None) -> str:
+        if file_path:
+            # Inject system prompt + question and (optional) file path
+            prompt = (
+                f"{self.system_prompt}\n\n"
+                f"Question: {question}\n\n"
+                f"There is an associated file at path: {file_path}.\n"
+                f"Use the appropriate tool to download it (if necessary) and read it before answering"
+            )
+        else:
+            prompt = (
+                f"{self.system_prompt}\n\n"
+                f"Question: {question}\n\n"
+            )
+        return self.agent.run(prompt)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

requirements.txt CHANGED Viewed

@@ -1,11 +1,23 @@
-smolagents
 gradio
-requests
 pandas
-openpyxl
-pdfplumber
-PyMuPDF
-Pillow
-requests
-ddgs
-pytesseract

+# Core agent framework (PINNED)
+smolagents==1.23.0
+transformers==4.53.3
+huggingface-hub==0.36.0
+# UI
 gradio
+# Networking & retrieval
+requests==2.32.5
+ddgs==9.10.0
+# Data handling
 pandas
+openpyxl==3.1.5
+# File & document parsing (PINNED: brittle)
+Pillow==11.3.0
+pdfplumber==0.11.8
+PyMuPDF==1.26.7
+# OCR (OPTIONAL, disabled)
+# pytesseract==0.3.13

tools/PLACEHOLDER.txt DELETED Viewed

File without changes

tools/download_file.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from typing import Optional
+@tool
+def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
+    """
+    Downloads a file from the given URL to a temporary local location.
+    The file is saved in the system's temporary directory. The filename is either passed as argument or
+    inferred from the URL's path; if it cannot be determined, a generic name is used.
+    Args:
+        url: The URL of the file to download (str).
+        filename: Optional filename, will generate one based on URL if not provided
+    Returns:
+        The full local file path (str) of the downloaded file if successful,
+        or an error message string detailing the failure.
+    """
+    import requests, tempfile, os
+    from urllib.parse import urlparse
+    try:
+        if not filename:
+            filename = os.path.basename(urlparse(url).path) or "downloaded_file"
+        filepath = os.path.join(tempfile.gettempdir(), filename)
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        with open(filepath, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return filepath
+    except Exception as e:
+        return f"Download error: {e}"

tools/files_to_dict.py DELETED Viewed

@@ -1,62 +0,0 @@
-from smolagents import tool
-import pandas as pd
-import pymupdf
-@tool
-def csv_to_dict(csv_file_path: str) -> str:
-    """
-    Reads a CSV file from the given path and returns:
-        - the data as a list of dictionaries,
-        - the list of column names,
-        - a basic descriptive summary of numeric columns.
-    Args:
-        csv_file_path (str): Path to the CSV file.
-    Returns:
-        str: A dictionary-like structure containing:
-             "data", "columns", and "describe".
-    """
-    try:
-        df = pd.read_csv(csv_file_path)
-        output = {
-            "columns" : df.columns.tolist(),
-            "describe": df.describe(include="all",percentiles=[.5]).to_dict(),
-            "data"    : df.to_dict(orient="records")
-        }
-        return output
-    except FileNotFoundError:
-        return f"Error: The file at '{csv_file_path}' was not found."
-    except Exception as e:
-        return f"An error occurred: {e}"
-@tool
-def excel_to_dict(xlsx_file_path: str) -> str:
-    """
-    Reads an Excel (xlsx) file from the given path and returns:
-        - the data as a list of dictionaries,
-        - the list of column names,
-        - a basic descriptive summary of numeric columns.
-    Args:
-        xlsx_file_path (str): Path to the Excel file.
-    Returns:
-        str: A dictionary-like structure containing:
-             "data", "columns", and "describe".
-    """
-    try:
-        df = pd.read_excel(xlsx_file_path)
-        output = {
-            "columns" : df.columns.tolist(),
-            "describe": df.describe(include="all",percentiles=[.5]).to_dict(),
-            "data"    : df.to_dict(orient="records")
-        }
-        return output
-    except FileNotFoundError:
-        return f"Error: The file at '{xlsx_file_path}' was not found."
-    except Exception as e:
-        return f"An error occurred: {e}"

tools/files_to_text.py CHANGED Viewed

@@ -10,16 +10,16 @@ def image_to_text(image_path: str) -> str:
         Extracted text or error message
     """
     try:
-      import pytesseract
-      from PIL import Image
-      # Open the image using PIL
-      img = Image.open(image_path)
-      # Use pytesseract to extract text from the image
-      extracted_text = pytesseract.image_to_string(img)
-      return f"Extracted text from image: {extracted_text}"
     except ImportError:
         return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
     except Exception as e:
@@ -34,13 +34,15 @@ def pdf_to_text(pdf_file_path: str) -> str:
     Returns:
         str: The text content of the PDF.
     """
     try:
-      doc = pymupdf.open(pdf_file_path)
-      text = ""
-      for page in doc:
-          text += page.get_text("text")
-          text += "\n"
-      return text
     except FileNotFoundError:
         return f"Error: The file at '{pdf_file_path}' was not found."
     except Exception as e:
@@ -65,10 +67,10 @@ def text_file_to_string(path: str) -> str:
     If the file contains binary data, the returned string may be partially decoded.
     """
     try:
-      with open(path, "r", encoding="utf-8", errors="ignore") as f:
-        content = f.read()
-      return content
     except FileNotFoundError:
-      return f"Error: The file at '{path}' was not found."
     except Exception as e:
-      return f"An error occurred: {e}"

         Extracted text or error message
     """
     try:
+        import pytesseract
+        from PIL import Image
+        # Open the image using PIL
+        img = Image.open(image_path)
+        # Use pytesseract to extract text from the image
+        extracted_text = pytesseract.image_to_string(img)
+        return f"Extracted text from image: {extracted_text}"
     except ImportError:
         return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
     except Exception as e:
     Returns:
         str: The text content of the PDF.
     """
     try:
+        import pymupdf
+        doc = pymupdf.open(pdf_file_path)
+        text = ""
+        for page in doc:
+            text += page.get_text("text")
+            text += "\n"
+        return text
     except FileNotFoundError:
         return f"Error: The file at '{pdf_file_path}' was not found."
     except Exception as e:
     If the file contains binary data, the returned string may be partially decoded.
     """
     try:
+        with open(path, "r", encoding="utf-8", errors="ignore") as f:
+            content = f.read()
+        return content
     except FileNotFoundError:
+        return f"Error: The file at '{path}' was not found."
     except Exception as e:
+        return f"An error occurred: {e}"