Spaces:

DakshChaudhary
/

Agent_GAIA_Benchmark

Sleeping

App Files Files Community

DakshChaudhary commited on Jun 20, 2025

Commit

1b34f03

1 Parent(s): 24d5bb0

Refactored + Added prompts + Added Tools (Calculator, FileDownloader, ImageReader, Pandas, WebSearch) + NebiusAI inference added

Browse files

Files changed (18) hide show

__pycache__/agent.cpython-313.pyc +0 -0
agent.py +38 -0
agent_models/__pycache__/models.cpython-313.pyc +0 -0
agent_models/models.py +22 -0
agent_prompts/SystemPrompt.py +9 -1
agent_prompts/__pycache__/SystemPrompt.cpython-313.pyc +0 -0
agent_tools/CalculatorTool.py +29 -0
agent_tools/FileDownloaderTool.py +56 -0
agent_tools/ImageReaderTool.py +56 -0
agent_tools/PandasTool.py +39 -0
agent_tools/WebSearchTool.py +1 -1
agent_tools/__pycache__/CalculatorTool.cpython-313.pyc +0 -0
agent_tools/__pycache__/FileDownloaderTool.cpython-313.pyc +0 -0
agent_tools/__pycache__/ImageReaderTool.cpython-313.pyc +0 -0
agent_tools/__pycache__/PandasTool.cpython-313.pyc +0 -0
agent_tools/__pycache__/WebSearchTool.cpython-313.pyc +0 -0
app.py +25 -69
requirements.txt +10 -2

__pycache__/agent.cpython-313.pyc ADDED Viewed

Binary file (2.5 kB). View file

agent.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import sys
+import inspect
+import logging
+from llama_index.core.agent import ReActAgent
+from agent_models.models import get_language_model
+from agent_tools.WebSearchTool import web_search_tools
+from agent_tools.FileDownloaderTool import get_downloader_tool
+from agent_tools.ImageReaderTool import get_image_interpreter_tool
+from agent_tools.CalculatorTool import get_calculator_tool
+from agent_tools.PandasTool import get_pandas_tool
+from agent_prompts.SystemPrompt import gaia_system_prompt
+#Logging
+logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+# Agent Definition
+class GaiaAgent:
+    def __init__(self):
+        # print(f"ReAct Agent signature--------------------------------------- \n {inspect.signature(ReActAgent.from_tools)} \n ReAct Agent signature--------------------------------------- \n" )
+        list_of_search_tools = web_search_tools()
+        list_of_other_tools = [get_downloader_tool(), get_image_interpreter_tool(), get_calculator_tool(), get_pandas_tool()]
+        self.llm = get_language_model()
+        self.tools = list_of_search_tools + list_of_other_tools
+        self.agent = ReActAgent.from_tools(tools=self.tools, llm=self.llm, context=gaia_system_prompt, verbose=True)
+    async def __call__(self, question: str) -> str:
+        response_object = self.agent.chat(question)
+        full_response_text = str(response_object)
+        final_answer_prefix = "FINAL ANSWER:"
+        if final_answer_prefix in full_response_text:
+            clean_answer = full_response_text.split(final_answer_prefix, 1)[1].strip()
+            return clean_answer
+        else:
+            return full_response_text

agent_models/__pycache__/models.cpython-313.pyc ADDED Viewed

Binary file (1.65 kB). View file

agent_models/models.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+from llama_index.llms.nebius import NebiusLLM
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from openai import OpenAI
+# NebiusAI API Key
+os.environ["NEBIUS_API_KEY"] = ""
+# Model config variable
+llm_Id = "Qwen/Qwen2.5-32B-Instruct"
+def get_language_model():
+    '''Initializes and returns the LLM for Agent's core functionality'''
+    return NebiusLLM(api_key=os.getenv("NEBIUS_API_KEY"), model=llm_Id) # To use HuggingFaceInferenceAPI -> return HuggingFaceInferenceAPI(model_name=llm_Id)
+def get_vision_model_client():
+    '''Initializes and returns the vision model for analyzing images'''
+    return OpenAI(
+        api_key=os.getenv("NEBIUS_API_KEY"),
+        base_url="https://api.studio.nebius.com/v1/"
+    )

agent_prompts/SystemPrompt.py CHANGED Viewed

@@ -49,4 +49,12 @@ Action: calculate["20 divided by 80 times 100"]
 Observation: 20/80 × 100 = 25.
 Thought: That is twenty‑five percent.
 FINAL ANSWER: twenty-five percent
-'''

 Observation: 20/80 × 100 = 25.
 Thought: That is twenty‑five percent.
 FINAL ANSWER: twenty-five percent
+'''
+vision_model_system_prompt = '''
+You are an expert image analyst. Describe the contents of this image in detail. Focus on the minute details present in the image that might be important to the overall context.
+If it is a chess board, describe the exact position of all pieces for both black and white (Pay very close attention to the numbers and letters to determine position).
+You MUST ALWAYS VERIFY that your output is correct before sending it to the user.
+'''

agent_prompts/__pycache__/SystemPrompt.cpython-313.pyc CHANGED Viewed

Binary files a/agent_prompts/__pycache__/SystemPrompt.cpython-313.pyc and b/agent_prompts/__pycache__/SystemPrompt.cpython-313.pyc differ

agent_tools/CalculatorTool.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import numexpr
+from llama_index.core.tools import FunctionTool
+def calculate(expression: str) -> str:
+    """
+    A safe calculator that evaluates a mathematical string expression.
+    This tool can handle complex expressions with parentheses, addition,
+    subtraction, multiplication, and division.
+    Args:
+        expression (str): The mathematical expression to evaluate (e.g., "2 * (3 + 4)").
+    """
+    print(f"Calculating expression: {expression}")
+    try:
+        # Use the numexpr library to safely evaluate the string.
+        result = numexpr.evaluate(expression).item()
+        # Return the result as a string for the agent to process.
+        return str(result)
+    except Exception as e:
+        # If the expression is invalid, return a descriptive error.
+        return f"Error: Invalid mathematical expression. Please check your syntax. Details: {e}"
+def get_calculator_tool() -> FunctionTool:
+    """Initializes and returns our custom-built, safe calculator tool."""
+    return FunctionTool.from_defaults(
+        fn=calculate,
+        name="calculator",
+        description="A tool for evaluating mathematical expressions. Use this for any math calculations, like addition, subtraction, multiplication, division, etc. Example input: '(25 * 4) + 15 - 5'"
+    )

agent_tools/FileDownloaderTool.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import requests
+import os
+from llama_index.core.tools import FunctionTool
+BASE_API_URL = "https://agents-course-unit4-scoring.hf.space"
+def download_file(task_id: str, file_name: str) -> str:
+    """
+    Downloads a file for a given task_id from the GAIA API.
+    Args:
+        task_id (str): The ID of the task to download the file for.
+        file_name (str): The name to save the file as.
+    Returns:
+        str: The local path to the downloaded file.
+    """
+    print(f"Attempting to download file for task: {task_id}")
+    # 1. Construct the full URL for the file endpoint.
+    file_url = f"{BASE_API_URL}/files/{task_id}"
+    # 2. Define the local directory to save downloads.
+    download_dir = "downloads"
+    os.makedirs(download_dir, exist_ok=True)
+    # 3. Construct the full local path for the file.
+    local_filepath = os.path.join(download_dir, file_name)
+    # 4. Make a GET request to the file_url.
+    try:
+        response = requests.get(file_url, timeout=20)
+        # This will raise an exception for bad status codes (like 404).
+        response.raise_for_status()
+        # 5. Save the content of the response to the local file.
+        with open(local_filepath, 'wb') as f:
+            f.write(response.content)
+        print(f"Successfully downloaded file to: {local_filepath}")
+        # 6. Return the local file path.
+        return local_filepath
+    except requests.exceptions.RequestException as e:
+        error_message = f"Failed to download file for task {task_id}. Error: {e}"
+        print(error_message)
+        return error_message
+# Wrapper function to create the tool
+def get_downloader_tool() -> FunctionTool:
+    return FunctionTool.from_defaults(
+        fn=download_file,
+        name="file_downloader",
+        description="A tool to download files associated with a specific task ID. Use this when a question mentions an image, audio file, or other document."
+    )

agent_tools/ImageReaderTool.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+import base64
+from llama_index.core.tools import FunctionTool
+from llama_index.readers.file.image import ImageReader
+from agent_models.models import get_vision_model_client
+from agent_prompts.SystemPrompt import vision_model_system_prompt
+def get_image_description(image_path: str) -> str:
+    """
+    Analyzes a local image and returns a text description. This tool is used to "see" what is in an image file.
+    Args:
+        image_path (str): The local file path of the image to analyze.
+    """
+    try:
+        print(f"Analyzing image at path: {image_path}")
+        # Read and encode the image
+        with open(image_path, "rb") as img_file:
+            b64_image = base64.b64encode(img_file.read()).decode("utf-8")
+        b64_url = f"data:image/png;base64,{b64_image}"
+        # Get Nebius client
+        client = get_vision_model_client()
+        # Call Nebius API
+        response = client.chat.completions.create(
+            model="Qwen/Qwen2-VL-72B-Instruct",
+            messages=[
+                {
+                    "role": "system",
+                    "content": vision_model_system_prompt
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Here is an image."},
+                        {"type": "image_url", "image_url": {"url": b64_url}}
+                    ]
+                }
+            ]
+        )
+        description = response.choices[0].message.content
+        print(f"Vision model response: {description}")
+        return description
+    except Exception as e:
+        return f"Error analyzing image: {e}"
+# Wrapper function to create the tool for our agent
+def get_image_interpreter_tool() -> FunctionTool:
+    return FunctionTool.from_defaults(
+        fn=get_image_description,
+        name="image_interpreter",
+        description="A tool to analyze an image from a local file path and return a detailed text description. Use this to 'see' what is in an image file that has already been downloaded."
+    )

agent_tools/PandasTool.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import pandas as pd
+from llama_index.experimental.query_engine import PandasQueryEngine
+from llama_index.core.tools import FunctionTool
+from agent_models.models import get_language_model
+def analyze_spreadsheet(file_path: str, question: str) -> str:
+    """
+    Analyzes a spreadsheet (Excel/CSV) to answer a specific question about its data.
+    This is a powerful tool for data analysis on structured files.
+    Args:
+        file_path (str): The local path to the .xlsx or .csv file.
+        question (str): The question to ask about the data in the file.
+    """
+    print(f"Analyzing spreadsheet '{file_path}' for question: '{question}'")
+    try:
+        # Load the spreadsheet into a Pandas DataFrame
+        df = pd.read_excel(file_path)
+        # The PandasQueryEngine needs a LLM to translate natural language into Pandas commands.
+        llm = get_language_model()
+        # Create the query engine
+        query_engine = PandasQueryEngine(df=df, llm=llm, verbose=True)
+        # Ask the question and get the response
+        response = query_engine.query(question)
+        return str(response)
+    except Exception as e:
+        return f"Error analyzing spreadsheet: {e}"
+# Wrapper function to create the LlamaIndex tool
+def get_pandas_tool() -> FunctionTool:
+    return FunctionTool.from_defaults(
+        fn=analyze_spreadsheet,
+        name="spreadsheet_analyzer",
+        description="A tool to answer questions about data stored in a spreadsheet (.xlsx or .csv). It takes two arguments: the 'file_path' to the spreadsheet and the 'question' to ask about the data."
+    )

agent_tools/WebSearchTool.py CHANGED Viewed

@@ -6,6 +6,6 @@ from llama_index.tools.tavily_research import TavilyToolSpec
 def web_search_tools()->List[FunctionTool]:
     "Tool to search the web with Tavily (search + scraping)"
-    os.environ["TAVILY_API_KEY"] = "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"
     tavily_spec = TavilyToolSpec(api_key=os.getenv("TAVILY_API_KEY"))
     return tavily_spec.to_tool_list()

 def web_search_tools()->List[FunctionTool]:
     "Tool to search the web with Tavily (search + scraping)"
+    os.environ["TAVILY_API_KEY"] = ""
     tavily_spec = TavilyToolSpec(api_key=os.getenv("TAVILY_API_KEY"))
     return tavily_spec.to_tool_list()

agent_tools/__pycache__/CalculatorTool.cpython-313.pyc ADDED Viewed

Binary file (1.65 kB). View file

agent_tools/__pycache__/FileDownloaderTool.cpython-313.pyc ADDED Viewed

Binary file (2.36 kB). View file

agent_tools/__pycache__/ImageReaderTool.cpython-313.pyc ADDED Viewed

Binary file (2.56 kB). View file

agent_tools/__pycache__/PandasTool.cpython-313.pyc ADDED Viewed

Binary file (1.91 kB). View file

agent_tools/__pycache__/WebSearchTool.cpython-313.pyc CHANGED Viewed

Binary files a/agent_tools/__pycache__/WebSearchTool.cpython-313.pyc and b/agent_tools/__pycache__/WebSearchTool.cpython-313.pyc differ

app.py CHANGED Viewed

@@ -1,35 +1,23 @@
-import logging
-import sys
-# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
-# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
 import os
-import gradio as gr
-import requests
 import inspect
 import pandas as pd
-from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
-from llama_index.core.agent import ReActAgent
-from agent_tools.WebSearchTool import web_search_tools
-from agent_prompts.SystemPrompt import gaia_system_prompt
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-class BasicAgent:
-    def __init__(self):
-        self.llm = HuggingFaceInferenceAPI(model_name = "Qwen/Qwen2.5-72B-Instruct")
-        # print(f"ReAct Agent signature--------------------------------------- \n {inspect.signature(ReActAgent.from_tools)} \n ReAct Agent signature--------------------------------------- \n" )
-        self.agent = ReActAgent.from_tools(tools=web_search_tools(), llm=self.llm, context=gaia_system_prompt, verbose=True)
-    async def __call__(self, question: str) -> str:
-        response = self.agent.chat(question)
-        return str(response)
 async def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers, and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
@@ -41,13 +29,13 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -55,36 +43,6 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
-    # 2. Test Case
-    test_question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)?"
-    print(f"--- RUNNING SINGLE TEST ---")
-    print(f"Question: {test_question}")
-    # 3. Run the agent and display the result
-    try:
-        # This calls your agent's __call__ method
-        submitted_answer = await agent(test_question)
-        # Print the final answer clearly in the terminal
-        print("-" * 50)
-        print(f">>> AGENT'S FINAL ANSWER: {submitted_answer}")
-        print("-" * 50)
-        # Prepare a simple status message and table for the Gradio UI
-        status_message = "Test Finished Successfully."
-        results_df = pd.DataFrame([
-            {"Question": test_question, "Submitted Answer": submitted_answer}
-        ])
-    except Exception as e:
-        print(f"Error during single test run: {e}")
-        status_message = f"Agent returned an error: {e}"
-        results_df = pd.DataFrame()
-    # 4. Return early to prevent fetching all questions or submitting
-    return status_message, results_df
-    '''
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
@@ -113,11 +71,18 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -174,24 +139,15 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    '''
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
-        **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )

 import os
+import sys
 import inspect
+import requests
+import logging
+import gradio as gr
 import pandas as pd
+from agent import GaiaAgent
+# Logging
+logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+# Constants
+BASE_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# Function to run GAIA dataset questions
 async def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the GaiaAgent on them, submits all answers, and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
+    api_url = BASE_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        agent = GaiaAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
+        full_input=f"""
+        Task ID: {task_id}
+        File Name: {file_name}
+        Question: {question_text}
+        """
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = await agent(full_input)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
+        return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
         """
     )

requirements.txt CHANGED Viewed

@@ -1,6 +1,14 @@
 gradio
 requests
 llama-index
-llama-index-llms-huggingface-api
 llama-index-tools-tavily-research
-tavily-python

 gradio
 requests
 llama-index
+# llama-index-llms-huggingface-api
 llama-index-tools-tavily-research
+llama-index-llms-nebius
+llama-index-multi-modal-llms-nebius
+tavily-python
+llama-index-readers-file
+openai
+numexpr # calculator
+pandas
+openpyxl
+llama-index-experimental