Agent_Course_Final_Assignment

Runtime error

App Files Files Community

lwant commited on Jul 3, 2025

Commit

449333b

1 Parent(s): 7e0251d

Add OpenAI integration and enhance toolset in `agent.py` and `tools.py`

Browse files

Files changed (2) hide show

src/gaia_solving_agent/agent.py +25 -15
src/gaia_solving_agent/tools.py +29 -1

src/gaia_solving_agent/agent.py CHANGED Viewed

@@ -4,12 +4,14 @@ from typing import Literal
 from llama_index.core.agent.workflow import FunctionAgent
 from llama_index.core.prompts import RichPromptTemplate
 from llama_index.llms.nebius import NebiusLLM
 from llama_index.tools.requests import RequestsToolSpec
 from llama_index.tools.wikipedia import WikipediaToolSpec
 from workflows import Workflow, step, Context
 from workflows.events import StartEvent, Event, StopEvent
-from gaia_solving_agent import NEBIUS_API_KEY
 from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
 from gaia_solving_agent.tools import (
     tavily_search_web,
@@ -17,6 +19,9 @@ from gaia_solving_agent.tools import (
     simple_web_page_reader_toolspec,
     vllm_ask_image_tool,
     youtube_transcript_reader_toolspec,
 )
 from gaia_solving_agent.utils import extract_pattern
@@ -26,6 +31,19 @@ light_model_name = "Qwen/Qwen2.5-32B-Instruct"
 balanced_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
 reasoning_model_name = "deepseek-ai/DeepSeek-R1-0528"
 vlm_model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"  # For VLM needs
 def get_llm(model_name=cheap_model_name):
     return NebiusLLM(
@@ -192,26 +210,18 @@ You are a helpful assistant that searches Wikipedia and visit Wikipedia pages.
 )
-# gaia_solving_agent = AgentWorkflow(
-#     agents = [tavily_search_engine, visit_web_page_agent, wikipedia_agent],
-#     initial_state = dict(),
-#     root_agent = tavily_search_engine.name,
-#     handoff_prompt = None,
-#     handoff_output_prompt = None,
-#     state_prompt = None,
-#     num_concurrent_runs=1,
-# )
 gaia_solving_agent = FunctionAgent(
     tools = [
         vllm_ask_image_tool,
         tavily_search_web,
         *load_and_search_tools_from_toolspec(WikipediaToolSpec()),
         *simple_web_page_reader_toolspec.to_tool_list(),
         *youtube_transcript_reader_toolspec.to_tool_list(),
     ],
-    llm=get_llm(balanced_model_name),
     system_prompt="""
     You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions.
@@ -221,10 +231,10 @@ gaia_solving_agent = FunctionAgent(
     - Analyse or compute : specialized tools to provide a specific analysis or computation.
     Try to get resources before querying them.
     If the analysis require a new external resource get it first.(e.g. a set of rules or a process)
-    When calling a tool, the inputs should be in a valid JSON format.
     You will be provided a question, some known facts summarizing the user provided context and some sub-tasks to complete.
     You should follow the order of the sub-tasks.
     """,

 from llama_index.core.agent.workflow import FunctionAgent
 from llama_index.core.prompts import RichPromptTemplate
 from llama_index.llms.nebius import NebiusLLM
+from llama_index.llms.mistralai import MistralAI
+from llama_index.llms.openai import OpenAI
 from llama_index.tools.requests import RequestsToolSpec
 from llama_index.tools.wikipedia import WikipediaToolSpec
 from workflows import Workflow, step, Context
 from workflows.events import StartEvent, Event, StopEvent
+from gaia_solving_agent import NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
 from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
 from gaia_solving_agent.tools import (
     tavily_search_web,
     simple_web_page_reader_toolspec,
     vllm_ask_image_tool,
     youtube_transcript_reader_toolspec,
+    text_content_analysis,
+    research_paper_reader_toolspec,
+    get_text_representation_of_additional_file,
 )
 from gaia_solving_agent.utils import extract_pattern
 balanced_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
 reasoning_model_name = "deepseek-ai/DeepSeek-R1-0528"
 vlm_model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"  # For VLM needs
+openai_llm = OpenAI(
+    model="gpt-4.1",
+    api_key=OPENAI_API_KEY,
+    temperature=.1,
+    max_retries=5,
+)
+mistral_llm = MistralAI(
+        model="mistral-small-latest",
+        api_key=MISTRAL_API_KEY,
+        temperature=.1,
+        max_retries=5,
+        # is_function_calling_model=True,
+    )
 def get_llm(model_name=cheap_model_name):
     return NebiusLLM(
 )
 gaia_solving_agent = FunctionAgent(
     tools = [
+        get_text_representation_of_additional_file,
         vllm_ask_image_tool,
         tavily_search_web,
         *load_and_search_tools_from_toolspec(WikipediaToolSpec()),
         *simple_web_page_reader_toolspec.to_tool_list(),
         *youtube_transcript_reader_toolspec.to_tool_list(),
+        *research_paper_reader_toolspec.to_tool_list(),
+        text_content_analysis,
     ],
+    llm=openai_llm,
     system_prompt="""
     You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions.
     - Analyse or compute : specialized tools to provide a specific analysis or computation.
     Try to get resources before querying them.
+    If it is an additional file, you can access its content through the get_text_representation_of_additional_file tool.
+    If you need the original Document, you can use the llamaindex context with ctx.store.get("additional_file").
     If the analysis require a new external resource get it first.(e.g. a set of rules or a process)
     You will be provided a question, some known facts summarizing the user provided context and some sub-tasks to complete.
     You should follow the order of the sub-tasks.
     """,

src/gaia_solving_agent/tools.py CHANGED Viewed

@@ -5,13 +5,14 @@ from llama_index.core.tools.tool_spec.base import BaseToolSpec
 from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
 from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
 from llama_index.multi_modal_llms.nebius import NebiusMultiModal
 from llama_index.readers.web import SimpleWebPageReader
 from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
 from llama_index.readers.papers import ArxivReader
 from tavily import AsyncTavilyClient
 from workflows import Context
-from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY
 def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]:
@@ -58,6 +59,33 @@ async def get_text_representation_of_additional_file(ctx: Context) -> str :
     return text_representation
 async def vllm_ask_image_tool(ctx: Context, query: str) -> str:
     """
     Asynchronously processes a visual-linguistic query paired with image data

 from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
 from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
 from llama_index.multi_modal_llms.nebius import NebiusMultiModal
+from llama_index.llms.openai import OpenAI
 from llama_index.readers.web import SimpleWebPageReader
 from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
 from llama_index.readers.papers import ArxivReader
 from tavily import AsyncTavilyClient
 from workflows import Context
+from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
 def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]:
     return text_representation
+async def text_content_analysis(text: str, query: str) -> str:
+    """
+    Analysis of the text provided as input.
+    For example, extracting or filtering information from it.
+    Parameters:
+        text (str): The text to analyze.
+        query (str): What you need to analyze in the text or extract from it.
+    Returns:
+    str
+        The result of the analysis.
+    """
+    reasonning_llm = OpenAI(
+        model="o3-mini",
+        api_key=OPENAI_API_KEY
+    )
+    prompt = f"""
+You are a good at text analysis. You are being asked the following:
+{ query }
+There is the text you must analyze :
+{ text }
+"""
+    return reasonning_llm.complete(prompt).text
 async def vllm_ask_image_tool(ctx: Context, query: str) -> str:
     """
     Asynchronously processes a visual-linguistic query paired with image data