gaia_agent

Sleeping

App Files Files Community

msanton commited on Oct 1, 2025

Commit

bf9e70e

verified ·

1 Parent(s): 81917a3

Add GaiaAgent and tools

Browse files

Files changed (4) hide show

.gitignore +3 -0
gaia_agent.py +113 -0
requirements.txt +15 -1
tools.py +223 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+/.env
+/chroma_db
+/__pycache__

gaia_agent.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import os
+from dotenv import load_dotenv
+from langchain_core.messages import HumanMessage
+from langchain_chroma import Chroma
+from langchain_litellm import ChatLiteLLM
+from langchain_openai import OpenAIEmbeddings
+from langgraph.graph import START, StateGraph
+from langgraph.graph.message import MessagesState
+from langgraph.prebuilt import ToolNode, tools_condition
+from tools import *
+load_dotenv()
+class GaiaAgent:
+    def __init__(self):
+        self.llm = ChatLiteLLM(
+            model="openai/gemini-2.5-pro",
+            api_key=os.getenv("ITP_API_KEY"),
+            api_base=os.getenv("TRELLIS_URL"),
+            temperature=0.5,
+        )
+        self.tools = [
+            web_search,
+            wikipedia_search,
+            arxiv_search,
+            text_splitter,
+            read_file,
+            analyze_image,
+            analyze_audio,
+            analyze_youtube_video,
+            multiply,
+            add,
+            subtract,
+            divide,
+        ]
+        self.llm_with_tools = self.llm.bind_tools(self.tools)
+        self.system_message = """
+        You are a general AI assistant. I will ask you a question.
+        Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
+        YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+        If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+        If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+        If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        """
+        self.vectorstore = Chroma(
+            embedding_function=OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY")),
+            persist_directory="chroma_db"
+        )
+        self.retriever = self.vectorstore.as_retriever(search_kwargs={"k": 3})
+    def build_graph(self):
+        builder = StateGraph(MessagesState)
+        builder.add_node("retriever", self.retrieve_node)
+        builder.add_node("assistant", self.assistant_node)
+        builder.add_node("tools", ToolNode(self.tools))
+        builder.add_edge(START, "retriever")
+        builder.add_edge("retriever", "assistant")
+        builder.add_conditional_edges(
+            "assistant",
+            tools_condition,
+        )
+        builder.add_edge("tools", "assistant")
+        return builder.compile()
+    def retrieve_node(self, state: MessagesState):
+        """Retriever node"""
+        question = state["messages"][-1].content
+        docs = self.retriever.invoke(question)
+        if docs:
+            context = "\n\n".join([d.page_content for d in docs])
+        else:
+            context = "No relevant documents found"
+        combined = f"Context:\n{context}\n\nQuestion:\n{question}"
+        return {"messages": [HumanMessage(content=combined)]}
+    def assistant_node(self, state: MessagesState):
+        """Assistant node"""
+        if not any(isinstance(m, HumanMessage) for m in state["messages"]):
+            messages = [self.system_message] + state["messages"]
+        else:
+            messages = state["messages"]
+        response = self.llm_with_tools.invoke(messages)
+        return {"messages": [response]}
+    @staticmethod
+    def extract_answer(text: str):
+        keyword = "FINAL ANSWER"
+        index = text.find(keyword)
+        if index != -1:
+            return text[index + len(keyword):].strip()
+        else:
+            return text
+    def run(self, task: dict):
+        task_id, question, file_name = task["task_id"], task["question"], task["file_name"]
+        if file_name != "" or file_name is not None:
+            question = f"{question} with task_id {task_id}"
+        graph = self.build_graph()
+        messages: list[HumanMessage] = [HumanMessage(content=question)]
+        result = graph.invoke({"messages": messages})
+        last_message = self.extract_answer(result["messages"][-1].content)
+        return self.extract_answer(last_message)

requirements.txt CHANGED Viewed

@@ -1,2 +1,16 @@
 gradio
-requests

 gradio
+requests
+langchain
+langchain-community
+langchain-core
+langchain-text-splitters
+langgraph
+langchain-chroma
+langchain-litellm
+langchain_openai
+wikipedia
+python-dotenv
+openai
+arxiv
+chromadb
+openai

tools.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import os
+import re
+import requests
+import openai
+from typing import List
+from dotenv import load_dotenv
+from langchain_core.tools import tool
+from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ImageCaptionLoader, ArxivLoader
+from langchain_community.tools import DuckDuckGoSearchResults
+from langchain_text_splitters import CharacterTextSplitter
+load_dotenv()
+@tool
+def multiply(a: int, b: int) -> int:
+    """
+    Multiply two integers and return the result
+    Args:
+        a: The first integer to multiply
+        b: The second integer to multiply
+    Returns:
+        int: The result of the multiplication
+    """
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """
+    Add two integers and return the result
+    Args:
+        a: The first integer to add
+        b: The second integer to add
+    Returns:
+        int: The result of the addition
+    """
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """
+    Subtract two integers and return the result
+    Args:
+        a: The first integer to subtract
+        b: The second integer to subtract
+    Returns:
+        int: The result of the subtraction
+    """
+    return a - b
+@tool
+def divide(a: int, b: int) -> int:
+    """
+    Divide the first integer by the second integer and return the result
+    Args:
+        a: The first integer to divide
+        b: The second integer to divide
+    Returns:
+        int: The result of the division
+    """
+    return a / b
+FILE_URL = "https://agents-course-unit4-scoring.hf.space/files/"
+@tool
+def read_file(task_id: str) -> str:
+    """
+    Download a file based on the task_id and then read the content of the file
+    Args:
+        task_id: The id of the task to download the file from
+    Returns:
+        str: The content of the file
+    """
+    file_url = f"{FILE_URL}{task_id}"
+    response = requests.get(file_url, timeout=10, allow_redirects=True)
+    with open('temp', 'wb') as fp:
+        fp.write(response.content)
+    with open('temp') as file:
+        return file.read()
+@tool
+def analyze_image(task_id: str) -> str:
+    """
+    Analyze an image based on the task_id and return a description of the content of the image
+    Args:
+        task_id: The id of the task to analyze the image from
+    Returns:
+        str: The description of the content of the image
+    """
+    file_url = f"{FILE_URL}{task_id}"
+    image = ImageCaptionLoader(images=[file_url])
+    return image.load()[0].page_content
+@tool
+def analyze_audio(task_id: str) -> str:
+    """
+    Analyze an mp3 file based on the task_id and return a description of the content of the audio file
+    Args:
+        task_id: The id of the task to analyze the audio file from
+    Returns:
+        str: The description of the content of the audio file
+    """
+    file_url = f"{FILE_URL}{task_id}"
+    response = requests.get(file_url, timeout=10, allow_redirects=True)
+    temp_file = 'temp.mp3'
+    with open(temp_file, 'wb') as fp:
+        fp.write(response.content)
+    with open(temp_file, "rb") as audio_file:
+        transcript = openai.audio.transcriptions.create(
+            file=audio_file,
+            model="whisper-1"
+        )
+        return transcript.text
+@tool
+def analyze_youtube_video(youtube_url: str, question: str) -> str:
+    """
+    Analyze a youtube video based on the youtube_url and the question and return the answer to the question
+    Args:
+        youtube_url: The url of the youtube video to analyze
+        question: The question to answer based on the youtube video
+    Returns:
+        str: The answer to the question
+    """
+@tool
+def web_search(query: str) -> str:
+    """
+    Search the web for the given query and return the results
+    Args:
+        query: The query to search the web for
+    Returns:
+        str: The text content of the web search results
+    """
+    search_engine = DuckDuckGoSearchResults(output_type="list", num_results=3)
+    results = search_engine.invoke({"query": query})
+    page_urls = [url["link"] for url in results]
+    loader = WebBaseLoader(web_paths=page_urls)
+    docs = loader.load()
+    combined_text = "\n\n".join(doc.page_content[:15000] for doc in docs)
+    # Clean up excessive newlines, spaces and strip leading/trailing whitespace
+    cleaned_text = re.sub(r'\n{3,}', '\n\n', combined_text).strip()
+    cleaned_text = re.sub(r'[ \t]{6,}', ' ', cleaned_text)
+    # Strip leading/trailing whitespace
+    cleaned_text = cleaned_text.strip()
+    return cleaned_text
+@tool
+def wikipedia_search(query: str) -> str:
+    """
+    Search Wikipedia articles with the given query and return the pages
+    Args:
+        query: The query to search Wikipedia for
+    Returns:
+        str: The text content of the Wikipedia articles related to the query
+    """
+    print("Searching Wikipedia for the query: ", query)
+    search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+@tool
+def arxiv_search(query: str) -> str:
+    """
+    Search arxiv for the given query and return the results
+    Args:
+        query: The query to search arxiv for
+    Returns:
+        str: The text content of the arxiv search results
+    """
+    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+@tool
+def text_splitter(text: str) -> List[str]:
+    """
+    Split a large text into smaller chunks using Langchain's CharacterTextSplitter
+    Args:
+        text: The large text to split into smaller chunks
+    Returns:
+        List[str]: a list container the smaller chunks of the text
+    """
+    splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=10)
+    return splitter.split_text(text)