Final_Assignment_Template

Sleeping

App Files Files Community

carolinacon commited on Aug 21, 2025

Commit

778116a

1 Parent(s): 2e8bb22

Added logic for audio, images, excel and python code

Browse files

Files changed (12) hide show

app.py +44 -5
config/prompts.yaml +14 -3
core/agent.py +20 -24
core/edges.py +0 -10
core/messages.py +93 -0
core/state.py +2 -1
nodes/nodes.py +31 -11
requirements.txt +4 -1
tools/audio_tool.py +29 -0
tools/excel_tool.py +25 -0
tools/python_executor.py +52 -0
utils/prompt_manager.py +7 -5

app.py CHANGED Viewed

@@ -2,8 +2,10 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-from core.agent import GaiaAgent
 # (Keep Constants as is)
 # --- Constants ---
@@ -15,12 +17,46 @@ class BasicAgent:
     agent: GaiaAgent
     def __init__(self):
         self.agent = GaiaAgent()
-    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        answer = self.agent.__call__(question)
         print(f"Agent returning fixed answer: {answer}")
         return answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -78,11 +114,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -98,7 +137,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
-    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)

 import gradio as gr
 import requests
 import pandas as pd
+from pathlib import Path
+from core.agent import GaiaAgent, Attachment
 # (Keep Constants as is)
 # --- Constants ---
     agent: GaiaAgent
     def __init__(self):
         self.agent = GaiaAgent()
+    def __call__(self, question: str, attached_content: bytes|None, attached_file_name: str|None ) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        attachment = None
+        if attached_content:
+            attachment =Attachment(attached_content, attached_file_name)
+        answer = self.agent.__call__(question, attachment)
         print(f"Agent returning fixed answer: {answer}")
         return answer
+def get_question_attached_file(task_id, file_name) -> bytes:
+    api_url = DEFAULT_API_URL
+    attachment_url = f"{api_url}/files/{task_id}"
+    print(f"Fetching attachment from: {attachment_url}")
+    try:
+        response = requests.get(attachment_url, timeout=15)
+        response.raise_for_status()
+        print(f"Retrieved {file_name} attachment from: {attachment_url}")
+        # Save to disk
+        file_path = Path(f"attachments\{task_id}\{file_name}")
+        content = response.content
+        # Create parent directories if they don't exist
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        # Write the file
+        file_path.write_bytes(content)
+        return content
+    except Exception as e:
+        print(f"An unexpected error occurred fetching attachment for taskid{task_id}: {e}")
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        attached_file_name = item.get("file_name")
+        if attached_file_name:
+            file_content = get_question_attached_file(task_id, attached_file_name)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, file_content)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
+    #5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)

config/prompts.yaml CHANGED Viewed

@@ -3,8 +3,9 @@ prompts:
     content: |
       You are a general AI assistant tasked with answering complex questions.
       Make sure you think step by step in order to answer the given question.
       Here is a summary of the steps you took so far:
       <summary>
@@ -61,8 +62,10 @@ prompts:
        If the value of chunked_last_tool_call is true, this means that the last tool execution returns a result formed from the concatenation
        of multiple chunks.
        Current value of the chunked_last_tool_call is {{chunked_last_tool_call}}
     type: base_system
-    variables: ["summary"]
     version: 1.0
     description: "Core system prompt for all interactions"
   final_answer_processor:
@@ -99,4 +102,12 @@ prompts:
     type: question_refinement
     variables: ["question"]
     version: 1.0
-    description: "Prompt for evaluating a question"

     content: |
       You are a general AI assistant tasked with answering complex questions.
       Make sure you think step by step in order to answer the given question.
+      {{attachment}}
       Here is a summary of the steps you took so far:
       <summary>
        If the value of chunked_last_tool_call is true, this means that the last tool execution returns a result formed from the concatenation
        of multiple chunks.
        Current value of the chunked_last_tool_call is {{chunked_last_tool_call}}
+       If you generate python code make sure you print the value of the variable you are interested in.
     type: base_system
+    variables: ["summary", "chunked_last_tool_call", "attachment"]
     version: 1.0
     description: "Core system prompt for all interactions"
   final_answer_processor:
     type: question_refinement
     variables: ["question"]
     version: 1.0
+    description: "Prompt for evaluating a question"
+  audio_evaluation:
+    content: |
+      You are an audio analysis assistant. Answer questions based on the provided audio. Be precise and factual.
+      If you're asked about facts not present in the given audio, say so.
+    type: tool
+    variables: []
+    version: 1.0
+    description: "Prompt for audio tool"

core/agent.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from langchain_core.messages import HumanMessage
 from langgraph.graph.state import CompiledStateGraph
-from core.edges import file_condition
 from core.state import State
-from nodes.nodes import assistant, optimize_memory, response_processing, orchestrator
 from tools.tavily_tools import llm_tools
 from langgraph.graph import START, StateGraph, END
@@ -19,19 +21,15 @@ class GaiaAgent:
         builder = StateGraph(State)
         # Define nodes: these do the work
-        builder.add_node("orchestrator", orchestrator)
         builder.add_node("assistant", assistant)
         builder.add_node("tools", ToolNode(llm_tools))
         builder.add_node("optimize_memory", optimize_memory)
         builder.add_node("response_processing", response_processing)
         # Define edges: these determine how the control flow moves
-        builder.add_edge(START, "orchestrator")
-        builder.add_conditional_edges("orchestrator",
-            # If the question involves a file processing -> file_condition routes to the END state
-            # If the question does not involve a file processing -> tools_condition routes to
-            # assistant
-            file_condition)
         builder.add_conditional_edges(
             "assistant",
@@ -46,27 +44,25 @@ class GaiaAgent:
         builder.add_edge("response_processing", END)
         self.react_graph = builder.compile()
-    def __call__(self, question: str) -> str:
-        messages = [HumanMessage(content=question)]
-        messages = self.react_graph.invoke({"messages": messages})
-#        for m in messages['messages']:
-#            m.pretty_print()
         answer = messages['messages'][-1].content
         return answer
-    def __streamed_call__(self, question: str) -> str:
-        # Test the web agent
-        inputs = {
-            "messages": [
-                HumanMessage(
-                    content=question
-                )
-            ]
-        }
         # Stream the web agent's response
-        for s in self.react_graph.stream(inputs, stream_mode="values"):
             message = s["messages"][-1]
             if isinstance(message, tuple):
                 print(message)

+from typing import Optional
 from langchain_core.messages import HumanMessage
 from langgraph.graph.state import CompiledStateGraph
+from core.messages import Attachment
 from core.state import State
+from nodes.nodes import assistant, optimize_memory, response_processing, pre_processor
 from tools.tavily_tools import llm_tools
 from langgraph.graph import START, StateGraph, END
         builder = StateGraph(State)
         # Define nodes: these do the work
+        builder.add_node("pre_processor", pre_processor)
         builder.add_node("assistant", assistant)
         builder.add_node("tools", ToolNode(llm_tools))
         builder.add_node("optimize_memory", optimize_memory)
         builder.add_node("response_processing", response_processing)
         # Define edges: these determine how the control flow moves
+        builder.add_edge(START, "pre_processor")
+        builder.add_edge("pre_processor", "assistant")
         builder.add_conditional_edges(
             "assistant",
         builder.add_edge("response_processing", END)
         self.react_graph = builder.compile()
+    def __call__(self, question: str, attachment: Optional[Attachment] = None) -> str:
+        initial_state = {"messages": [HumanMessage(content=question)]}
+        if attachment:
+            initial_state["file_reference"] = attachment.file_path
+        messages = self.react_graph.invoke(initial_state)
+        #        for m in messages['messages']:
+        #            m.pretty_print()
         answer = messages['messages'][-1].content
         return answer
+    def __streamed_call__(self, question: str, attachment: Optional[Attachment] = None) -> str:
+        initial_state = {"messages": [HumanMessage(content=question)]}
+        if attachment:
+            initial_state["file_reference"] = attachment.file_path
         # Stream the web agent's response
+        for s in self.react_graph.stream(initial_state, stream_mode="values"):
             message = s["messages"][-1]
             if isinstance(message, tuple):
                 print(message)

core/edges.py DELETED Viewed

@@ -1,10 +0,0 @@
-from core.state import State
-from typing import Literal
-from langgraph.graph import END
-def file_condition(state: State) -> Literal["assistant", END]:
-    has_attachment = state.get("attachment", "")
-    if has_attachment == "true":
-        return END
-    return "assistant"

core/messages.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from typing import Literal, Optional
+import mimetypes
+import base64
+from pathlib import Path
+def get_content_type(mime_type: str) -> Optional[Literal["image", "audio", "video", "document"]]:
+    """Extracts content type from MIME type string."""
+    if not mime_type:
+        return None
+    # Split into type/subtype (e.g., "image/png" → "image")
+    main_type = mime_type.split('/')[0].lower()
+    # Map to LangChain content types
+    if main_type in ["image", "audio", "file", "text"]:
+        return main_type
+    elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+        return "file"
+    raise Exception(f"Cannot extract type from mime_type {mime_type}")
+class Attachment:
+    content: bytes
+    mime_type: str
+    file_path: str
+    type: str
+    def __init__(self, content: bytes, file_path: str):
+        self.content = content
+        self.file_path = file_path
+        self.mime_type = mimetypes.guess_type(file_path)[0]
+        self.type = get_content_type(self.mime_type)
+    def get_encoded_content_b64(self) -> str:
+        return base64.b64encode(self.content).decode("utf-8")
+class AttachmentHandler:
+    def __init__(self, supported_types: list):
+        self.supported_types = supported_types
+    def get_attachment_representation(self, attachment: Attachment) -> dict:
+        if attachment.type not in self.supported_types:
+            raise Exception(f"Invalid attachment type{attachment.type}")
+        if attachment.type == "image":
+            return {"type": "image_url",
+                    "image_url": {"url": f"data:{attachment.mime_type};base64," + attachment.get_encoded_content_b64()}}
+        if attachment.type == "audio":
+            return {"type": "text",
+                    "text": attachment.get_encoded_content_b64()}
+        if attachment.type == "text":
+            return {"type": attachment.type, "data": attachment.content, "mime_type": attachment.mime_type}
+        # The remaining types are image, file, audio
+        return {"type": attachment.type, "source": "base64", "data": attachment.get_encoded_content_b64(),
+                "mime_type": attachment.mime_type}
+    def get_representation(self, type: str, content: bytes, format: str, mime_type) -> dict:
+        base64_content = base64.b64encode(content).decode("utf-8")
+        if type not in self.supported_types:
+            raise Exception(f"Invalid attachment type{type}")
+        if type == "audio":
+            return {"type": "input_audio",
+                    "input_audio": {"data": base64_content, "format": format}}
+        if type == "image":
+            return {"type": "image_url",
+                    "image_url": {"url": f"data:{mime_type};base64," + base64_content}}
+        raise Exception(f"Cannot extract a representation for type {type}")
+    def fetch_file_from_reference(self, file_reference: str) -> bytes:
+        """Fetches file bytes from a reference (e.g., S3, local path, URL)."""
+        #  It's a local file path
+        file = Path(file_reference)
+        if file_reference.startswith("/") or file_reference.startswith("./") or file.exists():
+            return file.read_bytes()
+        # Example 3: It's an ID in your database (pseudocode)
+        else:
+            # file_bytes = database.lookup_file_bytes(file_reference)
+            # return file_bytes
+            raise ValueError(
+                f"Could not resolve file reference: {file_reference}. Implement 'fetch_file_from_reference' for your "
+                f"storage system.")
+supported_types = ["image", "audio", "file", "text"]
+attachmentHandler = AttachmentHandler(supported_types)

core/state.py CHANGED Viewed

@@ -4,5 +4,6 @@ from langgraph.graph import MessagesState
 class State(MessagesState):
     summary: str
     question: str
-    attachment: str
     chunked_last_tool_call: bool

 class State(MessagesState):
     summary: str
     question: str
     chunked_last_tool_call: bool
+    attachment: str
+    file_reference: str  # Attachment file reference: a path, URL, or unique ID

nodes/nodes.py CHANGED Viewed

@@ -1,33 +1,49 @@
-from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage, ToolMessage
 from langchain_openai import ChatOpenAI
 from core.state import State
-import time
 from nodes.chunking_node import OversizedContentHandler
 from tools.tavily_tools import llm_tools
 from utils.prompt_manager import prompt_mgmt
 model = ChatOpenAI(model="gpt-4.1")
 response_processing_model = ChatOpenAI(model="gpt-4.1-mini")
 model = model.bind_tools(llm_tools, parallel_tool_calls=False)
 # Node
-def orchestrator(state: State):
     # Get original question if it exists
     question = state.get("question", "")
     if not question:
         question = state["messages"][0].content
-    message = prompt_mgmt.render_template("question_evaluation", {"question": question})
     # Add prompt to our history
     messages = [HumanMessage(content=message)]
     response = response_processing_model.invoke(messages)
     if response.content == "YES":
-        return {"question": question, "attachment": "true", "messages": [response]}
     return {"question": question}
@@ -40,9 +56,14 @@ def assistant(state: State):
     # Get original question if it exists
     question = state.get("question", "")
     if not question:
-        question = state["messages"][0].content
-    prompt_params = {"summary": summary, "chunked_last_tool_call": state.get("chunked_last_tool_call", False)}
     sys_msg = SystemMessage(content=prompt_mgmt.render_template("base_system", prompt_params))
     try:
         response = model.invoke([sys_msg] + state["messages"])
@@ -83,7 +104,6 @@ def optimize_memory(state: State):
     messages = state["messages"][:-2] + [HumanMessage(content=summary_message)]
     response = model.invoke(messages)
-    print("&&&" * 50, state["messages"][-1].type)
     # Delete all but the 2 most recent messages and the first one
     remaining_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]

+import mimetypes
+import pathlib
+import time
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage
 from langchain_openai import ChatOpenAI
+from core.messages import attachmentHandler
 from core.state import State
 from nodes.chunking_node import OversizedContentHandler
+from tools.audio_tool import query_audio
+from tools.excel_tool import query_excel_file
+from tools.python_executor import execute_python_code
 from tools.tavily_tools import llm_tools
 from utils.prompt_manager import prompt_mgmt
 model = ChatOpenAI(model="gpt-4.1")
 response_processing_model = ChatOpenAI(model="gpt-4.1-mini")
+llm_tools.append(query_audio)
+llm_tools.append(query_excel_file)
+llm_tools.append(execute_python_code)
 model = model.bind_tools(llm_tools, parallel_tool_calls=False)
 # Node
+def pre_processor(state: State):
     # Get original question if it exists
     question = state.get("question", "")
     if not question:
         question = state["messages"][0].content
+    file_reference = state.get("file_reference", "")
+    extension = pathlib.Path(file_reference).suffix
+    if extension == "png":
+        content_bytes = attachmentHandler.fetch_file_from_reference(file_reference)
+        mime_type = mimetypes.guess_type(file_reference)[0]
+        state["messages"][0].content = [{"type": "text", "text": question},
+                                        attachmentHandler.get_representation("image", content_bytes, "png", mime_type)]
+    message = prompt_mgmt.render_template("question_evaluation", {"question": question[0]})
     # Add prompt to our history
     messages = [HumanMessage(content=message)]
     response = response_processing_model.invoke(messages)
     if response.content == "YES":
+        return {"question": question, "attachment": "true"}
     return {"question": question}
     # Get original question if it exists
     question = state.get("question", "")
     if not question:
+        question = state["messages"][0].content[0]
+    attachment = ""
+    file_reference = state.get("file_reference", "")
+    if file_reference:
+        attachment = f" you have access to the file with the following reference {file_reference}"
+    prompt_params = {"summary": summary, "chunked_last_tool_call": state.get("chunked_last_tool_call", False),
+                     "attachment": attachment}
     sys_msg = SystemMessage(content=prompt_mgmt.render_template("base_system", prompt_params))
     try:
         response = model.invoke([sys_msg] + state["messages"])
     messages = state["messages"][:-2] + [HumanMessage(content=summary_message)]
     response = model.invoke(messages)
     # Delete all but the 2 most recent messages and the first one
     remaining_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]

requirements.txt CHANGED Viewed

@@ -5,4 +5,7 @@ langchain_core
 langgraph
 langchain-tavily
 langchain-community
-faiss-cpu

 langgraph
 langchain-tavily
 langchain-community
+faiss-cpu
+langchain-experimental
+openpyxl
+tabulate

tools/audio_tool.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from langchain.tools import tool
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_openai import ChatOpenAI
+from core.messages import attachmentHandler
+from utils.prompt_manager import prompt_mgmt
+audio_model = ChatOpenAI(model="gpt-4o-audio-preview")
+@tool
+def query_audio(question: str, file_reference: str) -> str:
+    """
+    Tool to answer questions based on the audio file identified by the provided file reference
+    :param question: Question to be answered
+    :param file_reference: file reference
+    :return: the answer to the given question
+    """
+    sys_msg = SystemMessage(content=prompt_mgmt.render_template("audio_evaluation", []))
+    content_bytes = attachmentHandler.fetch_file_from_reference(file_reference)
+    content = [{"type": "text", "text": question}, attachmentHandler.get_representation("audio", content_bytes, "mp3", None)]
+    message = [HumanMessage(content=content)]
+    try:
+        response = audio_model.invoke([sys_msg] + message)
+        return response
+    except Exception as e:
+        print("Exception while invoking audio tool")

tools/excel_tool.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import pandas as pd
+from langchain.tools import tool
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from langchain_openai import ChatOpenAI
+from core.messages import attachmentHandler
+llm = ChatOpenAI(model="gpt-4.1")
+@tool
+def query_excel_file(question: str, file_reference: str) -> str:
+    """
+    Analyze the incoming excel file (xls/xlsx) and answer the question based on this analysis
+    :param question: the question concerning the data in the given excel file
+    :param file_reference: the content of the excel file encoded base64
+    :return: the answer to the question
+    """
+    # Load Excel file
+    content_bytes = attachmentHandler.fetch_file_from_reference(file_reference)
+    df = pd.read_excel(content_bytes)
+    # Create agent
+    pandas_agent = create_pandas_dataframe_agent(llm, df, verbose=True, allow_dangerous_code=True)
+    response = pandas_agent.run(question)
+    return response

tools/python_executor.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import subprocess
+import sys
+from typing import Optional
+from langchain.tools import tool
+from langchain_experimental.tools import PythonREPLTool
+@tool
+def execute_python_code(intent: str, code: Optional[str] = None, file_reference: Optional[str] = None) -> str:
+    """
+    Executes the provided python code snippet or python file identified by its reference and returns the outcome of
+    the execution
+    :param intent: this parameter should be set to either code_snippet or file_execution depending on
+    the intent of the user
+    :param code: if the intent is code_snippet, this parameter should be populated with the
+    python code snippet to be executed
+    :param file_reference: if the intent is file_execution, this parameter should
+    be populated with the reference of the file to be executed
+    :return: the outcome of the python code execution
+    """
+    if intent == "code_snippet":
+        python_tool = PythonREPLTool()
+        if not code:
+            raise Exception("Invalid arguments. Tool intent is code_snippet but no value provided for code argument")
+        return python_tool.run(code, verbose=True)
+    if intent == "file_execution":
+        return subprocess_python_exec(file_reference)
+    raise Exception("Invalid arguments. Invalid value for intent parameter")
+def subprocess_python_exec(file_reference: str) -> str:
+    """Execute Python code in a subprocess for better isolation"""
+    try:
+        # Execute in subprocess
+        result = subprocess.run(
+            [sys.executable, file_reference],
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        if result.returncode == 0:
+            return result.stdout if result.stdout else "Code executed successfully"
+        else:
+            return f"Error: {result.stderr}"
+    except subprocess.TimeoutExpired:
+        return "Error: Code execution timed out"
+    except Exception as e:
+        return f"Error: {str(e)}"

utils/prompt_manager.py CHANGED Viewed

@@ -1,11 +1,12 @@
-from enum import Enum
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
-from jinja2 import Environment, BaseLoader
 import tiktoken
 import yaml
-from pathlib import Path
-import os
 class PromptType(Enum):
@@ -13,6 +14,7 @@ class PromptType(Enum):
     ANSWER_REFINEMENT = "answer_refinement"
     MEMORY_OPTIMIZATION = "memory_optimization"
     QUESTION_REFINEMENT = "question_refinement"
 @dataclass

+import os
 from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+from typing import Any, Dict, List
 import tiktoken
 import yaml
+from jinja2 import Environment, BaseLoader
 class PromptType(Enum):
     ANSWER_REFINEMENT = "answer_refinement"
     MEMORY_OPTIMIZATION = "memory_optimization"
     QUESTION_REFINEMENT = "question_refinement"
+    TOOL = "tool"
 @dataclass