File size: 5,975 Bytes
3f771a9
edf3100
 
3f771a9
edf3100
b0c6c93
3f771a9
 
edf3100
7da5655
b0c6c93
edf3100
095d02f
3f771a9
095d02f
3f771a9
095d02f
b0c6c93
 
095d02f
 
 
edf3100
7da5655
095d02f
edf3100
 
 
3f771a9
edf3100
b0c6c93
7da5655
 
 
 
b0c6c93
7da5655
 
 
 
b0c6c93
7da5655
 
 
 
b0c6c93
7da5655
 
 
 
b0c6c93
7da5655
 
 
 
b0c6c93
3f771a9
edf3100
3f771a9
 
 
 
 
 
 
 
 
b0c6c93
3f771a9
7da5655
 
 
edf3100
7da5655
 
edf3100
 
095d02f
b0c6c93
3f771a9
 
 
 
 
 
 
7da5655
3f771a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edf3100
7da5655
 
 
 
 
3f771a9
7da5655
3f771a9
 
 
 
b0c6c93
 
 
 
 
 
 
3f771a9
b0c6c93
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from llama_index.llms.gemini import Gemini
from llama_index.tools.arxiv import ArxivToolSpec
from llama_index.tools.wikipedia import WikipediaToolSpec
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentWorkflow
from gradio import ChatMessage
from llama_index.core.base.llms.types import ChatMessage as llama_index_chat_message

from tools import interpret_python_math_code, image_understanding, convert_audio_to_text, video_understanding, read_csv_file, read_xlsx_file
from gaia_system_prompt import CUSTOM_SYSTEM_PROMPT

import os
import asyncio

TIMEOUT=180 # Timeout for agent execution in seconds
GEMINI_API_KEY = os.getenv("GEMINI_TOKEN")
GEMINI_MODEL_NAME = "gemini-2.5-flash-preview-04-17"
# GEMINI_MODEL_NAME = "gemini-2.0-flash"

class FinalAgent:
    def __init__(self):
        # LLM Initialization
        self.llm = Gemini(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)

        # Tool Initialization
        self.tools = [
            FunctionTool.from_defaults(
                fn=interpret_python_math_code,
                name="InterpretPythonMathCode",
                description=interpret_python_math_code.__doc__
            ),
            FunctionTool.from_defaults(
                fn=image_understanding,
                name="ImageUnderstanding",
                description=image_understanding.__doc__
            ),
            FunctionTool.from_defaults(
                fn=convert_audio_to_text,
                name="ConvertAudioToText",
                description= convert_audio_to_text.__doc__
            ),
            FunctionTool.from_defaults(
                fn=video_understanding,
                name="VideoUnderstanding",
                description= video_understanding.__doc__
            ),
            FunctionTool.from_defaults(
                fn=read_csv_file,
                name="ReadCSVFile",
                description=read_csv_file.__doc__
            ),
            FunctionTool.from_defaults(
                fn=read_xlsx_file,
                name="ReadXLSXFile",
                description= read_xlsx_file.__doc__
            )
        ]
        self.tools.extend(
            ArxivToolSpec().to_tool_list()
        )
        self.tools.extend(
            WikipediaToolSpec().to_tool_list()
        )
        self.tools.extend(
            DuckDuckGoSearchToolSpec().to_tool_list()
        )
        

        # Agent Workflow Initialization
        self.agent = AgentWorkflow.from_tools_or_functions(
            tools_or_functions=self.tools,
            llm=self.llm,
            system_prompt=CUSTOM_SYSTEM_PROMPT,
            timeout=TIMEOUT
        )

        print("FinalAgent initialized.")
    
    async def __call__(self, question: str) -> str:
        print(f"Agent received question: {question}")
        
        response_str = ""
        try:
            # Use arun for an async method.
            agent_chat_response = await self.agent.run(question)
            print(agent_chat_response)
            
            potential_response_obj = agent_chat_response.response

            if isinstance(potential_response_obj, ChatMessage):
                # If it's a ChatMessage, its .content attribute should hold the string
                print(f"DEBUG: Response object is ChatMessage. Role: {potential_response_obj.role}")
                response_str = potential_response_obj.content
                if response_str is None: # Handle cases where content might be None
                    print("DEBUG: ChatMessage content is None, defaulting to empty string.")
                    response_str = ""
            elif isinstance(potential_response_obj, str):
                # If it's already a string
                print("DEBUG: Response object is str.")
                response_str = potential_response_obj
            elif isinstance(potential_response_obj, llama_index_chat_message):
                # If it's a llama_index ChatMessage, use its .content attribute
                print(f"DEBUG: Response object is llama_index ChatMessage. Role: {potential_response_obj.role}")
                response_str = potential_response_obj.content
                if response_str is None:
                    print("DEBUG: llama_index ChatMessage content is None, defaulting to empty string.")
                    response_str = ""
            else:
                # Fallback if it's some other type
                print(f"Warning: Agent response was of unexpected type: {type(potential_response_obj)}. Converting to string.")
                response_str = str(potential_response_obj)

        except Exception as e:
            print(f"Error during agent execution with LLM {self.llm.__class__.__name__}: {e}")
            # Depending on requirements, you might want to return an error message or re-raise
            response_str = f"Agent error: {e}" 
        
        # Get the agent's final response between <final_answer> and </final_answer> tags
        if "<final_answer>" in response_str and "</final_answer>" in response_str:
            start_index = response_str.index("<final_answer>") + len("<final_answer>")
            end_index = response_str.index("</final_answer>")
            response_str = response_str[start_index:end_index].strip()
        else:
            print("Warning: No <final_answer> tags found in the response.")

        return response_str
    

async def main():
    # Example usage
    agent = FinalAgent()
    question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
    question2 = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
    answer = await agent(question)
    print(f"Final answer: {answer}")

if __name__ == "__main__":
    asyncio.run(main())