File size: 5,975 Bytes
3f771a9 edf3100 3f771a9 edf3100 b0c6c93 3f771a9 edf3100 7da5655 b0c6c93 edf3100 095d02f 3f771a9 095d02f 3f771a9 095d02f b0c6c93 095d02f edf3100 7da5655 095d02f edf3100 3f771a9 edf3100 b0c6c93 7da5655 b0c6c93 7da5655 b0c6c93 7da5655 b0c6c93 7da5655 b0c6c93 7da5655 b0c6c93 3f771a9 edf3100 3f771a9 b0c6c93 3f771a9 7da5655 edf3100 7da5655 edf3100 095d02f b0c6c93 3f771a9 7da5655 3f771a9 edf3100 7da5655 3f771a9 7da5655 3f771a9 b0c6c93 3f771a9 b0c6c93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from llama_index.llms.gemini import Gemini
from llama_index.tools.arxiv import ArxivToolSpec
from llama_index.tools.wikipedia import WikipediaToolSpec
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentWorkflow
from gradio import ChatMessage
from llama_index.core.base.llms.types import ChatMessage as llama_index_chat_message
from tools import interpret_python_math_code, image_understanding, convert_audio_to_text, video_understanding, read_csv_file, read_xlsx_file
from gaia_system_prompt import CUSTOM_SYSTEM_PROMPT
import os
import asyncio
TIMEOUT=180 # Timeout for agent execution in seconds
GEMINI_API_KEY = os.getenv("GEMINI_TOKEN")
GEMINI_MODEL_NAME = "gemini-2.5-flash-preview-04-17"
# GEMINI_MODEL_NAME = "gemini-2.0-flash"
class FinalAgent:
def __init__(self):
# LLM Initialization
self.llm = Gemini(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
# Tool Initialization
self.tools = [
FunctionTool.from_defaults(
fn=interpret_python_math_code,
name="InterpretPythonMathCode",
description=interpret_python_math_code.__doc__
),
FunctionTool.from_defaults(
fn=image_understanding,
name="ImageUnderstanding",
description=image_understanding.__doc__
),
FunctionTool.from_defaults(
fn=convert_audio_to_text,
name="ConvertAudioToText",
description= convert_audio_to_text.__doc__
),
FunctionTool.from_defaults(
fn=video_understanding,
name="VideoUnderstanding",
description= video_understanding.__doc__
),
FunctionTool.from_defaults(
fn=read_csv_file,
name="ReadCSVFile",
description=read_csv_file.__doc__
),
FunctionTool.from_defaults(
fn=read_xlsx_file,
name="ReadXLSXFile",
description= read_xlsx_file.__doc__
)
]
self.tools.extend(
ArxivToolSpec().to_tool_list()
)
self.tools.extend(
WikipediaToolSpec().to_tool_list()
)
self.tools.extend(
DuckDuckGoSearchToolSpec().to_tool_list()
)
# Agent Workflow Initialization
self.agent = AgentWorkflow.from_tools_or_functions(
tools_or_functions=self.tools,
llm=self.llm,
system_prompt=CUSTOM_SYSTEM_PROMPT,
timeout=TIMEOUT
)
print("FinalAgent initialized.")
async def __call__(self, question: str) -> str:
print(f"Agent received question: {question}")
response_str = ""
try:
# Use arun for an async method.
agent_chat_response = await self.agent.run(question)
print(agent_chat_response)
potential_response_obj = agent_chat_response.response
if isinstance(potential_response_obj, ChatMessage):
# If it's a ChatMessage, its .content attribute should hold the string
print(f"DEBUG: Response object is ChatMessage. Role: {potential_response_obj.role}")
response_str = potential_response_obj.content
if response_str is None: # Handle cases where content might be None
print("DEBUG: ChatMessage content is None, defaulting to empty string.")
response_str = ""
elif isinstance(potential_response_obj, str):
# If it's already a string
print("DEBUG: Response object is str.")
response_str = potential_response_obj
elif isinstance(potential_response_obj, llama_index_chat_message):
# If it's a llama_index ChatMessage, use its .content attribute
print(f"DEBUG: Response object is llama_index ChatMessage. Role: {potential_response_obj.role}")
response_str = potential_response_obj.content
if response_str is None:
print("DEBUG: llama_index ChatMessage content is None, defaulting to empty string.")
response_str = ""
else:
# Fallback if it's some other type
print(f"Warning: Agent response was of unexpected type: {type(potential_response_obj)}. Converting to string.")
response_str = str(potential_response_obj)
except Exception as e:
print(f"Error during agent execution with LLM {self.llm.__class__.__name__}: {e}")
# Depending on requirements, you might want to return an error message or re-raise
response_str = f"Agent error: {e}"
# Get the agent's final response between <final_answer> and </final_answer> tags
if "<final_answer>" in response_str and "</final_answer>" in response_str:
start_index = response_str.index("<final_answer>") + len("<final_answer>")
end_index = response_str.index("</final_answer>")
response_str = response_str[start_index:end_index].strip()
else:
print("Warning: No <final_answer> tags found in the response.")
return response_str
async def main():
# Example usage
agent = FinalAgent()
question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
question2 = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
answer = await agent(question)
print(f"Final answer: {answer}")
if __name__ == "__main__":
asyncio.run(main()) |