agentzero07 commited on
Commit
7c6662d
·
verified ·
1 Parent(s): 81917a3

Create agent.py

Browse files
Files changed (1) hide show
  1. agent.py +688 -0
agent.py ADDED
@@ -0,0 +1,688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from pathlib import Path
4
+ from typing import Optional, Union, Dict, List, Any
5
+ from enum import Enum
6
+ import requests
7
+ import tempfile
8
+ import ast
9
+
10
+ from dotenv import load_dotenv
11
+ from langgraph.graph import StateGraph, END
12
+ from langchain.tools import Tool as LangTool
13
+ from langchain_core.runnables import RunnableLambda
14
+ from langchain_google_genai import ChatGoogleGenerativeAI
15
+ from pathlib import Path
16
+
17
+ from langchain.tools import StructuredTool
18
+
19
+ from tools import (
20
+ EnhancedSearchTool,
21
+ EnhancedWikipediaTool,
22
+ excel_to_markdown,
23
+ image_file_info,
24
+ audio_file_info,
25
+ code_file_read,
26
+ extract_youtube_info)
27
+
28
+ # Load environment variables
29
+ load_dotenv()
30
+
31
+ # --- Constants ---
32
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
33
+ QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
34
+ SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
35
+ FILE_PATH = f"{DEFAULT_API_URL}/files/"
36
+
37
+ # Initialize LLM
38
+ llm = ChatGoogleGenerativeAI(
39
+ model=os.getenv("GEMINI_MODEL", "gemini-pro"),
40
+ google_api_key=os.getenv("GEMINI_API_KEY")
41
+ )
42
+
43
+ # ----------- Enhanced State Management -----------
44
+ from typing import TypedDict
45
+
46
+ class AgentState(TypedDict):
47
+ """Enhanced state tracking for the agent - using TypedDict for LangGraph compatibility"""
48
+ question: str
49
+ original_question: str
50
+ conversation_history: List[Dict[str, str]]
51
+ selected_tools: List[str]
52
+ tool_results: Dict[str, Any]
53
+ final_answer: str
54
+ current_step: str
55
+ error_count: int
56
+ max_errors: int
57
+
58
+ class AgentStep(Enum):
59
+ ANALYZE_QUESTION = "analyze_question"
60
+ SELECT_TOOLS = "select_tools"
61
+ EXECUTE_TOOLS = "execute_tools"
62
+ SYNTHESIZE_ANSWER = "synthesize_answer"
63
+ ERROR_RECOVERY = "error_recovery"
64
+ COMPLETE = "complete"
65
+
66
+ # ----------- Helper Functions -----------
67
+ def initialize_state(question: str) -> AgentState:
68
+ """Initialize agent state with default values"""
69
+ return {
70
+ "question": question,
71
+ "original_question": question,
72
+ "conversation_history": [],
73
+ "selected_tools": [],
74
+ "tool_results": {},
75
+ "final_answer": "",
76
+ "current_step": "start",
77
+ "error_count": 0,
78
+ "max_errors": 3
79
+ }
80
+
81
+ # Initialize vanilla tools
82
+ from langchain.tools import DuckDuckGoSearchResults, WikipediaQueryRun
83
+ from langchain.utilities import WikipediaAPIWrapper
84
+
85
+ duckduckgo_tool = DuckDuckGoSearchResults()
86
+ wiki_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
87
+
88
+
89
+ # Initialize enhanced tools
90
+ enhanced_search_tool = LangTool.from_function(
91
+ name="enhanced_web_search",
92
+ func=EnhancedSearchTool().run,
93
+ description="Enhanced web search with intelligent query processing, multiple search strategies, and result filtering. Provides comprehensive and relevant search results."
94
+ )
95
+
96
+ enhanced_wiki_tool = LangTool.from_function(
97
+ name="enhanced_wikipedia",
98
+ func=EnhancedWikipediaTool().run,
99
+ description="Enhanced Wikipedia search with entity extraction, multi-term search, and relevant content filtering. Provides detailed encyclopedic information."
100
+ )
101
+
102
+ excel_tool = StructuredTool.from_function(
103
+ name="excel_to_text",
104
+ func=excel_to_markdown,
105
+ description="Enhanced Excel analysis with metadata, statistics, and structured data preview. Inputs: 'excel_path' (str), 'sheet_name' (str, optional).",
106
+ )
107
+
108
+ image_tool = StructuredTool.from_function(
109
+ name="image_file_info",
110
+ func=image_file_info,
111
+ description="Enhanced image file analysis with detailed metadata and properties."
112
+ )
113
+
114
+ audio_tool = LangTool.from_function(
115
+ name="audio_file_info",
116
+ func=audio_file_info,
117
+ description="Enhanced audio processing with transcription, language detection, and timestamped segments."
118
+ )
119
+
120
+ code_tool = LangTool.from_function(
121
+ name="code_file_read",
122
+ func=code_file_read,
123
+ description="Enhanced code file analysis with language-specific insights and structure analysis."
124
+ )
125
+
126
+ youtube_tool = LangTool.from_function(
127
+ name="extract_youtube_info",
128
+ func=extract_youtube_info,
129
+ description="Extracts transcription from the youtube link"
130
+ )
131
+
132
+ # Enhanced tool registry
133
+ AVAILABLE_TOOLS = {
134
+ "excel": excel_tool,
135
+ "search": wiki_tool,
136
+ "wikipedia": duckduckgo_tool,
137
+ "image": image_tool,
138
+ "audio": audio_tool,
139
+ "code": code_tool,
140
+ "youtube": youtube_tool
141
+ }
142
+
143
+ # ----------- Intelligent Tool Selection -----------
144
+ def analyze_question(state: AgentState) -> AgentState:
145
+ """Enhanced question analysis with better tool recommendation"""
146
+ analysis_prompt = f"""
147
+ Analyze this question and determine the best tools and approach:
148
+ Question: {state["question"]}
149
+
150
+ Available enhanced tools:
151
+ 1. excel - Enhanced Excel/CSV analysis with statistics and metadata
152
+ 2. search - Enhanced web search with intelligent query processing and result filtering
153
+ 3. wikipedia - Enhanced Wikipedia search with entity extraction and content filtering
154
+ 4. image - Enhanced image analysis with what the image contains
155
+ 5. audio - Enhanced audio processing with transcription
156
+ 6. code - Enhanced code analysis with language-specific insights
157
+ 7. youtube - Extracts transcription from the youtube link
158
+
159
+ Consider:
160
+ - Question type (factual, analytical, current events, technical)
161
+ - Required information sources (files, web, encyclopedic)
162
+ - Time sensitivity (current vs historical information)
163
+ - Complexity level
164
+
165
+ Respond with:
166
+ 1. Question type: <type>
167
+ 2. Primary tools needed: <tools>
168
+ 3. Search strategy: <strategy>
169
+ 4. Expected answer format: <format>
170
+
171
+ Format: TYPE: <type> | TOOLS: <tools> | STRATEGY: <strategy> | FORMAT: <format>
172
+ """
173
+
174
+ try:
175
+ response = llm.invoke(analysis_prompt).content
176
+ state["conversation_history"].append({"role": "analysis", "content": response})
177
+ state["current_step"] = AgentStep.SELECT_TOOLS.value
178
+ except Exception as e:
179
+ state["error_count"] += 1
180
+ state["conversation_history"].append({"role": "error", "content": f"Analysis failed: {e}"})
181
+ state["current_step"] = AgentStep.ERROR_RECOVERY.value
182
+
183
+ return state
184
+
185
+ def select_tools(state: AgentState) -> AgentState:
186
+ """Enhanced tool selection with smarter logic"""
187
+ question = state["question"].lower()
188
+ selected_tools = []
189
+
190
+ # File-based tool selection
191
+ if any(keyword in question for keyword in ["excel", "csv", "spreadsheet", ".xlsx", ".xls"]):
192
+ selected_tools.append("excel")
193
+ if any(keyword in question for keyword in [".png", ".jpg", ".jpeg", ".bmp", ".gif", "image"]):
194
+ selected_tools.append("image")
195
+ if any(keyword in question for keyword in [".mp3", ".wav", ".ogg", "audio", "transcribe"]):
196
+ selected_tools.append("audio")
197
+ if any(keyword in question for keyword in [".py", ".ipynb", "code", "script", "function"]):
198
+ selected_tools.append("code")
199
+ if any(keyword in question for keyword in ["youtube"]):
200
+ selected_tools.append("youtube")
201
+
202
+ print(f"File-based tools selected: {selected_tools}")
203
+
204
+ tools_prompt = f"""
205
+ You are a smart assistant that selects relevant tools based on the user's natural language question.
206
+
207
+ Available tools:
208
+ - "search" → Use for real-time, recent, or broad web information.
209
+ - "wikipedia" → Use for factual or encyclopedic knowledge.
210
+ - "excel" → Use for spreadsheet-related questions (.xlsx, .csv).
211
+ - "image" → Use for image files (.png, .jpg, etc.) or image-based tasks.
212
+ - "audio" → Use for sound files (.mp3, .wav, etc.) or transcription.
213
+ - "code" → Use for programming-related questions or when files like .py are mentioned.
214
+ - "youtube" → Use for questions involving YouTube videos.
215
+
216
+ Return the result as a **Python list of strings**, no explanation. Use only the relevant tools.
217
+ If not relevant tool is found, return an empty list such as [].
218
+
219
+ ### Examples:
220
+
221
+ Q: "Show me recent news about elections in 2025"
222
+ A: ["search"]
223
+
224
+ Q: "Summarize this Wikipedia article about Einstein"
225
+ A: ["wikipedia"]
226
+
227
+ Q: "Analyze this .csv file"
228
+ A: ["excel"]
229
+
230
+ Q: "Transcribe this .wav audio file"
231
+ A: ["audio"]
232
+
233
+ Q: "Generate Python code from this prompt"
234
+ A: ["code"]
235
+
236
+ Q: "Who was the president of USA in 1945?"
237
+ A: ["wikipedia"]
238
+
239
+ Q: "Give me current weather updates"
240
+ A: ["search"]
241
+
242
+ Q: "Look up the history of space exploration"
243
+ A: ["search", "wikipedia"]
244
+
245
+ Q: "What is 2 + 2?"
246
+ A: []
247
+
248
+ ### Now answer:
249
+
250
+ Q: {state["question"]}
251
+ A:
252
+ """
253
+
254
+ llm_tools = ast.literal_eval(llm.invoke(tools_prompt).content.strip())
255
+ if not isinstance(llm_tools, list):
256
+ llm_tools = []
257
+ print(f"LLM suggested tools: {llm_tools}")
258
+ selected_tools.extend(llm_tools)
259
+ selected_tools = list(set(selected_tools)) # Remove duplicates
260
+
261
+ print(f"Final selected tools after LLM suggestion: {selected_tools}")
262
+
263
+
264
+ # # Information-based tool selection
265
+ # current_indicators = ["recent", "current", "news", "today", "2025", "now"]
266
+ # encyclopedia_indicators = ["wiki", "wikipedia"]
267
+
268
+ # if any(indicator in question for indicator in current_indicators):
269
+ # selected_tools.append("search")
270
+ # elif any(indicator in question for indicator in encyclopedia_indicators):
271
+ # selected_tools.append("wikipedia")
272
+ # elif any(keyword in question for keyword in ["search", "find", "look up", "information about"]):
273
+ # # Use both for comprehensive coverage
274
+ # selected_tools.extend(["search", "wikipedia"])
275
+
276
+ # # Default fallback
277
+ # if not selected_tools:
278
+ # if any(word in question for word in ["who", "what", "when", "where"]):
279
+ # selected_tools.append("wikipedia")
280
+ # selected_tools.append("search")
281
+
282
+ # # Remove duplicates while preserving order
283
+ # selected_tools = list(dict.fromkeys(selected_tools))
284
+
285
+ state["selected_tools"] = selected_tools
286
+ state["current_step"] = AgentStep.EXECUTE_TOOLS.value
287
+ return state
288
+
289
+ def execute_tools(state: AgentState) -> AgentState:
290
+ """Enhanced tool execution with better error handling"""
291
+ results = {}
292
+
293
+ # Enhanced file detection
294
+ file_path = None
295
+ downloaded_file_marker = "A file was downloaded for this task and saved locally at:"
296
+ if downloaded_file_marker in state["question"]:
297
+ lines = state["question"].splitlines()
298
+ for i, line in enumerate(lines):
299
+ if downloaded_file_marker in line:
300
+ if i + 1 < len(lines):
301
+ file_path_candidate = lines[i + 1].strip()
302
+ if Path(file_path_candidate).exists():
303
+ file_path = file_path_candidate
304
+ print(f"Detected file path: {file_path}")
305
+ break
306
+
307
+ for tool_name in state["selected_tools"]:
308
+ try:
309
+ print(f"Executing tool: {tool_name}")
310
+
311
+ # File-based tools
312
+ if tool_name in ["excel", "image", "audio", "code"] and file_path:
313
+ if tool_name == "excel":
314
+ result = AVAILABLE_TOOLS["excel"].run({"excel_path": file_path, "sheet_name": None})
315
+ elif tool_name == "image":
316
+ result = AVAILABLE_TOOLS["image"].run({"image_path": file_path, "question": state["question"]})
317
+ elif tool_name == "youtube":
318
+ print(f"Running YouTube tool with file path: {file_path}")
319
+ result = AVAILABLE_TOOLS["youtube"].run(state["question"])
320
+ else:
321
+ result = AVAILABLE_TOOLS[tool_name].run(file_path)
322
+ # Information-based tools
323
+ else:
324
+ # Extract clean query for search tools
325
+ clean_query = state["question"]
326
+ if downloaded_file_marker in clean_query:
327
+ clean_query = clean_query.split(downloaded_file_marker)[0].strip()
328
+
329
+ result = AVAILABLE_TOOLS[tool_name].run(clean_query)
330
+
331
+ results[tool_name] = result
332
+
333
+ print(f"Tool {tool_name} completed successfully.")
334
+ print(f"Output for {tool_name}: {result}")
335
+
336
+ except Exception as e:
337
+ error_msg = f"Error using {tool_name}: {str(e)}"
338
+ results[tool_name] = error_msg
339
+ state["error_count"] += 1
340
+ print(error_msg)
341
+
342
+ state["tool_results"] = results
343
+ state["current_step"] = AgentStep.SYNTHESIZE_ANSWER.value
344
+ return state
345
+
346
+ def synthesize_answer(state: AgentState) -> AgentState:
347
+ """Enhanced answer synthesis with better formatting"""
348
+
349
+ tool_results_str = "\n".join([f"=== {tool.upper()} RESULTS ===\n{result}\n" for tool, result in state["tool_results"].items()])
350
+
351
+ cot_prompt = f"""You are a precise assistant tasked with analyzing the user's question{" using the available tool outputs" if state["tool_results"] else ""}.
352
+
353
+ Question:
354
+ {state["question"]}
355
+
356
+ {f"Available tool outputs: {tool_results_str}" if state["tool_results"] else ""}
357
+
358
+ Instructions:
359
+ - Think step-by-step to determine the best strategy to answer the question.
360
+ - Use only the given information; do not hallucinate or infer from external knowledge.
361
+ - If decoding, logical deduction, counting, or interpretation is required, show each step clearly.
362
+ - If any part of the tool output is unclear or incomplete, mention it and its impact.
363
+ - Do not guess. If the information is insufficient, say so clearly.
364
+ - Finish with a clearly marked line: `---END OF ANALYSIS---`
365
+
366
+ Your step-by-step analysis:"""
367
+
368
+ cot_response = llm.invoke(cot_prompt).content
369
+
370
+ final_answer_prompt = f"""You are a precise assistant tasked with deriving the **final answer** from the step-by-step analysis below.
371
+
372
+ Question:
373
+ {state["question"]}
374
+
375
+ Step-by-step analysis:
376
+ {cot_response}
377
+
378
+ Instructions:
379
+ - Read the analysis thoroughly before responding.
380
+ - Output ONLY the final answer. Do NOT include any reasoning or explanation.
381
+ - Remove any punctuation at the corners of the answer unless it is explicitly mentioned in the question.
382
+ - The answer must be concise and factual.
383
+ - If the analysis concluded that a definitive answer cannot be determined, respond with: `NA` (exactly).
384
+
385
+ Final answer:"""
386
+
387
+
388
+ try:
389
+ response = llm.invoke(final_answer_prompt).content
390
+ state["final_answer"] = response
391
+ state["current_step"] = AgentStep.COMPLETE.value
392
+ except Exception as e:
393
+ state["error_count"] += 1
394
+ state["final_answer"] = f"Error synthesizing answer: {e}"
395
+ state["current_step"] = AgentStep.ERROR_RECOVERY.value
396
+
397
+ return state
398
+
399
+ def error_recovery(state: AgentState) -> AgentState:
400
+ """Enhanced error recovery with multiple fallback strategies"""
401
+ if state["error_count"] >= state["max_errors"]:
402
+ state["final_answer"] = "I encountered multiple errors and cannot complete this task reliably."
403
+ state["current_step"] = AgentStep.COMPLETE.value
404
+ else:
405
+ # Enhanced fallback: try with simplified approach
406
+ try:
407
+ fallback_prompt = f"""
408
+ Answer this question directly using your knowledge:
409
+ {state["original_question"]}
410
+
411
+ Provide a helpful response even if you cannot access external tools.
412
+ Be clear about any limitations in your answer.
413
+ """
414
+ response = llm.invoke(fallback_prompt).content
415
+ state["final_answer"] = f"Using available knowledge (some tools unavailable): {response}"
416
+ state["current_step"] = AgentStep.COMPLETE.value
417
+ except Exception as e:
418
+ state["final_answer"] = f"All approaches failed. Error: {e}"
419
+ state["current_step"] = AgentStep.COMPLETE.value
420
+
421
+ return state
422
+
423
+ # ----------- Enhanced LangGraph Workflow -----------
424
+ def route_next_step(state: AgentState) -> str:
425
+ """Route to next step based on current state"""
426
+ step_routing = {
427
+ "start": AgentStep.ANALYZE_QUESTION.value,
428
+ AgentStep.ANALYZE_QUESTION.value: AgentStep.SELECT_TOOLS.value,
429
+ AgentStep.SELECT_TOOLS.value: AgentStep.EXECUTE_TOOLS.value,
430
+ AgentStep.EXECUTE_TOOLS.value: AgentStep.SYNTHESIZE_ANSWER.value,
431
+ AgentStep.SYNTHESIZE_ANSWER.value: AgentStep.COMPLETE.value,
432
+ AgentStep.ERROR_RECOVERY.value: AgentStep.COMPLETE.value,
433
+ AgentStep.COMPLETE.value: END,
434
+ }
435
+
436
+ return step_routing.get(state["current_step"], END)
437
+
438
+ # Create enhanced workflow
439
+ workflow = StateGraph(AgentState)
440
+
441
+ # Add nodes
442
+ workflow.add_node("analyze_question", RunnableLambda(analyze_question))
443
+ workflow.add_node("select_tools", RunnableLambda(select_tools))
444
+ workflow.add_node("execute_tools", RunnableLambda(execute_tools))
445
+ workflow.add_node("synthesize_answer", RunnableLambda(synthesize_answer))
446
+ workflow.add_node("error_recovery", RunnableLambda(error_recovery))
447
+
448
+ # Set entry point
449
+ workflow.set_entry_point("analyze_question")
450
+
451
+ # Add conditional edges
452
+ workflow.add_conditional_edges(
453
+ "analyze_question",
454
+ lambda state: "select_tools" if state["current_step"] == AgentStep.SELECT_TOOLS.value else "error_recovery"
455
+ )
456
+ workflow.add_edge("select_tools", "execute_tools")
457
+ workflow.add_conditional_edges(
458
+ "execute_tools",
459
+ lambda state: "synthesize_answer" if state["current_step"] == AgentStep.SYNTHESIZE_ANSWER.value else "error_recovery"
460
+ )
461
+ workflow.add_conditional_edges(
462
+ "synthesize_answer",
463
+ lambda state: END if state["current_step"] == AgentStep.COMPLETE.value else "error_recovery"
464
+ )
465
+ workflow.add_edge("error_recovery", END)
466
+
467
+ # Compile the enhanced graph
468
+ graph = workflow.compile()
469
+
470
+ # ----------- Agent Class -----------
471
+ class GaiaAgent:
472
+ """GAIA Agent with tools and intelligent processing"""
473
+
474
+ def __init__(self):
475
+ self.graph = graph
476
+ self.tool_usage_stats = {}
477
+ print("Enhanced GAIA Agent initialized with:")
478
+ print("✓ Intelligent multi-query web search")
479
+ print("✓ Entity-aware Wikipedia search")
480
+ print("✓ Enhanced file processing tools")
481
+ print("✓ Advanced error recovery")
482
+ print("✓ Comprehensive result synthesis")
483
+
484
+ def get_tool_stats(self) -> Dict[str, int]:
485
+ """Get usage statistics for tools"""
486
+ return self.tool_usage_stats.copy()
487
+
488
+ def __call__(self, task_id: str, question: str) -> str:
489
+ print(f"\n{'='*60}")
490
+ print(f"[{task_id}] ENHANCED PROCESSING: {question}")
491
+
492
+ # Initialize state
493
+ processed_question = process_file(task_id, question)
494
+ initial_state = initialize_state(processed_question)
495
+
496
+ try:
497
+ # Execute the enhanced workflow
498
+ result = self.graph.invoke(initial_state)
499
+
500
+ # Extract results
501
+ answer = result.get("final_answer", "No answer generated")
502
+ selected_tools = result.get("selected_tools", [])
503
+ conversation_history = result.get("conversation_history", [])
504
+ tool_results = result.get("tool_results", {})
505
+ error_count = result.get("error_count", 0)
506
+
507
+ # Update tool usage statistics
508
+ for tool in selected_tools:
509
+ self.tool_usage_stats[tool] = self.tool_usage_stats.get(tool, 0) + 1
510
+
511
+ # Enhanced logging
512
+ print(f"[{task_id}] Selected tools: {selected_tools}")
513
+ print(f"[{task_id}] Tools executed: {list(tool_results.keys())}")
514
+ print(f"[{task_id}] Processing steps: {len(conversation_history)}")
515
+ print(f"[{task_id}] Errors encountered: {error_count}")
516
+
517
+ # Log tool result sizes for debugging
518
+ for tool, result in tool_results.items():
519
+ result_size = len(str(result)) if result else 0
520
+ print(f"[{task_id}] {tool} result size: {result_size} chars")
521
+
522
+ print(f"[{task_id}] FINAL ANSWER: {answer}")
523
+ print(f"{'='*60}")
524
+
525
+ return answer
526
+
527
+ except Exception as e:
528
+ error_msg = f"Critical error in enhanced agent execution: {str(e)}"
529
+ print(f"[{task_id}] {error_msg}")
530
+
531
+ # Try fallback direct LLM response
532
+ try:
533
+ fallback_response = llm.invoke(f"Please answer this question: {question}").content
534
+ return f"Fallback response: {fallback_response}"
535
+ except:
536
+ return error_msg
537
+
538
+ # ----------- Enhanced File Processing -----------
539
+ def detect_file_type(file_path: str) -> Optional[str]:
540
+ """Enhanced file type detection with more formats"""
541
+ ext = Path(file_path).suffix.lower()
542
+
543
+ file_type_mapping = {
544
+ # Spreadsheets
545
+ '.xlsx': 'excel', '.xls': 'excel', '.csv': 'excel',
546
+ # Images
547
+ '.png': 'image', '.jpg': 'image', '.jpeg': 'image',
548
+ '.bmp': 'image', '.gif': 'image', '.tiff': 'image', '.webp': 'image',
549
+ # Audio
550
+ '.mp3': 'audio', '.wav': 'audio', '.ogg': 'audio',
551
+ '.flac': 'audio', '.m4a': 'audio', '.aac': 'audio',
552
+ # Code
553
+ '.py': 'code', '.ipynb': 'code', '.js': 'code', '.html': 'code',
554
+ '.css': 'code', '.java': 'code', '.cpp': 'code', '.c': 'code',
555
+ '.sql': 'code', '.r': 'code', '.json': 'code', '.xml': 'code',
556
+ # Documents
557
+ '.txt': 'text', '.md': 'text', '.pdf': 'document',
558
+ '.doc': 'document', '.docx': 'document'
559
+ }
560
+
561
+ return file_type_mapping.get(ext)
562
+
563
+ def process_file(task_id: str, question_text: str) -> str:
564
+ """Enhanced file processing with better error handling and metadata"""
565
+ file_url = f"{FILE_PATH}{task_id}"
566
+
567
+ try:
568
+ print(f"[{task_id}] Attempting to download file from: {file_url}")
569
+ response = requests.get(file_url, timeout=30)
570
+ response.raise_for_status()
571
+ print(f"[{task_id}] File download successful. Status: {response.status_code}")
572
+
573
+ except requests.exceptions.RequestException as exc:
574
+ print(f"[{task_id}] File download failed: {str(exc)}")
575
+ return question_text # Return original question if no file
576
+
577
+ # Enhanced filename extraction
578
+ content_disposition = response.headers.get("content-disposition", "")
579
+ filename = task_id # Default fallback
580
+
581
+ # Try to extract filename from Content-Disposition header
582
+ filename_match = re.search(r'filename[*]?=(?:"([^"]+)"|([^;]+))', content_disposition)
583
+ if filename_match:
584
+ filename = filename_match.group(1) or filename_match.group(2)
585
+ filename = filename.strip()
586
+
587
+ # Create enhanced temp directory structure
588
+ temp_storage_dir = Path(tempfile.gettempdir()) / "gaia_enhanced_files" / task_id
589
+ temp_storage_dir.mkdir(parents=True, exist_ok=True)
590
+
591
+ file_path = temp_storage_dir / filename
592
+ file_path.write_bytes(response.content)
593
+
594
+ # Get file metadata
595
+ file_size = len(response.content)
596
+ file_type = detect_file_type(filename)
597
+
598
+ print(f"[{task_id}] File saved: {filename} ({file_size:,} bytes, type: {file_type})")
599
+
600
+ # Enhanced question augmentation
601
+ enhanced_question = f"{question_text}\n\n"
602
+ enhanced_question += f"{'='*50}\n"
603
+ enhanced_question += f"FILE INFORMATION:\n"
604
+ enhanced_question += f"A file was downloaded for this task and saved locally at:\n"
605
+ enhanced_question += f"{str(file_path)}\n"
606
+ enhanced_question += f"File details:\n"
607
+ enhanced_question += f"- Name: {filename}\n"
608
+ enhanced_question += f"- Size: {file_size:,} bytes ({file_size/1024:.1f} KB)\n"
609
+ enhanced_question += f"- Type: {file_type or 'unknown'}\n"
610
+ enhanced_question += f"{'='*50}\n\n"
611
+
612
+ return enhanced_question
613
+
614
+ # ----------- Usage Examples and Testing -----------
615
+ def run_enhanced_tests():
616
+ """Run comprehensive tests of the enhanced agent"""
617
+ agent = GaiaAgent()
618
+
619
+ test_cases = [
620
+ {
621
+ "id": "test_search_1",
622
+ "question": "What are the latest developments in artificial intelligence in 2024?",
623
+ "expected_tools": ["search"]
624
+ },
625
+ {
626
+ "id": "test_wiki_1",
627
+ "question": "Tell me about Albert Einstein's contributions to physics",
628
+ "expected_tools": ["wikipedia"]
629
+ },
630
+ {
631
+ "id": "test_combined_1",
632
+ "question": "What is machine learning and what are recent breakthroughs?",
633
+ "expected_tools": ["wikipedia", "search"]
634
+ },
635
+ {
636
+ "id": "test_excel_1",
637
+ "question": "Analyze the data in the Excel file sales_data.xlsx",
638
+ "expected_tools": ["excel"]
639
+ }
640
+ ]
641
+
642
+ print("\n" + "="*80)
643
+ print("RUNNING ENHANCED AGENT TESTS")
644
+ print("="*80)
645
+
646
+ for test_case in test_cases:
647
+ print(f"\nTest Case: {test_case['id']}")
648
+ print(f"Question: {test_case['question']}")
649
+ print(f"Expected tools: {test_case['expected_tools']}")
650
+
651
+ try:
652
+ result = agent(test_case['id'], test_case['question'])
653
+ print(f"Result length: {len(result)} characters")
654
+ print(f"Result preview: {result[:200]}...")
655
+ except Exception as e:
656
+ print(f"Test failed: {e}")
657
+
658
+ print("-" * 60)
659
+
660
+ # Print tool usage statistics
661
+ print(f"\nTool Usage Statistics:")
662
+ for tool, count in agent.get_tool_stats().items():
663
+ print(f" {tool}: {count} times")
664
+
665
+ # Usage example
666
+ if __name__ == "__main__":
667
+ # Create enhanced agent
668
+ agent = GaiaAgent()
669
+
670
+ # Example usage
671
+ sample_questions = [
672
+ "What is the current population of Tokyo and how has it changed recently?",
673
+ "Explain quantum computing and its recent developments",
674
+ "Tell me about the history of machine learning and current AI trends",
675
+ ]
676
+
677
+ print("\n" + "="*80)
678
+ print("ENHANCED GAIA AGENT DEMONSTRATION")
679
+ print("="*80)
680
+
681
+ for i, question in enumerate(sample_questions):
682
+ print(f"\nExample {i+1}: {question}")
683
+ result = agent(f"demo_{i}", question)
684
+ print(f"Answer: {result[:300]}...")
685
+ print("-" * 60)
686
+
687
+ # Uncomment to run comprehensive tests
688
+ # run_enhanced_tests()