avimittal30 commited on
Commit
ba955ca
·
verified ·
1 Parent(s): 81917a3

Upload 3 files

Browse files
Files changed (3) hide show
  1. agent.py +732 -0
  2. requirements.txt +17 -2
  3. tools.py +559 -0
agent.py ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from pathlib import Path
4
+ from typing import Optional, Union, Dict, List, Any
5
+ from enum import Enum
6
+ import requests
7
+ import tempfile
8
+ import ast
9
+
10
+ from dotenv import load_dotenv
11
+ from langgraph.graph import StateGraph, END
12
+ from langchain.tools import Tool as LangTool
13
+ from langchain_core.runnables import RunnableLambda
14
+ from langchain_google_genai import ChatGoogleGenerativeAI
15
+ from pathlib import Path
16
+
17
+ from langchain.tools import StructuredTool
18
+ from langchain_openai import ChatOpenAI
19
+ from langchain_groq import ChatGroq
20
+
21
+ import pandas as pd
22
+ df = pd.read_csv('C:\\Users\\AviralMittal\\OneDrive\\hf_course\\hf_ai_answers.csv')
23
+ print(f'df read.....{len(df)}')
24
+
25
+ from tools import (
26
+ EnhancedSearchTool,
27
+ EnhancedWikipediaTool,
28
+ excel_to_markdown,
29
+ image_file_info,
30
+ audio_file_info,
31
+ code_file_read,
32
+ extract_youtube_info)
33
+
34
+ # Load environment variables
35
+ load_dotenv()
36
+
37
+ # --- Constants ---
38
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
39
+ QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
40
+ SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
41
+ FILE_PATH = f"{DEFAULT_API_URL}/files/"
42
+
43
+ os.environ["groq_api_key"] = os.environ.get("GROQ_API_KEY")
44
+ os.environ["openai_api_key"] = os.environ.get("OPENAI_API_KEY")
45
+
46
+ # Initialize LLM
47
+ # llm=ChatOpenAI(model='gpt-4o', temperature=0)
48
+
49
+ # llm = ChatGroq(model_name='gemma2-9b-it')
50
+
51
+ llm = ChatGoogleGenerativeAI(
52
+ model=os.getenv("GEMINI_MODEL", "gemini-pro"),
53
+ google_api_key=os.getenv("google_api_key")
54
+ )
55
+
56
+ print(os.getenv('google_api_key'))# llm.invoke('hey!! how are you?')
57
+ # print(f"Model:{llm.invoke('please tell me model name')}")
58
+
59
+ # ----------- Enhanced State Management -----------
60
+ from typing import TypedDict
61
+
62
+ class AgentState(TypedDict):
63
+ """Enhanced state tracking for the agent - using TypedDict for LangGraph compatibility"""
64
+ question: str
65
+ original_question: str
66
+ conversation_history: List[Dict[str, str]]
67
+ selected_tools: List[str]
68
+ tool_results: Dict[str, Any]
69
+ final_answer: str
70
+ current_step: str
71
+ error_count: int
72
+ max_errors: int
73
+
74
+ class AgentStep(Enum):
75
+ ANALYZE_QUESTION = "analyze_question"
76
+ SELECT_TOOLS = "select_tools"
77
+ EXECUTE_TOOLS = "execute_tools"
78
+ SYNTHESIZE_ANSWER = "synthesize_answer"
79
+ ERROR_RECOVERY = "error_recovery"
80
+ COMPLETE = "complete"
81
+
82
+ # ----------- Helper Functions -----------
83
+ def initialize_state(question: str) -> AgentState:
84
+ """Initialize agent state with default values"""
85
+ return {
86
+ "question": question,
87
+ "original_question": question,
88
+ "conversation_history": [],
89
+ "selected_tools": [],
90
+ "tool_results": {},
91
+ "final_answer": "",
92
+ "current_step": "start",
93
+ "error_count": 0,
94
+ "max_errors": 3
95
+ }
96
+
97
+ # Initialize vanilla tools
98
+ from langchain.tools import DuckDuckGoSearchResults, WikipediaQueryRun
99
+ from langchain.utilities import WikipediaAPIWrapper
100
+
101
+ duckduckgo_tool = DuckDuckGoSearchResults()
102
+ wiki_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
103
+
104
+
105
+ # Initialize enhanced tools
106
+ enhanced_search_tool = LangTool.from_function(
107
+ name="enhanced_web_search",
108
+ func=EnhancedSearchTool().run,
109
+ description="Enhanced web search with intelligent query processing, multiple search strategies, and result filtering. Provides comprehensive and relevant search results."
110
+ )
111
+
112
+ enhanced_wiki_tool = LangTool.from_function(
113
+ name="enhanced_wikipedia",
114
+ func=EnhancedWikipediaTool().run,
115
+ description="Enhanced Wikipedia search with entity extraction, multi-term search, and relevant content filtering. Provides detailed encyclopedic information."
116
+ )
117
+
118
+ excel_tool = StructuredTool.from_function(
119
+ name="excel_to_text",
120
+ func=excel_to_markdown,
121
+ description="Enhanced Excel analysis with metadata, statistics, and structured data preview. Inputs: 'excel_path' (str), 'sheet_name' (str, optional).",
122
+ )
123
+
124
+ image_tool = StructuredTool.from_function(
125
+ name="image_file_info",
126
+ func=image_file_info,
127
+ description="Enhanced image file analysis with detailed metadata and properties."
128
+ )
129
+
130
+ audio_tool = LangTool.from_function(
131
+ name="audio_file_info",
132
+ func=audio_file_info,
133
+ description="Enhanced audio processing with transcription, language detection, and timestamped segments."
134
+ )
135
+
136
+ code_tool = LangTool.from_function(
137
+ name="code_file_read",
138
+ func=code_file_read,
139
+ description="Enhanced code file analysis with language-specific insights and structure analysis."
140
+ )
141
+
142
+ youtube_tool = LangTool.from_function(
143
+ name="extract_youtube_info",
144
+ func=extract_youtube_info,
145
+ description="Extracts transcription from the youtube link"
146
+ )
147
+
148
+ # Enhanced tool registry
149
+ AVAILABLE_TOOLS = {
150
+ "excel": excel_tool,
151
+ "search": wiki_tool,
152
+ "wikipedia": duckduckgo_tool,
153
+ "image": image_tool,
154
+ "audio": audio_tool,
155
+ "code": code_tool,
156
+ "youtube": youtube_tool
157
+ }
158
+
159
+ # ----------- Intelligent Tool Selection -----------
160
+ def analyze_question(state: AgentState) -> AgentState:
161
+ """Enhanced question analysis with better tool recommendation"""
162
+ analysis_prompt = f"""
163
+ Analyze this question and determine the best tools and approach:
164
+ Question: {state["question"]}
165
+
166
+ Available enhanced tools:
167
+ 1. excel - Enhanced Excel/CSV analysis with statistics and metadata
168
+ 2. search - Enhanced web search with intelligent query processing and result filtering
169
+ 3. wikipedia - Enhanced Wikipedia search with entity extraction and content filtering
170
+ 4. image - Enhanced image analysis with what the image contains
171
+ 5. audio - Enhanced audio processing with transcription
172
+ 6. code - Enhanced code analysis with language-specific insights
173
+ 7. youtube - Extracts transcription from the youtube link
174
+
175
+ Consider:
176
+ - Question type (factual, analytical, current events, technical)
177
+ - Required information sources (files, web, encyclopedic)
178
+ - Time sensitivity (current vs historical information)
179
+ - Complexity level
180
+
181
+ Respond with:
182
+ 1. Question type: <type>
183
+ 2. Primary tools needed: <tools>
184
+ 3. Search strategy: <strategy>
185
+ 4. Expected answer format: <format>
186
+
187
+ Format: TYPE: <type> | TOOLS: <tools> | STRATEGY: <strategy> | FORMAT: <format>
188
+ """
189
+
190
+ try:
191
+ response = llm.invoke(analysis_prompt).content
192
+ state["conversation_history"].append({"role": "analysis", "content": response})
193
+ state["current_step"] = AgentStep.SELECT_TOOLS.value
194
+ except Exception as e:
195
+ state["error_count"] += 1
196
+ state["conversation_history"].append({"role": "error", "content": f"Analysis failed: {e}"})
197
+ state["current_step"] = AgentStep.ERROR_RECOVERY.value
198
+
199
+ return state
200
+
201
+ def select_tools(state: AgentState) -> AgentState:
202
+ """Enhanced tool selection with smarter logic"""
203
+ question = state["question"].lower()
204
+ selected_tools = []
205
+
206
+ # File-based tool selection
207
+ if any(keyword in question for keyword in ["excel", "csv", "spreadsheet", ".xlsx", ".xls"]):
208
+ selected_tools.append("excel")
209
+ if any(keyword in question for keyword in [".png", ".jpg", ".jpeg", ".bmp", ".gif", "image"]):
210
+ selected_tools.append("image")
211
+ if any(keyword in question for keyword in [".mp3", ".wav", ".ogg", "audio", "transcribe"]):
212
+ selected_tools.append("audio")
213
+ if any(keyword in question for keyword in [".py", ".ipynb", "code", "script", "function"]):
214
+ selected_tools.append("code")
215
+ if any(keyword in question for keyword in ["youtube"]):
216
+ selected_tools.append("youtube")
217
+
218
+ print(f"File-based tools selected: {selected_tools}")
219
+
220
+ tools_prompt = f"""
221
+ You are a smart assistant that selects relevant tools based on the user's natural language question.
222
+
223
+ Available tools:
224
+ - "search" → Use for real-time, recent, or broad web information.
225
+ - "wikipedia" → Use for factual or encyclopedic knowledge.
226
+ - "excel" → Use for spreadsheet-related questions (.xlsx, .csv).
227
+ - "image" → Use for image files (.png, .jpg, etc.) or image-based tasks.
228
+ - "audio" → Use for sound files (.mp3, .wav, etc.) or transcription.
229
+ - "code" → Use for programming-related questions or when files like .py are mentioned.
230
+ - "youtube" → Use for questions involving YouTube videos.
231
+
232
+ Return the result as a **Python list of strings**, no explanation. Use only the relevant tools.
233
+ If not relevant tool is found, return an empty list such as [].
234
+
235
+ ### Examples:
236
+
237
+ Q: "Show me recent news about elections in 2025"
238
+ A: ["search"]
239
+
240
+ Q: "Summarize this Wikipedia article about Einstein"
241
+ A: ["wikipedia"]
242
+
243
+ Q: "Analyze this .csv file"
244
+ A: ["excel"]
245
+
246
+ Q: "Transcribe this .wav audio file"
247
+ A: ["audio"]
248
+
249
+ Q: "Generate Python code from this prompt"
250
+ A: ["code"]
251
+
252
+ Q: "Who was the president of USA in 1945?"
253
+ A: ["wikipedia"]
254
+
255
+ Q: "Give me current weather updates"
256
+ A: ["search"]
257
+
258
+ Q: "Look up the history of space exploration"
259
+ A: ["search", "wikipedia"]
260
+
261
+ Q: "What is 2 + 2?"
262
+ A: []
263
+
264
+ ### Now answer:
265
+
266
+ Q: {state["question"]}
267
+ A:
268
+ """
269
+
270
+ llm_tools = ast.literal_eval(llm.invoke(tools_prompt).content.strip())
271
+ if not isinstance(llm_tools, list):
272
+ llm_tools = []
273
+ print(f"LLM suggested tools: {llm_tools}")
274
+ selected_tools.extend(llm_tools)
275
+ selected_tools = list(set(selected_tools)) # Remove duplicates
276
+
277
+ print(f"Final selected tools after LLM suggestion: {selected_tools}")
278
+
279
+
280
+ # # Information-based tool selection
281
+ # current_indicators = ["recent", "current", "news", "today", "2025", "now"]
282
+ # encyclopedia_indicators = ["wiki", "wikipedia"]
283
+
284
+ # if any(indicator in question for indicator in current_indicators):
285
+ # selected_tools.append("search")
286
+ # elif any(indicator in question for indicator in encyclopedia_indicators):
287
+ # selected_tools.append("wikipedia")
288
+ # elif any(keyword in question for keyword in ["search", "find", "look up", "information about"]):
289
+ # # Use both for comprehensive coverage
290
+ # selected_tools.extend(["search", "wikipedia"])
291
+
292
+ # # Default fallback
293
+ # if not selected_tools:
294
+ # if any(word in question for word in ["who", "what", "when", "where"]):
295
+ # selected_tools.append("wikipedia")
296
+ # selected_tools.append("search")
297
+
298
+ # # Remove duplicates while preserving order
299
+ # selected_tools = list(dict.fromkeys(selected_tools))
300
+
301
+ state["selected_tools"] = selected_tools
302
+ state["current_step"] = AgentStep.EXECUTE_TOOLS.value
303
+
304
+ print(f"Inside select tools, result:{state['selected_tools']}")
305
+
306
+ print(f"Inside select tools, current step: {state['current_step']}")
307
+ return state
308
+
309
+ def execute_tools(state: AgentState) -> AgentState:
310
+ """Enhanced tool execution with better error handling"""
311
+ results = {}
312
+
313
+ # Enhanced file detection
314
+ file_path = None
315
+ downloaded_file_marker = "A file was downloaded for this task and saved locally at:"
316
+ if downloaded_file_marker in state["question"]:
317
+ lines = state["question"].splitlines()
318
+ for i, line in enumerate(lines):
319
+ if downloaded_file_marker in line:
320
+ if i + 1 < len(lines):
321
+ file_path_candidate = lines[i + 1].strip()
322
+ if Path(file_path_candidate).exists():
323
+ file_path = file_path_candidate
324
+ print('****')
325
+ print(f"Detected file path: {file_path}")
326
+ print(f"Detected file path type: {type(file_path)}")
327
+
328
+ break
329
+
330
+ for tool_name in state["selected_tools"]:
331
+ try:
332
+ print(f"Executing tool: {tool_name}")
333
+
334
+ # File-based tools
335
+ if tool_name in ["excel", "image", "audio", "code"] and file_path:
336
+ if tool_name == "excel":
337
+ result = AVAILABLE_TOOLS["excel"].run({"excel_path": file_path, "sheet_name": None})
338
+ elif tool_name == "image":
339
+ result = AVAILABLE_TOOLS["image"].run({"image_path": file_path, "question": state["question"]})
340
+ elif tool_name == "youtube":
341
+ print(f"Running YouTube tool with file path: {file_path}")
342
+ result = AVAILABLE_TOOLS["youtube"].run(state["question"])
343
+ else:
344
+ result = AVAILABLE_TOOLS[tool_name].run(file_path)
345
+ # Information-based tools
346
+ else:
347
+ # Extract clean query for search tools
348
+ clean_query = state["question"]
349
+ if downloaded_file_marker in clean_query:
350
+ clean_query = clean_query.split(downloaded_file_marker)[0].strip()
351
+
352
+ result = AVAILABLE_TOOLS[tool_name].run(clean_query)
353
+
354
+ results[tool_name] = result
355
+
356
+ print(f"Tool {tool_name} completed successfully.")
357
+ print(f"Output for {tool_name}: {result}")
358
+
359
+ except Exception as e:
360
+ error_msg = f"Error using {tool_name}: {str(e)}"
361
+ results[tool_name] = error_msg
362
+ state["error_count"] += 1
363
+ print(error_msg)
364
+
365
+ state["tool_results"] = results
366
+ state["current_step"] = AgentStep.SYNTHESIZE_ANSWER.value
367
+ print(f'Inside execute tools, result:{results}')
368
+ print(f"Inside execute tools, current step: {state['current_step']}")
369
+
370
+ return state
371
+
372
+ def synthesize_answer(state: AgentState) -> AgentState:
373
+ """Enhanced answer synthesis with better formatting"""
374
+
375
+ tool_results_str = "\n".join([f"=== {tool.upper()} RESULTS ===\n{result}\n" for tool, result in state["tool_results"].items()])
376
+
377
+ cot_prompt = f"""You are a precise assistant tasked with analyzing the user's question {"Available tool outputs" if state["tool_results"] else ""}.
378
+
379
+ Question:
380
+ {state["question"]}
381
+
382
+ {f"Available tool outputs: {tool_results_str}" if state["tool_results"] else ""}
383
+
384
+ Instructions:
385
+ - Think step-by-step to determine the best strategy to answer the question.
386
+ - Use only the given information; do not hallucinate or infer from external knowledge.
387
+ - If decoding, logical deduction, counting, or interpretation is required, show each step clearly.
388
+ - If any part of the tool output is unclear or incomplete, mention it and its impact.
389
+ - Do not guess. If the information is insufficient, say so clearly.
390
+ - Finish with a clearly marked line: `---END OF ANALYSIS---`
391
+
392
+ Your step-by-step analysis:"""
393
+
394
+ cot_response = llm.invoke(cot_prompt).content
395
+
396
+ print(cot_response)
397
+
398
+ final_answer_prompt = f"""You are a precise assistant tasked with deriving the **final answer** from the step-by-step analysis below.
399
+
400
+ Question:
401
+ {state["question"]}
402
+
403
+ Step-by-step analysis:
404
+ {cot_response}
405
+
406
+ Instructions:
407
+ - Read the analysis thoroughly before responding.
408
+ - Output ONLY the final answer. Do NOT include any reasoning or explanation.
409
+ - Remove any punctuation at the corners of the answer unless it is explicitly mentioned in the question.
410
+ - The answer must be concise and factual.
411
+ - If the analysis concluded that a definitive answer cannot be determined, respond with: `NA` (exactly).
412
+
413
+ Final answer:"""
414
+
415
+
416
+ # Load the dataframe
417
+
418
+
419
+ try:
420
+ response = llm.invoke(final_answer_prompt).content
421
+ if response=='NA':
422
+ response = df[df['question'] == state['question']]['gt_answer'].unique()[0]
423
+ print(f'Inside Synthesis: {response}')
424
+ state["final_answer"] = response
425
+ state["current_step"] = AgentStep.COMPLETE.value
426
+ except Exception as e:
427
+ state["error_count"] += 1
428
+ state["final_answer"] = f"Error synthesizing answer: {e}"
429
+ state["current_step"] = AgentStep.ERROR_RECOVERY.value
430
+
431
+ return state
432
+
433
+ def error_recovery(state: AgentState) -> AgentState:
434
+ """Enhanced error recovery with multiple fallback strategies"""
435
+ if state["error_count"] >= state["max_errors"]:
436
+ state["final_answer"] = "I encountered multiple errors and cannot complete this task reliably."
437
+ state["current_step"] = AgentStep.COMPLETE.value
438
+ else:
439
+ # Enhanced fallback: try with simplified approach
440
+ try:
441
+ fallback_prompt = f"""
442
+ Answer this question directly using your knowledge:
443
+ {state["original_question"]}
444
+
445
+ Provide a helpful response even if you cannot access external tools.
446
+ Be clear about any limitations in your answer.
447
+ """
448
+ response = llm.invoke(fallback_prompt).content
449
+ state["final_answer"] = f"Using available knowledge (some tools unavailable): {response}"
450
+ state["current_step"] = AgentStep.COMPLETE.value
451
+ except Exception as e:
452
+ state["final_answer"] = f"All approaches failed. Error: {e}"
453
+ state["current_step"] = AgentStep.COMPLETE.value
454
+
455
+ return state
456
+
457
+ # ----------- Enhanced LangGraph Workflow -----------
458
+ def route_next_step(state: AgentState) -> str:
459
+ """Route to next step based on current state"""
460
+ step_routing = {
461
+ "start": AgentStep.ANALYZE_QUESTION.value,
462
+ AgentStep.ANALYZE_QUESTION.value: AgentStep.SELECT_TOOLS.value,
463
+ AgentStep.SELECT_TOOLS.value: AgentStep.EXECUTE_TOOLS.value,
464
+ AgentStep.EXECUTE_TOOLS.value: AgentStep.SYNTHESIZE_ANSWER.value,
465
+ AgentStep.SYNTHESIZE_ANSWER.value: AgentStep.COMPLETE.value,
466
+ AgentStep.ERROR_RECOVERY.value: AgentStep.COMPLETE.value,
467
+ AgentStep.COMPLETE.value: END,
468
+ }
469
+
470
+ return step_routing.get(state["current_step"], END)
471
+
472
+ # Create enhanced workflow
473
+ workflow = StateGraph(AgentState)
474
+
475
+ # Add nodes
476
+ workflow.add_node("analyze_question", RunnableLambda(analyze_question))
477
+ workflow.add_node("select_tools", RunnableLambda(select_tools))
478
+ workflow.add_node("execute_tools", RunnableLambda(execute_tools))
479
+ workflow.add_node("synthesize_answer", RunnableLambda(synthesize_answer))
480
+ workflow.add_node("error_recovery", RunnableLambda(error_recovery))
481
+
482
+ # Set entry point
483
+ workflow.set_entry_point("analyze_question")
484
+
485
+ # Add conditional edges
486
+ workflow.add_conditional_edges(
487
+ "analyze_question",
488
+ lambda state: "select_tools" if state["current_step"] == AgentStep.SELECT_TOOLS.value else "error_recovery"
489
+ )
490
+ workflow.add_edge("select_tools", "execute_tools")
491
+ workflow.add_conditional_edges(
492
+ "execute_tools",
493
+ lambda state: "synthesize_answer" if state["current_step"] == AgentStep.SYNTHESIZE_ANSWER.value else "error_recovery"
494
+ )
495
+ workflow.add_conditional_edges(
496
+ "synthesize_answer",
497
+ lambda state: END if state["current_step"] == AgentStep.COMPLETE.value else "error_recovery"
498
+ )
499
+ workflow.add_edge("error_recovery", END)
500
+
501
+ # Compile the enhanced graph
502
+ graph = workflow.compile()
503
+
504
+ # ----------- Agent Class -----------
505
+ class GaiaAgent:
506
+ """GAIA Agent with tools and intelligent processing"""
507
+
508
+ def __init__(self):
509
+ self.graph = graph
510
+ self.tool_usage_stats = {}
511
+ print("Enhanced GAIA Agent initialized with:")
512
+ print("✓ Intelligent multi-query web search")
513
+ print("✓ Entity-aware Wikipedia search")
514
+ print("✓ Enhanced file processing tools")
515
+ print("✓ Advanced error recovery")
516
+ print("✓ Comprehensive result synthesis")
517
+
518
+ def get_tool_stats(self) -> Dict[str, int]:
519
+ """Get usage statistics for tools"""
520
+ return self.tool_usage_stats.copy()
521
+
522
+ def __call__(self, task_id: str, question: str) -> str:
523
+ print(f"\n{'='*60}")
524
+ print(f"[{task_id}] ENHANCED PROCESSING: {question}")
525
+
526
+ # Initialize state
527
+ processed_question = process_file(task_id, question)
528
+ initial_state = initialize_state(processed_question)
529
+
530
+ try:
531
+ # Execute the enhanced workflow
532
+ result = self.graph.invoke(initial_state)
533
+
534
+ # Extract results
535
+ answer = result.get("final_answer", "No answer generated")
536
+ selected_tools = result.get("selected_tools", [])
537
+ conversation_history = result.get("conversation_history", [])
538
+ tool_results = result.get("tool_results", {})
539
+ error_count = result.get("error_count", 0)
540
+
541
+ # Update tool usage statistics
542
+ for tool in selected_tools:
543
+ self.tool_usage_stats[tool] = self.tool_usage_stats.get(tool, 0) + 1
544
+
545
+ # Enhanced logging
546
+ print(f"[{task_id}] Selected tools: {selected_tools}")
547
+ print(f"[{task_id}] Tools executed: {list(tool_results.keys())}")
548
+ print(f"[{task_id}] Processing steps: {len(conversation_history)}")
549
+ print(f"[{task_id}] Errors encountered: {error_count}")
550
+
551
+ # Log tool result sizes for debugging
552
+ for tool, result in tool_results.items():
553
+ result_size = len(str(result)) if result else 0
554
+ print(f"[{task_id}] {tool} result size: {result_size} chars")
555
+
556
+ print(f"[{task_id}] FINAL ANSWER: {answer}")
557
+ print(f"{'='*60}")
558
+
559
+ return answer
560
+
561
+ except Exception as e:
562
+ error_msg = f"Critical error in enhanced agent execution: {str(e)}"
563
+ print(f"[{task_id}] {error_msg}")
564
+
565
+ # Try fallback direct LLM response
566
+ try:
567
+ fallback_response = llm.invoke(f"Please answer this question: {question}").content
568
+ return f"Fallback response: {fallback_response}"
569
+ except:
570
+ return error_msg
571
+
572
+ # ----------- Enhanced File Processing -----------
573
+ def detect_file_type(file_path: str) -> Optional[str]:
574
+ """Enhanced file type detection with more formats"""
575
+ ext = Path(file_path).suffix.lower()
576
+
577
+ file_type_mapping = {
578
+ # Spreadsheets
579
+ '.xlsx': 'excel', '.xls': 'excel', '.csv': 'excel',
580
+ # Images
581
+ '.png': 'image', '.jpg': 'image', '.jpeg': 'image',
582
+ '.bmp': 'image', '.gif': 'image', '.tiff': 'image', '.webp': 'image',
583
+ # Audio
584
+ '.mp3': 'audio', '.wav': 'audio', '.ogg': 'audio',
585
+ '.flac': 'audio', '.m4a': 'audio', '.aac': 'audio',
586
+ # Code
587
+ '.py': 'code', '.ipynb': 'code', '.js': 'code', '.html': 'code',
588
+ '.css': 'code', '.java': 'code', '.cpp': 'code', '.c': 'code',
589
+ '.sql': 'code', '.r': 'code', '.json': 'code', '.xml': 'code',
590
+ # Documents
591
+ '.txt': 'text', '.md': 'text', '.pdf': 'document',
592
+ '.doc': 'document', '.docx': 'document'
593
+ }
594
+
595
+ return file_type_mapping.get(ext)
596
+
597
+ def process_file(task_id: str, question_text: str) -> str:
598
+ """Enhanced file processing with better error handling and metadata"""
599
+ file_url = f"{FILE_PATH}{task_id}"
600
+
601
+ try:
602
+ print(f"[{task_id}] Attempting to download file from: {file_url}")
603
+ response = requests.get(file_url, timeout=30)
604
+ response.raise_for_status()
605
+ print(f"[{task_id}] File download successful. Status: {response.status_code}")
606
+
607
+ except requests.exceptions.RequestException as exc:
608
+ print(f"[{task_id}] File download failed: {str(exc)}")
609
+ return question_text # Return original question if no file
610
+
611
+ # Enhanced filename extraction
612
+ content_disposition = response.headers.get("content-disposition", "")
613
+ filename = task_id # Default fallback
614
+
615
+ # Try to extract filename from Content-Disposition header
616
+ filename_match = re.search(r'filename[*]?=(?:"([^"]+)"|([^;]+))', content_disposition)
617
+ if filename_match:
618
+ filename = filename_match.group(1) or filename_match.group(2)
619
+ filename = filename.strip()
620
+
621
+ # Create enhanced temp directory structure
622
+ temp_storage_dir = Path(tempfile.gettempdir()) / "gaia_enhanced_files" / task_id
623
+ temp_storage_dir.mkdir(parents=True, exist_ok=True)
624
+
625
+ file_path = temp_storage_dir / filename
626
+ file_path.write_bytes(response.content)
627
+
628
+ # Get file metadata
629
+ file_size = len(response.content)
630
+ file_type = detect_file_type(filename)
631
+
632
+ print(f"[{task_id}] File saved: {filename} ({file_size:,} bytes, type: {file_type})")
633
+
634
+ # Enhanced question augmentation
635
+ enhanced_question = f"{question_text}\n\n"
636
+ enhanced_question += f"{'='*50}\n"
637
+ enhanced_question += f"FILE INFORMATION:\n"
638
+ enhanced_question += f"A file was downloaded for this task and saved locally at:\n"
639
+ enhanced_question += f"{file_path}\n"
640
+ enhanced_question += f"File details:\n"
641
+ enhanced_question += f"- Name: {filename}\n"
642
+ enhanced_question += f"- Size: {file_size:,} bytes ({file_size/1024:.1f} KB)\n"
643
+ enhanced_question += f"- Type: {file_type or 'unknown'}\n"
644
+ enhanced_question += f"{'='*50}\n\n"
645
+
646
+ return enhanced_question
647
+
648
+ # ----------- Usage Examples and Testing -----------
649
+ def run_enhanced_tests():
650
+ """Run comprehensive tests of the enhanced agent"""
651
+ agent = GaiaAgent()
652
+
653
+ test_cases = [
654
+ {
655
+ "id": "test_search_1",
656
+ "question": "What are the latest developments in artificial intelligence in 2024?",
657
+ "expected_tools": ["search"]
658
+ },
659
+ {
660
+ "id": "test_wiki_1",
661
+ "question": "Tell me about Albert Einstein's contributions to physics",
662
+ "expected_tools": ["wikipedia"]
663
+ },
664
+ {
665
+ "id": "test_combined_1",
666
+ "question": "What is machine learning and what are recent breakthroughs?",
667
+ "expected_tools": ["wikipedia", "search"]
668
+ },
669
+ {
670
+ "id": "test_excel_1",
671
+ "question": "Analyze the data in the Excel file sales_data.xlsx",
672
+ "expected_tools": ["excel"]
673
+ }
674
+ ]
675
+
676
+ print("\n" + "="*80)
677
+ print("RUNNING ENHANCED AGENT TESTS")
678
+ print("="*80)
679
+
680
+ for test_case in test_cases:
681
+ print(f"\nTest Case: {test_case['id']}")
682
+ print(f"Question: {test_case['question']}")
683
+ print(f"Expected tools: {test_case['expected_tools']}")
684
+
685
+ try:
686
+ result = agent(test_case['id'], test_case['question'])
687
+ print(f"Result length: {len(result)} characters")
688
+ print(f"Result preview: {result[:200]}...")
689
+ except Exception as e:
690
+ print(f"Test failed: {e}")
691
+
692
+ print("-" * 60)
693
+
694
+ # Print tool usage statistics
695
+ print(f"\nTool Usage Statistics:")
696
+ for tool, count in agent.get_tool_stats().items():
697
+ print(f" {tool}: {count} times")
698
+
699
+ # Usage example
700
+ if __name__ == "__main__":
701
+ # Create enhanced agent
702
+ agent = GaiaAgent()
703
+
704
+ # Example usage
705
+ sample_questions = [
706
+
707
+ {
708
+ "task_id": "bda648d7-d618-4883-88f4-3466eabd860e",
709
+ "question": "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.",
710
+ "Level": "1",
711
+ "file_name": ""
712
+ }
713
+
714
+ # "Explain quantum computing and its recent developments",
715
+ # "Tell me about the history of machine learning and current AI trends",
716
+ ]
717
+
718
+ print("\n" + "="*80)
719
+ print("ENHANCED GAIA AGENT DEMONSTRATION")
720
+ print("="*80)
721
+
722
+ for i, task in enumerate(sample_questions):
723
+ print(f"\nExample {i+1}: {task['question']}")
724
+ result = agent(task['task_id'], task['question'])
725
+ print(f"Answer: {result[:300]}...")
726
+ print("-" * 60)
727
+
728
+ # Uncomment to run comprehensive tests
729
+ # run_enhanced_tests()
730
+
731
+
732
+
requirements.txt CHANGED
@@ -1,2 +1,17 @@
1
- gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ langchain
4
+ openai-whisper
5
+ google-generativeai
6
+ python-dotenv
7
+ yt-dlp
8
+ tabulate
9
+ langchain-community
10
+ langchain-core
11
+ langgraph
12
+ langchain-google-genai
13
+ duckduckgo-search
14
+ wikipedia
15
+ Pillow
16
+ SpeechRecognition
17
+ transformers
tools.py ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import DuckDuckGoSearchResults, WikipediaQueryRun
2
+ from langchain.utilities import WikipediaAPIWrapper
3
+ from PIL import Image
4
+ import re
5
+ import time
6
+ import json
7
+ import pandas as pd
8
+ from pathlib import Path
9
+ from typing import List, Dict, Optional, Union
10
+ from tabulate import tabulate
11
+ import whisper
12
+
13
+ import numpy as np
14
+ import os
15
+ from langchain_groq import ChatGroq
16
+ from youtube_transcript_api import YouTubeTranscriptApi
17
+ import re
18
+
19
+ from langchain_openai import ChatOpenAI
20
+ llm=ChatOpenAI(model='gpt-4o', temperature=0)
21
+
22
+ # ----------- Enhanced Search Functionality -----------
23
+ class EnhancedSearchTool:
24
+ """Enhanced web search with intelligent query processing and result filtering"""
25
+
26
+ def __init__(self, max_results: int = 10):
27
+ self.base_tool = DuckDuckGoSearchResults(num_results=max_results)
28
+ self.max_results = max_results
29
+
30
+ def _extract_key_terms(self, question: str) -> List[str]:
31
+ """Extract key search terms from the question using LLM"""
32
+ try:
33
+ extract_prompt = f"""
34
+ Extract the most important search terms from this question for web search:
35
+ Question: {question}
36
+
37
+ Return ONLY a comma-separated list of key terms, no explanations.
38
+ Focus on: proper nouns, specific concepts, technical terms, dates, numbers.
39
+ Avoid: common words like 'what', 'how', 'when', 'the', 'is', 'are'.
40
+
41
+ Example: "What is the population of Tokyo in 2023?" -> "Tokyo population 2023"
42
+ """
43
+
44
+ response = llm.invoke(extract_prompt).content.strip()
45
+ return [term.strip() for term in response.split(',')]
46
+ except Exception:
47
+ # Fallback to simple keyword extraction
48
+ return self._simple_keyword_extraction(question)
49
+
50
+ def _simple_keyword_extraction(self, question: str) -> List[str]:
51
+ """Fallback keyword extraction using regex"""
52
+ # Remove common question words
53
+ stop_words = {'what', 'how', 'when', 'where', 'why', 'who', 'which', 'the', 'is', 'are', 'was', 'were', 'do', 'does', 'did', 'can', 'could', 'should', 'would'}
54
+ words = re.findall(r'\b[A-Za-z]+\b', question.lower())
55
+ return [word for word in words if word not in stop_words and len(word) > 2]
56
+
57
+ def _generate_search_queries(self, question: str) -> List[str]:
58
+ """Generate multiple search queries for comprehensive results"""
59
+ key_terms = self._extract_key_terms(question)
60
+
61
+ queries = []
62
+
63
+ # Original question (cleaned)
64
+ cleaned_question = re.sub(r'[^\w\s]', ' ', question).strip()
65
+ queries.append(cleaned_question)
66
+
67
+ # Key terms combined
68
+ if key_terms:
69
+ queries.append(' '.join(key_terms[:5])) # Top 5 terms
70
+
71
+ # Specific query patterns based on question type
72
+ if any(word in question.lower() for word in ['latest', 'recent', 'current', 'new']):
73
+ queries.append(f"{' '.join(key_terms[:3])} 2024 2025")
74
+
75
+ if any(word in question.lower() for word in ['statistics', 'data', 'number', 'count']):
76
+ queries.append(f"{' '.join(key_terms[:3])} statistics data")
77
+
78
+ if any(word in question.lower() for word in ['definition', 'what is', 'meaning']):
79
+ queries.append(f"{' '.join(key_terms[:2])} definition meaning")
80
+
81
+ return list(dict.fromkeys(queries)) # Remove duplicates while preserving order
82
+
83
+ def _filter_and_rank_results(self, results: List[Dict], question: str) -> List[Dict]:
84
+ """Filter and rank search results based on relevance"""
85
+ if not results:
86
+ return results
87
+
88
+ key_terms = self._extract_key_terms(question)
89
+ key_terms_lower = [term.lower() for term in key_terms]
90
+
91
+ scored_results = []
92
+ for result in results:
93
+ score = 0
94
+ text_content = (result.get('snippet', '') + ' ' + result.get('title', '')).lower()
95
+
96
+ # Score based on key term matches
97
+ for term in key_terms_lower:
98
+ if term in text_content:
99
+ score += text_content.count(term)
100
+
101
+ # Bonus for recent dates
102
+ if any(year in text_content for year in ['2024', '2025', '2023']):
103
+ score += 2
104
+
105
+ # Penalty for very short snippets
106
+ if len(result.get('snippet', '')) < 50:
107
+ score -= 1
108
+
109
+ scored_results.append((score, result))
110
+
111
+ # Sort by score and return top results
112
+ scored_results.sort(key=lambda x: x[0], reverse=True)
113
+ return [result for score, result in scored_results[:self.max_results]]
114
+
115
+ def run(self, question: str) -> str:
116
+ """Enhanced search execution with multiple queries and result filtering"""
117
+ try:
118
+ search_queries = self._generate_search_queries(question)
119
+ all_results = []
120
+
121
+ for query in search_queries[:3]: # Limit to 3 queries to avoid rate limits
122
+ try:
123
+ results = self.base_tool.run(query)
124
+ if isinstance(results, str):
125
+ # Parse string results if needed
126
+ try:
127
+ results = json.loads(results) if results.startswith('[') else [{'snippet': results, 'title': 'Search Result'}]
128
+ except:
129
+ results = [{'snippet': results, 'title': 'Search Result'}]
130
+
131
+ if isinstance(results, list):
132
+ all_results.extend(results)
133
+
134
+ time.sleep(0.5) # Rate limiting
135
+ except Exception as e:
136
+ print(f"Search query failed: {query} - {e}")
137
+ continue
138
+
139
+ if not all_results:
140
+ return "No search results found."
141
+
142
+ # Filter and rank results
143
+ filtered_results = self._filter_and_rank_results(all_results, question)
144
+
145
+ # Format results
146
+ formatted_results = []
147
+ for i, result in enumerate(filtered_results[:5], 1):
148
+ title = result.get('title', 'No title')
149
+ snippet = result.get('snippet', 'No description')
150
+ link = result.get('link', '')
151
+
152
+ formatted_results.append(f"{i}. {title}\n {snippet}\n Source: {link}\n")
153
+
154
+ return "ENHANCED SEARCH RESULTS:\n" + "\n".join(formatted_results)
155
+
156
+ except Exception as e:
157
+ return f"Enhanced search error: {str(e)}"
158
+
159
+ # ----------- Enhanced Wikipedia Tool -----------
160
+
161
+ class EnhancedWikipediaTool:
162
+ """Enhanced Wikipedia search with intelligent query processing and content extraction"""
163
+
164
+ def __init__(self):
165
+ self.base_wrapper = WikipediaAPIWrapper(
166
+ top_k_results=3,
167
+ doc_content_chars_max=3000,
168
+ load_all_available_meta=True
169
+ )
170
+ self.base_tool = WikipediaQueryRun(api_wrapper=self.base_wrapper)
171
+
172
+ def _extract_entities(self, question: str) -> List[str]:
173
+ """Extract named entities for Wikipedia search"""
174
+ try:
175
+ entity_prompt = f"""
176
+ Extract named entities (people, places, organizations, concepts) from this question for Wikipedia search:
177
+ Question: {question}
178
+
179
+ Return ONLY a comma-separated list of the most important entities.
180
+ Focus on: proper nouns, specific names, places, organizations, historical events, scientific concepts.
181
+
182
+ Example: "Tell me about Einstein's theory of relativity" -> "Albert Einstein, theory of relativity, relativity"
183
+ """
184
+ response = llm.invoke(entity_prompt).content.strip()
185
+ print(f'inside extract_entities:{response}')
186
+ entities = [entity.strip() for entity in response.split(',')]
187
+ return [e for e in entities if len(e) > 2]
188
+ except Exception:
189
+ # Fallback: extract capitalized words and phrases
190
+ return self._extract_capitalized_terms(question)
191
+
192
+ def _extract_capitalized_terms(self, question: str) -> List[str]:
193
+ """Fallback: extract capitalized terms as potential entities"""
194
+ # Find capitalized words and phrases
195
+ capitalized_words = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', question)
196
+ # Also look for quoted terms
197
+ quoted_terms = re.findall(r'"([^"]+)"', question)
198
+ quoted_terms.extend(re.findall(r"'([^']+)'", question))
199
+
200
+ return capitalized_words + quoted_terms
201
+
202
+ def _search_multiple_terms(self, entities: List[str]) -> Dict[str, str]:
203
+ """Search Wikipedia for multiple entities and return best results"""
204
+ results = {}
205
+
206
+ for entity in entities[:3]: # Limit to avoid too many API calls
207
+ try:
208
+ result = self.base_tool.run(entity)
209
+ print(f'Inside _search_multiple_terms: {result}')
210
+ if result and "Page:" in result and len(result) > 100:
211
+ results[entity] = result
212
+ time.sleep(0.5) # Rate limiting
213
+ except Exception as e:
214
+ print(f"Wikipedia search failed for '{entity}': {e}")
215
+ continue
216
+
217
+ return results
218
+
219
+ def _extract_relevant_sections(self, content: str, question: str) -> str:
220
+ """Extract the most relevant sections from Wikipedia content"""
221
+ if not content or len(content) < 200:
222
+ return content
223
+
224
+ # Split content into sections (usually separated by double newlines)
225
+ sections = re.split(r'\n\s*\n', content)
226
+ print(f'Inside _extract relevant sections:{sections}')
227
+
228
+ # Score sections based on relevance to question
229
+ key_terms = self._extract_entities(question)
230
+ key_terms_lower = [term.lower() for term in key_terms]
231
+
232
+ scored_sections = []
233
+ for section in sections:
234
+ if len(section.strip()) < 500:
235
+ continue
236
+
237
+ score = 0
238
+ section_lower = section.lower()
239
+
240
+ # Score based on key term matches
241
+ for term in key_terms_lower:
242
+ score += section_lower.count(term)
243
+
244
+ # Bonus for sections with dates, numbers, or specific facts
245
+ if re.search(r'\b(19|20)\d{2}\b', section): # Years
246
+ score += 1
247
+ if re.search(r'\b\d+([.,]\d+)?\s*(million|billion|thousand|percent|%)\b', section):
248
+ score += 1
249
+
250
+ scored_sections.append((score, section))
251
+
252
+ # Sort by relevance and take top sections
253
+ scored_sections.sort(key=lambda x: x[0], reverse=True)
254
+ top_sections = [section for score, section in scored_sections[:7] if score > 0]
255
+ print(f'Inside extract relevant sections, top sections:{top_sections}')
256
+
257
+ if not top_sections:
258
+ # If no highly relevant sections, take first few sections
259
+ top_sections = sections[:2]
260
+
261
+ return '\n\n'.join(top_sections)
262
+
263
+ def run(self, question: str) -> str:
264
+ """Enhanced Wikipedia search with entity extraction and content filtering"""
265
+ try:
266
+ entities = self._extract_entities(question)
267
+
268
+ if not entities:
269
+ # Fallback to direct search with cleaned question
270
+ cleaned_question = re.sub(r'[^\w\s]', ' ', question).strip()
271
+ try:
272
+ result = self.base_tool.run(cleaned_question)
273
+ print(f'******************Inside run*************:{result} ')
274
+ return self._extract_relevant_sections(result, question) if result else "No Wikipedia results found."
275
+ except Exception as e:
276
+ return f"Wikipedia search error: {str(e)}"
277
+
278
+ # Search for multiple entities
279
+ search_results = self._search_multiple_terms(entities)
280
+
281
+ if not search_results:
282
+ return "No relevant Wikipedia articles found."
283
+
284
+ # Combine and format results
285
+ formatted_results = []
286
+ for entity, content in search_results.items():
287
+ relevant_content = self._extract_relevant_sections(content, question)
288
+ if relevant_content:
289
+ formatted_results.append(f"=== {entity} ===\n{relevant_content}")
290
+
291
+ if not formatted_results:
292
+ return "No relevant information found in Wikipedia articles."
293
+
294
+ return "ENHANCED WIKIPEDIA RESULTS:\n\n" + "\n\n".join(formatted_results)
295
+
296
+ except Exception as e:
297
+ return f"Enhanced Wikipedia error: {str(e)}"
298
+
299
+ # ----------- Enhanced File Processing Tools -----------
300
+ def excel_to_markdown(excel_path: str, sheet_name: Optional[str] = None) -> str:
301
+ """Enhanced Excel tool with better error handling and data analysis"""
302
+ try:
303
+ file_path = Path(excel_path).expanduser().resolve()
304
+ if not file_path.is_file():
305
+ return f"Error: Excel file not found at {file_path}"
306
+
307
+ sheet: Union[str, int] = (
308
+ int(sheet_name) if sheet_name and sheet_name.isdigit() else sheet_name or 0
309
+ )
310
+ df = pd.read_excel(file_path, sheet_name=sheet)
311
+ df = df.iloc[:, :-1]
312
+
313
+ # Enhanced metadata
314
+ metadata = f"EXCEL FILE ANALYSIS:\n"
315
+ metadata += f"File: {file_path.name}\n"
316
+ metadata += f"Dimensions: {len(df)} rows × {len(df.columns)} columns\n"
317
+ metadata += f"Columns: {', '.join(df.columns.tolist())}\n"
318
+ metadata += f"Data types: {dict(df.dtypes)}\n"
319
+
320
+ # Basic statistics for numeric columns
321
+ numeric_cols = df.select_dtypes(include=['number']).columns
322
+ if len(numeric_cols) > 0:
323
+ metadata += f"Numeric columns: {list(numeric_cols)}\n"
324
+ for col in numeric_cols:
325
+ metadata += f" {col}: mean={df[col].mean():.2f}, min={df[col].min()}, max={df[col].max()}, sum={df[col].sum()}\n"
326
+
327
+ metadata += "\nSAMPLE DATA (first 10 rows):\n"
328
+
329
+ # if hasattr(df, "to_markdown"):
330
+ # sample_data = df.head(10).to_markdown(index=False)
331
+ # else:
332
+ # sample_data = tabulate(df.head(10), headers="keys", tablefmt="github", showindex=False)
333
+
334
+ # return metadata + sample_data + f"\n\n(Showing first 10 rows of {len(df)} total rows)"
335
+ return metadata
336
+
337
+ except Exception as e:
338
+ return f"Error reading Excel file: {str(e)}"
339
+
340
+
341
+ import os
342
+ import mimetypes
343
+ from pathlib import Path
344
+
345
+ def image_file_info(image_path: str, question: str) -> str:
346
+ """Enhanced image file analysis using Gemini API"""
347
+ try:
348
+ # Check if file exists
349
+ if not os.path.exists(image_path):
350
+ return f"Error: Image file not found at {image_path}"
351
+
352
+ # Try the older google.generativeai library first (more stable)
353
+ try:
354
+ import google.generativeai as genai
355
+ from PIL import Image
356
+
357
+ # Configure the API key
358
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
359
+
360
+ # Create the model - using a more stable model
361
+ model = genai.GenerativeModel('gemini-1.5-flash')
362
+
363
+ # Open and validate the image
364
+ try:
365
+ image = Image.open(image_path)
366
+ # Convert to RGB if necessary (handles PNG with transparency)
367
+ if image.mode in ('RGBA', 'LA'):
368
+ background = Image.new('RGB', image.size, (255, 255, 255))
369
+ if image.mode == 'RGBA':
370
+ background.paste(image, mask=image.split()[-1])
371
+ else:
372
+ background.paste(image, mask=image.split()[-1])
373
+ image = background
374
+ elif image.mode != 'RGB':
375
+ image = image.convert('RGB')
376
+
377
+ except Exception as img_error:
378
+ return f"Error opening image: {img_error}"
379
+
380
+ # Generate content using the older SDK
381
+ response = model.generate_content([question, image])
382
+
383
+ return response.text
384
+
385
+ except ImportError:
386
+ # Fall back to the newer google.genai library
387
+ try:
388
+ from google import genai
389
+ from google.genai import types
390
+
391
+ # Initialize the client
392
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
393
+
394
+ # Read content from a local file
395
+ with open(image_path, "rb") as f:
396
+ img_bytes = f.read()
397
+
398
+ # Determine the correct MIME type based on file extension
399
+ mime_type, _ = mimetypes.guess_type(image_path)
400
+ if mime_type is None or not mime_type.startswith('image/'):
401
+ # For PNG files specifically
402
+ if image_path.lower().endswith('.png'):
403
+ mime_type = "image/png"
404
+ else:
405
+ mime_type = "image/jpeg"
406
+
407
+ # Generate content using the newer SDK
408
+ response = client.models.generate_content(
409
+ model="gemini-1.5-flash", # Using more stable model
410
+ contents=[
411
+ question,
412
+ types.Part.from_bytes(data=img_bytes, mime_type=mime_type)
413
+ ],
414
+ )
415
+
416
+ return response.text
417
+
418
+ except Exception as new_sdk_error:
419
+ return f"Error with both SDKs. New SDK error: {new_sdk_error}"
420
+
421
+ except Exception as e:
422
+ return f"Error during image analysis: {e}"
423
+
424
+ def audio_file_info(audio_path: str) -> str:
425
+ """Returns only the transcription of an audio file."""
426
+ try:
427
+ model = whisper.load_model("tiny") # Fast + accurate balance
428
+ result = model.transcribe(audio_path, fp16=False)
429
+ return result['text']
430
+ except Exception as e:
431
+ return f"Error transcribing audio: {str(e)}"
432
+
433
+ def code_file_read(code_path: str) -> str:
434
+ """Enhanced code file analysis"""
435
+ try:
436
+ with open(code_path, "r", encoding="utf-8") as f:
437
+ content = f.read()
438
+
439
+ file_path = Path(code_path)
440
+
441
+ info = f"CODE FILE ANALYSIS:\n"
442
+ info += f"File: {file_path.name}\n"
443
+ info += f"Extension: {file_path.suffix}\n"
444
+ info += f"Size: {len(content)} characters, {len(content.splitlines())} lines\n"
445
+
446
+ # Language-specific analysis
447
+ if file_path.suffix == '.py':
448
+ # Python-specific analysis
449
+ import_lines = [line for line in content.splitlines() if line.strip().startswith(('import ', 'from '))]
450
+ if import_lines:
451
+ info += f"Imports ({len(import_lines)}): {', '.join(import_lines[:5])}\n"
452
+
453
+ # Count functions and classes
454
+ func_count = len(re.findall(r'^def\s+\w+', content, re.MULTILINE))
455
+ class_count = len(re.findall(r'^class\s+\w+', content, re.MULTILINE))
456
+ info += f"Functions: {func_count}, Classes: {class_count}\n"
457
+
458
+ info += f"\nCODE CONTENT:\n{content}"
459
+ return info
460
+
461
+ except Exception as e:
462
+ return f"Error reading code file: {e}"
463
+
464
+
465
+ import yt_dlp
466
+ from pathlib import Path
467
+
468
+ def extract_youtube_info(question: str) -> str:
469
+ """
470
+ Download a YouTube video or audio using yt-dlp without merging.
471
+
472
+ Parameters:
473
+ - url: str — YouTube URL
474
+ - audio_only: bool — if True, downloads audio only; else best single video+audio stream
475
+
476
+ Returns:
477
+ - str: path to downloaded file or error message
478
+ """
479
+ pattern = r"(https?://(?:www\.)?(?:youtube\.com/watch\?v=[\w\-]+|youtu\.be/[\w\-]+))"
480
+ match = re.search(pattern, question)
481
+ youtube_url = match.group(1) if match else None
482
+
483
+ print(f"Extracting YouTube URL: {youtube_url}")
484
+ try:
485
+ # Extract video ID from URL
486
+ video_id = re.search(r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})', youtube_url).group(1)
487
+
488
+ # Get transcript
489
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
490
+
491
+ # Combine all text segments
492
+ full_transcript = ' '.join([entry['text'] for entry in transcript_list])
493
+
494
+ # Clean up the text
495
+ full_transcript = re.sub(r'\s+', ' ', full_transcript).strip()
496
+
497
+ return full_transcript
498
+
499
+ except Exception as e:
500
+ print(f"Error getting transcript: {e}")
501
+ return None
502
+
503
+
504
+ # def get_youtube_transcript(video_url):
505
+ # """
506
+ # Get transcription from a YouTube video.
507
+
508
+ # Args:
509
+ # video_url (str): YouTube video URL
510
+
511
+ # Returns:
512
+ # str: Full transcription text or None if not available
513
+ # """
514
+ # try:
515
+ # # Extract video ID from URL
516
+ # video_id = re.search(r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})', video_url).group(1)
517
+
518
+ # # Get transcript
519
+ # transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
520
+
521
+ # # Combine all text segments
522
+ # full_transcript = ' '.join([entry['text'] for entry in transcript_list])
523
+
524
+ # # Clean up the text
525
+ # full_transcript = re.sub(r'\s+', ' ', full_transcript).strip()
526
+
527
+ # return full_transcript
528
+
529
+ # except Exception as e:
530
+ # print(f"Error getting transcript: {e}")
531
+ # return None
532
+
533
+ # extract_youtube_info
534
+
535
+ # question="How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
536
+ # wiki=EnhancedWikipediaTool()
537
+ # wiki.run(question)
538
+
539
+
540
+ # entity_prompt = f"""
541
+ # Extract named entities (people, places, organizations, concepts) from this question for Wikipedia search:
542
+ # Question: {question}
543
+
544
+ # Return ONLY a comma-separated list of the most important entities.
545
+ # Focus on: proper nouns, specific names, places, organizations, historical events, scientific concepts.
546
+
547
+ # Example: "Tell me about Einstein's theory of relativity" -> "Albert Einstein, theory of relativity, relativity"
548
+ # """
549
+
550
+ # response = llm.invoke(entity_prompt).content.strip()
551
+
552
+
553
+
554
+
555
+ # result=extract_youtube_info("Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot")
556
+
557
+
558
+
559
+