Macmill commited on
Commit
8c86b22
Β·
verified Β·
1 Parent(s): d7b35af

Update final_agent.py

Browse files
Files changed (1) hide show
  1. final_agent.py +536 -418
final_agent.py CHANGED
@@ -1,463 +1,581 @@
1
- # ==============================================================================
2
- # Imports
3
- # ==============================================================================
 
 
 
 
 
 
 
4
  import os
5
- import requests
6
- import traceback
7
- import html2text # For HTML to text conversion
8
- import tempfile # For file handling tools
9
- import pandas as pd # For CSV/Excel analysis
10
- import openpyxl # For Excel analysis
11
- from PIL import Image # For image text extraction
12
- import pytesseract # For image text extraction
13
- from urllib.parse import urlparse # For download tool
14
- from typing import Annotated, List, TypedDict, Optional
15
  from dotenv import load_dotenv
16
- import time # For adding potential delays if needed later
17
-
18
- # LangChain and LangGraph Imports
19
- from langgraph.graph import StateGraph, START, END
20
- from langgraph.graph.message import add_messages
21
- from langgraph.prebuilt import ToolNode, tools_condition
22
- from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
23
- from langchain_core.tools import tool
24
- # LLM Import - Using Groq
25
- from langchain_groq import ChatGroq
26
- from langchain_community.tools.tavily_search import TavilySearchResults
27
 
28
- # ==============================================================================
29
- # Environment Setup & LLM
30
- # ==============================================================================
31
- load_dotenv()
32
- tavily_api_key = os.getenv("TAVILY_API_KEY")
33
- groq_api_key = os.getenv("GROQ_API_KEY")
34
-
35
- # --- Optional: Tesseract Path ---
36
- # If Tesseract OCR is not in your system's PATH environment variable,
37
- # uncomment the following line and set the correct path to tesseract.exe
38
- # try:
39
- # pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Example path for Windows
40
- # except NameError: pass # Handles case where pytesseract might not be imported yet if PIL fails first
41
- # except Exception as e: print(f"Warning: Could not set tesseract_cmd path: {e}")
42
-
43
-
44
- # --- Validate API Keys ---
45
- if not tavily_api_key:
46
- raise ValueError("TAVILY_API_KEY not found in environment variables/Space secrets.")
47
- if not groq_api_key:
48
- raise ValueError("GROQ_API_KEY not found in environment variables/Space secrets.")
49
-
50
- # --- Initialize LLM (Using Groq) ---
51
  try:
52
- llm = ChatGroq(
53
- model="meta-llama/llama-4-maverick-17b-128e-instruct", # Powerful model available on Groq, good for reasoning
54
- # model="gemma2-9b-it", # Alternative lighter model
55
- api_key=groq_api_key,
56
- temperature=0.3 # Low temperature for factual tasks
57
- )
58
- print(f"LLM Initialized: Groq - {llm.model_name}")
59
- except Exception as e:
60
- print(f"ERROR initializing Groq LLM: {e}")
61
- traceback.print_exc()
62
- raise # Stop if LLM fails to init
 
 
 
 
 
 
63
 
64
  # ==============================================================================
65
- # State Definition
66
  # ==============================================================================
67
- class AgentState(TypedDict):
68
- """Defines the structure of the information the agent tracks during its run."""
69
- input_question: str # The original question from the benchmark
70
- messages: Annotated[List[BaseMessage], add_messages] # History of interactions (Human, AI, Tool)
71
- error: Optional[str] # Stores any error message encountered
72
- iterations: int # Counter for agent steps to prevent loops
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  # ==============================================================================
75
- # Tools Definitions
76
  # ==============================================================================
77
- print("Defining tools...")
78
-
79
- # --- Search Tool (Tavily) ---
80
- search_tool = TavilySearchResults(max_results=3, api_key=tavily_api_key)
81
- search_tool.name = "web_search"
82
- search_tool.description = "Performs a web search (using Tavily) to find relevant URLs/snippets for a query."
83
-
84
- # --- Web Browser Tool (html2text) ---
85
- @tool
86
- def web_browser(url: str) -> str:
87
- """Fetches text content from a webpage URL using html2text. Use after 'web_search'."""
88
- print(f"--- [Tool] Browsing (html2text): {url} ---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  try:
90
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
91
- response = requests.get(url, headers=headers, timeout=20)
92
- response.raise_for_status()
93
- response.encoding = response.apparent_encoding or 'utf-8'
94
- # Configure html2text
95
- h = html2text.HTML2Text(bodywidth=0)
96
- h.ignore_links = True
97
- h.ignore_images = True
98
- # Convert HTML to text
99
- clean_text = h.handle(response.text)
100
- # Limit content length
101
- max_length = 6000
102
- if len(clean_text) > max_length:
103
- return clean_text[:max_length] + "\n\n... [Content Truncated]"
104
- cleaned_and_stripped = clean_text.strip()
105
- return cleaned_and_stripped if cleaned_and_stripped else f"Error: No meaningful content via html2text for {url}."
106
- except requests.exceptions.RequestException as e:
107
- return f"Error: Network request failed for URL: {url}. Reason: {e}"
108
- except Exception as e:
109
- return f"Error: Unexpected error processing URL with html2text: {url}. Reason: {str(e)}"
110
 
111
- # --- File Download Tool ---
112
- @tool
113
- def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
114
- """Downloads a file from a URL to a temporary directory. Input: file URL. Returns: path to downloaded file or error."""
115
- print(f"--- [Tool] Downloading file from: {url} ---")
116
  try:
117
- # Generate filename if needed
118
- if not filename:
119
- try: path = urlparse(url).path; filename = os.path.basename(path) if path else None
120
- except Exception: filename = None
121
- if not filename: import uuid; filename = f"downloaded_{uuid.uuid4().hex[:8]}"
122
- # Define save path
123
- temp_dir = tempfile.gettempdir(); filepath = os.path.join(temp_dir, filename)
124
- # Download file
125
- response = requests.get(url, stream=True, timeout=30); response.raise_for_status()
126
- with open(filepath, 'wb') as f:
127
- for chunk in response.iter_content(chunk_size=8192): f.write(chunk)
128
- print(f"--- [Tool] File downloaded to: {filepath} ---")
129
- return f"File downloaded to {filepath}. Use appropriate tools (e.g., analyze_csv_file) to process it."
130
- except requests.exceptions.RequestException as e:
131
- return f"Error downloading file: Network issue for {url}. Reason: {e}"
 
 
 
 
 
 
 
 
 
132
  except Exception as e:
133
- return f"Error downloading file: Unexpected error for {url}. Reason: {str(e)}"
134
-
135
- # --- CSV Analysis Tool ---
136
- @tool
137
- def analyze_csv_file(file_path: str) -> str:
138
- """Analyzes a CSV file at the given path using pandas. Returns a summary of content or error."""
139
- print(f"--- [Tool] Analyzing CSV: {file_path} ---")
140
- # GAIA might provide relative paths, ensure they work or adjust logic if needed
141
- if not os.path.exists(file_path): return f"Error: CSV file not found at path: {file_path}"
 
142
  try:
143
- df = pd.read_csv(file_path)
144
- # Generate summary string
145
- summary = f"CSV Analysis Report for {os.path.basename(file_path)}:\n"
146
- summary += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
147
- summary += f"- Columns: {', '.join(df.columns)}\n"
148
- summary += f"\nFirst 5 rows:\n{df.head().to_string()}\n"
149
- numeric_cols = df.select_dtypes(include=['number'])
150
- if not numeric_cols.empty:
151
- summary += f"\nBasic Stats (Numeric):\n{numeric_cols.describe().to_string()}"
152
- else:
153
- summary += "\nNo numeric columns for stats."
154
- return summary
155
- except ImportError: return "Error: 'pandas' required but not installed."
156
- except Exception as e: return f"Error analyzing CSV {file_path}: {str(e)}"
157
-
158
- # --- Excel Analysis Tool ---
159
- @tool
160
- def analyze_excel_file(file_path: str) -> str:
161
- """Analyzes an Excel file (.xlsx, .xls) at the given path. Returns a summary of the first sheet or error."""
162
- print(f"--- [Tool] Analyzing Excel: {file_path} ---")
163
- if not os.path.exists(file_path): return f"Error: Excel file not found at path: {file_path}"
164
  try:
165
- df = pd.read_excel(file_path, engine='openpyxl')
166
- # Generate summary string
167
- summary = f"Excel Analysis Report for {os.path.basename(file_path)} (First Sheet):\n"
168
- summary += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
169
- summary += f"- Columns: {', '.join(df.columns)}\n"
170
- summary += f"\nFirst 5 rows:\n{df.head().to_string()}\n"
171
- numeric_cols = df.select_dtypes(include=['number'])
172
- if not numeric_cols.empty:
173
- summary += f"\nBasic Stats (Numeric):\n{numeric_cols.describe().to_string()}"
174
- else:
175
- summary += "\nNo numeric columns for stats."
176
- return summary
177
- except ImportError: return "Error: 'pandas' and 'openpyxl' required but not installed."
178
- except Exception as e: return f"Error analyzing Excel {file_path}: {str(e)}"
179
-
180
- # --- Image Text Extraction Tool (OCR) ---
181
- @tool
182
- def extract_text_from_image(file_path: str) -> str:
183
- """Extracts text from an image file at the given path using Tesseract OCR. Returns extracted text or error."""
184
- print(f"--- [Tool] Extracting text from image: {file_path} ---")
185
- if not os.path.exists(file_path): return f"Error: Image file not found at path: {file_path}"
186
  try:
187
- # Need to explicitly handle potential empty string from pytesseract
188
- text = pytesseract.image_to_string(Image.open(file_path))
189
- text_stripped = text.strip()
190
- # Return a clear message if no text found, otherwise return extracted text
191
- return f"Extracted text from image '{os.path.basename(file_path)}':\n{text_stripped}" if text_stripped else "No text found in image."
192
- except ImportError: return "Error: 'Pillow' or 'pytesseract' required but not installed."
193
- except pytesseract.TesseractNotFoundError: return "Error: Tesseract OCR not installed or not in PATH."
194
- except Exception as e: return f"Error extracting text from image {file_path}: {str(e)}"
195
-
196
- # --- Basic Math Tools ---
197
- @tool
198
- def add(a: float, b: float) -> float:
199
- """Adds two numbers (a + b). Handles float inputs."""
200
- print(f"--- [Tool] Calculating: {a} + {b} ---")
201
- return a + b
202
- @tool
203
- def subtract(a: float, b: float) -> float:
204
- """Subtracts the second number from the first (a - b). Handles float inputs."""
205
- print(f"--- [Tool] Calculating: {a} - {b} ---")
206
- return a - b
207
- @tool
208
- def multiply(a: float, b: float) -> float:
209
- """Multiplies two numbers (a * b). Handles float inputs."""
210
- print(f"--- [Tool] Calculating: {a} * {b} ---")
211
- return a * b
212
- @tool
213
- def divide(a: float, b: float) -> float | str:
214
- """Divides the first number by the second (a / b). Handles float inputs and division by zero."""
215
- print(f"--- [Tool] Calculating: {a} / {b} ---")
216
- if b == 0: return "Error: Cannot divide by zero."
217
- return a / b
218
-
219
- # --- Compile list of all tools ---
220
- tools = [ search_tool, web_browser, download_file_from_url, analyze_csv_file,
221
- analyze_excel_file, extract_text_from_image, add, subtract, multiply, divide ]
222
-
223
- # --- Bind tools to the LLM ---
224
- # Ensure LLM is initialized before binding
225
- if 'llm' not in globals():
226
- raise RuntimeError("LLM was not initialized successfully before binding tools.")
227
- llm_with_tools = llm.bind_tools(tools)
228
- print(f"Agent initialized with {len(tools)} tools.")
229
 
230
- # ==============================================================================
231
- # Node Definitions (With Logging Added)
232
- # ==============================================================================
233
- print("Defining graph nodes...")
234
-
235
- # --- Agent Node ---
236
- def call_agent_node(state: AgentState) -> dict:
237
- """Invokes the LLM with current state to decide the next step."""
238
- # --- Logging: Node Entry ---
239
- print(f"\n>>> Entering Agent Node (Iteration {state['iterations']})")
240
- MAX_ITERATIONS = 15 # Max steps allowed for the task - Increased slightly
241
- current_iterations = state.get('iterations', 0)
242
- if current_iterations >= MAX_ITERATIONS:
243
- print(f"!!! Agent Node: Max iterations ({MAX_ITERATIONS}) reached. Setting error.")
244
- return {"error": f"Max iterations ({MAX_ITERATIONS}) reached."}
245
  try:
246
- print(f"--- Agent Node: Invoking LLM ({llm.model_name})... ---") # Log before LLM call
247
- # Ensure LLM is bound with tools before invoking
248
- if 'llm_with_tools' not in globals():
249
- return {"error": "LLM tools not bound."}
250
-
251
- response = llm_with_tools.invoke(state['messages'])
252
- print(f"--- Agent Node: LLM Invocation Complete. ---") # Log after LLM call
253
- # response.pretty_print() # Optional: Print formatted LLM response
254
- # --- Logging: Node Exit (Success) ---
255
- print(f"<<< Exiting Agent Node (Success, Iteration {current_iterations + 1})")
256
- return {"messages": [response], "iterations": current_iterations + 1}
 
 
 
 
 
 
 
 
257
  except Exception as e:
258
- error_message = f"LLM invocation failed: {str(e)}"
259
- print(f"!!! Agent Node ERROR: {error_message} !!!")
260
- traceback.print_exc() # Print full traceback for debugging LLM errors
261
- # --- Logging: Node Exit (Error) ---
262
- print(f"<<< Exiting Agent Node (LLM Error, Iteration {current_iterations})")
263
- # Return an error message and set error state, still increment iteration to prevent infinite error loops
264
- return {"messages": [AIMessage(content=f"Error during LLM call: {error_message}")], "error": error_message, "iterations": current_iterations + 1}
265
-
266
- # --- Tool Node Wrapper (for Logging) ---
267
- # We still use the prebuilt ToolNode, but wrap its call for logging
268
- tool_executor = ToolNode(tools) # Keep the instance
269
-
270
- def logged_tool_node(state: AgentState) -> dict:
271
- """Logs tool execution start/end and calls the actual ToolNode."""
272
- print(f">>> Entering Tool Node")
273
- # Log requested tools
274
- last_message = state['messages'][-1]
275
- requested_tools_str = "None"
276
- tool_calls = []
277
- if hasattr(last_message, "tool_calls") and last_message.tool_calls:
278
- tool_calls = last_message.tool_calls
279
- tool_names = [tc.get('name', 'unknown') for tc in tool_calls]
280
- requested_tools_str = ", ".join(tool_names)
281
- print(f"--- Tool Node: Executing tools: {requested_tools_str} ---")
282
- if tool_calls: print(f"--- Tool Node: Tool Args: {[tc.get('args') for tc in tool_calls]} ---")
283
-
284
 
 
 
 
285
  try:
286
- # Call the actual ToolNode instance
287
- result = tool_executor.invoke(state)
288
- # Log truncated results
289
- print("--- Tool Node: Tool Execution Results ---")
290
- if isinstance(result.get("messages"), list):
291
- for msg in result["messages"]:
292
- if isinstance(msg, ToolMessage):
293
- print(f" - Tool: {msg.name}, Result (start): {str(msg.content)[:200]}...") # Slightly more context
294
- print(f"<<< Exiting Tool Node (Success)")
295
- return result # Return the dictionary containing ToolMessages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  except Exception as e:
297
- error_message = f"ToolNode invocation exception: {str(e)}"
298
- print(f"!!! Tool Node ERROR: {error_message} !!!")
299
- traceback.print_exc()
300
- print(f"<<< Exiting Tool Node (Error)")
301
- # Return an error message in the expected format
302
- return {"messages": [ToolMessage(content=error_message, tool_call_id="tool_node_error")]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
 
305
  # ==============================================================================
306
- # Graph Construction (Non-conversational, using logged tool node)
307
  # ==============================================================================
308
- print("Building agent graph...")
309
- builder = StateGraph(AgentState)
310
- builder.add_node("agent", call_agent_node)
311
- builder.add_node("tools", logged_tool_node) # Use the logging wrapper node
312
- builder.add_edge(START, "agent")
313
- builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", END: END})
314
- builder.add_edge("tools", "agent")
315
-
316
- # Compile the graph globally so it's ready for the function call
317
  try:
318
- graph = builder.compile()
319
- print("GAIA agent graph compiled successfully.")
 
 
 
 
320
  except Exception as e:
321
- print(f"ERROR: Failed to compile LangGraph graph: {e}")
322
- traceback.print_exc()
323
- graph = None # Ensure graph is None if compilation fails
324
- raise # Reraise exception to make startup failure clear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
  # ==============================================================================
327
- # Main Execution Function for GAIA Benchmark <<<< WRAPPER FUNCTION >>>>
328
  # ==============================================================================
329
- def answer_gaia_task(question: str, file_path: Optional[str] = None) -> str:
330
- """
331
- Runs the compiled GAIA agent graph for a given question and optional file path.
332
- This is the main entry point expected by the benchmark runner.
333
- """
334
- # Check if graph compilation was successful
335
- if graph is None:
336
- return "Error: Agent graph was not compiled successfully during setup."
337
-
338
- print(f"\n{'='*20} Running Agent for GAIA Task {'='*20}")
339
- print(f"Question: {question}")
340
- file_context_info = f"An associated file is provided at path: '{file_path}'. Use this path if relevant." if file_path else ""
341
-
342
- # Define the initial prompt sent to the agent, incorporating strict formatting rules
343
- prompt_content = f"""Your task is to accurately answer the following question based *only* on information obtained using your tools (web search, web browser, file download, csv/excel analysis, image OCR, math).
344
 
345
- {file_context_info}
346
-
347
- Follow these steps methodically:
348
- 1. Analyze the question: {question}
349
- 2. Use tools ONLY if necessary to gather the specific information required. Assume local file paths mentioned (like 'data.csv') are accessible.
350
- 3. Synthesize the final answer from the gathered information.
351
 
352
- **CRITICAL OUTPUT FORMATTING RULES:**
353
- * Your final response MUST be ONLY the answer, without any other text/explanations.
354
- * **Numbers:** No commas (1000). No units ($ , %) unless asked.
355
- * **Strings:** No articles (a, an, the) unless proper noun. No abbreviations (Saint Petersburg) unless answer is abbreviation. Use numerals (5).
356
- * **Lists:** Comma-separated (apple,banana,cherry). Apply number/string rules to elements.
357
- * If answer not found, output only the exact phrase: Information not found
358
 
359
- Provide ONLY the final answer according to these rules.
360
- """
361
-
362
- # Create the initial state for the graph run
363
- initial_state = AgentState(
364
- input_question=question,
365
- messages=[HumanMessage(content=prompt_content)],
366
- error=None,
367
- iterations=0
368
- )
369
 
370
- final_answer = "Error: Agent execution did not complete successfully." # Default fallback
 
371
 
372
  try:
373
- # Invoke the compiled graph
374
- final_state = graph.invoke(initial_state, config={"recursion_limit": 20}) # Increased recursion limit
375
-
376
- # Process the final state to extract the answer
377
- if final_state:
378
- # Prioritize showing agent error if one occurred
379
- if final_state.get("error"):
380
- print(f"--- Agent stopped due to ERROR: {final_state['error']} ---")
381
- final_answer = f"Error: {final_state['error']}"
382
- # Otherwise, try to get the last AI message content
383
- elif final_state.get('messages') and isinstance(final_state['messages'][-1], AIMessage):
384
- potential_answer = final_state['messages'][-1].content
385
- # Basic cleanup for potential quotes added by LLM
386
- if isinstance(potential_answer, str):
387
- if (potential_answer.startswith('"') and potential_answer.endswith('"')) or \
388
- (potential_answer.startswith("'") and potential_answer.endswith("'")):
389
- potential_answer = potential_answer[1:-1].strip()
390
- print(f"--- Final Answer (from AI): {potential_answer} ---")
391
- final_answer = potential_answer
392
- else:
393
- print("--- Could not determine final answer (last message not AI or missing). Check logs. ---")
394
- # Log final state details for debugging
395
- print(f"Final State: Error={final_state.get('error')}, Iterations={final_state.get('iterations')}")
396
 
397
- except Exception as e:
398
- print(f"--- Graph execution failed unexpectedly: {e} ---")
399
- traceback.print_exc()
400
- final_answer = f"Error: Graph execution failed - {str(e)}"
401
-
402
- print(f"{'='*20} Agent Run Finished {'='*20}")
403
- # Return the final answer string
404
- return str(final_answer)
405
 
 
 
 
 
 
 
 
 
 
406
 
407
  # ==============================================================================
408
- # Local Testing Block (Optional)
409
  # ==============================================================================
410
- # This block allows you to test the agent by running final_agent.py directly.
411
- if __name__ == "__main__":
412
- print("\n--- Running Local Test ---")
413
- # --- Define Test Question ---
414
- test_question = "What is the result of multiplying the number of rows (excluding the header) in 'data.csv' by the number found after the phrase 'total items:' in 'image.png'?"
415
-
416
- # --- Create Dummy Files for Local Test ---
417
- print("Creating dummy files for local test...")
418
- dummy_files_created = True
419
- try:
420
- # Dummy CSV with 3 data rows + header
421
- with open("data.csv", "w") as f:
422
- f.write("Header1,Header2\nRow1Val1,Row1Val2\nRow2Val1,Row2Val2\nRow3Val1,Row3Val2")
423
 
424
- # Dummy Image containing the required text
 
 
 
 
425
  try:
426
- img = Image.new('RGB', (300, 50), color = (255, 255, 255)) # White background
427
- from PIL import ImageDraw, ImageFont # Import drawing tools locally
428
- draw = ImageDraw.Draw(img)
429
- # Use a basic font if specific ones aren't found
430
- try: font = ImageFont.truetype("arial.ttf", 15)
431
- except IOError: font = ImageFont.load_default()
432
- draw.text((10,10), "Some random info... total items: 7 ... more text", fill=(0,0,0), font=font) # Black text
433
- img.save("image.png")
434
- print("Dummy data.csv and image.png created successfully.")
435
- except ImportError:
436
- print("Pillow/ImageDraw/ImageFont not installed. Cannot create dummy image file.")
437
- dummy_files_created = False
438
- except Exception as img_e:
439
- print(f"Error creating dummy image: {img_e}")
440
- dummy_files_created = False
441
-
442
- except Exception as file_e:
443
- print(f"Error creating dummy files: {file_e}")
444
- dummy_files_created = False
445
- # ---------------------------------------------
446
-
447
- # --- Run the Test ---
448
- if dummy_files_created:
449
- # Call the main function, simulating how the benchmark runner would call it.
450
- result = answer_gaia_task(question=test_question, file_path=None)
451
- print(f"\n--- Local Test Result ---")
452
- print(f"Returned Answer: {result}")
453
- print(f"Expected Answer (for dummy files): 21") # 3 data rows * 7 = 21
454
- else:
455
- print("Skipping test execution due to issues creating dummy files.")
456
-
457
- # --- Clean up Dummy Files ---
458
- print("\nCleaning up dummy files...")
459
- for dummy_file in ["data.csv", "image.png"]:
460
- if os.path.exists(dummy_file):
461
- try: os.remove(dummy_file)
462
- except Exception as e: print(f"Could not remove {dummy_file}: {e}")
463
- print("Dummy file cleanup attempted.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ GAIA Benchmark Agent using LangChain, Groq, Tavily, and various tools.
4
+
5
+ This agent is designed to interact with files, search the web, scrape pages,
6
+ execute Python code, read Excel files, and transcribe audio/YouTube videos
7
+ to tackle complex tasks like those found in the GAIA benchmark.
8
+ """
9
+
10
+ # --- Core Libraries ---
11
  import os
12
+ import sys
13
+ import subprocess
14
+ import time
15
+ import importlib
16
+ from pathlib import Path
17
+ from typing import List, Optional, Dict, Any
18
+
19
+ # --- Environment & Configuration ---
 
 
20
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # --- LangChain Imports ---
23
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
24
+ from langchain_core.tools import BaseTool, tool
25
+ from langchain.pydantic_v1 import BaseModel, Field # Use Pydantic v1 for Langchain tool compatibility
26
+ from langchain.memory import ConversationBufferWindowMemory
27
+ from langchain.agents import AgentExecutor, create_structured_chat_agent
28
+
29
+ # --- Tool Specific Imports ---
30
+ # Search
31
+ from langchain_community.utilities import TavilySearchResults
32
+ # Web Scraping
33
+ import requests
34
+ from bs4 import BeautifulSoup
35
+ # LLM
36
+ from langchain_groq import ChatGroq
37
+ # Audio/Video Transcription (Optional)
 
 
 
 
 
 
 
38
  try:
39
+ import openai
40
+ OPENAI_AVAILABLE = True
41
+ except ImportError:
42
+ OPENAI_AVAILABLE = False
43
+ # Excel Reading (Optional)
44
+ try:
45
+ import pandas as pd
46
+ PANDAS_AVAILABLE = True
47
+ except ImportError:
48
+ PANDAS_AVAILABLE = False
49
+ # YouTube Processing (Optional)
50
+ try:
51
+ from pytube import YouTube
52
+ from pytube.exceptions import PytubeError
53
+ PYTUBE_AVAILABLE = True
54
+ except ImportError:
55
+ PYTUBE_AVAILABLE = False
56
 
57
  # ==============================================================================
58
+ # 1. CONFIGURATION
59
  # ==============================================================================
60
+ load_dotenv() # Load environment variables from .env file if it exists
61
+
62
+ # --- Agent Settings ---
63
+ AGENT_WORKSPACE = Path("./gaia_agent_workspace")
64
+ AGENT_WORKSPACE.mkdir(exist_ok=True) # Ensure workspace directory exists
65
+ MAX_ITERATIONS = 15
66
+ MEMORY_WINDOW_SIZE = 10
67
+
68
+ # --- LLM Configuration ---
69
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
70
+ GROQ_MODEL_NAME = os.getenv("GROQ_MODEL_NAME", "llama3-70b-8192") # Default if not set
71
+
72
+ # --- Tool Configuration ---
73
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
74
+ TAVILY_MAX_RESULTS = 3
75
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # Needed for Whisper
76
+ WHISPER_MODEL = "whisper-1"
77
+
78
+ # --- Dependency & API Key Checks ---
79
+ if not GROQ_API_KEY:
80
+ print("ERROR: GROQ_API_KEY environment variable not set. Agent cannot run.")
81
+ sys.exit(1)
82
+ if not TAVILY_API_KEY:
83
+ print("ERROR: TAVILY_API_KEY environment variable not set. Search tool disabled.")
84
+ # Decide if this is fatal or just disables the tool
85
+ # sys.exit(1) # Uncomment to make it fatal
86
+
87
+ openai_client = None
88
+ if OPENAI_AVAILABLE and OPENAI_API_KEY:
89
+ try:
90
+ openai_client = openai.OpenAI(api_key=OPENAI_API_KEY)
91
+ print("OpenAI client initialized for Whisper transcription.")
92
+ except Exception as e:
93
+ print(f"Warning: Failed to initialize OpenAI client: {e}. Transcription tools disabled.")
94
+ openai_client = None
95
+ elif OPENAI_AVAILABLE:
96
+ print("Warning: OpenAI library installed, but OPENAI_API_KEY not set. Transcription tools disabled.")
97
+ else:
98
+ print("Info: OpenAI library not installed. Transcription tools disabled.")
99
+
100
+ if not PANDAS_AVAILABLE:
101
+ print("Info: 'pandas' library not installed. Excel tool disabled. Install with: pip install pandas openpyxl")
102
+ if not PYTUBE_AVAILABLE:
103
+ print("Info: 'pytube' library not installed. YouTube tool disabled. Install with: pip install pytube")
104
 
105
  # ==============================================================================
106
+ # 2. TOOL DEFINITIONS
107
  # ==============================================================================
108
+
109
+ # --- Tool Input Schemas (Pydantic Models) ---
110
+ # Using Pydantic v1 as required by Langchain tools at the time of writing
111
+
112
+ class FileWriteArgs(BaseModel):
113
+ relative_path: str = Field(description="Relative path within the agent's workspace where the file should be written.")
114
+ content: str = Field(description="The text content to write into the file.")
115
+
116
+ class FileReadArgs(BaseModel):
117
+ relative_path: str = Field(description="Relative path within the agent's workspace of the file to read.")
118
+
119
+ class ListDirectoryArgs(BaseModel):
120
+ relative_path: str = Field(default=".", description="Relative path within the agent's workspace to list contents of. Use '.' for the root.")
121
+
122
+ class RunPythonCodeArgs(BaseModel):
123
+ code: str = Field(description="The Python code to execute. Use 'print()' to output results. Code runs in isolation.")
124
+
125
+ class WebScrapeArgs(BaseModel):
126
+ url: str = Field(description="The URL of the webpage to scrape.")
127
+ query: Optional[str] = Field(default=None, description="Optional specific question to answer from the page content.")
128
+
129
+ class ReadExcelArgs(BaseModel):
130
+ relative_path: str = Field(description="Relative path within the agent's workspace of the Excel file (.xlsx or .xls).")
131
+ sheet_name: Optional[str] = Field(default=None, description="Optional name of the specific sheet to read. Reads the first sheet if not specified.")
132
+ max_rows_preview: int = Field(default=20, description="Maximum number of rows to include in the text preview.")
133
+
134
+ class TranscribeAudioArgs(BaseModel):
135
+ relative_path: str = Field(description="Relative path within the agent's workspace of the audio file (e.g., .mp3, .wav, .m4a). Max 25MB.")
136
+
137
+ class TranscribeYouTubeArgs(BaseModel):
138
+ youtube_url: str = Field(description="The URL of the YouTube video to transcribe. Audio will be downloaded temporarily.")
139
+
140
+ # --- Helper Functions ---
141
+
142
+ def _resolve_path(relative_path: str) -> Optional[Path]:
143
+ """Resolves a relative path against the workspace and checks bounds."""
144
  try:
145
+ full_path = (AGENT_WORKSPACE / relative_path).resolve()
146
+ # Security Check: Ensure the resolved path is within the workspace
147
+ if not str(full_path).startswith(str(AGENT_WORKSPACE.resolve())):
148
+ return None # Path is outside the workspace
149
+ return full_path
150
+ except Exception: # Handle potential path resolution errors
151
+ return None
152
+
153
+ def _transcribe_audio(file_path: Path, file_description: str) -> str:
154
+ """Helper to transcribe an audio file using OpenAI Whisper."""
155
+ if not openai_client:
156
+ return "Error: OpenAI client not available for transcription."
157
+ if not file_path.is_file():
158
+ return f"Error: Audio file not found at '{file_path.relative_to(AGENT_WORKSPACE)}'"
 
 
 
 
 
 
159
 
 
 
 
 
 
160
  try:
161
+ file_size_mb = file_path.stat().st_size / (1024 * 1024)
162
+ if file_size_mb > 25:
163
+ return f"Error: Audio file '{file_description}' is too large ({file_size_mb:.2f} MB). Max 25 MB."
164
+
165
+ print(f"Transcribing audio: {file_description}...")
166
+ with open(file_path, "rb") as audio_file_handle:
167
+ # Note: response_format="text" returns a simple string
168
+ transcript = openai_client.audio.transcriptions.create(
169
+ model=WHISPER_MODEL,
170
+ file=audio_file_handle,
171
+ response_format="text"
172
+ )
173
+ print("Transcription complete.")
174
+
175
+ if isinstance(transcript, str):
176
+ max_len = 10000 # Limit transcription length in output
177
+ if len(transcript) > max_len:
178
+ transcript = transcript[:max_len] + "\n... [Transcription truncated]"
179
+ return f"Transcription of '{file_description}':\n{transcript}"
180
+ else:
181
+ return f"Transcription of '{file_description}' succeeded, but format was unexpected: {type(transcript)}"
182
+
183
+ except openai.APIError as e:
184
+ return f"OpenAI API Error during transcription of '{file_description}': {e}"
185
  except Exception as e:
186
+ return f"Error transcribing '{file_description}': {e}"
187
+
188
+ # --- Tool Implementations ---
189
+
190
+ @tool("write_file", args_schema=FileWriteArgs)
191
+ def write_file(relative_path: str, content: str) -> str:
192
+ """Writes text content to a file within the agent's workspace. Creates parent directories if needed."""
193
+ full_path = _resolve_path(relative_path)
194
+ if not full_path:
195
+ return f"Error: Invalid or disallowed path '{relative_path}'."
196
  try:
197
+ full_path.parent.mkdir(parents=True, exist_ok=True)
198
+ with open(full_path, 'w', encoding='utf-8') as f:
199
+ f.write(content)
200
+ return f"Successfully wrote to file: {relative_path}"
201
+ except Exception as e:
202
+ return f"Error writing file '{relative_path}': {e}"
203
+
204
+ @tool("read_file", args_schema=FileReadArgs)
205
+ def read_file(relative_path: str) -> str:
206
+ """Reads the text content of a file from the agent's workspace. Limited read size."""
207
+ full_path = _resolve_path(relative_path)
208
+ if not full_path:
209
+ return f"Error: Invalid or disallowed path '{relative_path}'."
210
+ if not full_path.is_file():
211
+ return f"Error: File not found at '{relative_path}'"
 
 
 
 
 
 
212
  try:
213
+ with open(full_path, 'r', encoding='utf-8') as f:
214
+ content = f.read(10000) # Limit read size
215
+ if len(f.read(1)) > 0:
216
+ content += "\n... [File truncated due to length]"
217
+ return content
218
+ except Exception as e:
219
+ return f"Error reading file '{relative_path}': {e}"
220
+
221
+ @tool("list_directory", args_schema=ListDirectoryArgs)
222
+ def list_directory(relative_path: str = ".") -> str:
223
+ """Lists the contents (files and directories) of a specified directory within the agent's workspace."""
224
+ target_path = _resolve_path(relative_path)
225
+ if not target_path:
226
+ return f"Error: Invalid or disallowed path '{relative_path}'."
227
+ if not target_path.is_dir():
228
+ return f"Error: '{relative_path}' is not a valid directory."
 
 
 
 
 
229
  try:
230
+ items = [f.name + ('/' if f.is_dir() else '') for f in target_path.iterdir()]
231
+ if not items:
232
+ return f"Directory '{relative_path}' is empty."
233
+ return f"Contents of '{relative_path}':\n" + "\n".join(items)
234
+ except Exception as e:
235
+ return f"Error listing directory '{relative_path}': {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
+ @tool("run_python_code", args_schema=RunPythonCodeArgs)
238
+ def run_python_code(code: str) -> str:
239
+ """Executes Python code in a subprocess and returns the stdout/stderr. Use print() for output. WARNING: Executes arbitrary code."""
 
 
 
 
 
 
 
 
 
 
 
 
240
  try:
241
+ process = subprocess.run(
242
+ [sys.executable, "-c", code],
243
+ capture_output=True, text=True, timeout=30, cwd=AGENT_WORKSPACE, check=False # Don't raise error on non-zero exit
244
+ )
245
+ output, error = process.stdout, process.stderr
246
+ result = ""
247
+ if output:
248
+ max_output = 2000
249
+ if len(output) > max_output: output = output[:max_output] + "\n... [Output truncated]"
250
+ result += f"Output:\n{output}\n"
251
+ if error:
252
+ result += f"Error Output:\n{error}\n"
253
+
254
+ if process.returncode == 0:
255
+ return f"Execution successful.\n{result}"
256
+ else:
257
+ return f"Execution failed (Return Code: {process.returncode}).\n{result}"
258
+ except subprocess.TimeoutExpired:
259
+ return "Error: Code execution timed out after 30 seconds."
260
  except Exception as e:
261
+ return f"Error executing Python code: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
+ @tool("scrape_webpage", args_schema=WebScrapeArgs)
264
+ def scrape_webpage(url: str, query: Optional[str] = None) -> str:
265
+ """Scrapes text content from a given URL using BeautifulSoup. If a query is provided, returns content for the agent to answer it."""
266
  try:
267
+ headers = {'User-Agent': 'Mozilla/5.0 (compatible; GAIA-Agent/1.0)'} # Identify the agent
268
+ response = requests.get(url, headers=headers, timeout=20)
269
+ response.raise_for_status() # Raise HTTPError for bad responses
270
+
271
+ # Check content type - avoid trying to parse images, etc.
272
+ content_type = response.headers.get('content-type', '').lower()
273
+ if 'text/html' not in content_type:
274
+ return f"Error: Content type of URL {url} is '{content_type}', not HTML. Cannot scrape."
275
+
276
+ soup = BeautifulSoup(response.text, 'html.parser')
277
+ for script_or_style in soup(["script", "style", "nav", "footer", "aside"]): # Remove common clutter
278
+ script_or_style.decompose()
279
+
280
+ text_content = soup.get_text(separator='\n', strip=True)
281
+ if not text_content: return f"Could not extract meaningful text content from {url}."
282
+
283
+ max_chars = 10000 # Limit content length
284
+ if len(text_content) > max_chars:
285
+ text_content = text_content[:max_chars] + "\n... [Content truncated]"
286
+
287
+ if query:
288
+ return f"Use the following content from {url} to answer the query '{query}':\n\n{text_content}"
289
+ else:
290
+ return f"Content scraped from {url}:\n\n{text_content}"
291
+
292
+ except requests.exceptions.RequestException as e:
293
+ return f"Error fetching or reading URL {url}: {e}"
294
  except Exception as e:
295
+ return f"Error scraping URL {url}: {e}"
296
+
297
+ # --- Optional Tools (Conditionally Available) ---
298
+
299
+ if PANDAS_AVAILABLE:
300
+ @tool("read_excel_file", args_schema=ReadExcelArgs)
301
+ def read_excel_file(relative_path: str, sheet_name: Optional[str] = None, max_rows_preview: int = 20) -> str:
302
+ """Reads data from an Excel file (.xlsx or .xls) within the workspace and returns a text preview."""
303
+ full_path = _resolve_path(relative_path)
304
+ if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
305
+ if not full_path.is_file(): return f"Error: Excel file not found at '{relative_path}'"
306
+ try:
307
+ excel_file = pd.ExcelFile(full_path)
308
+ if sheet_name:
309
+ if sheet_name not in excel_file.sheet_names:
310
+ return f"Error: Sheet '{sheet_name}' not found. Available: {excel_file.sheet_names}"
311
+ sheet_to_read = sheet_name
312
+ else:
313
+ sheet_to_read = excel_file.sheet_names[0]
314
+
315
+ df = pd.read_excel(full_path, sheet_name=sheet_to_read)
316
+ output = f"Preview of sheet '{sheet_to_read}' from '{relative_path}' ({df.shape[0]} rows, {df.shape[1]} cols):\n"
317
+ output += df.to_string(max_rows=max_rows_preview, max_cols=15) # Preview format
318
+
319
+ max_output_len = 5000
320
+ if len(output) > max_output_len:
321
+ output = output[:max_output_len] + "\n... [Output truncated]"
322
+ return output
323
+ except Exception as e: return f"Error reading Excel file '{relative_path}': {e}"
324
+
325
+ if OPENAI_AVAILABLE and openai_client:
326
+ @tool("transcribe_audio_file", args_schema=TranscribeAudioArgs)
327
+ def transcribe_audio_file(relative_path: str) -> str:
328
+ """Transcribes audio content from a file in the workspace using OpenAI Whisper (max 25MB)."""
329
+ full_path = _resolve_path(relative_path)
330
+ if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
331
+ return _transcribe_audio(full_path, relative_path)
332
+
333
+ if PYTUBE_AVAILABLE and OPENAI_AVAILABLE and openai_client:
334
+ @tool("transcribe_youtube_video", args_schema=TranscribeYouTubeArgs)
335
+ def transcribe_youtube_video(youtube_url: str) -> str:
336
+ """Downloads audio from a YouTube URL, transcribes it using OpenAI Whisper, and returns the text."""
337
+ temp_audio_path = None
338
+ try:
339
+ print(f"Processing YouTube URL: {youtube_url}")
340
+ yt = YouTube(youtube_url)
341
+ audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').order_by('abr').desc().first()
342
+ if not audio_stream: audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first() # Fallback
343
+ if not audio_stream: return f"Error: No suitable audio stream found for {youtube_url}"
344
+
345
+ # Download to a unique temporary file in workspace
346
+ timestamp = int(time.time())
347
+ temp_filename = f"temp_youtube_{timestamp}.{audio_stream.subtype or 'mp4'}"
348
+ temp_audio_path = AGENT_WORKSPACE / temp_filename
349
+ print(f"Downloading audio to: {temp_audio_path}...")
350
+ audio_stream.download(output_path=AGENT_WORKSPACE, filename=temp_filename)
351
+ print("Download complete.")
352
+
353
+ # Transcribe the downloaded file
354
+ result = _transcribe_audio(temp_audio_path, f"YouTube video '{yt.title}'")
355
+ return result
356
+
357
+ except PytubeError as e: return f"Error processing YouTube video {youtube_url}: {e}"
358
+ except Exception as e: return f"Unexpected error during YouTube transcription {youtube_url}: {e}"
359
+ finally:
360
+ # --- IMPORTANT: Clean up temporary file ---
361
+ if temp_audio_path and temp_audio_path.exists():
362
+ try: temp_audio_path.unlink(); print(f"Cleaned up: {temp_audio_path}")
363
+ except Exception as e: print(f"Warning: Failed to delete temp file {temp_audio_path}: {e}")
364
 
365
 
366
  # ==============================================================================
367
+ # 3. AGENT SETUP
368
  # ==============================================================================
369
+
370
+ # --- Initialize LLM ---
 
 
 
 
 
 
 
371
  try:
372
+ llm = ChatGroq(
373
+ temperature=0,
374
+ model_name=GROQ_MODEL_NAME,
375
+ groq_api_key=GROQ_API_KEY
376
+ )
377
+ print(f"Using Groq LLM: {GROQ_MODEL_NAME}")
378
  except Exception as e:
379
+ print(f"FATAL: Error initializing Groq LLM: {e}")
380
+ sys.exit(1)
381
+
382
+ # --- Assemble Available Tools ---
383
+ available_tools = []
384
+ if TAVILY_API_KEY:
385
+ available_tools.append(TavilySearchResults(max_results=TAVILY_MAX_RESULTS, api_key=TAVILY_API_KEY))
386
+ else:
387
+ print("Warning: Tavily Search tool disabled (API key missing).")
388
+
389
+ # Core tools are always added (they don't have external dependencies checked above)
390
+ available_tools.extend([
391
+ write_file,
392
+ read_file,
393
+ list_directory,
394
+ run_python_code,
395
+ scrape_webpage,
396
+ ])
397
+
398
+ # Add optional tools if their dependencies/clients are ready
399
+ if PANDAS_AVAILABLE: available_tools.append(read_excel_file)
400
+ if OPENAI_AVAILABLE and openai_client: available_tools.append(transcribe_audio_file)
401
+ if PYTUBE_AVAILABLE and OPENAI_AVAILABLE and openai_client: available_tools.append(transcribe_youtube_video)
402
+
403
+ print(f"Agent initialized with tools: {[tool.name for tool in available_tools]}")
404
+
405
+ # --- Define System Prompt ---
406
+ # This prompt is formatted later with the *actually available* tools
407
+ SYSTEM_PROMPT_TEMPLATE = """You are a highly capable AI assistant designed to solve complex problems step-by-step, mimicking human-like reasoning and actions. Your goal is to accurately answer the user's request based on the GAIA benchmark philosophy.
408
+
409
+ **Workspace:** You have access to a local workspace directory: '{agent_workspace}'. You can ONLY interact with files inside this directory using the provided tools. Always use relative paths for file operations.
410
+
411
+ **Available Tools:** You have access to the following tools:
412
+ {tool_descriptions}
413
+
414
+ **Reasoning Process:**
415
+ 1. **Understand:** Analyze the request. Identify objectives, constraints, and required information (text, web search, file content, Excel data, audio/video transcription, calculations).
416
+ 2. **Plan:** Break down the problem into logical steps. Choose the *most appropriate* tool for each step.
417
+ 3. **Execute:** Perform actions step-by-step using ONE tool at a time. Provide valid arguments for the chosen tool.
418
+ 4. **Observe:** Analyze the results (observations) from each tool execution. Note errors or unexpected output.
419
+ 5. **Reflect & Adjust:** If a step fails or results are insufficient, analyze the error, refine your plan, and try a different approach or tool. If a file isn't found, consider using `list_directory`. If web search results aren't specific enough, refine your query. If scraping fails, the site might be dynamic or blocking; note this limitation.
420
+ 6. **Synthesize:** Once all necessary information is gathered and actions performed, combine the findings to formulate the final answer.
421
+ 7. **Final Answer:** Provide ONLY the final answer in the precise format requested by the task. Do not include explanations, commentary, or conversational text unless explicitly asked for. If the task requires creating a file, use `write_file` and state the relative path if needed as the final answer.
422
+
423
+ **Important Guidelines:**
424
+ * Think step-by-step. Be methodical.
425
+ * Use file/audio/excel tools ONLY for the designated workspace: {agent_workspace}. Use relative paths.
426
+ * Check file existence with `list_directory` before attempting to read if unsure.
427
+ * Use `read_excel_file` for `.xlsx` or `.xls` files.
428
+ * Use `transcribe_audio_file` for local audio files (e.g., .mp3, .wav). Max 25MB.
429
+ * Use `transcribe_youtube_video` for YouTube URLs. Max 25MB audio download.
430
+ * Use `run_python_code` for calculations or data manipulation not covered by other tools. Use `print()` for output.
431
+ * Use `tavily_search_results_json` for web searches. Use `scrape_webpage` to get content from a specific URL found in search or given in the prompt.
432
+ * Adhere strictly to the requested final answer format.
433
+ """
434
+
435
+ # --- Create Prompt Template ---
436
+ prompt = ChatPromptTemplate.from_messages(
437
+ [
438
+ ("system", SYSTEM_PROMPT_TEMPLATE.format(
439
+ agent_workspace=AGENT_WORKSPACE.resolve(),
440
+ tool_descriptions="\n".join([f"- {tool.name}: {tool.description}" for tool in available_tools])
441
+ )
442
+ ),
443
+ MessagesPlaceholder(variable_name="chat_history"),
444
+ ("human", "{input}"),
445
+ MessagesPlaceholder(variable_name="agent_scratchpad"), # Crucial for agent's intermediate steps
446
+ ]
447
+ )
448
+
449
+ # --- Setup Memory ---
450
+ memory = ConversationBufferWindowMemory(
451
+ k=MEMORY_WINDOW_SIZE,
452
+ memory_key="chat_history",
453
+ return_messages=True # Return Message objects for chat models
454
+ )
455
+
456
+ # --- Create Agent ---
457
+ # Structured Chat Agent is generally good for models supporting tool calling/structured output
458
+ agent = create_structured_chat_agent(llm, available_tools, prompt)
459
+
460
+ # --- Create Agent Executor ---
461
+ agent_executor = AgentExecutor(
462
+ agent=agent,
463
+ tools=available_tools,
464
+ memory=memory,
465
+ verbose=True, # Set to True to see agent's thought process, False for cleaner output
466
+ max_iterations=MAX_ITERATIONS,
467
+ handle_parsing_errors="Please check your output format and try again.", # Basic guidance on format errors
468
+ # return_intermediate_steps=True # Uncomment to get intermediate steps in the result dictionary
469
+ )
470
 
471
  # ==============================================================================
472
+ # 4. EXECUTION FUNCTION
473
  # ==============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
 
475
+ def run_gaia_task(task_description: str):
476
+ """
477
+ Runs the GAIA agent on a given task description.
 
 
 
478
 
479
+ Args:
480
+ task_description: The natural language description of the task.
 
 
 
 
481
 
482
+ Returns:
483
+ The final output string from the agent, or an error message.
484
+ """
485
+ print("\n" + "="*50)
486
+ print(f"πŸš€ Running GAIA Task")
487
+ print(f"πŸ“ Task: {task_description}")
488
+ print(f"πŸ“ Workspace: {AGENT_WORKSPACE.resolve()}")
489
+ print(f"πŸ› οΈ Available Tools: {[tool.name for tool in available_tools]}")
490
+ print("="*50 + "\n")
 
491
 
492
+ # Reset memory for each new task to avoid context bleed
493
+ memory.clear()
494
 
495
  try:
496
+ # Invoke the agent executor
497
+ result = agent_executor.invoke({"input": task_description})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
 
499
+ print("\n" + "="*50)
500
+ print("βœ… Agent Execution Finished")
501
+ print(f"🏁 Final Output:\n{result.get('output', 'No output found.')}")
502
+ print("="*50 + "\n")
503
+ return result.get('output', 'Agent finished but produced no output.')
 
 
 
504
 
505
+ except Exception as e:
506
+ print(f"\n{'='*50}")
507
+ print(f"❌ Agent Execution Error")
508
+ print(f"An error occurred: {e}")
509
+ # Optional: Print traceback for detailed debugging
510
+ # import traceback
511
+ # traceback.print_exc()
512
+ print("="*50 + "\n")
513
+ return f"Agent failed with error: {e}"
514
 
515
  # ==============================================================================
516
+ # 5. EXAMPLE USAGE (Entry Point)
517
  # ==============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
518
 
519
+ if __name__ == "__main__":
520
+ # --- Optional: Setup Example Files ---
521
+ print("--- Setting up example files (if needed) ---")
522
+ # Dummy Excel
523
+ if PANDAS_AVAILABLE:
524
  try:
525
+ dummy_excel_path = AGENT_WORKSPACE / "sample_data.xlsx"
526
+ if not dummy_excel_path.exists():
527
+ pd.DataFrame({'ID': [1, 2, 3], 'Product': ['Widget', 'Gadget', 'Thingamajig']}).to_excel(dummy_excel_path, index=False)
528
+ print(f"Created dummy Excel: {dummy_excel_path}")
529
+ except Exception as e: print(f"Could not create dummy Excel: {e}")
530
+ # Dummy Text
531
+ try:
532
+ dummy_text_path = AGENT_WORKSPACE / "numbers.txt"
533
+ if not dummy_text_path.exists():
534
+ with open(dummy_text_path, "w") as f: f.write("15\n-3\n42.5\n100\n")
535
+ print(f"Created dummy text file: {dummy_text_path}")
536
+ except Exception as e: print(f"Could not create dummy text file: {e}")
537
+ # Dummy Audio - User needs to provide this manually
538
+ dummy_audio_path = AGENT_WORKSPACE / "sample_audio.mp3"
539
+ if not dummy_audio_path.exists() and OPENAI_AVAILABLE and openai_client:
540
+ print(f"INFO: To test audio transcription, place an MP3 file at: {dummy_audio_path}")
541
+ print("--- Example setup complete ---")
542
+
543
+
544
+ # --- Define Example Tasks ---
545
+ task_list = [
546
+ {
547
+ "id": "excel_read",
548
+ "description": "Read the file 'sample_data.xlsx' in the workspace. What is the 'Product' where 'ID' is 2? Final answer should be just the product name."
549
+ },
550
+ {
551
+ "id": "python_sum",
552
+ "description": "Read the numbers from 'numbers.txt' in the workspace (one per line). Calculate their sum using python code. Write the sum into 'sum_result.txt'. Final answer should be the relative path 'sum_result.txt'."
553
+ },
554
+ {
555
+ "id": "search_scrape_write",
556
+ "description": "Search the web for the official website of the Python Software Foundation. Scrape the main title from the homepage of that website. Write the title into 'psf_title.txt'. Final answer is 'psf_title.txt'."
557
+ },
558
+ # { # Uncomment to run audio task if sample_audio.mp3 exists
559
+ # "id": "audio_transcribe",
560
+ # "description": "Transcribe the audio file 'sample_audio.mp3' from the workspace. Write the first 50 characters of the transcription into 'audio_snippet.txt'. Final answer is 'audio_snippet.txt'."
561
+ # },
562
+ # { # Uncomment to run YouTube task
563
+ # "id": "youtube_transcribe",
564
+ # "description": "Transcribe the YouTube video 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'. What is the first line of the transcription? Final answer is just the first line."
565
+ # },
566
+ ]
567
+
568
+ # --- Run Selected Task ---
569
+ # Choose which task to run by its index or ID
570
+ task_to_run = task_list[0] # Run the first task (Excel read)
571
+
572
+ print(f"\n>>> Running selected task: {task_to_run['id']} <<<")
573
+ final_answer = run_gaia_task(task_to_run['description'])
574
+ print(f">>> Task {task_to_run['id']} completed. Agent Output: {final_answer} <<<")
575
+
576
+ # To run all tasks:
577
+ # for task in task_list:
578
+ # print(f"\n>>> Running task: {task['id']} <<<")
579
+ # final_answer = run_gaia_task(task['description'])
580
+ # print(f">>> Task {task['id']} completed. Agent Output: {final_answer} <<<")
581
+ # input("Press Enter to continue to the next task...") # Pause between tasks