Macmill commited on
Commit
a7b6255
·
verified ·
1 Parent(s): 1056032

Update final_agent.py

Browse files
Files changed (1) hide show
  1. final_agent.py +193 -361
final_agent.py CHANGED
@@ -1,10 +1,6 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
  GAIA Benchmark Agent using LangChain, Groq, Tavily, and various tools.
4
-
5
- This agent is designed to interact with files, search the web, scrape pages,
6
- execute Python code, read Excel files, and transcribe audio/YouTube videos
7
- to tackle complex tasks like those found in the GAIA benchmark.
8
  """
9
 
10
  # --- Core Libraries ---
@@ -22,303 +18,199 @@ from dotenv import load_dotenv
22
  # --- LangChain Imports ---
23
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
24
  from langchain_core.tools import BaseTool, tool
25
- from langchain.pydantic_v1 import BaseModel, Field # Use Pydantic v1 for Langchain tool compatibility
 
 
26
  from langchain.memory import ConversationBufferWindowMemory
27
- from langchain.agents import AgentExecutor, create_structured_chat_agent
28
 
29
  # --- Tool Specific Imports ---
30
  # Search
31
- from langchain_community.utilities import TavilySearchResults
32
  # Web Scraping
33
  import requests
34
  from bs4 import BeautifulSoup
35
  # LLM
36
  from langchain_groq import ChatGroq
37
  # Audio/Video Transcription (Optional)
38
- try:
39
- import openai
40
- OPENAI_AVAILABLE = True
41
- except ImportError:
42
- OPENAI_AVAILABLE = False
43
  # Excel Reading (Optional)
44
- try:
45
- import pandas as pd
46
- PANDAS_AVAILABLE = True
47
- except ImportError:
48
- PANDAS_AVAILABLE = False
49
  # YouTube Processing (Optional)
50
- try:
51
- from pytube import YouTube
52
- from pytube.exceptions import PytubeError
53
- PYTUBE_AVAILABLE = True
54
- except ImportError:
55
- PYTUBE_AVAILABLE = False
56
 
57
  # ==============================================================================
58
  # 1. CONFIGURATION
59
  # ==============================================================================
60
- load_dotenv() # Load environment variables from .env file if it exists
61
-
62
- # --- Agent Settings ---
63
- AGENT_WORKSPACE = Path("./gaia_agent_workspace")
64
- AGENT_WORKSPACE.mkdir(exist_ok=True) # Ensure workspace directory exists
65
- MAX_ITERATIONS = 15
66
- MEMORY_WINDOW_SIZE = 10
67
-
68
- # --- LLM Configuration ---
69
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
70
- GROQ_MODEL_NAME = os.getenv("GROQ_MODEL_NAME", "llama3-70b-8192") # Default if not set
71
-
72
- # --- Tool Configuration ---
73
- TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
74
- TAVILY_MAX_RESULTS = 3
75
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # Needed for Whisper
76
- WHISPER_MODEL = "whisper-1"
77
-
78
- # --- Dependency & API Key Checks ---
79
- if not GROQ_API_KEY:
80
- print("ERROR: GROQ_API_KEY environment variable not set. Agent cannot run.")
81
- sys.exit(1)
82
- if not TAVILY_API_KEY:
83
- print("ERROR: TAVILY_API_KEY environment variable not set. Search tool disabled.")
84
- # Decide if this is fatal or just disables the tool
85
- # sys.exit(1) # Uncomment to make it fatal
86
-
87
  openai_client = None
88
  if OPENAI_AVAILABLE and OPENAI_API_KEY:
89
- try:
90
- openai_client = openai.OpenAI(api_key=OPENAI_API_KEY)
91
- print("OpenAI client initialized for Whisper transcription.")
92
- except Exception as e:
93
- print(f"Warning: Failed to initialize OpenAI client: {e}. Transcription tools disabled.")
94
- openai_client = None
95
- elif OPENAI_AVAILABLE:
96
- print("Warning: OpenAI library installed, but OPENAI_API_KEY not set. Transcription tools disabled.")
97
- else:
98
- print("Info: OpenAI library not installed. Transcription tools disabled.")
99
-
100
- if not PANDAS_AVAILABLE:
101
- print("Info: 'pandas' library not installed. Excel tool disabled. Install with: pip install pandas openpyxl")
102
- if not PYTUBE_AVAILABLE:
103
- print("Info: 'pytube' library not installed. YouTube tool disabled. Install with: pip install pytube")
104
 
105
  # ==============================================================================
106
  # 2. TOOL DEFINITIONS
107
  # ==============================================================================
108
 
109
  # --- Tool Input Schemas (Pydantic Models) ---
110
- # Using Pydantic v1 as required by Langchain tools at the time of writing
111
-
112
  class FileWriteArgs(BaseModel):
113
  relative_path: str = Field(description="Relative path within the agent's workspace where the file should be written.")
114
  content: str = Field(description="The text content to write into the file.")
115
-
116
  class FileReadArgs(BaseModel):
117
  relative_path: str = Field(description="Relative path within the agent's workspace of the file to read.")
118
-
119
  class ListDirectoryArgs(BaseModel):
120
  relative_path: str = Field(default=".", description="Relative path within the agent's workspace to list contents of. Use '.' for the root.")
121
-
122
  class RunPythonCodeArgs(BaseModel):
123
  code: str = Field(description="The Python code to execute. Use 'print()' to output results. Code runs in isolation.")
124
-
125
  class WebScrapeArgs(BaseModel):
126
  url: str = Field(description="The URL of the webpage to scrape.")
127
  query: Optional[str] = Field(default=None, description="Optional specific question to answer from the page content.")
128
-
129
  class ReadExcelArgs(BaseModel):
130
  relative_path: str = Field(description="Relative path within the agent's workspace of the Excel file (.xlsx or .xls).")
131
  sheet_name: Optional[str] = Field(default=None, description="Optional name of the specific sheet to read. Reads the first sheet if not specified.")
132
  max_rows_preview: int = Field(default=20, description="Maximum number of rows to include in the text preview.")
133
-
134
  class TranscribeAudioArgs(BaseModel):
135
  relative_path: str = Field(description="Relative path within the agent's workspace of the audio file (e.g., .mp3, .wav, .m4a). Max 25MB.")
136
-
137
  class TranscribeYouTubeArgs(BaseModel):
138
  youtube_url: str = Field(description="The URL of the YouTube video to transcribe. Audio will be downloaded temporarily.")
139
 
140
  # --- Helper Functions ---
141
-
142
  def _resolve_path(relative_path: str) -> Optional[Path]:
143
  """Resolves a relative path against the workspace and checks bounds."""
144
  try:
145
- full_path = (AGENT_WORKSPACE / relative_path).resolve()
146
- # Security Check: Ensure the resolved path is within the workspace
147
- if not str(full_path).startswith(str(AGENT_WORKSPACE.resolve())):
148
- return None # Path is outside the workspace
149
- return full_path
150
- except Exception: # Handle potential path resolution errors
 
 
 
 
 
 
 
 
 
 
151
  return None
152
 
153
  def _transcribe_audio(file_path: Path, file_description: str) -> str:
154
  """Helper to transcribe an audio file using OpenAI Whisper."""
155
- if not openai_client:
156
- return "Error: OpenAI client not available for transcription."
157
  if not file_path.is_file():
158
- return f"Error: Audio file not found at '{file_path.relative_to(AGENT_WORKSPACE)}'"
159
-
 
160
  try:
161
  file_size_mb = file_path.stat().st_size / (1024 * 1024)
162
- if file_size_mb > 25:
163
- return f"Error: Audio file '{file_description}' is too large ({file_size_mb:.2f} MB). Max 25 MB."
164
-
165
  print(f"Transcribing audio: {file_description}...")
166
- with open(file_path, "rb") as audio_file_handle:
167
- # Note: response_format="text" returns a simple string
168
- transcript = openai_client.audio.transcriptions.create(
169
- model=WHISPER_MODEL,
170
- file=audio_file_handle,
171
- response_format="text"
172
- )
173
  print("Transcription complete.")
174
-
175
- if isinstance(transcript, str):
176
- max_len = 10000 # Limit transcription length in output
177
- if len(transcript) > max_len:
178
- transcript = transcript[:max_len] + "\n... [Transcription truncated]"
179
- return f"Transcription of '{file_description}':\n{transcript}"
180
- else:
181
- return f"Transcription of '{file_description}' succeeded, but format was unexpected: {type(transcript)}"
182
-
183
- except openai.APIError as e:
184
- return f"OpenAI API Error during transcription of '{file_description}': {e}"
185
- except Exception as e:
186
- return f"Error transcribing '{file_description}': {e}"
187
 
188
  # --- Tool Implementations ---
189
-
190
  @tool("write_file", args_schema=FileWriteArgs)
191
  def write_file(relative_path: str, content: str) -> str:
192
  """Writes text content to a file within the agent's workspace. Creates parent directories if needed."""
193
- full_path = _resolve_path(relative_path)
194
- if not full_path:
195
- return f"Error: Invalid or disallowed path '{relative_path}'."
196
- try:
197
- full_path.parent.mkdir(parents=True, exist_ok=True)
198
- with open(full_path, 'w', encoding='utf-8') as f:
199
- f.write(content)
200
- return f"Successfully wrote to file: {relative_path}"
201
- except Exception as e:
202
- return f"Error writing file '{relative_path}': {e}"
203
 
204
  @tool("read_file", args_schema=FileReadArgs)
205
  def read_file(relative_path: str) -> str:
206
  """Reads the text content of a file from the agent's workspace. Limited read size."""
207
- full_path = _resolve_path(relative_path)
208
- if not full_path:
209
- return f"Error: Invalid or disallowed path '{relative_path}'."
210
- if not full_path.is_file():
211
- return f"Error: File not found at '{relative_path}'"
212
  try:
213
- with open(full_path, 'r', encoding='utf-8') as f:
214
- content = f.read(10000) # Limit read size
215
- if len(f.read(1)) > 0:
216
- content += "\n... [File truncated due to length]"
217
  return content
218
- except Exception as e:
219
- return f"Error reading file '{relative_path}': {e}"
220
 
221
  @tool("list_directory", args_schema=ListDirectoryArgs)
222
  def list_directory(relative_path: str = ".") -> str:
223
  """Lists the contents (files and directories) of a specified directory within the agent's workspace."""
224
- target_path = _resolve_path(relative_path)
225
- if not target_path:
226
- return f"Error: Invalid or disallowed path '{relative_path}'."
227
- if not target_path.is_dir():
228
- return f"Error: '{relative_path}' is not a valid directory."
229
- try:
230
- items = [f.name + ('/' if f.is_dir() else '') for f in target_path.iterdir()]
231
- if not items:
232
- return f"Directory '{relative_path}' is empty."
233
- return f"Contents of '{relative_path}':\n" + "\n".join(items)
234
- except Exception as e:
235
- return f"Error listing directory '{relative_path}': {e}"
236
 
237
  @tool("run_python_code", args_schema=RunPythonCodeArgs)
238
  def run_python_code(code: str) -> str:
239
  """Executes Python code in a subprocess and returns the stdout/stderr. Use print() for output. WARNING: Executes arbitrary code."""
 
240
  try:
241
- process = subprocess.run(
242
- [sys.executable, "-c", code],
243
- capture_output=True, text=True, timeout=30, cwd=AGENT_WORKSPACE, check=False # Don't raise error on non-zero exit
244
- )
245
  output, error = process.stdout, process.stderr
246
- result = ""
247
- if output:
248
- max_output = 2000
249
- if len(output) > max_output: output = output[:max_output] + "\n... [Output truncated]"
250
- result += f"Output:\n{output}\n"
251
- if error:
252
- result += f"Error Output:\n{error}\n"
253
-
254
- if process.returncode == 0:
255
- return f"Execution successful.\n{result}"
256
- else:
257
- return f"Execution failed (Return Code: {process.returncode}).\n{result}"
258
- except subprocess.TimeoutExpired:
259
- return "Error: Code execution timed out after 30 seconds."
260
- except Exception as e:
261
- return f"Error executing Python code: {e}"
262
 
263
  @tool("scrape_webpage", args_schema=WebScrapeArgs)
264
  def scrape_webpage(url: str, query: Optional[str] = None) -> str:
265
  """Scrapes text content from a given URL using BeautifulSoup. If a query is provided, returns content for the agent to answer it."""
 
266
  try:
267
- headers = {'User-Agent': 'Mozilla/5.0 (compatible; GAIA-Agent/1.0)'} # Identify the agent
268
- response = requests.get(url, headers=headers, timeout=20)
269
- response.raise_for_status() # Raise HTTPError for bad responses
270
-
271
- # Check content type - avoid trying to parse images, etc.
272
  content_type = response.headers.get('content-type', '').lower()
273
- if 'text/html' not in content_type:
274
- return f"Error: Content type of URL {url} is '{content_type}', not HTML. Cannot scrape."
275
-
276
  soup = BeautifulSoup(response.text, 'html.parser')
277
- for script_or_style in soup(["script", "style", "nav", "footer", "aside"]): # Remove common clutter
278
- script_or_style.decompose()
279
-
280
- text_content = soup.get_text(separator='\n', strip=True)
281
- if not text_content: return f"Could not extract meaningful text content from {url}."
282
-
283
- max_chars = 10000 # Limit content length
284
- if len(text_content) > max_chars:
285
- text_content = text_content[:max_chars] + "\n... [Content truncated]"
286
-
287
- if query:
288
- return f"Use the following content from {url} to answer the query '{query}':\n\n{text_content}"
289
- else:
290
- return f"Content scraped from {url}:\n\n{text_content}"
291
-
292
- except requests.exceptions.RequestException as e:
293
- return f"Error fetching or reading URL {url}: {e}"
294
- except Exception as e:
295
- return f"Error scraping URL {url}: {e}"
296
-
297
- # --- Optional Tools (Conditionally Available) ---
298
 
299
  if PANDAS_AVAILABLE:
300
  @tool("read_excel_file", args_schema=ReadExcelArgs)
301
  def read_excel_file(relative_path: str, sheet_name: Optional[str] = None, max_rows_preview: int = 20) -> str:
302
  """Reads data from an Excel file (.xlsx or .xls) within the workspace and returns a text preview."""
303
- full_path = _resolve_path(relative_path)
304
  if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
305
  if not full_path.is_file(): return f"Error: Excel file not found at '{relative_path}'"
 
306
  try:
307
  excel_file = pd.ExcelFile(full_path)
308
- if sheet_name:
309
- if sheet_name not in excel_file.sheet_names:
310
- return f"Error: Sheet '{sheet_name}' not found. Available: {excel_file.sheet_names}"
311
- sheet_to_read = sheet_name
312
- else:
313
- sheet_to_read = excel_file.sheet_names[0]
314
-
315
  df = pd.read_excel(full_path, sheet_name=sheet_to_read)
316
- output = f"Preview of sheet '{sheet_to_read}' from '{relative_path}' ({df.shape[0]} rows, {df.shape[1]} cols):\n"
317
- output += df.to_string(max_rows=max_rows_preview, max_cols=15) # Preview format
318
-
319
- max_output_len = 5000
320
- if len(output) > max_output_len:
321
- output = output[:max_output_len] + "\n... [Output truncated]"
322
  return output
323
  except Exception as e: return f"Error reading Excel file '{relative_path}': {e}"
324
 
@@ -326,7 +218,7 @@ if OPENAI_AVAILABLE and openai_client:
326
  @tool("transcribe_audio_file", args_schema=TranscribeAudioArgs)
327
  def transcribe_audio_file(relative_path: str) -> str:
328
  """Transcribes audio content from a file in the workspace using OpenAI Whisper (max 25MB)."""
329
- full_path = _resolve_path(relative_path)
330
  if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
331
  return _transcribe_audio(full_path, relative_path)
332
 
@@ -336,80 +228,57 @@ if PYTUBE_AVAILABLE and OPENAI_AVAILABLE and openai_client:
336
  """Downloads audio from a YouTube URL, transcribes it using OpenAI Whisper, and returns the text."""
337
  temp_audio_path = None
338
  try:
339
- print(f"Processing YouTube URL: {youtube_url}")
340
- yt = YouTube(youtube_url)
341
- audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').order_by('abr').desc().first()
342
- if not audio_stream: audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first() # Fallback
343
- if not audio_stream: return f"Error: No suitable audio stream found for {youtube_url}"
344
-
345
- # Download to a unique temporary file in workspace
346
- timestamp = int(time.time())
347
- temp_filename = f"temp_youtube_{timestamp}.{audio_stream.subtype or 'mp4'}"
 
348
  temp_audio_path = AGENT_WORKSPACE / temp_filename
349
  print(f"Downloading audio to: {temp_audio_path}...")
350
- audio_stream.download(output_path=AGENT_WORKSPACE, filename=temp_filename)
351
- print("Download complete.")
352
-
353
- # Transcribe the downloaded file
354
- result = _transcribe_audio(temp_audio_path, f"YouTube video '{yt.title}'")
355
- return result
356
-
357
- except PytubeError as e: return f"Error processing YouTube video {youtube_url}: {e}"
358
  except Exception as e: return f"Unexpected error during YouTube transcription {youtube_url}: {e}"
359
  finally:
360
- # --- IMPORTANT: Clean up temporary file ---
361
  if temp_audio_path and temp_audio_path.exists():
362
- try: temp_audio_path.unlink(); print(f"Cleaned up: {temp_audio_path}")
363
  except Exception as e: print(f"Warning: Failed to delete temp file {temp_audio_path}: {e}")
364
 
365
-
366
  # ==============================================================================
367
  # 3. AGENT SETUP
368
  # ==============================================================================
369
 
370
  # --- Initialize LLM ---
371
  try:
372
- llm = ChatGroq(
373
- temperature=0,
374
- model_name=GROQ_MODEL_NAME,
375
- groq_api_key=GROQ_API_KEY
376
- )
377
  print(f"Using Groq LLM: {GROQ_MODEL_NAME}")
378
- except Exception as e:
379
- print(f"FATAL: Error initializing Groq LLM: {e}")
380
- sys.exit(1)
381
 
382
  # --- Assemble Available Tools ---
383
  available_tools = []
384
  if TAVILY_API_KEY:
385
- available_tools.append(TavilySearchResults(max_results=TAVILY_MAX_RESULTS, api_key=TAVILY_API_KEY))
386
- else:
387
- print("Warning: Tavily Search tool disabled (API key missing).")
388
-
389
- # Core tools are always added (they don't have external dependencies checked above)
390
- available_tools.extend([
391
- write_file,
392
- read_file,
393
- list_directory,
394
- run_python_code,
395
- scrape_webpage,
396
- ])
397
-
398
- # Add optional tools if their dependencies/clients are ready
399
  if PANDAS_AVAILABLE: available_tools.append(read_excel_file)
400
  if OPENAI_AVAILABLE and openai_client: available_tools.append(transcribe_audio_file)
401
  if PYTUBE_AVAILABLE and OPENAI_AVAILABLE and openai_client: available_tools.append(transcribe_youtube_video)
402
-
403
  print(f"Agent initialized with tools: {[tool.name for tool in available_tools]}")
404
 
405
  # --- Define System Prompt ---
406
- # This prompt is formatted later with the *actually available* tools
407
  SYSTEM_PROMPT_TEMPLATE = """You are a highly capable AI assistant designed to solve complex problems step-by-step, mimicking human-like reasoning and actions. Your goal is to accurately answer the user's request based on the GAIA benchmark philosophy.
408
 
409
  **Workspace:** You have access to a local workspace directory: '{agent_workspace}'. You can ONLY interact with files inside this directory using the provided tools. Always use relative paths for file operations.
410
 
411
  **Available Tools:** You have access to the following tools:
412
- {tool_descriptions}
413
 
414
  **Reasoning Process:**
415
  1. **Understand:** Analyze the request. Identify objectives, constraints, and required information (text, web search, file content, Excel data, audio/video transcription, calculations).
@@ -433,149 +302,112 @@ SYSTEM_PROMPT_TEMPLATE = """You are a highly capable AI assistant designed to so
433
  """
434
 
435
  # --- Create Prompt Template ---
436
- prompt = ChatPromptTemplate.from_messages(
437
- [
438
- ("system", SYSTEM_PROMPT_TEMPLATE.format(
439
- agent_workspace=AGENT_WORKSPACE.resolve(),
440
- tool_descriptions="\n".join([f"- {tool.name}: {tool.description}" for tool in available_tools])
441
- )
442
- ),
443
- MessagesPlaceholder(variable_name="chat_history"),
444
- ("human", "{input}"),
445
- MessagesPlaceholder(variable_name="agent_scratchpad"), # Crucial for agent's intermediate steps
446
- ]
447
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
  # --- Setup Memory ---
450
  memory = ConversationBufferWindowMemory(
451
  k=MEMORY_WINDOW_SIZE,
452
  memory_key="chat_history",
453
- return_messages=True # Return Message objects for chat models
454
  )
455
 
456
  # --- Create Agent ---
457
- # Structured Chat Agent is generally good for models supporting tool calling/structured output
458
- agent = create_structured_chat_agent(llm, available_tools, prompt)
 
 
 
 
 
 
459
 
460
  # --- Create Agent Executor ---
461
- agent_executor = AgentExecutor(
462
- agent=agent,
463
- tools=available_tools,
464
- memory=memory,
465
- verbose=True, # Set to True to see agent's thought process, False for cleaner output
466
- max_iterations=MAX_ITERATIONS,
467
- handle_parsing_errors="Please check your output format and try again.", # Basic guidance on format errors
468
- # return_intermediate_steps=True # Uncomment to get intermediate steps in the result dictionary
469
- )
 
 
 
470
 
471
  # ==============================================================================
472
- # 4. EXECUTION FUNCTION
473
  # ==============================================================================
474
-
475
  def run_gaia_task(task_description: str):
476
- """
477
- Runs the GAIA agent on a given task description.
478
-
479
- Args:
480
- task_description: The natural language description of the task.
481
-
482
- Returns:
483
- The final output string from the agent, or an error message.
484
- """
485
- print("\n" + "="*50)
486
- print(f"🚀 Running GAIA Task")
487
- print(f"📝 Task: {task_description}")
488
- print(f"📍 Workspace: {AGENT_WORKSPACE.resolve()}")
489
- print(f"🛠️ Available Tools: {[tool.name for tool in available_tools]}")
490
- print("="*50 + "\n")
491
-
492
- # Reset memory for each new task to avoid context bleed
493
- memory.clear()
494
-
495
  try:
496
- # Invoke the agent executor
497
  result = agent_executor.invoke({"input": task_description})
498
-
499
- print("\n" + "="*50)
500
- print("✅ Agent Execution Finished")
501
- print(f"🏁 Final Output:\n{result.get('output', 'No output found.')}")
502
- print("="*50 + "\n")
503
- return result.get('output', 'Agent finished but produced no output.')
504
-
505
  except Exception as e:
506
- print(f"\n{'='*50}")
507
- print(f"❌ Agent Execution Error")
508
- print(f"An error occurred: {e}")
509
- # Optional: Print traceback for detailed debugging
510
- # import traceback
511
- # traceback.print_exc()
512
- print("="*50 + "\n")
513
  return f"Agent failed with error: {e}"
514
 
515
  # ==============================================================================
516
- # 5. EXAMPLE USAGE (Entry Point)
517
  # ==============================================================================
518
-
519
  if __name__ == "__main__":
520
- # --- Optional: Setup Example Files ---
521
  print("--- Setting up example files (if needed) ---")
522
- # Dummy Excel
523
  if PANDAS_AVAILABLE:
524
  try:
525
  dummy_excel_path = AGENT_WORKSPACE / "sample_data.xlsx"
526
- if not dummy_excel_path.exists():
527
- pd.DataFrame({'ID': [1, 2, 3], 'Product': ['Widget', 'Gadget', 'Thingamajig']}).to_excel(dummy_excel_path, index=False)
528
- print(f"Created dummy Excel: {dummy_excel_path}")
529
  except Exception as e: print(f"Could not create dummy Excel: {e}")
530
- # Dummy Text
531
  try:
532
  dummy_text_path = AGENT_WORKSPACE / "numbers.txt"
533
  if not dummy_text_path.exists():
534
- with open(dummy_text_path, "w") as f: f.write("15\n-3\n42.5\n100\n")
535
- print(f"Created dummy text file: {dummy_text_path}")
536
  except Exception as e: print(f"Could not create dummy text file: {e}")
537
- # Dummy Audio - User needs to provide this manually
538
  dummy_audio_path = AGENT_WORKSPACE / "sample_audio.mp3"
539
- if not dummy_audio_path.exists() and OPENAI_AVAILABLE and openai_client:
540
- print(f"INFO: To test audio transcription, place an MP3 file at: {dummy_audio_path}")
541
  print("--- Example setup complete ---")
542
 
543
-
544
- # --- Define Example Tasks ---
545
- task_list = [
546
- {
547
- "id": "excel_read",
548
- "description": "Read the file 'sample_data.xlsx' in the workspace. What is the 'Product' where 'ID' is 2? Final answer should be just the product name."
549
- },
550
- {
551
- "id": "python_sum",
552
- "description": "Read the numbers from 'numbers.txt' in the workspace (one per line). Calculate their sum using python code. Write the sum into 'sum_result.txt'. Final answer should be the relative path 'sum_result.txt'."
553
- },
554
- {
555
- "id": "search_scrape_write",
556
- "description": "Search the web for the official website of the Python Software Foundation. Scrape the main title from the homepage of that website. Write the title into 'psf_title.txt'. Final answer is 'psf_title.txt'."
557
- },
558
- # { # Uncomment to run audio task if sample_audio.mp3 exists
559
- # "id": "audio_transcribe",
560
- # "description": "Transcribe the audio file 'sample_audio.mp3' from the workspace. Write the first 50 characters of the transcription into 'audio_snippet.txt'. Final answer is 'audio_snippet.txt'."
561
- # },
562
- # { # Uncomment to run YouTube task
563
- # "id": "youtube_transcribe",
564
- # "description": "Transcribe the YouTube video 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'. What is the first line of the transcription? Final answer is just the first line."
565
- # },
566
  ]
567
 
568
- # --- Run Selected Task ---
569
- # Choose which task to run by its index or ID
570
- task_to_run = task_list[0] # Run the first task (Excel read)
571
-
572
- print(f"\n>>> Running selected task: {task_to_run['id']} <<<")
573
- final_answer = run_gaia_task(task_to_run['description'])
574
- print(f">>> Task {task_to_run['id']} completed. Agent Output: {final_answer} <<<")
575
-
576
- # To run all tasks:
577
- # for task in task_list:
578
- # print(f"\n>>> Running task: {task['id']} <<<")
579
- # final_answer = run_gaia_task(task['description'])
580
- # print(f">>> Task {task['id']} completed. Agent Output: {final_answer} <<<")
581
- # input("Press Enter to continue to the next task...") # Pause between tasks
 
1
  # -*- coding: utf-8 -*-
2
  """
3
  GAIA Benchmark Agent using LangChain, Groq, Tavily, and various tools.
 
 
 
 
4
  """
5
 
6
  # --- Core Libraries ---
 
18
  # --- LangChain Imports ---
19
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
20
  from langchain_core.tools import BaseTool, tool
21
+ # Using Pydantic v2 is recommended if your environment supports it fully
22
+ # from pydantic import BaseModel, Field # Pydantic v2
23
+ from pydantic import BaseModel, Field # Pydantic v1 compatibility shim
24
  from langchain.memory import ConversationBufferWindowMemory
25
+ from langchain.agents import AgentExecutor, create_openai_tools_agent # Keep OpenAI Tools Agent
26
 
27
  # --- Tool Specific Imports ---
28
  # Search
29
+ from langchain_community.tools.tavily_search import TavilySearchResults
30
  # Web Scraping
31
  import requests
32
  from bs4 import BeautifulSoup
33
  # LLM
34
  from langchain_groq import ChatGroq
35
  # Audio/Video Transcription (Optional)
36
+ try: import openai; OPENAI_AVAILABLE = True
37
+ except ImportError: OPENAI_AVAILABLE = False
 
 
 
38
  # Excel Reading (Optional)
39
+ try: import pandas as pd; PANDAS_AVAILABLE = True
40
+ except ImportError: PANDAS_AVAILABLE = False
 
 
 
41
  # YouTube Processing (Optional)
42
+ try: from pytube import YouTube, PytubeError; PYTUBE_AVAILABLE = True
43
+ except ImportError: PYTUBE_AVAILABLE = False
 
 
 
 
44
 
45
  # ==============================================================================
46
  # 1. CONFIGURATION
47
  # ==============================================================================
48
+ load_dotenv()
49
+ AGENT_WORKSPACE = Path("./gaia_agent_workspace"); AGENT_WORKSPACE.mkdir(exist_ok=True)
50
+ MAX_ITERATIONS = 15; MEMORY_WINDOW_SIZE = 10
51
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY"); GROQ_MODEL_NAME = os.getenv("GROQ_MODEL_NAME", "llama3-70b-8192")
52
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY"); TAVILY_MAX_RESULTS = 3
53
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY"); WHISPER_MODEL = "whisper-1"
54
+ if not GROQ_API_KEY: print("ERROR: GROQ_API_KEY not set."); sys.exit(1)
55
+ if not TAVILY_API_KEY: print("Warning: TAVILY_API_KEY not set.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  openai_client = None
57
  if OPENAI_AVAILABLE and OPENAI_API_KEY:
58
+ try: openai_client = openai.OpenAI(api_key=OPENAI_API_KEY); print("OpenAI client initialized.")
59
+ except Exception as e: print(f"Warning: OpenAI client init failed: {e}"); openai_client = None
60
+ if not PANDAS_AVAILABLE: print("Info: 'pandas' not installed. Excel tool disabled.")
61
+ if not PYTUBE_AVAILABLE: print("Info: 'pytube' not installed. YouTube tool disabled.")
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # ==============================================================================
64
  # 2. TOOL DEFINITIONS
65
  # ==============================================================================
66
 
67
  # --- Tool Input Schemas (Pydantic Models) ---
 
 
68
  class FileWriteArgs(BaseModel):
69
  relative_path: str = Field(description="Relative path within the agent's workspace where the file should be written.")
70
  content: str = Field(description="The text content to write into the file.")
 
71
  class FileReadArgs(BaseModel):
72
  relative_path: str = Field(description="Relative path within the agent's workspace of the file to read.")
 
73
  class ListDirectoryArgs(BaseModel):
74
  relative_path: str = Field(default=".", description="Relative path within the agent's workspace to list contents of. Use '.' for the root.")
 
75
  class RunPythonCodeArgs(BaseModel):
76
  code: str = Field(description="The Python code to execute. Use 'print()' to output results. Code runs in isolation.")
 
77
  class WebScrapeArgs(BaseModel):
78
  url: str = Field(description="The URL of the webpage to scrape.")
79
  query: Optional[str] = Field(default=None, description="Optional specific question to answer from the page content.")
 
80
  class ReadExcelArgs(BaseModel):
81
  relative_path: str = Field(description="Relative path within the agent's workspace of the Excel file (.xlsx or .xls).")
82
  sheet_name: Optional[str] = Field(default=None, description="Optional name of the specific sheet to read. Reads the first sheet if not specified.")
83
  max_rows_preview: int = Field(default=20, description="Maximum number of rows to include in the text preview.")
 
84
  class TranscribeAudioArgs(BaseModel):
85
  relative_path: str = Field(description="Relative path within the agent's workspace of the audio file (e.g., .mp3, .wav, .m4a). Max 25MB.")
 
86
  class TranscribeYouTubeArgs(BaseModel):
87
  youtube_url: str = Field(description="The URL of the YouTube video to transcribe. Audio will be downloaded temporarily.")
88
 
89
  # --- Helper Functions ---
 
90
  def _resolve_path(relative_path: str) -> Optional[Path]:
91
  """Resolves a relative path against the workspace and checks bounds."""
92
  try:
93
+ normalized_relative_path = os.path.normpath(relative_path)
94
+ # Prevent absolute paths or paths trying to escape the workspace
95
+ if os.path.isabs(normalized_relative_path) or ".." in normalized_relative_path.split(os.sep):
96
+ print(f"Error: Invalid path characters or attempt to escape workspace in '{relative_path}'.")
97
+ return None
98
+ full_path = (AGENT_WORKSPACE / normalized_relative_path).resolve()
99
+ if AGENT_WORKSPACE.resolve() in full_path.parents or full_path == AGENT_WORKSPACE.resolve():
100
+ return full_path
101
+ # Check prefix as a fallback, although resolve should handle canonical paths
102
+ if str(full_path).startswith(str(AGENT_WORKSPACE.resolve())):
103
+ print(f"Warning: Path resolution for '{relative_path}' seems complex but within workspace: {full_path}")
104
+ return full_path
105
+ print(f"Error: Path '{relative_path}' resolved to '{full_path}' which is outside the allowed workspace '{AGENT_WORKSPACE.resolve()}'.")
106
+ return None
107
+ except Exception as e:
108
+ print(f"Error resolving path '{relative_path}': {e}")
109
  return None
110
 
111
  def _transcribe_audio(file_path: Path, file_description: str) -> str:
112
  """Helper to transcribe an audio file using OpenAI Whisper."""
113
+ if not openai_client: return "Error: OpenAI client not available for transcription."
 
114
  if not file_path.is_file():
115
+ try: rel_path_str = file_path.relative_to(AGENT_WORKSPACE)
116
+ except ValueError: rel_path_str = file_path
117
+ return f"Error: Audio file not found at '{rel_path_str}'"
118
  try:
119
  file_size_mb = file_path.stat().st_size / (1024 * 1024)
120
+ if file_size_mb > 25: return f"Error: Audio file '{file_description}' is too large ({file_size_mb:.2f} MB). Max 25 MB."
 
 
121
  print(f"Transcribing audio: {file_description}...")
122
+ with open(file_path, "rb") as audio_file_handle: transcript = openai_client.audio.transcriptions.create(model=WHISPER_MODEL, file=audio_file_handle, response_format="text")
 
 
 
 
 
 
123
  print("Transcription complete.")
124
+ if isinstance(transcript, str): max_len = 10000; transcript = transcript[:max_len] + ("\n... [Transcription truncated]" if len(transcript) > max_len else ""); return f"Transcription of '{file_description}':\n{transcript}"
125
+ else: return f"Transcription of '{file_description}' succeeded, but format was unexpected: {type(transcript)}"
126
+ except openai.APIError as e: return f"OpenAI API Error during transcription of '{file_description}': {e}"
127
+ except Exception as e: return f"Error transcribing '{file_description}': {e}"
 
 
 
 
 
 
 
 
 
128
 
129
  # --- Tool Implementations ---
 
130
  @tool("write_file", args_schema=FileWriteArgs)
131
  def write_file(relative_path: str, content: str) -> str:
132
  """Writes text content to a file within the agent's workspace. Creates parent directories if needed."""
133
+ full_path = _resolve_path(relative_path);
134
+ if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
135
+ try: full_path.parent.mkdir(parents=True, exist_ok=True); open(full_path, 'w', encoding='utf-8').write(content); return f"Successfully wrote to file: {relative_path}"
136
+ except Exception as e: return f"Error writing file '{relative_path}': {e}"
 
 
 
 
 
 
137
 
138
  @tool("read_file", args_schema=FileReadArgs)
139
  def read_file(relative_path: str) -> str:
140
  """Reads the text content of a file from the agent's workspace. Limited read size."""
141
+ full_path = _resolve_path(relative_path);
142
+ if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
143
+ if not full_path.is_file(): return f"Error: File not found at '{relative_path}'"
 
 
144
  try:
145
+ with open(full_path, 'r', encoding='utf-8') as f: content = f.read(10000); content += "\n... [File truncated due to length]" if len(f.read(1)) > 0 else ""
 
 
 
146
  return content
147
+ except Exception as e: return f"Error reading file '{relative_path}': {e}"
 
148
 
149
  @tool("list_directory", args_schema=ListDirectoryArgs)
150
  def list_directory(relative_path: str = ".") -> str:
151
  """Lists the contents (files and directories) of a specified directory within the agent's workspace."""
152
+ target_path = _resolve_path(relative_path);
153
+ if not target_path: return f"Error: Invalid or disallowed path '{relative_path}'."
154
+ if not target_path.is_dir(): return f"Error: '{relative_path}' is not a valid directory."
155
+ try: items = [f.name + ('/' if f.is_dir() else '') for f in target_path.iterdir()]; items.sort(); return f"Contents of '{relative_path}':\n" + "\n".join(items) if items else f"Directory '{relative_path}' is empty."
156
+ except Exception as e: return f"Error listing directory '{relative_path}': {e}"
 
 
 
 
 
 
 
157
 
158
  @tool("run_python_code", args_schema=RunPythonCodeArgs)
159
  def run_python_code(code: str) -> str:
160
  """Executes Python code in a subprocess and returns the stdout/stderr. Use print() for output. WARNING: Executes arbitrary code."""
161
+ print(f"Executing Python code:\n```python\n{code}\n```")
162
  try:
163
+ process = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True, timeout=30, cwd=AGENT_WORKSPACE, check=False)
 
 
 
164
  output, error = process.stdout, process.stderr
165
+ result = "Execution successful.\n" if process.returncode == 0 else f"Execution failed (Return Code: {process.returncode}).\n"
166
+ if output: max_output = 2000; output = output[:max_output] + ("\n... [Output truncated]" if len(output) > max_output else ""); result += f"Output:\n{output}\n"
167
+ if error: max_error = 1000; error = error[:max_error] + ("\n... [Error truncated]" if len(error) > max_error else ""); result += f"Error Output:\n{error}\n"
168
+ if not output and not error: result += "No output produced." if process.returncode == 0 else "No output or error message produced despite non-zero exit code."
169
+ return result.strip()
170
+ except subprocess.TimeoutExpired: return "Error: Code execution timed out after 30 seconds."
171
+ except Exception as e: return f"Error executing Python code: {e}"
 
 
 
 
 
 
 
 
 
172
 
173
  @tool("scrape_webpage", args_schema=WebScrapeArgs)
174
  def scrape_webpage(url: str, query: Optional[str] = None) -> str:
175
  """Scrapes text content from a given URL using BeautifulSoup. If a query is provided, returns content for the agent to answer it."""
176
+ print(f"Attempting to scrape URL: {url}")
177
  try:
178
+ space_id = os.getenv("SPACE_ID", "YOUR_SPACE_ID")
179
+ headers = {'User-Agent': f'Mozilla/5.0 (compatible; GAIA-Agent/1.0; +https://huggingface.co/spaces/{space_id})'}
180
+ response = requests.get(url, headers=headers, timeout=20); response.raise_for_status()
 
 
181
  content_type = response.headers.get('content-type', '').lower()
182
+ if 'text/html' not in content_type: return f"Error: Content type of URL {url} is '{content_type}', not HTML. Cannot scrape."
 
 
183
  soup = BeautifulSoup(response.text, 'html.parser')
184
+ for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "iframe", "noscript"]): tag.decompose()
185
+ text_content = soup.get_text(separator='\n', strip=True); text_content = '\n'.join(line for line in text_content.splitlines() if line.strip())
186
+ if not text_content: return f"Could not extract meaningful text content from {url} after cleaning."
187
+ max_chars = 10000; text_content = text_content[:max_chars] + ("\n... [Content truncated]" if len(text_content) > max_chars else "")
188
+ print(f"Scraping successful for {url}. Content length (approx): {len(text_content)}")
189
+ if query: return f"Use the following content from {url} to answer the query '{query}':\n\n{text_content}"
190
+ else: return f"Content scraped from {url}:\n\n{text_content}"
191
+ except requests.exceptions.Timeout: return f"Error: Timeout occurred while trying to fetch URL {url}"
192
+ except requests.exceptions.RequestException as e: return f"Error fetching or reading URL {url}: {e}"
193
+ except Exception as e: return f"Error scraping URL {url}: {e}"
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  if PANDAS_AVAILABLE:
196
  @tool("read_excel_file", args_schema=ReadExcelArgs)
197
  def read_excel_file(relative_path: str, sheet_name: Optional[str] = None, max_rows_preview: int = 20) -> str:
198
  """Reads data from an Excel file (.xlsx or .xls) within the workspace and returns a text preview."""
199
+ full_path = _resolve_path(relative_path);
200
  if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
201
  if not full_path.is_file(): return f"Error: Excel file not found at '{relative_path}'"
202
+ print(f"Reading Excel file: {relative_path}")
203
  try:
204
  excel_file = pd.ExcelFile(full_path)
205
+ if not excel_file.sheet_names: return f"Error: Excel file '{relative_path}' contains no sheets."
206
+ sheet_to_read = sheet_name if sheet_name and sheet_name in excel_file.sheet_names else excel_file.sheet_names[0]
207
+ if sheet_name and sheet_name not in excel_file.sheet_names: print(f"Warning: Sheet '{sheet_name}' not found, reading first sheet '{sheet_to_read}' instead.")
208
+ print(f"Reading sheet '{sheet_to_read}' from {relative_path}")
 
 
 
209
  df = pd.read_excel(full_path, sheet_name=sheet_to_read)
210
+ if df.empty: return f"Sheet '{sheet_to_read}' in '{relative_path}' is empty."
211
+ output = f"Preview of sheet '{sheet_to_read}' from '{relative_path}' ({df.shape[0]} rows, {df.shape[1]} columns):\n"
212
+ output += df.to_string(max_rows=max_rows_preview, max_cols=15, line_width=120)
213
+ max_output_len = 5000; output = output[:max_output_len] + ("\n... [Output truncated due to length]" if len(output) > max_output_len else "")
 
 
214
  return output
215
  except Exception as e: return f"Error reading Excel file '{relative_path}': {e}"
216
 
 
218
  @tool("transcribe_audio_file", args_schema=TranscribeAudioArgs)
219
  def transcribe_audio_file(relative_path: str) -> str:
220
  """Transcribes audio content from a file in the workspace using OpenAI Whisper (max 25MB)."""
221
+ full_path = _resolve_path(relative_path);
222
  if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
223
  return _transcribe_audio(full_path, relative_path)
224
 
 
228
  """Downloads audio from a YouTube URL, transcribes it using OpenAI Whisper, and returns the text."""
229
  temp_audio_path = None
230
  try:
231
+ print(f"Processing YouTube URL: {youtube_url}"); yt = YouTube(youtube_url, use_oauth=False, allow_oauth_cache=False)
232
+ print("Fetching available streams...")
233
+ audio_stream = yt.streams.filter(only_audio=True, subtype='webm').order_by('abr').desc().first() or \
234
+ yt.streams.filter(only_audio=True, subtype='mp4').order_by('abr').desc().first() or \
235
+ yt.streams.get_audio_only()
236
+ if not audio_stream: return f"Error: No suitable audio stream found for YouTube video: {youtube_url}"
237
+ print(f"Selected audio stream: Itag {audio_stream.itag}, ABR {audio_stream.abr}")
238
+ try: video_id = yt.video_id
239
+ except: video_id = f"vid_{int(time.time())}"
240
+ temp_filename = f"temp_youtube_{video_id}.{audio_stream.subtype or 'mp4'}"
241
  temp_audio_path = AGENT_WORKSPACE / temp_filename
242
  print(f"Downloading audio to: {temp_audio_path}...")
243
+ audio_stream.download(output_path=AGENT_WORKSPACE, filename=temp_filename); print("Download complete.")
244
+ result = _transcribe_audio(temp_audio_path, f"YouTube video '{yt.title}'"); return result
245
+ except PytubeError as e: return f"Error processing YouTube video {youtube_url} (PytubeError): {e}"
 
 
 
 
 
246
  except Exception as e: return f"Unexpected error during YouTube transcription {youtube_url}: {e}"
247
  finally:
 
248
  if temp_audio_path and temp_audio_path.exists():
249
+ try: temp_audio_path.unlink(); print(f"Cleaned up temporary file: {temp_audio_path}")
250
  except Exception as e: print(f"Warning: Failed to delete temp file {temp_audio_path}: {e}")
251
 
 
252
  # ==============================================================================
253
  # 3. AGENT SETUP
254
  # ==============================================================================
255
 
256
  # --- Initialize LLM ---
257
  try:
258
+ llm = ChatGroq(temperature=0, model_name=GROQ_MODEL_NAME, groq_api_key=GROQ_API_KEY)
 
 
 
 
259
  print(f"Using Groq LLM: {GROQ_MODEL_NAME}")
260
+ except Exception as e: print(f"FATAL: Error initializing Groq LLM: {e}"); sys.exit(1)
 
 
261
 
262
  # --- Assemble Available Tools ---
263
  available_tools = []
264
  if TAVILY_API_KEY:
265
+ try: available_tools.append(TavilySearchResults(max_results=TAVILY_MAX_RESULTS, api_key=TAVILY_API_KEY))
266
+ except Exception as e: print(f"Warning: Failed to initialize Tavily Search tool: {e}. Tool disabled.")
267
+ else: print("Warning: Tavily Search tool disabled (API key missing).")
268
+ available_tools.extend([write_file, read_file, list_directory, run_python_code, scrape_webpage])
 
 
 
 
 
 
 
 
 
 
269
  if PANDAS_AVAILABLE: available_tools.append(read_excel_file)
270
  if OPENAI_AVAILABLE and openai_client: available_tools.append(transcribe_audio_file)
271
  if PYTUBE_AVAILABLE and OPENAI_AVAILABLE and openai_client: available_tools.append(transcribe_youtube_video)
 
272
  print(f"Agent initialized with tools: {[tool.name for tool in available_tools]}")
273
 
274
  # --- Define System Prompt ---
275
+ # Contains {tools} and {agent_workspace} placeholders.
276
  SYSTEM_PROMPT_TEMPLATE = """You are a highly capable AI assistant designed to solve complex problems step-by-step, mimicking human-like reasoning and actions. Your goal is to accurately answer the user's request based on the GAIA benchmark philosophy.
277
 
278
  **Workspace:** You have access to a local workspace directory: '{agent_workspace}'. You can ONLY interact with files inside this directory using the provided tools. Always use relative paths for file operations.
279
 
280
  **Available Tools:** You have access to the following tools:
281
+ {tools}
282
 
283
  **Reasoning Process:**
284
  1. **Understand:** Analyze the request. Identify objectives, constraints, and required information (text, web search, file content, Excel data, audio/video transcription, calculations).
 
302
  """
303
 
304
  # --- Create Prompt Template ---
305
+ # Pre-format the system prompt string fully before creating the template
306
+ try:
307
+ # Format the tool descriptions manually using the render_text_description utility
308
+ from langchain.tools.render import render_text_description
309
+ tool_descriptions = render_text_description(available_tools)
310
+
311
+ # Format the entire system prompt string
312
+ formatted_system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
313
+ agent_workspace=str(AGENT_WORKSPACE.resolve()),
314
+ tools=tool_descriptions
315
+ )
316
+
317
+ # Create the template from the fully formatted string
318
+ prompt = ChatPromptTemplate.from_messages(
319
+ [
320
+ ("system", formatted_system_prompt), # Use the pre-formatted string
321
+ MessagesPlaceholder(variable_name="chat_history"),
322
+ ("human", "{input}"),
323
+ MessagesPlaceholder(variable_name="agent_scratchpad"), # Still needed by the agent type
324
+ ]
325
+ )
326
+
327
+ except Exception as e:
328
+ print(f"FATAL: Error creating ChatPromptTemplate: {e}")
329
+ sys.exit(1)
330
+
331
 
332
  # --- Setup Memory ---
333
  memory = ConversationBufferWindowMemory(
334
  k=MEMORY_WINDOW_SIZE,
335
  memory_key="chat_history",
336
+ return_messages=True
337
  )
338
 
339
  # --- Create Agent ---
340
+ # Using create_openai_tools_agent
341
+ try:
342
+ agent = create_openai_tools_agent(llm, available_tools, prompt)
343
+ except Exception as e:
344
+ print(f"FATAL: Error creating agent with create_openai_tools_agent: {e}")
345
+ import traceback
346
+ traceback.print_exc()
347
+ sys.exit(1)
348
 
349
  # --- Create Agent Executor ---
350
+ try:
351
+ agent_executor = AgentExecutor(
352
+ agent=agent,
353
+ tools=available_tools,
354
+ memory=memory,
355
+ verbose=True,
356
+ max_iterations=MAX_ITERATIONS,
357
+ handle_parsing_errors=True,
358
+ )
359
+ except Exception as e:
360
+ print(f"FATAL: Error creating AgentExecutor: {e}")
361
+ sys.exit(1)
362
 
363
  # ==============================================================================
364
+ # 4. EXECUTION FUNCTION (Exported for app.py)
365
  # ==============================================================================
 
366
  def run_gaia_task(task_description: str):
367
+ """Runs the GAIA agent on a given task description. This is the main entry point."""
368
+ print("\n" + "="*50 + f"\n🚀 Running GAIA Task\n📝 Task: {task_description[:150]}...\n📍 Workspace: {AGENT_WORKSPACE.resolve()}\n🛠️ Tools: {[tool.name for tool in available_tools]}\n" + "="*50 + "\n")
369
+ memory.clear() # Reset memory for the task
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  try:
371
+ if 'agent_executor' not in globals() or agent_executor is None: return "Error: Agent Executor not initialized."
372
  result = agent_executor.invoke({"input": task_description})
373
+ final_output = result.get('output', 'Agent finished but produced no output.')
374
+ print("\n" + "="*50 + f"\n✅ Agent Execution Finished\n🏁 Final Output:\n{final_output}\n" + "="*50 + "\n")
375
+ return str(final_output)
 
 
 
 
376
  except Exception as e:
377
+ print(f"\n{'='*50}\n❌ Agent Execution Error during task run\nAn error occurred: {e}\n{'='*50}\n")
378
+ import traceback; traceback.print_exc() # Print full traceback for debugging
 
 
 
 
 
379
  return f"Agent failed with error: {e}"
380
 
381
  # ==============================================================================
382
+ # 5. EXAMPLE USAGE (Local Testing)
383
  # ==============================================================================
 
384
  if __name__ == "__main__":
385
+ print("\n" + "*"*30 + " LOCAL TEST RUN " + "*"*30)
386
  print("--- Setting up example files (if needed) ---")
 
387
  if PANDAS_AVAILABLE:
388
  try:
389
  dummy_excel_path = AGENT_WORKSPACE / "sample_data.xlsx"
390
+ if not dummy_excel_path.exists(): pd.DataFrame({'ID': [1, 2, 3], 'Product': ['Widget', 'Gadget', 'Thingamajig']}).to_excel(dummy_excel_path, index=False); print(f"Created dummy Excel: {dummy_excel_path}")
 
 
391
  except Exception as e: print(f"Could not create dummy Excel: {e}")
 
392
  try:
393
  dummy_text_path = AGENT_WORKSPACE / "numbers.txt"
394
  if not dummy_text_path.exists():
395
+ with open(dummy_text_path, "w") as f: f.write("15\n-3\n42.5\n100\n"); print(f"Created dummy text file: {dummy_text_path}")
 
396
  except Exception as e: print(f"Could not create dummy text file: {e}")
 
397
  dummy_audio_path = AGENT_WORKSPACE / "sample_audio.mp3"
398
+ if not dummy_audio_path.exists() and OPENAI_AVAILABLE and openai_client: print(f"INFO: To test audio transcription, place an MP3 file at: {dummy_audio_path}")
 
399
  print("--- Example setup complete ---")
400
 
401
+ example_tasks = [
402
+ {"id": "local_excel_read", "description": "Read the file 'sample_data.xlsx' in the workspace. What is the 'Product' where 'ID' is 2? Final answer should be just the product name."},
403
+ {"id": "local_python_sum", "description": "Read the numbers from 'numbers.txt' in the workspace (one per line). Calculate their sum using python code. Write the sum into 'sum_result.txt'. Final answer should be the relative path 'sum_result.txt'."},
404
+ {"id": "local_search_scrape_write", "description": "Search the web for the official website of the Python Software Foundation. Scrape the main title from the homepage of that website. Write the title into 'psf_title.txt'. Final answer is 'psf_title.txt'."},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  ]
406
 
407
+ if example_tasks:
408
+ task_to_run = example_tasks[0] # Change index to test different tasks
409
+ print(f"\n>>> Running local test task: {task_to_run['id']} <<<")
410
+ final_answer = run_gaia_task(task_to_run['description'])
411
+ print(f">>> Local test task {task_to_run['id']} completed. Agent Output: {final_answer} <<<")
412
+ else: print("No example tasks defined for local testing.")
413
+ print("\n" + "*"*30 + " LOCAL TEST RUN COMPLETE " + "*"*30)