Final_Assignment_Template

Sleeping

App Files Files Community

Macmill commited on Apr 28, 2025

Commit

8c86b22

verified ·

1 Parent(s): d7b35af

Update final_agent.py

Browse files

Files changed (1) hide show

final_agent.py +536 -418

final_agent.py CHANGED Viewed

@@ -1,463 +1,581 @@
-# ==============================================================================
-# Imports
-# ==============================================================================
 import os
-import requests
-import traceback
-import html2text # For HTML to text conversion
-import tempfile # For file handling tools
-import pandas as pd # For CSV/Excel analysis
-import openpyxl # For Excel analysis
-from PIL import Image # For image text extraction
-import pytesseract # For image text extraction
-from urllib.parse import urlparse # For download tool
-from typing import Annotated, List, TypedDict, Optional
 from dotenv import load_dotenv
-import time # For adding potential delays if needed later
-# LangChain and LangGraph Imports
-from langgraph.graph import StateGraph, START, END
-from langgraph.graph.message import add_messages
-from langgraph.prebuilt import ToolNode, tools_condition
-from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
-from langchain_core.tools import tool
-# LLM Import - Using Groq
-from langchain_groq import ChatGroq
-from langchain_community.tools.tavily_search import TavilySearchResults
-# ==============================================================================
-# Environment Setup & LLM
-# ==============================================================================
-load_dotenv()
-tavily_api_key = os.getenv("TAVILY_API_KEY")
-groq_api_key = os.getenv("GROQ_API_KEY")
-# --- Optional: Tesseract Path ---
-# If Tesseract OCR is not in your system's PATH environment variable,
-# uncomment the following line and set the correct path to tesseract.exe
-# try:
-#     pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Example path for Windows
-# except NameError: pass # Handles case where pytesseract might not be imported yet if PIL fails first
-# except Exception as e: print(f"Warning: Could not set tesseract_cmd path: {e}")
-# --- Validate API Keys ---
-if not tavily_api_key:
-    raise ValueError("TAVILY_API_KEY not found in environment variables/Space secrets.")
-if not groq_api_key:
-    raise ValueError("GROQ_API_KEY not found in environment variables/Space secrets.")
-# --- Initialize LLM (Using Groq) ---
 try:
-    llm = ChatGroq(
-        model="meta-llama/llama-4-maverick-17b-128e-instruct", # Powerful model available on Groq, good for reasoning
-        # model="gemma2-9b-it", # Alternative lighter model
-        api_key=groq_api_key,
-        temperature=0.3 # Low temperature for factual tasks
-    )
-    print(f"LLM Initialized: Groq - {llm.model_name}")
-except Exception as e:
-     print(f"ERROR initializing Groq LLM: {e}")
-     traceback.print_exc()
-     raise # Stop if LLM fails to init
 # ==============================================================================
-# State Definition
 # ==============================================================================
-class AgentState(TypedDict):
-    """Defines the structure of the information the agent tracks during its run."""
-    input_question: str # The original question from the benchmark
-    messages: Annotated[List[BaseMessage], add_messages] # History of interactions (Human, AI, Tool)
-    error: Optional[str] # Stores any error message encountered
-    iterations: int # Counter for agent steps to prevent loops
 # ==============================================================================
-# Tools Definitions
 # ==============================================================================
-print("Defining tools...")
-# --- Search Tool (Tavily) ---
-search_tool = TavilySearchResults(max_results=3, api_key=tavily_api_key)
-search_tool.name = "web_search"
-search_tool.description = "Performs a web search (using Tavily) to find relevant URLs/snippets for a query."
-# --- Web Browser Tool (html2text) ---
-@tool
-def web_browser(url: str) -> str:
-    """Fetches text content from a webpage URL using html2text. Use after 'web_search'."""
-    print(f"--- [Tool] Browsing (html2text): {url} ---")
     try:
-        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
-        response = requests.get(url, headers=headers, timeout=20)
-        response.raise_for_status()
-        response.encoding = response.apparent_encoding or 'utf-8'
-        # Configure html2text
-        h = html2text.HTML2Text(bodywidth=0)
-        h.ignore_links = True
-        h.ignore_images = True
-        # Convert HTML to text
-        clean_text = h.handle(response.text)
-        # Limit content length
-        max_length = 6000
-        if len(clean_text) > max_length:
-            return clean_text[:max_length] + "\n\n... [Content Truncated]"
-        cleaned_and_stripped = clean_text.strip()
-        return cleaned_and_stripped if cleaned_and_stripped else f"Error: No meaningful content via html2text for {url}."
-    except requests.exceptions.RequestException as e:
-        return f"Error: Network request failed for URL: {url}. Reason: {e}"
-    except Exception as e:
-        return f"Error: Unexpected error processing URL with html2text: {url}. Reason: {str(e)}"
-# --- File Download Tool ---
-@tool
-def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
-    """Downloads a file from a URL to a temporary directory. Input: file URL. Returns: path to downloaded file or error."""
-    print(f"--- [Tool] Downloading file from: {url} ---")
     try:
-        # Generate filename if needed
-        if not filename:
-            try: path = urlparse(url).path; filename = os.path.basename(path) if path else None
-            except Exception: filename = None
-            if not filename: import uuid; filename = f"downloaded_{uuid.uuid4().hex[:8]}"
-        # Define save path
-        temp_dir = tempfile.gettempdir(); filepath = os.path.join(temp_dir, filename)
-        # Download file
-        response = requests.get(url, stream=True, timeout=30); response.raise_for_status()
-        with open(filepath, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192): f.write(chunk)
-        print(f"--- [Tool] File downloaded to: {filepath} ---")
-        return f"File downloaded to {filepath}. Use appropriate tools (e.g., analyze_csv_file) to process it."
-    except requests.exceptions.RequestException as e:
-        return f"Error downloading file: Network issue for {url}. Reason: {e}"
     except Exception as e:
-        return f"Error downloading file: Unexpected error for {url}. Reason: {str(e)}"
-# --- CSV Analysis Tool ---
-@tool
-def analyze_csv_file(file_path: str) -> str:
-    """Analyzes a CSV file at the given path using pandas. Returns a summary of content or error."""
-    print(f"--- [Tool] Analyzing CSV: {file_path} ---")
-    # GAIA might provide relative paths, ensure they work or adjust logic if needed
-    if not os.path.exists(file_path): return f"Error: CSV file not found at path: {file_path}"
     try:
-        df = pd.read_csv(file_path)
-        # Generate summary string
-        summary = f"CSV Analysis Report for {os.path.basename(file_path)}:\n"
-        summary += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
-        summary += f"- Columns: {', '.join(df.columns)}\n"
-        summary += f"\nFirst 5 rows:\n{df.head().to_string()}\n"
-        numeric_cols = df.select_dtypes(include=['number'])
-        if not numeric_cols.empty:
-             summary += f"\nBasic Stats (Numeric):\n{numeric_cols.describe().to_string()}"
-        else:
-             summary += "\nNo numeric columns for stats."
-        return summary
-    except ImportError: return "Error: 'pandas' required but not installed."
-    except Exception as e: return f"Error analyzing CSV {file_path}: {str(e)}"
-# --- Excel Analysis Tool ---
-@tool
-def analyze_excel_file(file_path: str) -> str:
-    """Analyzes an Excel file (.xlsx, .xls) at the given path. Returns a summary of the first sheet or error."""
-    print(f"--- [Tool] Analyzing Excel: {file_path} ---")
-    if not os.path.exists(file_path): return f"Error: Excel file not found at path: {file_path}"
     try:
-        df = pd.read_excel(file_path, engine='openpyxl')
-        # Generate summary string
-        summary = f"Excel Analysis Report for {os.path.basename(file_path)} (First Sheet):\n"
-        summary += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
-        summary += f"- Columns: {', '.join(df.columns)}\n"
-        summary += f"\nFirst 5 rows:\n{df.head().to_string()}\n"
-        numeric_cols = df.select_dtypes(include=['number'])
-        if not numeric_cols.empty:
-             summary += f"\nBasic Stats (Numeric):\n{numeric_cols.describe().to_string()}"
-        else:
-             summary += "\nNo numeric columns for stats."
-        return summary
-    except ImportError: return "Error: 'pandas' and 'openpyxl' required but not installed."
-    except Exception as e: return f"Error analyzing Excel {file_path}: {str(e)}"
-# --- Image Text Extraction Tool (OCR) ---
-@tool
-def extract_text_from_image(file_path: str) -> str:
-    """Extracts text from an image file at the given path using Tesseract OCR. Returns extracted text or error."""
-    print(f"--- [Tool] Extracting text from image: {file_path} ---")
-    if not os.path.exists(file_path): return f"Error: Image file not found at path: {file_path}"
     try:
-        # Need to explicitly handle potential empty string from pytesseract
-        text = pytesseract.image_to_string(Image.open(file_path))
-        text_stripped = text.strip()
-        # Return a clear message if no text found, otherwise return extracted text
-        return f"Extracted text from image '{os.path.basename(file_path)}':\n{text_stripped}" if text_stripped else "No text found in image."
-    except ImportError: return "Error: 'Pillow' or 'pytesseract' required but not installed."
-    except pytesseract.TesseractNotFoundError: return "Error: Tesseract OCR not installed or not in PATH."
-    except Exception as e: return f"Error extracting text from image {file_path}: {str(e)}"
-# --- Basic Math Tools ---
-@tool
-def add(a: float, b: float) -> float:
-    """Adds two numbers (a + b). Handles float inputs."""
-    print(f"--- [Tool] Calculating: {a} + {b} ---")
-    return a + b
-@tool
-def subtract(a: float, b: float) -> float:
-    """Subtracts the second number from the first (a - b). Handles float inputs."""
-    print(f"--- [Tool] Calculating: {a} - {b} ---")
-    return a - b
-@tool
-def multiply(a: float, b: float) -> float:
-    """Multiplies two numbers (a * b). Handles float inputs."""
-    print(f"--- [Tool] Calculating: {a} * {b} ---")
-    return a * b
-@tool
-def divide(a: float, b: float) -> float | str:
-    """Divides the first number by the second (a / b). Handles float inputs and division by zero."""
-    print(f"--- [Tool] Calculating: {a} / {b} ---")
-    if b == 0: return "Error: Cannot divide by zero."
-    return a / b
-# --- Compile list of all tools ---
-tools = [ search_tool, web_browser, download_file_from_url, analyze_csv_file,
-          analyze_excel_file, extract_text_from_image, add, subtract, multiply, divide ]
-# --- Bind tools to the LLM ---
-# Ensure LLM is initialized before binding
-if 'llm' not in globals():
-    raise RuntimeError("LLM was not initialized successfully before binding tools.")
-llm_with_tools = llm.bind_tools(tools)
-print(f"Agent initialized with {len(tools)} tools.")
-# ==============================================================================
-# Node Definitions (With Logging Added)
-# ==============================================================================
-print("Defining graph nodes...")
-# --- Agent Node ---
-def call_agent_node(state: AgentState) -> dict:
-    """Invokes the LLM with current state to decide the next step."""
-    # --- Logging: Node Entry ---
-    print(f"\n>>> Entering Agent Node (Iteration {state['iterations']})")
-    MAX_ITERATIONS = 15 # Max steps allowed for the task - Increased slightly
-    current_iterations = state.get('iterations', 0)
-    if current_iterations >= MAX_ITERATIONS:
-        print(f"!!! Agent Node: Max iterations ({MAX_ITERATIONS}) reached. Setting error.")
-        return {"error": f"Max iterations ({MAX_ITERATIONS}) reached."}
     try:
-        print(f"--- Agent Node: Invoking LLM ({llm.model_name})... ---") # Log before LLM call
-        # Ensure LLM is bound with tools before invoking
-        if 'llm_with_tools' not in globals():
-             return {"error": "LLM tools not bound."}
-        response = llm_with_tools.invoke(state['messages'])
-        print(f"--- Agent Node: LLM Invocation Complete. ---") # Log after LLM call
-        # response.pretty_print() # Optional: Print formatted LLM response
-        # --- Logging: Node Exit (Success) ---
-        print(f"<<< Exiting Agent Node (Success, Iteration {current_iterations + 1})")
-        return {"messages": [response], "iterations": current_iterations + 1}
     except Exception as e:
-        error_message = f"LLM invocation failed: {str(e)}"
-        print(f"!!! Agent Node ERROR: {error_message} !!!")
-        traceback.print_exc() # Print full traceback for debugging LLM errors
-        # --- Logging: Node Exit (Error) ---
-        print(f"<<< Exiting Agent Node (LLM Error, Iteration {current_iterations})")
-        # Return an error message and set error state, still increment iteration to prevent infinite error loops
-        return {"messages": [AIMessage(content=f"Error during LLM call: {error_message}")], "error": error_message, "iterations": current_iterations + 1}
-# --- Tool Node Wrapper (for Logging) ---
-# We still use the prebuilt ToolNode, but wrap its call for logging
-tool_executor = ToolNode(tools) # Keep the instance
-def logged_tool_node(state: AgentState) -> dict:
-    """Logs tool execution start/end and calls the actual ToolNode."""
-    print(f">>> Entering Tool Node")
-    # Log requested tools
-    last_message = state['messages'][-1]
-    requested_tools_str = "None"
-    tool_calls = []
-    if hasattr(last_message, "tool_calls") and last_message.tool_calls:
-        tool_calls = last_message.tool_calls
-        tool_names = [tc.get('name', 'unknown') for tc in tool_calls]
-        requested_tools_str = ", ".join(tool_names)
-    print(f"--- Tool Node: Executing tools: {requested_tools_str} ---")
-    if tool_calls: print(f"--- Tool Node: Tool Args: {[tc.get('args') for tc in tool_calls]} ---")
     try:
-        # Call the actual ToolNode instance
-        result = tool_executor.invoke(state)
-        # Log truncated results
-        print("--- Tool Node: Tool Execution Results ---")
-        if isinstance(result.get("messages"), list):
-            for msg in result["messages"]:
-                if isinstance(msg, ToolMessage):
-                    print(f"  - Tool: {msg.name}, Result (start): {str(msg.content)[:200]}...") # Slightly more context
-        print(f"<<< Exiting Tool Node (Success)")
-        return result # Return the dictionary containing ToolMessages
     except Exception as e:
-         error_message = f"ToolNode invocation exception: {str(e)}"
-         print(f"!!! Tool Node ERROR: {error_message} !!!")
-         traceback.print_exc()
-         print(f"<<< Exiting Tool Node (Error)")
-         # Return an error message in the expected format
-         return {"messages": [ToolMessage(content=error_message, tool_call_id="tool_node_error")]}
 # ==============================================================================
-# Graph Construction (Non-conversational, using logged tool node)
 # ==============================================================================
-print("Building agent graph...")
-builder = StateGraph(AgentState)
-builder.add_node("agent", call_agent_node)
-builder.add_node("tools", logged_tool_node) # Use the logging wrapper node
-builder.add_edge(START, "agent")
-builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", END: END})
-builder.add_edge("tools", "agent")
-# Compile the graph globally so it's ready for the function call
 try:
-    graph = builder.compile()
-    print("GAIA agent graph compiled successfully.")
 except Exception as e:
-    print(f"ERROR: Failed to compile LangGraph graph: {e}")
-    traceback.print_exc()
-    graph = None # Ensure graph is None if compilation fails
-    raise # Reraise exception to make startup failure clear
 # ==============================================================================
-# Main Execution Function for GAIA Benchmark <<<< WRAPPER FUNCTION >>>>
 # ==============================================================================
-def answer_gaia_task(question: str, file_path: Optional[str] = None) -> str:
-    """
-    Runs the compiled GAIA agent graph for a given question and optional file path.
-    This is the main entry point expected by the benchmark runner.
-    """
-    # Check if graph compilation was successful
-    if graph is None:
-         return "Error: Agent graph was not compiled successfully during setup."
-    print(f"\n{'='*20} Running Agent for GAIA Task {'='*20}")
-    print(f"Question: {question}")
-    file_context_info = f"An associated file is provided at path: '{file_path}'. Use this path if relevant." if file_path else ""
-    # Define the initial prompt sent to the agent, incorporating strict formatting rules
-    prompt_content = f"""Your task is to accurately answer the following question based *only* on information obtained using your tools (web search, web browser, file download, csv/excel analysis, image OCR, math).
-{file_context_info}
-Follow these steps methodically:
-1. Analyze the question: {question}
-2. Use tools ONLY if necessary to gather the specific information required. Assume local file paths mentioned (like 'data.csv') are accessible.
-3. Synthesize the final answer from the gathered information.
-**CRITICAL OUTPUT FORMATTING RULES:**
-*   Your final response MUST be ONLY the answer, without any other text/explanations.
-*   **Numbers:** No commas (1000). No units ($ , %) unless asked.
-*   **Strings:** No articles (a, an, the) unless proper noun. No abbreviations (Saint Petersburg) unless answer is abbreviation. Use numerals (5).
-*   **Lists:** Comma-separated (apple,banana,cherry). Apply number/string rules to elements.
-*   If answer not found, output only the exact phrase: Information not found
-Provide ONLY the final answer according to these rules.
-"""
-    # Create the initial state for the graph run
-    initial_state = AgentState(
-        input_question=question,
-        messages=[HumanMessage(content=prompt_content)],
-        error=None,
-        iterations=0
-    )
-    final_answer = "Error: Agent execution did not complete successfully." # Default fallback
     try:
-        # Invoke the compiled graph
-        final_state = graph.invoke(initial_state, config={"recursion_limit": 20}) # Increased recursion limit
-        # Process the final state to extract the answer
-        if final_state:
-            # Prioritize showing agent error if one occurred
-            if final_state.get("error"):
-                print(f"--- Agent stopped due to ERROR: {final_state['error']} ---")
-                final_answer = f"Error: {final_state['error']}"
-            # Otherwise, try to get the last AI message content
-            elif final_state.get('messages') and isinstance(final_state['messages'][-1], AIMessage):
-                potential_answer = final_state['messages'][-1].content
-                # Basic cleanup for potential quotes added by LLM
-                if isinstance(potential_answer, str):
-                     if (potential_answer.startswith('"') and potential_answer.endswith('"')) or \
-                        (potential_answer.startswith("'") and potential_answer.endswith("'")):
-                          potential_answer = potential_answer[1:-1].strip()
-                print(f"--- Final Answer (from AI): {potential_answer} ---")
-                final_answer = potential_answer
-            else:
-                 print("--- Could not determine final answer (last message not AI or missing). Check logs. ---")
-                 # Log final state details for debugging
-                 print(f"Final State: Error={final_state.get('error')}, Iterations={final_state.get('iterations')}")
-    except Exception as e:
-        print(f"--- Graph execution failed unexpectedly: {e} ---")
-        traceback.print_exc()
-        final_answer = f"Error: Graph execution failed - {str(e)}"
-    print(f"{'='*20} Agent Run Finished {'='*20}")
-    # Return the final answer string
-    return str(final_answer)
 # ==============================================================================
-# Local Testing Block (Optional)
 # ==============================================================================
-# This block allows you to test the agent by running final_agent.py directly.
-if __name__ == "__main__":
-    print("\n--- Running Local Test ---")
-    # --- Define Test Question ---
-    test_question = "What is the result of multiplying the number of rows (excluding the header) in 'data.csv' by the number found after the phrase 'total items:' in 'image.png'?"
-    # --- Create Dummy Files for Local Test ---
-    print("Creating dummy files for local test...")
-    dummy_files_created = True
-    try:
-        # Dummy CSV with 3 data rows + header
-        with open("data.csv", "w") as f:
-            f.write("Header1,Header2\nRow1Val1,Row1Val2\nRow2Val1,Row2Val2\nRow3Val1,Row3Val2")
-        # Dummy Image containing the required text
         try:
-            img = Image.new('RGB', (300, 50), color = (255, 255, 255)) # White background
-            from PIL import ImageDraw, ImageFont # Import drawing tools locally
-            draw = ImageDraw.Draw(img)
-            # Use a basic font if specific ones aren't found
-            try: font = ImageFont.truetype("arial.ttf", 15)
-            except IOError: font = ImageFont.load_default()
-            draw.text((10,10), "Some random info... total items: 7 ... more text", fill=(0,0,0), font=font) # Black text
-            img.save("image.png")
-            print("Dummy data.csv and image.png created successfully.")
-        except ImportError:
-            print("Pillow/ImageDraw/ImageFont not installed. Cannot create dummy image file.")
-            dummy_files_created = False
-        except Exception as img_e:
-            print(f"Error creating dummy image: {img_e}")
-            dummy_files_created = False
-    except Exception as file_e:
-        print(f"Error creating dummy files: {file_e}")
-        dummy_files_created = False
-    # ---------------------------------------------
-    # --- Run the Test ---
-    if dummy_files_created:
-        # Call the main function, simulating how the benchmark runner would call it.
-        result = answer_gaia_task(question=test_question, file_path=None)
-        print(f"\n--- Local Test Result ---")
-        print(f"Returned Answer: {result}")
-        print(f"Expected Answer (for dummy files): 21") # 3 data rows * 7 = 21
-    else:
-        print("Skipping test execution due to issues creating dummy files.")
-    # --- Clean up Dummy Files ---
-    print("\nCleaning up dummy files...")
-    for dummy_file in ["data.csv", "image.png"]:
-        if os.path.exists(dummy_file):
-            try: os.remove(dummy_file)
-            except Exception as e: print(f"Could not remove {dummy_file}: {e}")
-    print("Dummy file cleanup attempted.")

+# -*- coding: utf-8 -*-
+"""
+GAIA Benchmark Agent using LangChain, Groq, Tavily, and various tools.
+This agent is designed to interact with files, search the web, scrape pages,
+execute Python code, read Excel files, and transcribe audio/YouTube videos
+to tackle complex tasks like those found in the GAIA benchmark.
+"""
+# --- Core Libraries ---
 import os
+import sys
+import subprocess
+import time
+import importlib
+from pathlib import Path
+from typing import List, Optional, Dict, Any
+# --- Environment & Configuration ---
 from dotenv import load_dotenv
+# --- LangChain Imports ---
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.tools import BaseTool, tool
+from langchain.pydantic_v1 import BaseModel, Field # Use Pydantic v1 for Langchain tool compatibility
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.agents import AgentExecutor, create_structured_chat_agent
+# --- Tool Specific Imports ---
+# Search
+from langchain_community.utilities import TavilySearchResults
+# Web Scraping
+import requests
+from bs4 import BeautifulSoup
+# LLM
+from langchain_groq import ChatGroq
+# Audio/Video Transcription (Optional)
 try:
+    import openai
+    OPENAI_AVAILABLE = True
+except ImportError:
+    OPENAI_AVAILABLE = False
+# Excel Reading (Optional)
+try:
+    import pandas as pd
+    PANDAS_AVAILABLE = True
+except ImportError:
+    PANDAS_AVAILABLE = False
+# YouTube Processing (Optional)
+try:
+    from pytube import YouTube
+    from pytube.exceptions import PytubeError
+    PYTUBE_AVAILABLE = True
+except ImportError:
+    PYTUBE_AVAILABLE = False
 # ==============================================================================
+# 1. CONFIGURATION
 # ==============================================================================
+load_dotenv() # Load environment variables from .env file if it exists
+# --- Agent Settings ---
+AGENT_WORKSPACE = Path("./gaia_agent_workspace")
+AGENT_WORKSPACE.mkdir(exist_ok=True) # Ensure workspace directory exists
+MAX_ITERATIONS = 15
+MEMORY_WINDOW_SIZE = 10
+# --- LLM Configuration ---
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+GROQ_MODEL_NAME = os.getenv("GROQ_MODEL_NAME", "llama3-70b-8192") # Default if not set
+# --- Tool Configuration ---
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
+TAVILY_MAX_RESULTS = 3
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # Needed for Whisper
+WHISPER_MODEL = "whisper-1"
+# --- Dependency & API Key Checks ---
+if not GROQ_API_KEY:
+    print("ERROR: GROQ_API_KEY environment variable not set. Agent cannot run.")
+    sys.exit(1)
+if not TAVILY_API_KEY:
+    print("ERROR: TAVILY_API_KEY environment variable not set. Search tool disabled.")
+    # Decide if this is fatal or just disables the tool
+    # sys.exit(1) # Uncomment to make it fatal
+openai_client = None
+if OPENAI_AVAILABLE and OPENAI_API_KEY:
+    try:
+        openai_client = openai.OpenAI(api_key=OPENAI_API_KEY)
+        print("OpenAI client initialized for Whisper transcription.")
+    except Exception as e:
+        print(f"Warning: Failed to initialize OpenAI client: {e}. Transcription tools disabled.")
+        openai_client = None
+elif OPENAI_AVAILABLE:
+    print("Warning: OpenAI library installed, but OPENAI_API_KEY not set. Transcription tools disabled.")
+else:
+    print("Info: OpenAI library not installed. Transcription tools disabled.")
+if not PANDAS_AVAILABLE:
+    print("Info: 'pandas' library not installed. Excel tool disabled. Install with: pip install pandas openpyxl")
+if not PYTUBE_AVAILABLE:
+    print("Info: 'pytube' library not installed. YouTube tool disabled. Install with: pip install pytube")
 # ==============================================================================
+# 2. TOOL DEFINITIONS
 # ==============================================================================
+# --- Tool Input Schemas (Pydantic Models) ---
+# Using Pydantic v1 as required by Langchain tools at the time of writing
+class FileWriteArgs(BaseModel):
+    relative_path: str = Field(description="Relative path within the agent's workspace where the file should be written.")
+    content: str = Field(description="The text content to write into the file.")
+class FileReadArgs(BaseModel):
+    relative_path: str = Field(description="Relative path within the agent's workspace of the file to read.")
+class ListDirectoryArgs(BaseModel):
+    relative_path: str = Field(default=".", description="Relative path within the agent's workspace to list contents of. Use '.' for the root.")
+class RunPythonCodeArgs(BaseModel):
+    code: str = Field(description="The Python code to execute. Use 'print()' to output results. Code runs in isolation.")
+class WebScrapeArgs(BaseModel):
+    url: str = Field(description="The URL of the webpage to scrape.")
+    query: Optional[str] = Field(default=None, description="Optional specific question to answer from the page content.")
+class ReadExcelArgs(BaseModel):
+    relative_path: str = Field(description="Relative path within the agent's workspace of the Excel file (.xlsx or .xls).")
+    sheet_name: Optional[str] = Field(default=None, description="Optional name of the specific sheet to read. Reads the first sheet if not specified.")
+    max_rows_preview: int = Field(default=20, description="Maximum number of rows to include in the text preview.")
+class TranscribeAudioArgs(BaseModel):
+    relative_path: str = Field(description="Relative path within the agent's workspace of the audio file (e.g., .mp3, .wav, .m4a). Max 25MB.")
+class TranscribeYouTubeArgs(BaseModel):
+    youtube_url: str = Field(description="The URL of the YouTube video to transcribe. Audio will be downloaded temporarily.")
+# --- Helper Functions ---
+def _resolve_path(relative_path: str) -> Optional[Path]:
+    """Resolves a relative path against the workspace and checks bounds."""
     try:
+        full_path = (AGENT_WORKSPACE / relative_path).resolve()
+        # Security Check: Ensure the resolved path is within the workspace
+        if not str(full_path).startswith(str(AGENT_WORKSPACE.resolve())):
+            return None # Path is outside the workspace
+        return full_path
+    except Exception: # Handle potential path resolution errors
+        return None
+def _transcribe_audio(file_path: Path, file_description: str) -> str:
+    """Helper to transcribe an audio file using OpenAI Whisper."""
+    if not openai_client:
+        return "Error: OpenAI client not available for transcription."
+    if not file_path.is_file():
+        return f"Error: Audio file not found at '{file_path.relative_to(AGENT_WORKSPACE)}'"
     try:
+        file_size_mb = file_path.stat().st_size / (1024 * 1024)
+        if file_size_mb > 25:
+            return f"Error: Audio file '{file_description}' is too large ({file_size_mb:.2f} MB). Max 25 MB."
+        print(f"Transcribing audio: {file_description}...")
+        with open(file_path, "rb") as audio_file_handle:
+            # Note: response_format="text" returns a simple string
+            transcript = openai_client.audio.transcriptions.create(
+                model=WHISPER_MODEL,
+                file=audio_file_handle,
+                response_format="text"
+            )
+        print("Transcription complete.")
+        if isinstance(transcript, str):
+            max_len = 10000 # Limit transcription length in output
+            if len(transcript) > max_len:
+                transcript = transcript[:max_len] + "\n... [Transcription truncated]"
+            return f"Transcription of '{file_description}':\n{transcript}"
+        else:
+            return f"Transcription of '{file_description}' succeeded, but format was unexpected: {type(transcript)}"
+    except openai.APIError as e:
+        return f"OpenAI API Error during transcription of '{file_description}': {e}"
     except Exception as e:
+        return f"Error transcribing '{file_description}': {e}"
+# --- Tool Implementations ---
+@tool("write_file", args_schema=FileWriteArgs)
+def write_file(relative_path: str, content: str) -> str:
+    """Writes text content to a file within the agent's workspace. Creates parent directories if needed."""
+    full_path = _resolve_path(relative_path)
+    if not full_path:
+        return f"Error: Invalid or disallowed path '{relative_path}'."
     try:
+        full_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(full_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        return f"Successfully wrote to file: {relative_path}"
+    except Exception as e:
+        return f"Error writing file '{relative_path}': {e}"
+@tool("read_file", args_schema=FileReadArgs)
+def read_file(relative_path: str) -> str:
+    """Reads the text content of a file from the agent's workspace. Limited read size."""
+    full_path = _resolve_path(relative_path)
+    if not full_path:
+        return f"Error: Invalid or disallowed path '{relative_path}'."
+    if not full_path.is_file():
+        return f"Error: File not found at '{relative_path}'"
     try:
+        with open(full_path, 'r', encoding='utf-8') as f:
+            content = f.read(10000) # Limit read size
+            if len(f.read(1)) > 0:
+                 content += "\n... [File truncated due to length]"
+        return content
+    except Exception as e:
+        return f"Error reading file '{relative_path}': {e}"
+@tool("list_directory", args_schema=ListDirectoryArgs)
+def list_directory(relative_path: str = ".") -> str:
+    """Lists the contents (files and directories) of a specified directory within the agent's workspace."""
+    target_path = _resolve_path(relative_path)
+    if not target_path:
+        return f"Error: Invalid or disallowed path '{relative_path}'."
+    if not target_path.is_dir():
+        return f"Error: '{relative_path}' is not a valid directory."
     try:
+        items = [f.name + ('/' if f.is_dir() else '') for f in target_path.iterdir()]
+        if not items:
+            return f"Directory '{relative_path}' is empty."
+        return f"Contents of '{relative_path}':\n" + "\n".join(items)
+    except Exception as e:
+        return f"Error listing directory '{relative_path}': {e}"
+@tool("run_python_code", args_schema=RunPythonCodeArgs)
+def run_python_code(code: str) -> str:
+    """Executes Python code in a subprocess and returns the stdout/stderr. Use print() for output. WARNING: Executes arbitrary code."""
     try:
+        process = subprocess.run(
+            [sys.executable, "-c", code],
+            capture_output=True, text=True, timeout=30, cwd=AGENT_WORKSPACE, check=False # Don't raise error on non-zero exit
+        )
+        output, error = process.stdout, process.stderr
+        result = ""
+        if output:
+            max_output = 2000
+            if len(output) > max_output: output = output[:max_output] + "\n... [Output truncated]"
+            result += f"Output:\n{output}\n"
+        if error:
+             result += f"Error Output:\n{error}\n"
+        if process.returncode == 0:
+            return f"Execution successful.\n{result}"
+        else:
+            return f"Execution failed (Return Code: {process.returncode}).\n{result}"
+    except subprocess.TimeoutExpired:
+        return "Error: Code execution timed out after 30 seconds."
     except Exception as e:
+        return f"Error executing Python code: {e}"
+@tool("scrape_webpage", args_schema=WebScrapeArgs)
+def scrape_webpage(url: str, query: Optional[str] = None) -> str:
+    """Scrapes text content from a given URL using BeautifulSoup. If a query is provided, returns content for the agent to answer it."""
     try:
+        headers = {'User-Agent': 'Mozilla/5.0 (compatible; GAIA-Agent/1.0)'} # Identify the agent
+        response = requests.get(url, headers=headers, timeout=20)
+        response.raise_for_status() # Raise HTTPError for bad responses
+        # Check content type - avoid trying to parse images, etc.
+        content_type = response.headers.get('content-type', '').lower()
+        if 'text/html' not in content_type:
+            return f"Error: Content type of URL {url} is '{content_type}', not HTML. Cannot scrape."
+        soup = BeautifulSoup(response.text, 'html.parser')
+        for script_or_style in soup(["script", "style", "nav", "footer", "aside"]): # Remove common clutter
+            script_or_style.decompose()
+        text_content = soup.get_text(separator='\n', strip=True)
+        if not text_content: return f"Could not extract meaningful text content from {url}."
+        max_chars = 10000 # Limit content length
+        if len(text_content) > max_chars:
+            text_content = text_content[:max_chars] + "\n... [Content truncated]"
+        if query:
+            return f"Use the following content from {url} to answer the query '{query}':\n\n{text_content}"
+        else:
+            return f"Content scraped from {url}:\n\n{text_content}"
+    except requests.exceptions.RequestException as e:
+        return f"Error fetching or reading URL {url}: {e}"
     except Exception as e:
+        return f"Error scraping URL {url}: {e}"
+# --- Optional Tools (Conditionally Available) ---
+if PANDAS_AVAILABLE:
+    @tool("read_excel_file", args_schema=ReadExcelArgs)
+    def read_excel_file(relative_path: str, sheet_name: Optional[str] = None, max_rows_preview: int = 20) -> str:
+        """Reads data from an Excel file (.xlsx or .xls) within the workspace and returns a text preview."""
+        full_path = _resolve_path(relative_path)
+        if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
+        if not full_path.is_file(): return f"Error: Excel file not found at '{relative_path}'"
+        try:
+            excel_file = pd.ExcelFile(full_path)
+            if sheet_name:
+                if sheet_name not in excel_file.sheet_names:
+                    return f"Error: Sheet '{sheet_name}' not found. Available: {excel_file.sheet_names}"
+                sheet_to_read = sheet_name
+            else:
+                sheet_to_read = excel_file.sheet_names[0]
+            df = pd.read_excel(full_path, sheet_name=sheet_to_read)
+            output = f"Preview of sheet '{sheet_to_read}' from '{relative_path}' ({df.shape[0]} rows, {df.shape[1]} cols):\n"
+            output += df.to_string(max_rows=max_rows_preview, max_cols=15) # Preview format
+            max_output_len = 5000
+            if len(output) > max_output_len:
+                output = output[:max_output_len] + "\n... [Output truncated]"
+            return output
+        except Exception as e: return f"Error reading Excel file '{relative_path}': {e}"
+if OPENAI_AVAILABLE and openai_client:
+    @tool("transcribe_audio_file", args_schema=TranscribeAudioArgs)
+    def transcribe_audio_file(relative_path: str) -> str:
+        """Transcribes audio content from a file in the workspace using OpenAI Whisper (max 25MB)."""
+        full_path = _resolve_path(relative_path)
+        if not full_path: return f"Error: Invalid or disallowed path '{relative_path}'."
+        return _transcribe_audio(full_path, relative_path)
+if PYTUBE_AVAILABLE and OPENAI_AVAILABLE and openai_client:
+    @tool("transcribe_youtube_video", args_schema=TranscribeYouTubeArgs)
+    def transcribe_youtube_video(youtube_url: str) -> str:
+        """Downloads audio from a YouTube URL, transcribes it using OpenAI Whisper, and returns the text."""
+        temp_audio_path = None
+        try:
+            print(f"Processing YouTube URL: {youtube_url}")
+            yt = YouTube(youtube_url)
+            audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').order_by('abr').desc().first()
+            if not audio_stream: audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first() # Fallback
+            if not audio_stream: return f"Error: No suitable audio stream found for {youtube_url}"
+            # Download to a unique temporary file in workspace
+            timestamp = int(time.time())
+            temp_filename = f"temp_youtube_{timestamp}.{audio_stream.subtype or 'mp4'}"
+            temp_audio_path = AGENT_WORKSPACE / temp_filename
+            print(f"Downloading audio to: {temp_audio_path}...")
+            audio_stream.download(output_path=AGENT_WORKSPACE, filename=temp_filename)
+            print("Download complete.")
+            # Transcribe the downloaded file
+            result = _transcribe_audio(temp_audio_path, f"YouTube video '{yt.title}'")
+            return result
+        except PytubeError as e: return f"Error processing YouTube video {youtube_url}: {e}"
+        except Exception as e: return f"Unexpected error during YouTube transcription {youtube_url}: {e}"
+        finally:
+            # --- IMPORTANT: Clean up temporary file ---
+            if temp_audio_path and temp_audio_path.exists():
+                try: temp_audio_path.unlink(); print(f"Cleaned up: {temp_audio_path}")
+                except Exception as e: print(f"Warning: Failed to delete temp file {temp_audio_path}: {e}")
 # ==============================================================================
+# 3. AGENT SETUP
 # ==============================================================================
+# --- Initialize LLM ---
 try:
+    llm = ChatGroq(
+        temperature=0,
+        model_name=GROQ_MODEL_NAME,
+        groq_api_key=GROQ_API_KEY
+    )
+    print(f"Using Groq LLM: {GROQ_MODEL_NAME}")
 except Exception as e:
+    print(f"FATAL: Error initializing Groq LLM: {e}")
+    sys.exit(1)
+# --- Assemble Available Tools ---
+available_tools = []
+if TAVILY_API_KEY:
+    available_tools.append(TavilySearchResults(max_results=TAVILY_MAX_RESULTS, api_key=TAVILY_API_KEY))
+else:
+    print("Warning: Tavily Search tool disabled (API key missing).")
+# Core tools are always added (they don't have external dependencies checked above)
+available_tools.extend([
+    write_file,
+    read_file,
+    list_directory,
+    run_python_code,
+    scrape_webpage,
+])
+# Add optional tools if their dependencies/clients are ready
+if PANDAS_AVAILABLE: available_tools.append(read_excel_file)
+if OPENAI_AVAILABLE and openai_client: available_tools.append(transcribe_audio_file)
+if PYTUBE_AVAILABLE and OPENAI_AVAILABLE and openai_client: available_tools.append(transcribe_youtube_video)
+print(f"Agent initialized with tools: {[tool.name for tool in available_tools]}")
+# --- Define System Prompt ---
+# This prompt is formatted later with the *actually available* tools
+SYSTEM_PROMPT_TEMPLATE = """You are a highly capable AI assistant designed to solve complex problems step-by-step, mimicking human-like reasoning and actions. Your goal is to accurately answer the user's request based on the GAIA benchmark philosophy.
+**Workspace:** You have access to a local workspace directory: '{agent_workspace}'. You can ONLY interact with files inside this directory using the provided tools. Always use relative paths for file operations.
+**Available Tools:** You have access to the following tools:
+{tool_descriptions}
+**Reasoning Process:**
+1.  **Understand:** Analyze the request. Identify objectives, constraints, and required information (text, web search, file content, Excel data, audio/video transcription, calculations).
+2.  **Plan:** Break down the problem into logical steps. Choose the *most appropriate* tool for each step.
+3.  **Execute:** Perform actions step-by-step using ONE tool at a time. Provide valid arguments for the chosen tool.
+4.  **Observe:** Analyze the results (observations) from each tool execution. Note errors or unexpected output.
+5.  **Reflect & Adjust:** If a step fails or results are insufficient, analyze the error, refine your plan, and try a different approach or tool. If a file isn't found, consider using `list_directory`. If web search results aren't specific enough, refine your query. If scraping fails, the site might be dynamic or blocking; note this limitation.
+6.  **Synthesize:** Once all necessary information is gathered and actions performed, combine the findings to formulate the final answer.
+7.  **Final Answer:** Provide ONLY the final answer in the precise format requested by the task. Do not include explanations, commentary, or conversational text unless explicitly asked for. If the task requires creating a file, use `write_file` and state the relative path if needed as the final answer.
+**Important Guidelines:**
+*   Think step-by-step. Be methodical.
+*   Use file/audio/excel tools ONLY for the designated workspace: {agent_workspace}. Use relative paths.
+*   Check file existence with `list_directory` before attempting to read if unsure.
+*   Use `read_excel_file` for `.xlsx` or `.xls` files.
+*   Use `transcribe_audio_file` for local audio files (e.g., .mp3, .wav). Max 25MB.
+*   Use `transcribe_youtube_video` for YouTube URLs. Max 25MB audio download.
+*   Use `run_python_code` for calculations or data manipulation not covered by other tools. Use `print()` for output.
+*   Use `tavily_search_results_json` for web searches. Use `scrape_webpage` to get content from a specific URL found in search or given in the prompt.
+*   Adhere strictly to the requested final answer format.
+"""
+# --- Create Prompt Template ---
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", SYSTEM_PROMPT_TEMPLATE.format(
+            agent_workspace=AGENT_WORKSPACE.resolve(),
+            tool_descriptions="\n".join([f"- {tool.name}: {tool.description}" for tool in available_tools])
+            )
+        ),
+        MessagesPlaceholder(variable_name="chat_history"),
+        ("human", "{input}"),
+        MessagesPlaceholder(variable_name="agent_scratchpad"), # Crucial for agent's intermediate steps
+    ]
+)
+# --- Setup Memory ---
+memory = ConversationBufferWindowMemory(
+    k=MEMORY_WINDOW_SIZE,
+    memory_key="chat_history",
+    return_messages=True # Return Message objects for chat models
+)
+# --- Create Agent ---
+# Structured Chat Agent is generally good for models supporting tool calling/structured output
+agent = create_structured_chat_agent(llm, available_tools, prompt)
+# --- Create Agent Executor ---
+agent_executor = AgentExecutor(
+    agent=agent,
+    tools=available_tools,
+    memory=memory,
+    verbose=True, # Set to True to see agent's thought process, False for cleaner output
+    max_iterations=MAX_ITERATIONS,
+    handle_parsing_errors="Please check your output format and try again.", # Basic guidance on format errors
+    # return_intermediate_steps=True # Uncomment to get intermediate steps in the result dictionary
+)
 # ==============================================================================
+# 4. EXECUTION FUNCTION
 # ==============================================================================
+def run_gaia_task(task_description: str):
+    """
+    Runs the GAIA agent on a given task description.
+    Args:
+        task_description: The natural language description of the task.
+    Returns:
+        The final output string from the agent, or an error message.
+    """
+    print("\n" + "="*50)
+    print(f"🚀 Running GAIA Task")
+    print(f"📝 Task: {task_description}")
+    print(f"📍 Workspace: {AGENT_WORKSPACE.resolve()}")
+    print(f"🛠️ Available Tools: {[tool.name for tool in available_tools]}")
+    print("="*50 + "\n")
+    # Reset memory for each new task to avoid context bleed
+    memory.clear()
     try:
+        # Invoke the agent executor
+        result = agent_executor.invoke({"input": task_description})
+        print("\n" + "="*50)
+        print("✅ Agent Execution Finished")
+        print(f"🏁 Final Output:\n{result.get('output', 'No output found.')}")
+        print("="*50 + "\n")
+        return result.get('output', 'Agent finished but produced no output.')
+    except Exception as e:
+        print(f"\n{'='*50}")
+        print(f"❌ Agent Execution Error")
+        print(f"An error occurred: {e}")
+        # Optional: Print traceback for detailed debugging
+        # import traceback
+        # traceback.print_exc()
+        print("="*50 + "\n")
+        return f"Agent failed with error: {e}"
 # ==============================================================================
+# 5. EXAMPLE USAGE (Entry Point)
 # ==============================================================================
+if __name__ == "__main__":
+    # --- Optional: Setup Example Files ---
+    print("--- Setting up example files (if needed) ---")
+    # Dummy Excel
+    if PANDAS_AVAILABLE:
         try:
+            dummy_excel_path = AGENT_WORKSPACE / "sample_data.xlsx"
+            if not dummy_excel_path.exists():
+                 pd.DataFrame({'ID': [1, 2, 3], 'Product': ['Widget', 'Gadget', 'Thingamajig']}).to_excel(dummy_excel_path, index=False)
+                 print(f"Created dummy Excel: {dummy_excel_path}")
+        except Exception as e: print(f"Could not create dummy Excel: {e}")
+    # Dummy Text
+    try:
+        dummy_text_path = AGENT_WORKSPACE / "numbers.txt"
+        if not dummy_text_path.exists():
+            with open(dummy_text_path, "w") as f: f.write("15\n-3\n42.5\n100\n")
+            print(f"Created dummy text file: {dummy_text_path}")
+    except Exception as e: print(f"Could not create dummy text file: {e}")
+    # Dummy Audio - User needs to provide this manually
+    dummy_audio_path = AGENT_WORKSPACE / "sample_audio.mp3"
+    if not dummy_audio_path.exists() and OPENAI_AVAILABLE and openai_client:
+        print(f"INFO: To test audio transcription, place an MP3 file at: {dummy_audio_path}")
+    print("--- Example setup complete ---")
+    # --- Define Example Tasks ---
+    task_list = [
+        {
+            "id": "excel_read",
+            "description": "Read the file 'sample_data.xlsx' in the workspace. What is the 'Product' where 'ID' is 2? Final answer should be just the product name."
+        },
+        {
+            "id": "python_sum",
+            "description": "Read the numbers from 'numbers.txt' in the workspace (one per line). Calculate their sum using python code. Write the sum into 'sum_result.txt'. Final answer should be the relative path 'sum_result.txt'."
+        },
+        {
+            "id": "search_scrape_write",
+            "description": "Search the web for the official website of the Python Software Foundation. Scrape the main title from the homepage of that website. Write the title into 'psf_title.txt'. Final answer is 'psf_title.txt'."
+        },
+        # { # Uncomment to run audio task if sample_audio.mp3 exists
+        #     "id": "audio_transcribe",
+        #     "description": "Transcribe the audio file 'sample_audio.mp3' from the workspace. Write the first 50 characters of the transcription into 'audio_snippet.txt'. Final answer is 'audio_snippet.txt'."
+        # },
+        # { # Uncomment to run YouTube task
+        #     "id": "youtube_transcribe",
+        #     "description": "Transcribe the YouTube video 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'. What is the first line of the transcription? Final answer is just the first line."
+        # },
+    ]
+    # --- Run Selected Task ---
+    # Choose which task to run by its index or ID
+    task_to_run = task_list[0] # Run the first task (Excel read)
+    print(f"\n>>> Running selected task: {task_to_run['id']} <<<")
+    final_answer = run_gaia_task(task_to_run['description'])
+    print(f">>> Task {task_to_run['id']} completed. Agent Output: {final_answer} <<<")
+    # To run all tasks:
+    # for task in task_list:
+    #     print(f"\n>>> Running task: {task['id']} <<<")
+    #     final_answer = run_gaia_task(task['description'])
+    #     print(f">>> Task {task['id']} completed. Agent Output: {final_answer} <<<")
+    #     input("Press Enter to continue to the next task...") # Pause between tasks