Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Oct 28, 2025

Commit

d5d54b8

verified ·

1 Parent(s): 836bf02

Update app.py

Browse files

Files changed (1) hide show

app.py +335 -239

app.py CHANGED Viewed

@@ -5,12 +5,12 @@ import inspect
 import pandas as pd
 import io
 import contextlib
-from typing import TypedDict, Annotated
 import torch
-import json # For robust tool call parsing/generation if needed
-import re # For finding JSON
-import uuid # For generating tool call IDs
-import traceback
 # --- Multimodal & Web Tool Imports ---
 from transformers import pipeline
@@ -20,21 +20,18 @@ from bs4 import BeautifulSoup
 # --- LangChain & LangGraph Imports ---
 from langgraph.graph.message import add_messages
-# Make sure to import ToolCall
-from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage, ToolCall
 from langgraph.prebuilt import ToolNode
 from langgraph.graph import START, END, StateGraph
-# Removed tools_condition, we'll use a custom one
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool, BaseTool
-# --- ADD GROQ IMPORT ---
 from langchain_groq import ChatGroq
-# (Keep Constants as is)
 # --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # This URL is currently not working
-# --- Initialize ASR Pipeline (Moved back to Global Scope) ---
 asr_pipeline = None
 try:
     print("Loading ASR (Whisper) pipeline globally...")
@@ -50,26 +47,43 @@ try:
     print("✅ ASR (Whisper) pipeline loaded successfully.")
 except Exception as e:
     print(f"⚠️ Warning: Could not load ASR pipeline globally. Error: {e}")
-    import traceback
     traceback.print_exc()
     asr_pipeline = None
-# ====================================================
 # --- Tool Definitions (Standalone Functions) ---
 @tool
 def search_tool(query: str) -> str:
     """Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
     print(f"--- Calling Search Tool with query: {query} ---")
     try:
         search = DuckDuckGoSearchRun()
         return search.run(query)
     except Exception as e:
-        return f"Error running search: {e}"
 @tool
 def code_interpreter(code: str) -> str:
-    """Executes Python code..."""
     print(f"--- Calling Code Interpreter with code:\n{code}\n---")
     output_stream = io.StringIO()
     error_stream = io.StringIO()
@@ -80,140 +94,186 @@ def code_interpreter(code: str) -> str:
                 "__builtins__": __builtins__
             }
             exec(code, safe_globals, {})
         stdout = output_stream.getvalue(); stderr = error_stream.getvalue()
         if stderr: return f"Error: {stderr}\nStdout: {stdout}"
         if stdout: return f"Success:\n{stdout}"
         return "Success: Code executed without error and produced no stdout."
     except Exception as e:
-        # --- THIS IS THE IMPROVEMENT ---
-        # Get the full traceback string
         tb_str = traceback.format_exc()
         print(f"--- Code Interpreter FAILED ---\n{tb_str}\n---")
         return f"Execution failed with error:\n{tb_str}"
-        # --- END IMPROVEMENT ---
 @tool
 def read_file(path: str) -> str:
-    """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
     print(f"--- Calling Read File Tool at path: {path} ---")
     try:
         script_dir = os.getcwd()
         print(f"Base directory for reading: {script_dir}")
-        full_path = os.path.join(script_dir, path)
         print(f"Attempting to read relative path: {full_path}")
         if not os.path.exists(full_path):
-             full_path = path
              print(f"Attempting to read direct/absolute path: {full_path}")
              if not os.path.exists(full_path):
-                  base_path = os.path.basename(path)
                   cwd_base_path = os.path.join(os.getcwd(), base_path)
                   print(f"Attempting to read basename path in CWD: {cwd_base_path}")
-                  if os.path.exists(cwd_base_path): full_path = cwd_base_path
                   else:
                       try: cwd_files = os.listdir(".")
                       except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
                       return (f"Error: File not found.\n"
-                              f"Tried relative: '{os.path.join(script_dir, path)}'\n"
-                              f"Tried direct/absolute: '{path}'\n"
                               f"Tried basename in CWD: '{cwd_base_path}'\n"
                               f"Files in CWD (.): {cwd_files}")
         print(f"Reading file: {full_path}")
-        with open(full_path, 'r', encoding='utf-8') as f: return f.read()
-    except Exception as e: return f"Error reading file {path}: {str(e)}"
 @tool
 def write_file(path: str, content: str) -> str:
     """Writes the given content to a file at the specified path relative to the app's current directory. Creates directories if they don't exist."""
     print(f"--- Calling Write File Tool at path: {path} ---")
     try:
-        base_dir = os.getcwd()
-        full_path = os.path.join(base_dir, path)
-        print(f"Writing file to: {full_path}")
-        os.makedirs(os.path.dirname(full_path), exist_ok=True)
         with open(full_path, 'w', encoding='utf-8') as f: f.write(content)
         return f"Successfully wrote to file {path} (relative to CWD)."
-    except Exception as e: return f"Error writing to file {path}: {str(e)}"
 @tool
 def list_directory(path: str = ".") -> str:
     """Lists the contents (files and directories) of a directory at the specified path relative to the app's current directory."""
     print(f"--- Calling List Directory Tool at path: {path} ---")
     try:
-        base_dir = os.getcwd()
-        full_path = os.path.join(base_dir, path)
         print(f"Listing directory: {full_path}")
-        if not os.path.isdir(full_path): return f"Error: '{path}' is not a valid directory."
         files = os.listdir(full_path); return "\n".join(files) if files else "Directory is empty."
-    except Exception as e: return f"Error listing directory {path}: {str(e)}"
 @tool
 def audio_transcription_tool(file_path: str) -> str:
     """Transcribes an audio file (like .mp3 or .wav) and returns the text content."""
     print(f"--- Calling Audio Transcription: {file_path} ---")
     if asr_pipeline is None: return "Error: ASR pipeline unavailable."
     try:
-        script_dir = os.getcwd()
-        full_path = os.path.join(script_dir, file_path)
         if not os.path.exists(full_path):
              full_path = file_path
              if not os.path.exists(full_path):
                   base_path = os.path.basename(file_path)
                   cwd_base_path = os.path.join(os.getcwd(), base_path)
                   if os.path.exists(cwd_base_path): full_path = cwd_base_path
-                  else: return f"Error: Audio file not found."
         transcription = asr_pipeline(full_path)
-        return transcription.get("text", "Error: Transcription failed.")
-    except Exception as e: import traceback; traceback.print_exc(); return f"Error transcribing: {e}"
 @tool
 def get_youtube_transcript(video_url: str) -> str:
-    """Fetches YouTube transcript."""
     print(f"--- Calling YouTube Transcript: {video_url} ---")
     try:
         video_id = None
         if "watch?v=" in video_url: video_id = video_url.split("v=")[1].split("&")[0]
         elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0]
-        if not video_id: return f"Error: Invalid YouTube URL."
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
         full_transcript = " ".join([item["text"] for item in transcript_list])
-        return full_transcript[:8000]
-    except Exception as e: return f"Error getting transcript: {e}"
 @tool
 def scrape_web_page(url: str) -> str:
-    """Fetches primary text content of a webpage."""
     print(f"--- Calling Web Scraper: {url} ---")
     try:
-        headers = {'User-Agent': 'Mozilla/5.0'}
-        response = requests.get(url, headers=headers, timeout=15); response.raise_for_status()
-        if 'html' not in response.headers.get('Content-Type', '').lower(): return f"Error: Not HTML."
         soup = BeautifulSoup(response.text, 'html.parser')
-        for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]): tag.extract()
-        main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body or soup
         text = main_content.get_text(separator='\n', strip=True)
         lines = (line.strip() for line in text.splitlines()); chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
         text = '\n'.join(chunk for chunk in chunks if chunk)
-        return text[:8000]
-    except Exception as e: return f"Error scraping {url}: {e}"
-# +++++++++++++++++++ NEW FINAL ANSWER TOOL +++++++++++++++++++
 @tool
 def final_answer_tool(answer: str) -> str:
     """
-    Call this tool *only* when you have the final, definitive answer to the user's question.
-    The 'answer' argument should be the single, concise, factual answer, formatted exactly as requested by the user's prompt.
     """
     print(f"--- AGENT CALLING FINAL ANSWER TOOL ---")
-    return answer
-# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-# --- Helper Function for Cleaning Fences ---
 def remove_fences_simple(text):
-    """Removes triple backtick fences and optional language identifiers."""
-    original_text = text
-    text = text.strip()
     if text.startswith("```") and text.endswith("```"):
         text = text[3:-3].strip()
         if '\n' in text:
@@ -222,7 +282,6 @@ def remove_fences_simple(text):
                 text = rest.strip()
         return text
     return original_text
-# --- End Helper ---
 # List of standalone tool functions
 defined_tools = [
@@ -234,221 +293,258 @@ defined_tools = [
     audio_transcription_tool,
     get_youtube_transcript,
     scrape_web_page,
-    final_answer_tool # Add the new tool to the list
-]
 # --- LangGraph Agent State ---
 class AgentState(TypedDict):
-    messages: Annotated[list[AnyMessage], add_messages]
 def should_continue(state: AgentState):
     """
-    Custom logic to decide whether to continue or end.
-    This now allows for a "reasoning loop".
     """
     last_message = state['messages'][-1]
-    if isinstance(last_message, AIMessage):
-        if last_message.tool_calls:
-            # Check for the final answer tool
-            if last_message.tool_calls[0].get("name") == "final_answer_tool":
-                print("--- Condition: Saw final_answer_tool, ending graph. ---")
-                return END
-            else:
-                # Any other tool call goes to the tools node
-                print("--- Condition: Saw other tools, calling tools node. ---")
-                return "tools"
-    # --- THIS IS THE KEY CHANGE ---
-    # If the last message is from the AI and has NO tool calls (i.e., it's plain text),
-    # loop back to the agent node to let it "think" again.
-    print("--- Condition: No tool call. Looping back to agent (reasoning loop). ---")
     return "agent"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent (LangGraph) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
-        HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        if not HUGGINGFACEHUB_API_TOKEN: print("⚠️ Warning: HUGGINGFACEHUB_API_TOKEN secret not set.")
         self.tools = defined_tools
         tool_descriptions = "\n".join([
             f"- {tool.name}: {tool.description}" if tool.name != 'code_interpreter' else
-            (f"- {tool.name}: Executes Python code. Use for calculations, data manipulation, or logic puzzles. "
-             "**When solving logic puzzles, write out your reasoning steps as comments in the code.** "
-             "'pandas' (as pd) is available.")
             for tool in self.tools
         ])
-        # ==================== MODIFIED SYSTEM PROMPT ====================
         self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant for the GAIA benchmark.
 Your goal is to provide the concise, factual answer by strictly following a step-by-step reasoning process.
 **CRITICAL PROTOCOL: YOU MUST FOLLOW THIS PROCESS**
 1.  **ANALYZE:** Read the question and all messages in the history.
-2.  **PLAN:** Your *first* response on any new task MUST be a step-by-step plan as plain text. Do NOT call a tool on your first turn. Write down your logic, what you need to find, and which tool you *plan* to use.
-3.  **EXECUTE:** After you submit your plan, you will run again. Now, execute the *first* step of your plan by calling the *one* appropriate tool.
-4.  **ANALYZE TOOL OUTPUT:** You will receive a [Tool Output] message. You MUST read it.
 5.  **REPEAT or FINISH:**
-    * **If more steps are needed:** Go back to step 2 (PLAN). Write an *updated* plan as plain text (e.g., "Step 1 was successful. My new step 2 is...").
-    * **If the [Tool Output] contains the final answer:** You MUST call the `final_answer_tool`. Your answer *must* be derived *only* from the [Tool Output], not your own knowledge.
 **RULES:**
-* **NEVER** call a tool on the same turn you write a plan.
-* **NEVER** use your pre-trained "leaked" knowledge for the final answer. The answer *must* come from a [Tool Output] (e.g., from `code_interpreter`'s print() or `search_tool`).
-* **NEVER** answer a logic puzzle from memory. You *must* use `code_interpreter`, **print the result**, and then use that printed result for your final answer.
-* **NEVER** call `final_answer_tool` until a tool has given you the answer.
-* **Error Handling:** If a tool call fails, your next step (Step 2) must be to write a plan that analyzes the error and tries a *different* approach.
 **TOOLS:**
 {tool_descriptions}
-- code_interpreter: Executes Python code.
-  **CODE INTERPRETER RULES:**
-  1.  **ALWAYS** use a `print()` statement to output your final result. The tool only returns what you print.
-  2.  **NEVER** write a complex, multi-step script in one go.
-  3.  **ALWAYS** break the problem down. Call the tool with a *simple* script to get one piece of information (e.g., `print(df.head())`).
-  4.  Then, use that output (in your "think" step) to plan your *next* simple script (e.g., `print(df['column'].value_counts())`).
-  5.  **ALWAYS** write your logical plan as Python comments (`#`) inside the code block *before* you write the code itself.
-**REASONING PROCESS & STOPPING CONDITION:**
-1.  **PLAN:** First, respond with your step-by-step plan in plain text. Do not call a tool yet.
-2.  **(Graph will loop)**
-3.  **EXECUTE:** Now, call the *one* tool needed for the first step of your plan.
-4.  **ANALYZE:** You will get a [Tool Output].
-5.  **REPEAT:** Go back to step 1. Write an updated plan (e.g., "Step 1 was successful and gave me [data]. My step 2 is...").
-6.  **STOP:** Only call `final_answer_tool` when a [Tool Output] has given you the final, exact answer.
 **TOOL FORMAT (JSON ONLY):**
-    ```json
-    {{
-      "tool": "tool_name",
-      "tool_input": {{ "arg_name1": "value1", ... }}
-    }}
-    ```
 * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
 * Do not add any text before or after the JSON block.
-**REASONING PROCESS & STOPPING CONDITION:**
-1.  **Analyze:** Read the question. Break it down into logical steps.
-2.  **DECIDE:** Do you have enough information to call a tool, or do you need to write down your plan first?
-3.  **ACT (Two Options):**
-    a. **Write Plan (Chain of Thought):** If you are not ready to call a tool, or if the problem is a complex logic puzzle, respond with your step-by-step reasoning plan as **plain text**. This allows you to "think" and add your plan to memory before your next step.
-    b. **Call Tool:** If you are ready, call **one** tool using the JSON format.
-4.  **Analyze Output:** After a tool is called, you will receive its output.
-5.  **GOTO 1:** Repeat the process. Analyze the new information and decide your next step (think, or call another tool).
-6.  **STOPPING:** The *only* way to provide the final answer is by calling `final_answer_tool`.
-7.  **FINAL OUTPUT:** The graph will stop *only* when you call `final_answer_tool`. Do not provide the answer in any other way."""
-        # =============================================================
-        print("Initializing Groq LLM Endpoint...")
-        try:
-            chat_llm = ChatGroq(
-                temperature=0.01,
-                groq_api_key=GROQ_API_KEY,
-                model_name="openai/gpt-oss-120b"  # <-- Your change is here
-            )
-            print("✅ Groq LLM Endpoint initialized with llama-3.1-8b-instant.")
-        except Exception as e: print(f"Error initializing Groq: {e}"); raise
-        self.llm_with_tools = chat_llm.bind_tools(self.tools)
-        print("✅ Tools bound to LLM (using bind_tools).")
-        def agent_node(state: AgentState):
-            print("--- Running Agent Node ---")
-            ai_message: AIMessage = self.llm_with_tools.invoke(state["messages"])
-            print(f"AI Message Raw Content: {ai_message.content}")
-            if ai_message.tool_calls: print(f"AI tool calls via bind_tools: {ai_message.tool_calls}")
-            elif ai_message.invalid_tool_calls: print(f"AI INVALID tool calls via bind_tools: {ai_message.invalid_tool_calls}")
-            else: print(f"AI content (no calls): {ai_message.pretty_repr()}")
-            return {"messages": [ai_message]}
-        tool_node = ToolNode(self.tools)
-        print("Building agent graph...")
-        graph_builder = StateGraph(AgentState)
-        graph_builder.add_node("agent", agent_node)
-        graph_builder.add_node("tools", tool_node)
-        graph_builder.add_edge(START, "agent")
-        graph_builder.add_edge("tools", "agent") # This edge is correct
-        # --- REPLACE your old add_conditional_edges ---
-        graph_builder.add_conditional_edges(
-            "agent",
-            should_continue,
-            {
-                "tools": "tools",  # If tools are called
-                "agent": "agent",  # If text is generated (the new loop)
-                END: END           # If final_answer is called
-            })
-        self.graph = graph_builder.compile()
-        print("✅ Graph compiled.")
-    # ++++++++++++++++++++ __call__ METHOD ++++++++++++++++++++
-    def __call__(self, question: str) -> str:
-        print(f"\n--- Starting Agent Run for Question ---")
-        print(f"Agent received question (first 100 chars): {question[:100]}...")
-        graph_input = {"messages": [
             SystemMessage(content=self.system_prompt),
             HumanMessage(content=question)
-        ]}
-        final_answer = "AGENT FAILED TO PRODUCE ANSWER" # Default answer
-        try:
-            for event in self.graph.stream(graph_input, stream_mode="values", config={"recursion_limit": 25}):
-                last_message = event["messages"][-1]
-                if isinstance(last_message, AIMessage) and last_message.tool_calls:
-                    for tool_call in last_message.tool_calls:
-                        if tool_call.get("name") == "final_answer_tool":
-                            final_answer = tool_call['args'].get('answer', "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER")
-                            print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
-                            break
-                elif isinstance(last_message, ToolMessage):
-                     print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
-                elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
-                     # This might be an error or the agent failing to call final_answer_tool
-                     print(f"AI Message (no tool call): {last_message.content[:500]}...")
-                     # We store this in case the graph ends here, but it's not the ideal path
-                     if isinstance(last_message.content, str) and last_message.content.strip():
-                         final_answer = last_message.content # Fallback
-            # --- Cleaning step (for the final answer, wherever it came from) ---
-            cleaned_answer = str(final_answer).strip() # Ensure it's a string
-            prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
-            original_cleaned = cleaned_answer
-            for prefix in prefixes_to_remove:
-                if cleaned_answer.lower().startswith(prefix.lower()):
-                    potential_answer = cleaned_answer[len(prefix):].strip()
-                    if potential_answer: cleaned_answer = potential_answer; break
-            if cleaned_answer == original_cleaned and any(cleaned_answer.lower().startswith(p.lower()) for p in prefixes_to_remove):
-                 print(f"Warning: Prefix found but not stripped: '{original_cleaned[:100]}...'")
-            looks_like_code = any(kw in cleaned_answer for kw in ["def ", "import ", "print(", "for ", "while ", "if ", "class ", "=>", "dict(", "list["]) or cleaned_answer.count('\n') > 3 or (cleaned_answer.startswith('[') and cleaned_answer.endswith(']')) or (cleaned_answer.startswith('{') and cleaned_answer.endswith('}'))
-            if not looks_like_code:
-                 # ++++++++++++++++ USING remove_fences_simple ++++++++++++++++
-                 cleaned_answer = remove_fences_simple(cleaned_answer) # Use the helper function
-                 # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-                 if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
-                      cleaned_answer = cleaned_answer[1:-1].strip()
-            print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
-            return cleaned_answer # Return the cleaned answer
-        except Exception as e:
-            print(f"Error running agent graph: {e}")
-            import traceback; traceback.print_exc()
-            return f"AGENT GRAPH ERROR: {e}"
     # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

 import pandas as pd
 import io
 import contextlib
+import traceback  # <-- Added for detailed errors
+from typing import TypedDict, Annotated, List  # <-- Added List
 import torch
+import json
+import re  # <-- Added for robust parsing
+import uuid # <-- Added for robust parsing
 # --- Multimodal & Web Tool Imports ---
 from transformers import pipeline
 # --- LangChain & LangGraph Imports ---
 from langgraph.graph.message import add_messages
+from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage, ToolCall # <-- Ensure ToolCall is imported
 from langgraph.prebuilt import ToolNode
 from langgraph.graph import START, END, StateGraph
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool, BaseTool
 from langchain_groq import ChatGroq
 # --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # Still not working
+MAX_TURNS = 15 # <-- Added turn limit
+# --- Initialize ASR Pipeline (Keep as is) ---
 asr_pipeline = None
 try:
     print("Loading ASR (Whisper) pipeline globally...")
     print("✅ ASR (Whisper) pipeline loaded successfully.")
 except Exception as e:
     print(f"⚠️ Warning: Could not load ASR pipeline globally. Error: {e}")
     traceback.print_exc()
     asr_pipeline = None
+# ====================================================
 # --- Tool Definitions (Standalone Functions) ---
 @tool
 def search_tool(query: str) -> str:
     """Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
+    # --- Input Validation ---
+    if not isinstance(query, str) or not query.strip():
+        return "Error: Invalid input. 'query' must be a non-empty string."
+    # --- End Validation ---
     print(f"--- Calling Search Tool with query: {query} ---")
     try:
         search = DuckDuckGoSearchRun()
         return search.run(query)
     except Exception as e:
+        # --- Granular Error ---
+        tb_str = traceback.format_exc()
+        print(f"--- Search Tool FAILED ---\n{tb_str}\n---")
+        return f"Error running search for '{query}': {str(e)}\nTraceback:\n{tb_str}"
 @tool
 def code_interpreter(code: str) -> str:
+    """
+    Executes a string of Python code and returns its stdout, stderr, and any error.
+    Use for calculations, data manipulation (pandas), logic puzzles.
+    RULES:
+    1. ALWAYS use print() for final results.
+    2. Write simple, single-step scripts. Use plan text output to plan next steps.
+    3. Write reasoning as Python comments (#) before code.
+    'pandas' (as pd) is available.
+    """
+    # --- Input Validation ---
+    if not isinstance(code, str): # Basic check, could add more (e.g., length)
+        return "Error: Invalid input. 'code' must be a string."
+    # --- End Validation ---
     print(f"--- Calling Code Interpreter with code:\n{code}\n---")
     output_stream = io.StringIO()
     error_stream = io.StringIO()
                 "__builtins__": __builtins__
             }
             exec(code, safe_globals, {})
         stdout = output_stream.getvalue(); stderr = error_stream.getvalue()
         if stderr: return f"Error: {stderr}\nStdout: {stdout}"
         if stdout: return f"Success:\n{stdout}"
         return "Success: Code executed without error and produced no stdout."
     except Exception as e:
+        # --- Granular Error with Traceback ---
         tb_str = traceback.format_exc()
         print(f"--- Code Interpreter FAILED ---\n{tb_str}\n---")
         return f"Execution failed with error:\n{tb_str}"
 @tool
 def read_file(path: str) -> str:
+    """Reads the content of a file at the specified path relative to the app's CWD. Use this to examine files provided."""
+    # --- 1. Stricter Input Validation ---
+    if not isinstance(path, str) or not path.strip():
+        return "Error: Invalid input. 'path' must be a non-empty string."
+    # --- End Validation ---
     print(f"--- Calling Read File Tool at path: {path} ---")
     try:
+        # --- Path Finding Logic ---
         script_dir = os.getcwd()
         print(f"Base directory for reading: {script_dir}")
+        safe_path = os.path.normpath(path) # Normalize path
+        full_path = os.path.join(script_dir, safe_path)
         print(f"Attempting to read relative path: {full_path}")
         if not os.path.exists(full_path):
+             full_path = safe_path # Try direct/absolute
              print(f"Attempting to read direct/absolute path: {full_path}")
              if not os.path.exists(full_path):
+                  base_path = os.path.basename(safe_path)
                   cwd_base_path = os.path.join(os.getcwd(), base_path)
                   print(f"Attempting to read basename path in CWD: {cwd_base_path}")
+                  if os.path.exists(cwd_base_path):
+                      full_path = cwd_base_path
                   else:
+                      # --- 2a. Granular Error: File Not Found ---
                       try: cwd_files = os.listdir(".")
                       except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
                       return (f"Error: File not found.\n"
+                              f"Tried relative: '{os.path.join(script_dir, safe_path)}'\n"
+                              f"Tried direct/absolute: '{safe_path}'\n"
                               f"Tried basename in CWD: '{cwd_base_path}'\n"
                               f"Files in CWD (.): {cwd_files}")
         print(f"Reading file: {full_path}")
+        # --- File Reading Logic with Specific Error Handling ---
+        try:
+            with open(full_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        # --- 2b. Granular Errors during file open/read ---
+        except FileNotFoundError:
+             return f"Error: File not found at final path '{full_path}'."
+        except PermissionError:
+            return f"Error: Permission denied when trying to read file '{full_path}'."
+        except IsADirectoryError:
+             return f"Error: Specified path '{full_path}' is a directory, not a file."
+        except UnicodeDecodeError:
+             return f"Error: Could not decode file '{full_path}' as UTF-8. It might be binary or have a different encoding."
+        except Exception as read_e:
+            tb_str = traceback.format_exc()
+            return f"Error reading file content from {full_path}: {str(read_e)}\nTraceback:\n{tb_str}"
+    except Exception as e:
+        # --- 2c. Fallback for Unexpected Errors ---
+        tb_str = traceback.format_exc()
+        print(f"--- Read File Tool FAILED UNEXPECTEDLY ---\n{tb_str}\n---")
+        return f"Unexpected error setting up file read for '{path}': {str(e)}\nTraceback:\n{tb_str}"
+# --- (Keep write_file, list_directory, audio_transcription_tool, get_youtube_transcript, scrape_web_page as they were,
+#      but consider adding similar input validation and granular errors to them too) ---
 @tool
 def write_file(path: str, content: str) -> str:
     """Writes the given content to a file at the specified path relative to the app's current directory. Creates directories if they don't exist."""
+    if not isinstance(path, str) or not path.strip(): return "Error: Invalid input. 'path' must be a non-empty string."
+    if not isinstance(content, str): return "Error: Invalid input. 'content' must be a string."
     print(f"--- Calling Write File Tool at path: {path} ---")
     try:
+        base_dir = os.getcwd(); full_path = os.path.join(base_dir, path)
+        print(f"Writing file to: {full_path}"); os.makedirs(os.path.dirname(full_path), exist_ok=True)
         with open(full_path, 'w', encoding='utf-8') as f: f.write(content)
         return f"Successfully wrote to file {path} (relative to CWD)."
+    except PermissionError: return f"Error: Permission denied writing to file '{full_path}'."
+    except Exception as e: tb_str = traceback.format_exc(); return f"Error writing to file {path}: {str(e)}\nTraceback:\n{tb_str}"
 @tool
 def list_directory(path: str = ".") -> str:
     """Lists the contents (files and directories) of a directory at the specified path relative to the app's current directory."""
+    if not isinstance(path, str): return "Error: Invalid input. 'path' must be a string (or empty for current directory)."
     print(f"--- Calling List Directory Tool at path: {path} ---")
     try:
+        base_dir = os.getcwd(); full_path = os.path.join(base_dir, path)
         print(f"Listing directory: {full_path}")
+        if not os.path.isdir(full_path): return f"Error: '{path}' is not a valid directory relative to CWD."
         files = os.listdir(full_path); return "\n".join(files) if files else "Directory is empty."
+    except FileNotFoundError: return f"Error: Directory not found at '{full_path}'."
+    except PermissionError: return f"Error: Permission denied listing directory '{full_path}'."
+    except Exception as e: tb_str = traceback.format_exc(); return f"Error listing directory {path}: {str(e)}\nTraceback:\n{tb_str}"
 @tool
 def audio_transcription_tool(file_path: str) -> str:
     """Transcribes an audio file (like .mp3 or .wav) and returns the text content."""
+    if not isinstance(file_path, str) or not file_path.strip(): return "Error: Invalid input. 'file_path' must be a non-empty string."
     print(f"--- Calling Audio Transcription: {file_path} ---")
     if asr_pipeline is None: return "Error: ASR pipeline unavailable."
     try:
+        # (Keep your existing path finding logic for audio files)
+        script_dir = os.getcwd(); full_path = os.path.join(script_dir, file_path)
         if not os.path.exists(full_path):
              full_path = file_path
              if not os.path.exists(full_path):
                   base_path = os.path.basename(file_path)
                   cwd_base_path = os.path.join(os.getcwd(), base_path)
                   if os.path.exists(cwd_base_path): full_path = cwd_base_path
+                  else: return f"Error: Audio file not found." # More specific error
+        print(f"Transcribing file: {full_path}")
         transcription = asr_pipeline(full_path)
+        result_text = transcription.get("text", "")
+        if not result_text: return "Error: Transcription failed or produced empty text."
+        return result_text
+    except Exception as e: tb_str = traceback.format_exc(); return f"Error transcribing '{file_path}': {str(e)}\nTraceback:\n{tb_str}"
 @tool
 def get_youtube_transcript(video_url: str) -> str:
+    """Fetches YouTube transcript for the given video URL."""
+    if not isinstance(video_url, str) or not video_url.strip(): return "Error: Invalid input. 'video_url' must be a non-empty string."
     print(f"--- Calling YouTube Transcript: {video_url} ---")
     try:
         video_id = None
         if "watch?v=" in video_url: video_id = video_url.split("v=")[1].split("&")[0]
         elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0]
+        if not video_id: return f"Error: Could not extract YouTube video ID from URL '{video_url}'."
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+        if not transcript_list: return "Error: No transcript found for this video."
         full_transcript = " ".join([item["text"] for item in transcript_list])
+        return full_transcript[:8000] # Keep length limit
+    except Exception as e: tb_str = traceback.format_exc(); return f"Error getting transcript for '{video_url}': {str(e)}\nTraceback:\n{tb_str}"
 @tool
 def scrape_web_page(url: str) -> str:
+    """Fetches primary text content of a webpage specified by URL."""
+    if not isinstance(url, str) or not url.strip(): return "Error: Invalid input. 'url' must be a non-empty string."
+    # Basic URL scheme check
+    if not url.lower().startswith(('http://', 'https://')): return f"Error: Invalid URL scheme. URL must start with http:// or https://. Received: '{url}'"
     print(f"--- Calling Web Scraper: {url} ---")
     try:
+        headers = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'} # Be a good bot
+        response = requests.get(url, headers=headers, timeout=20); response.raise_for_status()
+        content_type = response.headers.get('Content-Type', '').lower()
+        if 'html' not in content_type: return f"Error: Content type is '{content_type}', not HTML."
         soup = BeautifulSoup(response.text, 'html.parser')
+        # (Keep your existing tag extraction logic)
+        for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input", "img", "link", "meta"]): tag.extract()
+        main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body
+        if not main_content: return "Error: Could not find main body content."
         text = main_content.get_text(separator='\n', strip=True)
         lines = (line.strip() for line in text.splitlines()); chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
         text = '\n'.join(chunk for chunk in chunks if chunk)
+        if not text: return "Error: Scraped content was empty after cleaning."
+        return text[:8000] # Keep length limit
+    except requests.exceptions.RequestException as req_e:
+        return f"Error fetching URL {url}: {str(req_e)}"
+    except Exception as e: tb_str = traceback.format_exc(); return f"Error scraping {url}: {str(e)}\nTraceback:\n{tb_str}"
 @tool
 def final_answer_tool(answer: str) -> str:
     """
+    Call this tool ONLY when you have the final, definitive answer.
+    The 'answer' argument must be a string containing only the concise, factual answer.
     """
+    # --- Input Validation ---
+    if not isinstance(answer, str):
+        # Attempt conversion, or return error if not possible/sensible
+        try: answer = str(answer)
+        except: return "Error: Invalid input. 'answer' must be a string or convertible to a string."
+    # --- End Validation ---
     print(f"--- AGENT CALLING FINAL ANSWER TOOL ---")
+    return answer # The tool itself just returns the answer
+# --- Helper Function for Cleaning Fences (Keep as is) ---
 def remove_fences_simple(text):
+    original_text = text; text = text.strip()
     if text.startswith("```") and text.endswith("```"):
         text = text[3:-3].strip()
         if '\n' in text:
                 text = rest.strip()
         return text
     return original_text
 # List of standalone tool functions
 defined_tools = [
     audio_transcription_tool,
     get_youtube_transcript,
     scrape_web_page,
+    final_answer_tool
+] # Ensure remove_fences_simple is NOT here
 # --- LangGraph Agent State ---
 class AgentState(TypedDict):
+    messages: Annotated[List[AnyMessage], add_messages]
+    turn: int # <-- Added turn counter
+# --- Custom Conditional Edge ---
 def should_continue(state: AgentState):
     """
+    Custom logic: loop for thoughts, route to tools, end on final_answer or limit.
     """
     last_message = state['messages'][-1]
+    current_turn = state.get('turn', 0)
+    # 1. Check for explicit end signal (final_answer_tool)
+    if isinstance(last_message, AIMessage) and last_message.tool_calls:
+        if last_message.tool_calls[0].get("name") == "final_answer_tool":
+            print("--- Condition: Saw final_answer_tool, ending graph. ---")
+            return END
+    # 2. Check turn limit *before* deciding to loop or call tools
+    if current_turn >= MAX_TURNS:
+        print(f"--- Condition: Reached max turns ({MAX_TURNS}). Forcing END. ---")
+        # Optional: Append an error message for clarity in final output
+        state['messages'].append(SystemMessage(content=f"SYSTEM: Agent reached maximum turn limit ({MAX_TURNS}). Ending execution."))
+        return END
+    # 3. If tools were called (and it wasn't final_answer), route to tools node
+    if isinstance(last_message, AIMessage) and last_message.tool_calls:
+        print("--- Condition: Saw other tools, calling tools node. ---")
+        return "tools"
+    # 4. If no tool call and not over limit, loop back to agent (reasoning loop)
+    print(f"--- Condition: No tool call (Turn {current_turn}). Looping back to agent. ---")
     return "agent"
 # --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self):
         print("BasicAgent (LangGraph) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
         self.tools = defined_tools
         tool_descriptions = "\n".join([
             f"- {tool.name}: {tool.description}" if tool.name != 'code_interpreter' else
+            (f"- {tool.name}: Executes Python code. Use for calculations, data manipulation, or logic puzzles.\n"
+             f"  **CODE INTERPRETER RULES:**\n"
+             f"  1. ALWAYS use `print()` for final results.\n"
+             f"  2. Write SIMPLE, single-step scripts.\n"
+             f"  3. PLAN your next script using plain text output first.\n"
+             f"  4. Write reasoning as Python comments (#) before code.\n"
+             f"  'pandas' (as pd) is available.")
             for tool in self.tools
         ])
+        # ==================== SYSTEM PROMPT V4 ====================
         self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant for the GAIA benchmark.
 Your goal is to provide the concise, factual answer by strictly following a step-by-step reasoning process.
 **CRITICAL PROTOCOL: YOU MUST FOLLOW THIS PROCESS**
 1.  **ANALYZE:** Read the question and all messages in the history.
+2.  **MANDATORY FIRST STEP:** Your *first* response on *any* new task MUST be a plan in plain text. Do NOT call any tool on your first turn. Write down your logic, what you need, and which tool you *plan* to use next. Failure to provide a plan first will result in incorrect behavior.
+3.  **EXECUTE:** After submitting your plan, you will run again. Now, execute the *next* step of your plan by calling the *one* appropriate tool using the correct JSON format.
+4.  **ANALYZE TOOL OUTPUT:** You will receive a ToolMessage with the output. You MUST read it carefully.
 5.  **REPEAT or FINISH:**
+    * **If more steps are needed:** Go back to step 1 (ANALYZE the new info & PLAN). Write an *updated* plan as plain text (e.g., "The search found X. My next step is to use code_interpreter to process X...").
+    * **If the ToolMessage contains the final answer:** You MUST call the `final_answer_tool`. Your answer *must* be derived *only* from the ToolMessage output, not your own knowledge.
 **RULES:**
+* **NEVER** call a tool on the same turn you write a plan (plain text).
+* **NEVER** use your pre-trained "leaked" knowledge for the final answer. The answer *must* come from a ToolMessage (e.g., from `code_interpreter`'s print() or `search_tool`).
+* **NEVER** answer a logic puzzle from memory. You *must* use `code_interpreter`, ensure it `print()`s the result, analyze that output, and then use that printed result for `final_answer_tool`.
+* **NEVER** call `final_answer_tool` until a tool has explicitly given you the answer in its output.
+* **Error Handling:** If a tool call returns an Error, your next step (Step 1 PLAN) MUST analyze the error message and propose a *different* approach (different tool, different arguments, different logic). Do not retry the exact same failed call.
 **TOOLS:**
 {tool_descriptions}
 **TOOL FORMAT (JSON ONLY):**
+Respond ONLY with a single JSON block like this when calling a tool:
+```json
+{{
+  "tool": "tool_name",
+  "tool_input": {{ "arg_name1": "value1", ... }}
+}}
+```
 * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
 * Do not add any text before or after the JSON block.
+Example for final_answer_tool:
+```json
+{{
+  "tool": "final_answer_tool",
+  "tool_input": {{
+    "answer": "The final answer string here"
+  }}
+}}
+```
+NOTE: The value for "answer" MUST be a string enclosed in double quotes.
+print("Initializing Groq LLM Endpoint...")
+    try:
+        chat_llm = ChatGroq(
+            temperature=0.01, # Low temperature for factual tasks
+            groq_api_key=GROQ_API_KEY,
+            model_name="openai/gpt-oss-120b" # <-- Switched Model
+        )
+        print("✅ Groq LLM Endpoint initialized with openai/gpt-oss-120b.")
+    except Exception as e: print(f"Error initializing Groq: {e}"); raise
+    self.llm_with_tools = chat_llm.bind_tools(self.tools)
+    print("✅ Tools bound to LLM (using bind_tools).")
+    # --- Agent Node with Robust Parsing Fallback ---
+    def agent_node(state: AgentState):
+        current_turn = state.get('turn', 0) + 1
+        print(f"--- Running Agent Node (Turn {current_turn}) ---")
+        # Ensure message history isn't excessively long (optional safety)
+        # if len(state['messages']) > 20:
+        #     print("Warning: Pruning message history to prevent excessive length.")
+        #     messages_to_send = [state['messages'][0]] + state['messages'][-19:] # Keep system + last N
+        # else:
+        #     messages_to_send = state["messages"]
+        messages_to_send = state["messages"] # Keep all for now
+        ai_message: AIMessage = self.llm_with_tools.invoke(messages_to_send)
+        # --- Robust Parsing Fallback ---
+        if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
+            # Simple JSON block finder (might need refinement for complex cases)
+            json_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```|(\{.*?\})", ai_message.content, re.DOTALL | re.IGNORECASE)
+            if json_match:
+                json_str = json_match.group(1) or json_match.group(2)
+                try:
+                    parsed_json = json.loads(json_str)
+                    # Basic validation for *our* tool format
+                    if isinstance(parsed_json, dict) and "tool" in parsed_json and "tool_input" in parsed_json:
+                        tool_name = parsed_json.get("tool")
+                        tool_input = parsed_json.get("tool_input", {})
+                        # Check if the tool name is actually one we defined
+                        if any(t.name == tool_name for t in self.tools):
+                            print(f"--- Fallback: Manually parsed tool call for '{tool_name}' from content ---")
+                            tool_call = ToolCall(name=tool_name, args=tool_input, id=str(uuid.uuid4()))
+                            ai_message.tool_calls = [tool_call]
+                            ai_message.content = "" # Clear content as it's parsed
+                        else:
+                            print(f"--- Fallback Warning: Found JSON, but tool '{tool_name}' is not defined. ---")
+                    else:
+                         print("--- Fallback Warning: Found JSON, but not in expected tool format {tool:..., tool_input:...}. ---")
+                except json.JSONDecodeError as json_err:
+                    print(f"--- Fallback Warning: Found text resembling JSON, but failed to parse: {json_err} ---")
+        # --- End Fallback ---
+        print(f"AI Message Raw Content: {ai_message.content}")
+        if ai_message.tool_calls: print(f"AI tool calls: {ai_message.tool_calls}")
+        elif ai_message.invalid_tool_calls: print(f"AI INVALID tool calls: {ai_message.invalid_tool_calls}")
+        else: print(f"AI content (no calls): {ai_message.pretty_repr()}")
+        return {"messages": [ai_message], "turn": current_turn}
+    tool_node = ToolNode(self.tools)
+    print("Building agent graph...")
+    graph_builder = StateGraph(AgentState)
+    graph_builder.add_node("agent", agent_node)
+    graph_builder.add_node("tools", tool_node)
+    graph_builder.add_edge(START, "agent")
+    graph_builder.add_edge("tools", "agent") # Always go back to agent after tools
+    # --- Updated Conditional Edges ---
+    graph_builder.add_conditional_edges(
+        "agent",
+        should_continue,
+        {
+            "tools": "tools",  # If tools are called (and not final_answer)
+            "agent": "agent",  # If text/plan is generated (reasoning loop)
+            END: END           # If final_answer called or turn limit reached
+        }
+    )
+    self.graph = graph_builder.compile()
+    print("✅ Graph compiled.")
+# --- __call__ Method (Keep mostly as is, just init turn) ---
+def __call__(self, question: str) -> str:
+    print(f"\n--- Starting Agent Run for Question ---")
+    print(f"Agent received question (first 100 chars): {question[:100]}...")
+    # Initialize graph input with turn counter
+    graph_input = {
+        "messages": [
             SystemMessage(content=self.system_prompt),
             HumanMessage(content=question)
+        ],
+        "turn": 0
+    }
+    final_answer = "AGENT FAILED TO PRODUCE ANSWER"
+    try:
+        # Add config for recursion limit (LangGraph default is 25, but our turn limit is softer)
+        config = {"recursion_limit": MAX_TURNS + 5} # Allow slightly more graph steps than turns
+        for event in self.graph.stream(graph_input, stream_mode="values", config=config):
+            last_message = event["messages"][-1]
+            # Check for final answer extraction
+            if isinstance(last_message, AIMessage) and last_message.tool_calls:
+                if last_message.tool_calls[0].get("name") == "final_answer_tool":
+                    final_answer = last_message.tool_calls[0]['args'].get('answer', "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER")
+                    print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
+                    # We can break here since the graph condition should lead to END anyway
+                    break
+            # Log other message types (optional but helpful)
+            elif isinstance(last_message, ToolMessage):
+                 print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
+            elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
+                 # This is now expected (the "plan" or "think" step)
+                 print(f"AI Message (Plan/Thought): {last_message.content[:500]}...")
+                 # Don't set final_answer here anymore, only final_answer_tool counts
+        # --- Cleaning step (Keep as is) ---
+        cleaned_answer = str(final_answer).strip()
+        # ... (keep existing prefix removal and fence removal logic) ...
+        prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
+        original_cleaned = cleaned_answer
+        for prefix in prefixes_to_remove:
+            if cleaned_answer.lower().startswith(prefix.lower()):
+                potential_answer = cleaned_answer[len(prefix):].strip()
+                if potential_answer: cleaned_answer = potential_answer; break
+        if cleaned_answer == original_cleaned and any(cleaned_answer.lower().startswith(p.lower()) for p in prefixes_to_remove):
+             print(f"Warning: Prefix found but not stripped: '{original_cleaned[:100]}...'")
+        # Simple fence removal
+        cleaned_answer = remove_fences_simple(cleaned_answer)
+        if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
+              cleaned_answer = cleaned_answer[1:-1].strip()
+        print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
+        return cleaned_answer
+    except Exception as e:
+        print(f"Error running agent graph: {e}")
+        tb_str = traceback.format_exc()
+        print(tb_str)
+        # Check if it was specifically our turn limit message
+        if isinstance(e, SystemMessage) and f"maximum turn limit ({MAX_TURNS})" in str(e.content):
+             return f"AGENT STOPPED: Reached maximum turn limit ({MAX_TURNS})."
+        return f"AGENT GRAPH ERROR: {e}"
     # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++