Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Nov 2, 2025

Commit

4277297

verified ·

1 Parent(s): 8a7fdce

Update app.py

Browse files

Files changed (1) hide show

app.py +535 -193

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import os
 import io
 import json
@@ -8,11 +7,12 @@ import contextlib
 import uuid
 import time
 import ast
-from typing import List, Optional, TypedDict, Annotated
 from pathlib import Path
-import gradio as gr
 import pandas as pd
 import torch
 from pydantic import BaseModel, Field
@@ -41,11 +41,12 @@ from langchain_core.documents import Document
 # CONFIGURATION
 # =============================================================================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MAX_TURNS = 20
 MAX_MESSAGE_LENGTH = 8000
 # =============================================================================
-# GLOBAL RAG COMPONENTS (Initialize once)
 # =============================================================================
 global_embeddings = None
 global_text_splitter = None
@@ -138,7 +139,138 @@ def find_file(path: str) -> Optional[Path]:
     return None
 # =============================================================================
-# TOOL DEFINITIONS
 # =============================================================================
 class SearchInput(BaseModel):
@@ -146,11 +278,19 @@ class SearchInput(BaseModel):
 @tool(args_schema=SearchInput)
 def search_tool(query: str) -> str:
-    """Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
     if not isinstance(query, str) or not query.strip():
         return "Error: Invalid input. 'query' must be a non-empty string."
-    print(f"--- Calling Search Tool with query: {query} ---")
     try:
         search = DuckDuckGoSearchRun()
         result = search.run(query)
@@ -161,25 +301,70 @@ def search_tool(query: str) -> str:
         return f"Error running search for '{query}': {str(e)}"
 class CodeInput(BaseModel):
-    code: str = Field(description="The Python code to execute, which must include a print() statement for output.")
 @tool(args_schema=CodeInput)
 def code_interpreter(code: str) -> str:
     """
-    Executes a string of Python code and returns its stdout, stderr, and any error.
-    CRITICAL RULES:
-    1. ALWAYS use print() to output your final answer.
-    2. Write simple, focused code. One task per execution.
-    3. Add comments (#) to explain your logic.
-    4. SCOPE RULE: Import all necessary libraries inside any function you define.
-    Available: pandas as pd, basic Python libraries.
     """
     if not isinstance(code, str):
         return "Error: Invalid input. 'code' must be a string."
-    # Basic safety checks
-    dangerous_patterns = ['__import__', 'eval(', 'compile(', 'subprocess', 'os.system']
     code_lower = code.lower()
     for pattern in dangerous_patterns:
         if pattern in code_lower:
@@ -188,7 +373,7 @@ def code_interpreter(code: str) -> str:
     if 'open(' in code_lower and any(mode in code for mode in ["'w'", '"w"', "'a'", '"a"', "'wb'", '"wb"']):
         return "Error: Writing files is not allowed in code_interpreter. Use write_file tool instead."
-    print(f"--- Calling Code Interpreter ---\nCode:\n{code}\n---")
     output_stream = io.StringIO()
     error_stream = io.StringIO()
@@ -196,6 +381,9 @@ def code_interpreter(code: str) -> str:
         with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
             safe_globals = {
                 "pd": pd,
                 "__builtins__": __builtins__
             }
             exec(code, safe_globals, {})
@@ -209,9 +397,9 @@ def code_interpreter(code: str) -> str:
         if stdout:
             if len(stdout) > MAX_MESSAGE_LENGTH:
                 stdout = stdout[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(stdout)} total chars]"
-            return f"Success:\n{stdout}"
-        return "Success: Code executed without error but produced no output.\n⚠️ Remember to use print() to output your results!"
     except Exception as e:
         tb_str = traceback.format_exc()
@@ -219,15 +407,15 @@ def code_interpreter(code: str) -> str:
 class ReadFileInput(BaseModel):
-    path: str = Field(description="The path to the file to read.")
 @tool(args_schema=ReadFileInput)
 def read_file(path: str) -> str:
-    """Reads the content of a file at the specified path."""
     if not isinstance(path, str) or not path.strip():
         return "Error: Invalid input. 'path' must be a non-empty string."
-    print(f"--- Calling Read File Tool: {path} ---")
     file_path = find_file(path)
     if not file_path:
@@ -249,18 +437,18 @@ def read_file(path: str) -> str:
 class WriteFileInput(BaseModel):
-    path: str = Field(description="The path of the file to write to.")
-    content: str = Field(description="The content to write into the file.")
 @tool(args_schema=WriteFileInput)
 def write_file(path: str, content: str) -> str:
-    """Writes content to a file at the specified path."""
     if not isinstance(path, str) or not path.strip():
         return "Error: Invalid input. 'path' must be a non-empty string."
     if not isinstance(content, str):
         return "Error: Invalid input. 'content' must be a string."
-    print(f"--- Calling Write File Tool: {path} ---")
     try:
         file_path = Path.cwd() / path
@@ -272,12 +460,12 @@ def write_file(path: str, content: str) -> str:
 class ListDirInput(BaseModel):
-    path: str = Field(description="The directory path to list.", default=".")
 @tool(args_schema=ListDirInput)
 def list_directory(path: str = ".") -> str:
-    """Lists the contents of a directory."""
-    print(f"--- Calling List Directory Tool: {path} ---")
     try:
         dir_path = Path.cwd() / path if path != "." else Path.cwd()
@@ -311,15 +499,15 @@ def list_directory(path: str = ".") -> str:
 class AudioInput(BaseModel):
-    file_path: str = Field(description="The file path of the audio to transcribe.")
 @tool(args_schema=AudioInput)
 def audio_transcription_tool(file_path: str) -> str:
-    """Transcribes an audio file to text using Whisper."""
     if not isinstance(file_path, str) or not file_path.strip():
         return "Error: Invalid input. 'file_path' must be a non-empty string."
-    print(f"--- Calling Audio Transcription: {file_path} ---")
     if asr_pipeline is None:
         return "Error: ASR pipeline is not available."
@@ -339,17 +527,16 @@ def audio_transcription_tool(file_path: str) -> str:
     except Exception as e:
         return f"Error transcribing '{file_path}': {str(e)}"
 class YoutubeInput(BaseModel):
-    video_url: str = Field(description="The URL of the YouTube video.")
 @tool(args_schema=YoutubeInput)
 def get_youtube_transcript(video_url: str) -> str:
-    """Fetches the transcript/captions for a YouTube video."""
     if not isinstance(video_url, str) or not video_url.strip():
         return "Error: Invalid input. 'video_url' must be a non-empty string."
-    print(f"--- Calling YouTube Transcript: {video_url} ---")
     try:
         video_id = None
@@ -373,94 +560,76 @@ def get_youtube_transcript(video_url: str) -> str:
 class ScrapeInput(BaseModel):
-    url: str = Field(description="The URL to scrape (must start with http:// or https://).")
-    query: str = Field(description="The specific question to answer or information to find on the page.")
 @tool(args_schema=ScrapeInput)
 def scrape_and_retrieve(url: str, query: str) -> str:
     """
-    Scrapes a webpage, embeds its content using RAG, and retrieves relevant sections based on the query.
-    Use this to extract specific information from web pages.
     """
     if not (url.lower().startswith(('http://', 'https://'))):
         return f"Error: Invalid URL. Must start with http:// or https://. Got: '{url}'"
     if not query or not query.strip():
         return "Error: A query is required to search the page content."
-    # Check if RAG components are initialized
     if global_embeddings is None or global_text_splitter is None:
         if not initialize_rag_components():
             return "Error: RAG components could not be initialized."
-    print(f"--- Calling RAG Scraper: {url} for query: '{query}' ---")
     try:
-        # Fetch the webpage
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
-        print(f"Fetching URL: {url}")
         response = requests.get(url, headers=headers, timeout=20)
         response.raise_for_status()
-        # Parse HTML
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove unwanted tags
         for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe", "noscript"]):
             tag.extract()
-        # Try to find main content
         main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile('content|main|article', re.I)) or soup.body
         if not main_content:
             return "Error: Could not find main content on the page."
-        # Extract text
         text = main_content.get_text(separator='\n', strip=True)
-        # Clean up text - remove extra whitespace and empty lines
         lines = [line.strip() for line in text.splitlines()]
         text = '\n'.join(line for line in lines if line)
         if not text or len(text) < 50:
             return f"Error: Scraped content was too short or empty (length: {len(text)})."
-        print(f"Scraped text length: {len(text)} characters")
-        # Split text into chunks
         chunks = global_text_splitter.split_text(text)
         if not chunks:
             return "Error: Text could not be split into chunks."
-        print(f"Created {len(chunks)} chunks")
-        # Create Document objects
         docs = [Document(page_content=chunk, metadata={"source": url}) for chunk in chunks]
-        # Create FAISS vector store
-        print("Creating embeddings and vector store...")
         db = FAISS.from_documents(docs, global_embeddings)
-        # Retrieve relevant chunks
-        print(f"Searching for: '{query}'")
         retriever = db.as_retriever(search_kwargs={"k": 5})
         retrieved_docs = retriever.invoke(query)
         if not retrieved_docs:
             return f"No relevant information found on {url} for query: '{query}'\n\nThe page was successfully scraped but doesn't seem to contain information matching your query."
-        print(f"Retrieved {len(retrieved_docs)} relevant chunks")
-        # Combine retrieved chunks
         context_parts = []
         for i, doc in enumerate(retrieved_docs, 1):
             context_parts.append(f"[Chunk {i}]\n{doc.page_content}")
         context = "\n\n---\n\n".join(context_parts)
-        result = f"Successfully retrieved relevant information from {url}\n\nQuery: {query}\n\n{context}"
         return truncate_if_needed(result)
@@ -472,13 +641,24 @@ def scrape_and_retrieve(url: str, query: str) -> str:
 class FinalAnswerInput(BaseModel):
-    answer: str = Field(description="The final, definitive answer to the question.")
 @tool(args_schema=FinalAnswerInput)
 def final_answer_tool(answer: str) -> str:
     """
-    Call this tool ONLY when you have the final, definitive answer.
-    The 'answer' must be EXACTLY what was asked for, with no extra text.
     """
     if not isinstance(answer, str):
         try:
@@ -486,24 +666,60 @@ def final_answer_tool(answer: str) -> str:
         except:
             return "Error: Invalid input. 'answer' must be a string."
-    print(f"--- FINAL ANSWER TOOL CALLED ---")
-    print(f"Answer: {answer}")
     return answer
 # =============================================================================
 # FALLBACK PARSER
 # =============================================================================
 def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
-    """
-    Parses malformed tool call strings from an LLM response.
-    """
-    print(f"Original LLM content for fallback parsing:\n---\n{content[:500]}\n---")
     tool_name = None
     tool_input = None
     cleaned_str = None
-    # STRATEGY 1: Try to parse <function(tool_name)>...{json_string}...
     func_match = re.search(
         r"<function[(=]\s*([^)]+)\s*[)>](.*)",
         content,
@@ -523,28 +739,26 @@ def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
                 cleaned_str = cleaned_str.strip().rstrip(',')
                 tool_input = json.loads(cleaned_str)
-                print(f"🔧 Fallback (Format 1 - json.loads): Parsed tool call for '{tool_name}'")
             else:
-                print(f"⚠️ Fallback (Format 1): Found <function> but no JSON blob.")
                 tool_name = None
         except json.JSONDecodeError as e:
-            print(f"⚠️ Fallback (Format 1): json.loads failed: {e}. Trying ast.literal_eval.")
             try:
                 if cleaned_str:
                     potential_input = ast.literal_eval(cleaned_str)
                     if isinstance(potential_input, dict):
                         tool_input = potential_input
-                        print(f"🔧 Fallback (Format 1 - ast.literal_eval): Parsed tool call for '{tool_name}'")
                     else:
-                        print(f"⚠️ Fallback (Format 1): ast.literal_eval did not produce a dict.")
                         tool_name = None
                 else:
                     tool_name = None
             except:
                 tool_name = None
-    # FINAL VALIDATION
     if tool_name and tool_input is not None:
         if any(t.name == tool_name for t in tools):
             tool_call = ToolCall(
@@ -556,79 +770,52 @@ def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
             return [tool_call]
         else:
             print(f"❌ Tool '{tool_name}' not found in available tools")
-            print(f"   Available: {[t.name for t in tools]}")
     print("❌ Failed to parse any valid tool call from content")
     return []
-# =============================================================================
-# DEFINED TOOLS LIST
-# =============================================================================
-defined_tools = [
-    search_tool,
-    code_interpreter,
-    read_file,
-    write_file,
-    list_directory,
-    audio_transcription_tool,
-    get_youtube_transcript,
-    scrape_and_retrieve,
-    final_answer_tool
-]
-# =============================================================================
-# AGENT STATE
-# =============================================================================
-class AgentState(TypedDict):
-    messages: Annotated[List[AnyMessage], add_messages]
-    turn: int
 # =============================================================================
 # CONDITIONAL EDGE FUNCTION
 # =============================================================================
 def should_continue(state: AgentState):
-    """
-    Decide whether to continue, call tools, or end.
-    """
     last_message = state['messages'][-1]
     current_turn = state.get('turn', 0)
-    # 1. Check for final_answer_tool
     if isinstance(last_message, AIMessage) and last_message.tool_calls:
         for tool_call in last_message.tool_calls:
             if tool_call.get("name") == "final_answer_tool":
                 print("--- Condition: final_answer_tool called, ending. ---")
                 return END
-    # 2. Check turn limit
     if current_turn >= MAX_TURNS:
         print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
         return END
-    # 3. Route to tools if tool calls exist
     if isinstance(last_message, AIMessage) and last_message.tool_calls:
         print("--- Condition: Tools called, routing to tools node. ---")
         return "tools"
-    # 4. Loop prevention
     if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
         print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
         return END
-    # 5. Loop back to agent
     print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
     return "agent"
 # =============================================================================
-# BASIC AGENT CLASS
 # =============================================================================
-class BasicAgent:
     def __init__(self):
-        print("BasicAgent (Single LLM) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
@@ -638,7 +825,7 @@ class BasicAgent:
         # Initialize RAG Components
         if not initialize_rag_components():
-            print("⚠️ Warning: RAG components failed to initialize. scrape_and_retrieve may not work.")
         # Build tool descriptions
         tool_desc_list = []
@@ -656,31 +843,104 @@ class BasicAgent:
             tool_desc_list.append(desc)
         tool_descriptions = "\n".join(tool_desc_list)
-        # System Prompt
-        self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
-Your goal: Provide the EXACT answer in the EXACT format requested.
-**PROTOCOL:**
-1. **ANALYZE:** Read the question and history. What is the next logical step?
-2. **ACT:** Call ONE tool to get information or perform a calculation.
-3. **EVALUATE:** Look at the tool's output. Do you have the final answer?
-   - **If NO:** Go back to Step 1 and decide the *next* step.
-   - **If YES:** Call final_answer_tool immediately with the answer.
-**CRITICAL RULES:**
-- **TOOL USE:** You MUST use tools to find the answer. Do NOT use your own knowledge.
-- **FINAL ANSWER:** When you have the answer, use final_answer_tool. The 'answer' argument must be the answer ONLY (e.g., "42", "red, blue, green").
-- **NO CONVERSATIONAL TEXT:** Never add phrases like "The answer is" or "Based on the information". Just the answer.
-**TOOLS:**
 {tool_descriptions}
-**REMEMBER:** One step at a time. Use tools. Call final_answer_tool when done.
 """
         print("Initializing Groq LLM...")
         try:
-            # Changed from tool_choice="any" to "auto" for better flexibility
             self.llm_with_tools = ChatGroq(
                 temperature=0,
                 groq_api_key=GROQ_API_KEY,
@@ -688,27 +948,51 @@ Your goal: Provide the EXACT answer in the EXACT format requested.
                 max_tokens=4096,
                 timeout=60
             ).bind_tools(self.tools, tool_choice="auto")
-            print("✅ Main LLM (llama-3.3-70b-versatile with tools) initialized.")
         except Exception as e:
             print(f"❌ Error initializing Groq: {e}")
             raise
-        # Agent Node
         def agent_node(state: AgentState):
             current_turn = state.get('turn', 0) + 1
-            print(f"\n{'='*60}")
-            print(f"AGENT TURN {current_turn}/{MAX_TURNS}")
-            print('='*60)
             if current_turn > MAX_TURNS:
-                return {"messages": [SystemMessage(content="Max turns reached.")], "turn": current_turn}
             max_retries = 3
             ai_message = None
             for attempt in range(max_retries):
                 try:
-                    ai_message = self.llm_with_tools.invoke(state["messages"])
                     break
                 except Exception as e:
                     print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {e}")
@@ -718,32 +1002,58 @@ Your goal: Provide the EXACT answer in the EXACT format requested.
                         )
                     time.sleep(2 ** attempt)
-            # Fallback Parsing Logic
             if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
                 parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
                 if parsed_tool_calls:
-                    print("🔧 Fallback SUCCESS: Rebuilding tool call(s).")
                     ai_message.tool_calls = parsed_tool_calls
                     ai_message.content = ""
-                else:
-                    print(f"⚠️ Fallback FAILED: Could not parse any tool call from content:\n{ai_message.content[:200]}...")
             if ai_message.tool_calls:
-                print(f"🔧 Agent Tool Call: {ai_message.tool_calls[0]['name']}")
             else:
-                print(f"💭 Agent Reasoning: {ai_message.content[:200]}...")
-            return {"messages": [ai_message], "turn": current_turn}
-        # Tool Node
-        tool_node = ToolNode(self.tools)
         # Build Graph
-        print("Building Single-Agent graph...")
         graph_builder = StateGraph(AgentState)
         graph_builder.add_node("agent", agent_node)
-        graph_builder.add_node("tools", tool_node)
         graph_builder.add_edge(START, "agent")
@@ -760,87 +1070,119 @@ Your goal: Provide the EXACT answer in the EXACT format requested.
         graph_builder.add_edge("tools", "agent")
         self.graph = graph_builder.compile()
-        print("✅ Single-Agent graph compiled successfully.")
     def __call__(self, question: str) -> str:
-        print(f"\n--- Starting Agent Run for Question ---")
-        print(f"Agent received question (first 100 chars): {question[:100]}...")
         graph_input = {
             "messages": [
                 SystemMessage(content=self.system_prompt),
                 HumanMessage(content=question)
             ],
-            "turn": 0
         }
         final_answer = "AGENT FAILED TO PRODUCE ANSWER"
         try:
-            config = {"recursion_limit": MAX_TURNS + 5}
             for event in self.graph.stream(graph_input, stream_mode="values", config=config):
-                if event.get('messages'): # Ensure messages exist
-                    last_message = event["messages"][-1]
-                else:
-                    continue # Skip if no messages yet
                 # Check for final answer extraction
                 if isinstance(last_message, AIMessage) and last_message.tool_calls:
                     if last_message.tool_calls[0].get("name") == "final_answer_tool":
                         final_answer_args = last_message.tool_calls[0].get('args', {})
                         if 'answer' in final_answer_args:
-                             final_answer = final_answer_args['answer']
-                             print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
-                             break
                         else:
-                             print(f"⚠️ Final Answer tool called without 'answer' argument: {final_answer_args}")
-                             final_answer = "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER"
-                             break
                 elif isinstance(last_message, ToolMessage):
-                    print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
                 elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
-                    print(f"AI Message (Reasoning): {last_message.content[:500]}...")
-                elif isinstance(last_message, SystemMessage):
-                     print(f"System Message: {last_message.content[:500]}...")
-            # --- Final Answer Cleaning ---
             cleaned_answer = str(final_answer).strip()
-            prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
-            original_cleaned = cleaned_answer
             for prefix in prefixes_to_remove:
                 if cleaned_answer.lower().startswith(prefix.lower()):
                     potential_answer = cleaned_answer[len(prefix):].strip()
-                    if potential_answer:
                         cleaned_answer = potential_answer
-                        break
             cleaned_answer = remove_fences_simple(cleaned_answer)
-            if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
-                    cleaned_answer = cleaned_answer[1:-1].strip()
-            print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
             return cleaned_answer
         except Exception as e:
-            print(f"Error running agent graph: {e}")
             tb_str = traceback.format_exc()
             print(tb_str)
             return f"AGENT GRAPH ERROR: {e}"
-# ====================================================
-# --- Global Agent Instantiation ---
 try:
-    agent = BasicAgent()
-    print("✅ Global BasicAgent instantiated successfully.")
-    if asr_pipeline is None: print("⚠️ Global ASR Pipeline failed load.")
 except Exception as e:
     print(f"❌ FATAL: Could not instantiate global agent: {e}")
     traceback.print_exc()
     agent = None
 # ====================================================
 # --- (Original Template Code - Mock Questions Version) ---

 import os
 import io
 import json
 import uuid
 import time
 import ast
+from typing import List, Optional, TypedDict, Annotated, Dict
 from pathlib import Path
+from collections import Counter
 import pandas as pd
+import numpy as np
 import torch
 from pydantic import BaseModel, Field
 # CONFIGURATION
 # =============================================================================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MAX_TURNS = 25  # Increased for planning/reflection
 MAX_MESSAGE_LENGTH = 8000
+REFLECT_EVERY_N_TURNS = 5
 # =============================================================================
+# GLOBAL RAG COMPONENTS
 # =============================================================================
 global_embeddings = None
 global_text_splitter = None
     return None
 # =============================================================================
+# PLANNING & REFLECTION TOOLS
+# =============================================================================
+class PlanInput(BaseModel):
+    question: str = Field(description="The question to create a plan for")
+@tool(args_schema=PlanInput)
+def create_plan(question: str) -> str:
+    """
+    Creates a step-by-step plan for answering a question.
+    CRITICAL: Call this FIRST for any multi-step or complex question.
+    This helps you think through:
+    1. What information do you need?
+    2. In what order should you gather it?
+    3. What tools will you use?
+    After calling this, execute the plan step-by-step.
+    """
+    print(f"📋 Planning phase initiated for: {question[:100]}...")
+    return f"""✅ Plan Created. Now execute these steps methodically:
+PLANNING FRAMEWORK:
+1. GOAL: What exact answer format is needed?
+2. REQUIREMENTS: What data/information is required?
+3. STRATEGY: What's the most efficient path?
+4. EXECUTION: List concrete actions in order
+Now proceed with Step 1 of your plan."""
+class ReflectInput(BaseModel):
+    current_situation: str = Field(description="Brief summary of what you've tried and where you are stuck")
+@tool(args_schema=ReflectInput)
+def reflect_on_progress(current_situation: str) -> str:
+    """
+    Reflects on your progress and suggests what to do next.
+    Call this when:
+    - You feel stuck or uncertain
+    - Tools keep failing
+    - You're not making progress
+    - You've taken 5+ steps without getting closer to the answer
+    This helps you step back and reconsider your approach.
+    """
+    print(f"🤔 Reflection initiated: {current_situation[:100]}...")
+    return f"""🔍 REFLECTION ANALYSIS:
+Current situation: {current_situation}
+CRITICAL QUESTIONS TO ASK YOURSELF:
+1. Have I gathered the information I actually need?
+2. Am I using the right tools for this task?
+3. Am I going in circles (repeating similar actions)?
+4. Should I try a completely different approach?
+5. Do I have enough information to answer now?
+NEXT STEPS:
+- If stuck: Try a different tool or search query
+- If missing info: Identify exactly what's missing
+- If have info: Proceed to final_answer_tool
+- If uncertain: Break problem into smaller pieces
+Take a different approach now."""
+class ValidateInput(BaseModel):
+    proposed_answer: str = Field(description="The answer you plan to submit")
+    original_question: str = Field(description="The original question")
+@tool(args_schema=ValidateInput)
+def validate_answer(proposed_answer: str, original_question: str) -> str:
+    """
+    Validates your proposed answer before submission.
+    CRITICAL: ALWAYS call this before final_answer_tool.
+    Checks:
+    - Does the answer match what was asked?
+    - Is it in the correct format?
+    - Are there any obvious issues?
+    If validation passes, then call final_answer_tool.
+    If validation fails, gather more information or correct the format.
+    """
+    print(f"✓ Validating answer: '{proposed_answer[:50]}...'")
+    issues = []
+    warnings = []
+    # Check for conversational fluff
+    fluff_phrases = ["the answer is", "based on", "according to", "i found that", "here is", "final answer"]
+    if any(phrase in proposed_answer.lower() for phrase in fluff_phrases):
+        issues.append("❌ Remove conversational text. Provide ONLY the answer.")
+    # Check for number format if question asks for numbers
+    number_keywords = ["how many", "what number", "count", "total", "sum"]
+    if any(kw in original_question.lower() for kw in number_keywords):
+        if not any(char.isdigit() for char in proposed_answer):
+            warnings.append("⚠️ Question seems to ask for a number, but answer contains no digits.")
+    # Check for list format
+    if "list" in original_question.lower() and "," not in proposed_answer:
+        warnings.append("⚠️ Question asks for a list, consider comma-separated format.")
+    # Check for yes/no questions
+    if original_question.lower().strip().startswith(("is ", "are ", "was ", "were ", "do ", "does ", "did ", "can ", "will ")):
+        if proposed_answer.lower() not in ["yes", "no", "true", "false"]:
+            warnings.append("⚠️ This looks like a yes/no question. Consider simple yes/no answer.")
+    # Check for code fences or markdown
+    if "```" in proposed_answer:
+        issues.append("❌ Remove code fences (```) from the answer.")
+    # Check length
+    if len(proposed_answer) > 500:
+        warnings.append("⚠️ Answer is quite long. Are you sure this is just the answer and not an explanation?")
+    if issues:
+        return "🚫 VALIDATION FAILED:\n" + "\n".join(issues) + "\n\nFix these issues before calling final_answer_tool."
+    if warnings:
+        return "⚠️ VALIDATION WARNINGS:\n" + "\n".join(warnings) + "\n\nConsider these points, but you may proceed if confident."
+    return "✅ VALIDATION PASSED: Answer looks good! Proceed with final_answer_tool now."
+# =============================================================================
+# CORE TOOLS
 # =============================================================================
 class SearchInput(BaseModel):
 @tool(args_schema=SearchInput)
 def search_tool(query: str) -> str:
+    """
+    Searches the web using DuckDuckGo.
+    Use for: recent information, facts, general web searches.
+    Tips:
+    - Keep queries concise and specific
+    - Include year for time-sensitive queries (e.g., "GDP Brazil 2016")
+    - Try different phrasings if first search doesn't help
+    """
     if not isinstance(query, str) or not query.strip():
         return "Error: Invalid input. 'query' must be a non-empty string."
+    print(f"🔍 Searching: {query}")
     try:
         search = DuckDuckGoSearchRun()
         result = search.run(query)
         return f"Error running search for '{query}': {str(e)}"
+class CalcInput(BaseModel):
+    expression: str = Field(description="Mathematical expression to evaluate (e.g., '2 + 2', 'sqrt(16)', '45 * 1.2')")
+@tool(args_schema=CalcInput)
+def calculator(expression: str) -> str:
+    """
+    Evaluates mathematical expressions.
+    Use this for ANY calculations instead of code_interpreter.
+    Supports: +, -, *, /, **, sqrt, sin, cos, tan, log, exp, pi, e, abs, round
+    Examples:
+    - calculator("127 * 83")
+    - calculator("sqrt(144)")
+    - calculator("(45 + 23) / 2")
+    """
+    if not isinstance(expression, str) or not expression.strip():
+        return "Error: Invalid expression."
+    print(f"🧮 Calculating: {expression}")
+    try:
+        # Create safe namespace with math functions
+        import math
+        safe_dict = {
+            'sqrt': math.sqrt, 'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
+            'log': math.log, 'log10': math.log10, 'exp': math.exp,
+            'pi': math.pi, 'e': math.e, 'abs': abs, 'round': round,
+            'pow': pow, 'sum': sum, 'min': min, 'max': max
+        }
+        result = eval(expression, {"__builtins__": {}}, safe_dict)
+        return f"{result}"
+    except Exception as e:
+        return f"Error evaluating '{expression}': {str(e)}\nMake sure to use proper syntax (e.g., sqrt(16), not sqrt 16)"
 class CodeInput(BaseModel):
+    code: str = Field(description="Python code to execute. MUST include print() for output.")
 @tool(args_schema=CodeInput)
 def code_interpreter(code: str) -> str:
     """
+    Executes Python code for complex data processing.
+    WHEN TO USE:
+    - Data analysis (CSV, Excel files)
+    - Complex calculations with loops/conditionals
+    - String manipulation
+    - Date/time calculations
+    WHEN NOT TO USE:
+    - Simple math (use calculator instead)
+    - Web searches (use search_tool)
+    Available libraries: pandas as pd, numpy as np, json, re, datetime
+    CRITICAL: Always use print() to output results!
     """
     if not isinstance(code, str):
         return "Error: Invalid input. 'code' must be a string."
+    # Safety checks
+    dangerous_patterns = ['__import__', 'eval(', 'compile(', 'subprocess', 'os.system', 'exec(']
     code_lower = code.lower()
     for pattern in dangerous_patterns:
         if pattern in code_lower:
     if 'open(' in code_lower and any(mode in code for mode in ["'w'", '"w"', "'a'", '"a"', "'wb'", '"wb"']):
         return "Error: Writing files is not allowed in code_interpreter. Use write_file tool instead."
+    print(f"💻 Executing code...")
     output_stream = io.StringIO()
     error_stream = io.StringIO()
         with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
             safe_globals = {
                 "pd": pd,
+                "np": np,
+                "json": json,
+                "re": re,
                 "__builtins__": __builtins__
             }
             exec(code, safe_globals, {})
         if stdout:
             if len(stdout) > MAX_MESSAGE_LENGTH:
                 stdout = stdout[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(stdout)} total chars]"
+            return f"{stdout}"
+        return "Code executed but produced no output. Remember to use print() to display results!"
     except Exception as e:
         tb_str = traceback.format_exc()
 class ReadFileInput(BaseModel):
+    path: str = Field(description="Path to the file to read")
 @tool(args_schema=ReadFileInput)
 def read_file(path: str) -> str:
+    """Reads a file from the filesystem."""
     if not isinstance(path, str) or not path.strip():
         return "Error: Invalid input. 'path' must be a non-empty string."
+    print(f"📄 Reading file: {path}")
     file_path = find_file(path)
     if not file_path:
 class WriteFileInput(BaseModel):
+    path: str = Field(description="Path where file should be written")
+    content: str = Field(description="Content to write to the file")
 @tool(args_schema=WriteFileInput)
 def write_file(path: str, content: str) -> str:
+    """Writes content to a file."""
     if not isinstance(path, str) or not path.strip():
         return "Error: Invalid input. 'path' must be a non-empty string."
     if not isinstance(content, str):
         return "Error: Invalid input. 'content' must be a string."
+    print(f"✍️ Writing file: {path}")
     try:
         file_path = Path.cwd() / path
 class ListDirInput(BaseModel):
+    path: str = Field(description="Directory path to list", default=".")
 @tool(args_schema=ListDirInput)
 def list_directory(path: str = ".") -> str:
+    """Lists files and directories in a path."""
+    print(f"📁 Listing directory: {path}")
     try:
         dir_path = Path.cwd() / path if path != "." else Path.cwd()
 class AudioInput(BaseModel):
+    file_path: str = Field(description="Path to audio file to transcribe")
 @tool(args_schema=AudioInput)
 def audio_transcription_tool(file_path: str) -> str:
+    """Transcribes audio files to text using Whisper."""
     if not isinstance(file_path, str) or not file_path.strip():
         return "Error: Invalid input. 'file_path' must be a non-empty string."
+    print(f"🎤 Transcribing audio: {file_path}")
     if asr_pipeline is None:
         return "Error: ASR pipeline is not available."
     except Exception as e:
         return f"Error transcribing '{file_path}': {str(e)}"
 class YoutubeInput(BaseModel):
+    video_url: str = Field(description="YouTube video URL")
 @tool(args_schema=YoutubeInput)
 def get_youtube_transcript(video_url: str) -> str:
+    """Fetches transcript/captions from a YouTube video."""
     if not isinstance(video_url, str) or not video_url.strip():
         return "Error: Invalid input. 'video_url' must be a non-empty string."
+    print(f"📺 Getting YouTube transcript: {video_url}")
     try:
         video_id = None
 class ScrapeInput(BaseModel):
+    url: str = Field(description="URL to scrape (must start with http:// or https://)")
+    query: str = Field(description="Specific question or information to find on the page")
 @tool(args_schema=ScrapeInput)
 def scrape_and_retrieve(url: str, query: str) -> str:
     """
+    Scrapes a webpage and uses RAG to find relevant information.
+    Use when:
+    - You need specific information from a known webpage
+    - Search results give you a URL that contains the answer
+    - You need to extract data from a specific website
     """
     if not (url.lower().startswith(('http://', 'https://'))):
         return f"Error: Invalid URL. Must start with http:// or https://. Got: '{url}'"
     if not query or not query.strip():
         return "Error: A query is required to search the page content."
     if global_embeddings is None or global_text_splitter is None:
         if not initialize_rag_components():
             return "Error: RAG components could not be initialized."
+    print(f"🌐 Scraping & retrieving from: {url}")
     try:
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         response = requests.get(url, headers=headers, timeout=20)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
         for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe", "noscript"]):
             tag.extract()
         main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile('content|main|article', re.I)) or soup.body
         if not main_content:
             return "Error: Could not find main content on the page."
         text = main_content.get_text(separator='\n', strip=True)
         lines = [line.strip() for line in text.splitlines()]
         text = '\n'.join(line for line in lines if line)
         if not text or len(text) < 50:
             return f"Error: Scraped content was too short or empty (length: {len(text)})."
         chunks = global_text_splitter.split_text(text)
         if not chunks:
             return "Error: Text could not be split into chunks."
         docs = [Document(page_content=chunk, metadata={"source": url}) for chunk in chunks]
         db = FAISS.from_documents(docs, global_embeddings)
         retriever = db.as_retriever(search_kwargs={"k": 5})
         retrieved_docs = retriever.invoke(query)
         if not retrieved_docs:
             return f"No relevant information found on {url} for query: '{query}'\n\nThe page was successfully scraped but doesn't seem to contain information matching your query."
         context_parts = []
         for i, doc in enumerate(retrieved_docs, 1):
             context_parts.append(f"[Chunk {i}]\n{doc.page_content}")
         context = "\n\n---\n\n".join(context_parts)
+        result = f"Relevant information from {url}:\n\n{context}"
         return truncate_if_needed(result)
 class FinalAnswerInput(BaseModel):
+    answer: str = Field(description="The final answer - EXACTLY what was asked for, nothing more")
 @tool(args_schema=FinalAnswerInput)
 def final_answer_tool(answer: str) -> str:
     """
+    Submit your final answer.
+    CRITICAL RULES:
+    1. ALWAYS call validate_answer() before this
+    2. The answer must be EXACTLY what was asked for
+    3. NO conversational text (no "The answer is...", etc.)
+    4. NO explanations
+    5. Match the requested format exactly
+    Examples:
+    - If asked for a number: "42" (not "The answer is 42")
+    - If asked for a list: "red, blue, green" (not "The colors are: red, blue, green")
+    - If asked yes/no: "yes" (not "Yes, it is true")
     """
     if not isinstance(answer, str):
         try:
         except:
             return "Error: Invalid input. 'answer' must be a string."
+    print(f"✅ FINAL ANSWER SUBMITTED: {answer}")
     return answer
+# =============================================================================
+# DEFINED TOOLS LIST
+# =============================================================================
+defined_tools = [
+    # Planning & Reflection (use these first!)
+    create_plan,
+    reflect_on_progress,
+    validate_answer,
+    # Core tools
+    search_tool,
+    calculator,
+    code_interpreter,
+    # File operations
+    read_file,
+    write_file,
+    list_directory,
+    # Specialized tools
+    audio_transcription_tool,
+    get_youtube_transcript,
+    scrape_and_retrieve,
+    # Final answer
+    final_answer_tool
+]
+# =============================================================================
+# AGENT STATE
+# =============================================================================
+class AgentState(TypedDict):
+    messages: Annotated[List[AnyMessage], add_messages]
+    turn: int
+    has_plan: bool
+    consecutive_errors: int
+    tool_history: List[str]
 # =============================================================================
 # FALLBACK PARSER
 # =============================================================================
 def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
+    """Parses malformed tool call strings from an LLM response."""
+    print(f"Fallback parsing LLM content (first 500 chars):\n{content[:500]}")
     tool_name = None
     tool_input = None
     cleaned_str = None
     func_match = re.search(
         r"<function[(=]\s*([^)]+)\s*[)>](.*)",
         content,
                 cleaned_str = cleaned_str.strip().rstrip(',')
                 tool_input = json.loads(cleaned_str)
+                print(f"🔧 Fallback: Parsed tool call for '{tool_name}'")
             else:
+                print(f"⚠️ Fallback: Found <function> but no JSON blob.")
                 tool_name = None
         except json.JSONDecodeError as e:
+            print(f"⚠️ Fallback: json.loads failed, trying ast.literal_eval.")
             try:
                 if cleaned_str:
                     potential_input = ast.literal_eval(cleaned_str)
                     if isinstance(potential_input, dict):
                         tool_input = potential_input
+                        print(f"🔧 Fallback: Parsed with ast.literal_eval for '{tool_name}'")
                     else:
                         tool_name = None
                 else:
                     tool_name = None
             except:
                 tool_name = None
     if tool_name and tool_input is not None:
         if any(t.name == tool_name for t in tools):
             tool_call = ToolCall(
             return [tool_call]
         else:
             print(f"❌ Tool '{tool_name}' not found in available tools")
     print("❌ Failed to parse any valid tool call from content")
     return []
 # =============================================================================
 # CONDITIONAL EDGE FUNCTION
 # =============================================================================
 def should_continue(state: AgentState):
+    """Decide whether to continue, call tools, or end."""
     last_message = state['messages'][-1]
     current_turn = state.get('turn', 0)
+    # Check for final_answer_tool
     if isinstance(last_message, AIMessage) and last_message.tool_calls:
         for tool_call in last_message.tool_calls:
             if tool_call.get("name") == "final_answer_tool":
                 print("--- Condition: final_answer_tool called, ending. ---")
                 return END
+    # Check turn limit
     if current_turn >= MAX_TURNS:
         print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
         return END
+    # Route to tools if tool calls exist
     if isinstance(last_message, AIMessage) and last_message.tool_calls:
         print("--- Condition: Tools called, routing to tools node. ---")
         return "tools"
+    # Loop prevention
     if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
         print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
         return END
+    # Loop back to agent
     print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
     return "agent"
 # =============================================================================
+# ENHANCED AGENT CLASS WITH PLANNING & REFLECTION
 # =============================================================================
+class PlanningReflectionAgent:
     def __init__(self):
+        print("🧠 PlanningReflectionAgent initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
         # Initialize RAG Components
         if not initialize_rag_components():
+            print("⚠️ Warning: RAG components failed to initialize.")
         # Build tool descriptions
         tool_desc_list = []
             tool_desc_list.append(desc)
         tool_descriptions = "\n".join(tool_desc_list)
+        # Enhanced System Prompt with Planning & Reflection
+        self.system_prompt = f"""You are an elite AI agent designed for the GAIA benchmark - the most challenging question-answering tasks.
+🎯 YOUR MISSION: Provide the EXACT answer in the EXACT format requested.
+═══════════════════════════════════════════════════════════════
+📋 MANDATORY PROTOCOL - FOLLOW THIS RELIGIOUSLY:
+═══════════════════════════════════════════════════════════════
+**PHASE 1: PLANNING (For complex/multi-step questions)**
+├─ 1. Call create_plan() to think through your approach
+├─ 2. Identify what information you need
+└─ 3. Determine the sequence of steps
+**PHASE 2: EXECUTION (One step at a time)**
+├─ 1. Take ONE action per turn
+├─ 2. Use the RIGHT tool for each task:
+│     • Simple math → calculator()
+│     • Complex data → code_interpreter()
+│     • Web info → search_tool()
+│     • Specific page → scrape_and_retrieve()
+│     • Files → read_file()
+├─ 3. After EACH tool, evaluate the result
+└─ 4. Ask: "Do I have enough to answer now?"
+**PHASE 3: REFLECTION (If stuck)**
+├─ If no progress after 3-5 turns → call reflect_on_progress()
+├─ If tools keep failing → try different approach
+└─ If going in circles → step back and reconsider
+**PHASE 4: VALIDATION & SUBMISSION**
+├─ 1. When you have the answer → call validate_answer()
+├─ 2. If validation passes → call final_answer_tool()
+└─ 3. If validation fails → fix the issue first
+═══════════════════════════════════════════════════════════════
+🎓 EXAMPLES - LEARN FROM THESE:
+═══════════════════════════════════════════════════════════════
+**Example 1: Simple Math**
+Q: What is 127 × 83?
+Turn 1: calculator("127 * 83") → 10541
+Turn 2: validate_answer("10541", "What is 127 × 83?") → ✅ Pass
+Turn 3: final_answer_tool("10541")
+**Example 2: Multi-step Research**
+Q: What was the population of Einstein's birthplace in 1900?
+Turn 1: create_plan("What was the population of Einstein's birthplace in 1900?")
+Turn 2: search_tool("Albert Einstein birthplace") → Ulm, Germany
+Turn 3: search_tool("Ulm Germany population 1900") → approximately 50,000
+Turn 4: validate_answer("50000", "What was the population...") → ✅ Pass
+Turn 5: final_answer_tool("50000")
+**Example 3: File + Calculation**
+Q: What's the average of the 'score' column in data.csv?
+Turn 1: list_directory(".") → [files shown]
+Turn 2: read_file("data.csv") → [content]
+Turn 3: code_interpreter("import pandas as pd; df = pd.read_csv('data.csv'); print(df['score'].mean())")
+        → 78.5
+Turn 4: validate_answer("78.5", "What's the average...") → ✅ Pass
+Turn 5: final_answer_tool("78.5")
+**Example 4: Getting Unstuck**
+Q: What's the GDP of the 2016 Olympics host?
+Turn 1: search_tool("2016 Olympics") → [general info, no clear answer]
+Turn 2: search_tool("Olympics 2016 location") → [still unclear]
+Turn 3: reflect_on_progress("Tried searching but not getting clear host country")
+        → Try: "2016 Summer Olympics host country"
+Turn 4: search_tool("2016 Summer Olympics host country") → Brazil
+Turn 5: search_tool("Brazil GDP 2016") → $1.796 trillion
+Turn 6: validate_answer("1.796 trillion", original_q) → ✅ Pass
+Turn 7: final_answer_tool("1.796 trillion")
+═══════════════════════════════════════════════════════════════
+⚠️ CRITICAL RULES - NEVER VIOLATE THESE:
+═══════════════════════════════════════════════════════════════
+1. **NO GUESSING**: Always use tools. Never use your own knowledge.
+2. **ONE STEP AT A TIME**: Don't try to do multiple things in one turn.
+3. **EXACT FORMAT**: Answer must be EXACTLY what was asked for.
+4. **NO FLUFF**: Never add "The answer is" or explanations in final answer.
+5. **ALWAYS VALIDATE**: Call validate_answer() before final_answer_tool().
+6. **PLAN COMPLEX TASKS**: Multi-step questions need create_plan() first.
+7. **REFLECT WHEN STUCK**: If no progress after 5 turns, call reflect_on_progress().
+═══════════════════════════════════════════════════════════════
+📚 AVAILABLE TOOLS:
+═══════════════════════════════════════════════════════════════
 {tool_descriptions}
+═══════════════════════════════════════════════════════════════
+🎯 REMEMBER: Quality over speed. Think carefully, plan ahead, execute methodically.
+═══════════════════════════════════════════════════════════════
 """
         print("Initializing Groq LLM...")
         try:
             self.llm_with_tools = ChatGroq(
                 temperature=0,
                 groq_api_key=GROQ_API_KEY,
                 max_tokens=4096,
                 timeout=60
             ).bind_tools(self.tools, tool_choice="auto")
+            print("✅ LLM initialized.")
         except Exception as e:
             print(f"❌ Error initializing Groq: {e}")
             raise
+        # Agent Node with Enhanced Logic
         def agent_node(state: AgentState):
             current_turn = state.get('turn', 0) + 1
+            print(f"\n{'='*70}")
+            print(f"🤖 AGENT TURN {current_turn}/{MAX_TURNS}")
+            print('='*70)
             if current_turn > MAX_TURNS:
+                return {
+                    "messages": [SystemMessage(content="Max turns reached. Submitting best available answer.")],
+                    "turn": current_turn
+                }
+            # Check if we should auto-trigger reflection
+            should_reflect = False
+            consecutive_errors = state.get('consecutive_errors', 0)
+            if current_turn > 5 and current_turn % REFLECT_EVERY_N_TURNS == 0:
+                should_reflect = True
+                print("🤔 Auto-triggering reflection (periodic check)")
+            if consecutive_errors >= 3:
+                should_reflect = True
+                print("🤔 Auto-triggering reflection (multiple errors)")
+            # Add reflection hint if needed
+            messages_to_send = state["messages"].copy()
+            if should_reflect and not state.get('has_plan', False):
+                hint = SystemMessage(
+                    content="⚠️ SYSTEM HINT: You've been working for several turns. Consider calling reflect_on_progress() to evaluate your approach."
+                )
+                messages_to_send.append(hint)
+            # Invoke LLM
             max_retries = 3
             ai_message = None
             for attempt in range(max_retries):
                 try:
+                    ai_message = self.llm_with_tools.invoke(messages_to_send)
                     break
                 except Exception as e:
                     print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {e}")
                         )
                     time.sleep(2 ** attempt)
+            # Fallback Parsing
             if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
                 parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
                 if parsed_tool_calls:
+                    print("🔧 Fallback: Successfully rebuilt tool call")
                     ai_message.tool_calls = parsed_tool_calls
                     ai_message.content = ""
+            # Track tool usage
+            tool_history = state.get('tool_history', [])
+            has_plan = state.get('has_plan', False)
             if ai_message.tool_calls:
+                tool_name = ai_message.tool_calls[0]['name']
+                print(f"🔧 Tool Call: {tool_name}")
+                tool_history.append(tool_name)
+                if tool_name == "create_plan":
+                    has_plan = True
             else:
+                print(f"💭 Reasoning: {ai_message.content[:200]}...")
+            return {
+                "messages": [ai_message],
+                "turn": current_turn,
+                "has_plan": has_plan,
+                "tool_history": tool_history
+            }
+        # Tool Node with Error Tracking
+        def tool_node_wrapper(state: AgentState):
+            """Wraps tool execution to track errors"""
+            tool_node = ToolNode(self.tools)
+            result = tool_node(state)
+            # Check if last message is a tool error
+            if result['messages']:
+                last_msg = result['messages'][-1]
+                if isinstance(last_msg, ToolMessage) and "Error" in last_msg.content:
+                    consecutive_errors = state.get('consecutive_errors', 0) + 1
+                    result['consecutive_errors'] = consecutive_errors
+                else:
+                    result['consecutive_errors'] = 0
+            return result
         # Build Graph
+        print("Building Planning & Reflection Agent graph...")
         graph_builder = StateGraph(AgentState)
         graph_builder.add_node("agent", agent_node)
+        graph_builder.add_node("tools", tool_node_wrapper)
         graph_builder.add_edge(START, "agent")
         graph_builder.add_edge("tools", "agent")
         self.graph = graph_builder.compile()
+        print("✅ Planning & Reflection Agent graph compiled successfully.")
     def __call__(self, question: str) -> str:
+        print(f"\n{'='*70}")
+        print(f"🎯 NEW QUESTION")
+        print(f"{'='*70}")
+        print(f"Q: {question[:200]}{'...' if len(question) > 200 else ''}")
+        print(f"{'='*70}\n")
         graph_input = {
             "messages": [
                 SystemMessage(content=self.system_prompt),
                 HumanMessage(content=question)
             ],
+            "turn": 0,
+            "has_plan": False,
+            "consecutive_errors": 0,
+            "tool_history": []
         }
         final_answer = "AGENT FAILED TO PRODUCE ANSWER"
         try:
+            config = {"recursion_limit": MAX_TURNS + 10}
             for event in self.graph.stream(graph_input, stream_mode="values", config=config):
+                if not event.get('messages'):
+                    continue
+                last_message = event["messages"][-1]
                 # Check for final answer extraction
                 if isinstance(last_message, AIMessage) and last_message.tool_calls:
                     if last_message.tool_calls[0].get("name") == "final_answer_tool":
                         final_answer_args = last_message.tool_calls[0].get('args', {})
                         if 'answer' in final_answer_args:
+                            final_answer = final_answer_args['answer']
+                            print(f"\n{'='*70}")
+                            print(f"✅ FINAL ANSWER CAPTURED: '{final_answer}'")
+                            print(f"{'='*70}\n")
+                            break
                         else:
+                            print(f"⚠️ final_answer_tool called without 'answer' argument")
+                            final_answer = "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER"
+                            break
                 elif isinstance(last_message, ToolMessage):
+                    result_preview = last_message.content[:300].replace('\n', ' ')
+                    print(f"📊 Tool Result: {result_preview}...")
                 elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
+                    print(f"💭 AI Reasoning: {last_message.content[:300]}...")
+            # Final Answer Cleaning
             cleaned_answer = str(final_answer).strip()
+            # Remove common prefixes
+            prefixes_to_remove = [
+                "The answer is:", "Here is the answer:", "Based on the information:",
+                "Final Answer:", "Answer:", "The final answer is:", "My answer is:",
+                "According to", "I found that", "The result is:"
+            ]
             for prefix in prefixes_to_remove:
                 if cleaned_answer.lower().startswith(prefix.lower()):
                     potential_answer = cleaned_answer[len(prefix):].strip()
+                    if potential_answer:
                         cleaned_answer = potential_answer
+                        break
+            # Remove code fences
             cleaned_answer = remove_fences_simple(cleaned_answer)
+            # Remove surrounding backticks
+            while cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
+                cleaned_answer = cleaned_answer[1:-1].strip()
+            # Remove quotes if they wrap the entire answer
+            if (cleaned_answer.startswith('"') and cleaned_answer.endswith('"')) or \
+               (cleaned_answer.startswith("'") and cleaned_answer.endswith("'")):
+                cleaned_answer = cleaned_answer[1:-1].strip()
+            # Remove trailing periods for non-sentence answers
+            if cleaned_answer.endswith('.') and len(cleaned_answer.split()) < 10:
+                cleaned_answer = cleaned_answer[:-1]
+            print(f"\n{'='*70}")
+            print(f"🎉 FINAL CLEANED ANSWER")
+            print(f"{'='*70}")
+            print(f"{cleaned_answer}")
+            print(f"{'='*70}\n")
             return cleaned_answer
         except Exception as e:
+            print(f"❌ Error running agent graph: {e}")
             tb_str = traceback.format_exc()
             print(tb_str)
             return f"AGENT GRAPH ERROR: {e}"
+# =============================================================================
+# GLOBAL AGENT INSTANTIATION
+# =============================================================================
 try:
+    initialize_rag_components()
+    agent = PlanningReflectionAgent()
+    print("✅ Global PlanningReflectionAgent instantiated successfully.")
+    if asr_pipeline is None:
+        print("⚠️ Global ASR Pipeline failed to load.")
 except Exception as e:
     print(f"❌ FATAL: Could not instantiate global agent: {e}")
     traceback.print_exc()
     agent = None
 # ====================================================
 # --- (Original Template Code - Mock Questions Version) ---