Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Nov 4, 2025

Commit

1d49677

verified ·

1 Parent(s): 2541381

Update app.py

Browse files

Files changed (1) hide show

app.py +520 -601

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ from langchain_core.documents import Document
 # CONFIGURATION
 # =============================================================================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MAX_TURNS = 25  # Increased for planning/reflection
 MAX_MESSAGE_LENGTH = 8000
 REFLECT_EVERY_N_TURNS = 5
@@ -143,7 +143,7 @@ def find_file(path: str) -> Optional[Path]:
 # =============================================================================
 class ThinkInput(BaseModel):
-    reasoning: str = Field(description="Your step-by-step reasoning for a logic puzzle (keep under 200 chars)")
 @tool(args_schema=ThinkInput)
 def think_through_logic(reasoning: str) -> str:
@@ -155,150 +155,101 @@ def think_through_logic(reasoning: str) -> str:
     - You need to reason through a logical problem
     - No external information is needed, just thinking
-    After thinking through the logic, use calculator if math is involved,
-    then validate_answer and final_answer_tool.
-    NOTE: Keep reasoning summary brief (under 200 chars).
     """
-    print(f"🧠 Thinking through logic: {reasoning[:100]}...")
-    return f"""✅ Logic reasoning recorded: {reasoning}
-Now:
-1. If there's any math to calculate, use calculator()
-2. Once you have the answer, call validate_answer()
-3. Then call final_answer_tool() with just the answer"""
 class PlanInput(BaseModel):
-    question: str = Field(description="Brief summary of the task (keep under 100 chars)")
 @tool(args_schema=PlanInput)
-def create_plan(question: str) -> str:
     """
-    Creates a step-by-step plan for answering a question.
-    CRITICAL: Call this FIRST for any multi-step or complex question.
-    This helps you think through:
-    1. What information do you need?
-    2. In what order should you gather it?
-    3. What tools will you use?
-    After calling this, execute the plan step-by-step.
-    NOTE: Keep the question summary brief (under 100 chars) to avoid errors.
     """
-    print(f"📋 Planning phase initiated for: {question[:100]}...")
-    return f"""✅ Plan Created. Now execute these steps methodically:
-PLANNING FRAMEWORK:
-1. GOAL: What exact answer format is needed?
-2. REQUIREMENTS: What data/information is required?
-3. STRATEGY: What's the most efficient path?
-4. EXECUTION: List concrete actions in order
-Now proceed with Step 1 of your plan."""
 class ReflectInput(BaseModel):
-    current_situation: str = Field(description="What you've tried so far (keep brief, under 100 chars)")
 @tool(args_schema=ReflectInput)
-def reflect_on_progress(current_situation: str) -> str:
     """
-    Reflects on your progress and suggests what to do next.
-    Call this when:
-    - You feel stuck or uncertain
-    - Tools keep failing
-    - You're not making progress
-    - You've taken 5+ steps without getting closer to the answer
-    This helps you step back and reconsider your approach.
-    NOTE: Keep the situation summary brief (under 100 chars).
     """
-    print(f"🤔 Reflection initiated: {current_situation[:100]}...")
-    return f"""🔍 REFLECTION ANALYSIS:
-Current situation: {current_situation}
-CRITICAL QUESTIONS TO ASK YOURSELF:
-1. Have I gathered the information I actually need?
-2. Am I using the right tools for this task?
-3. Am I going in circles (repeating similar actions)?
-4. Should I try a completely different approach?
-5. Do I have enough information to answer now?
-NEXT STEPS:
-- If stuck: Try a different tool or search query
-- If missing info: Identify exactly what's missing
-- If have info: Proceed to final_answer_tool
-- If uncertain: Break problem into smaller pieces
-Take a different approach now."""
 class ValidateInput(BaseModel):
-    proposed_answer: str = Field(description="The answer you plan to submit")
-    original_question: str = Field(description="The original question")
 @tool(args_schema=ValidateInput)
 def validate_answer(proposed_answer: str, original_question: str) -> str:
     """
-    Validates your proposed answer before submission.
-    CRITICAL: ALWAYS call this before final_answer_tool.
-    Checks:
-    - Does the answer match what was asked?
-    - Is it in the correct format?
-    - Are there any obvious issues?
-    If validation passes, then call final_answer_tool.
-    If validation fails, gather more information or correct the format.
     """
-    print(f"✓ Validating answer: '{proposed_answer[:50]}...'")
     issues = []
     warnings = []
     # Check for conversational fluff
-    fluff_phrases = ["the answer is", "based on", "according to", "i found that", "here is", "final answer"]
-    if any(phrase in proposed_answer.lower() for phrase in fluff_phrases):
-        issues.append("❌ Remove conversational text. Provide ONLY the answer.")
-    # Check for number format if question asks for numbers
-    number_keywords = ["how many", "what number", "count", "total", "sum"]
-    if any(kw in original_question.lower() for kw in number_keywords):
-        if not any(char.isdigit() for char in proposed_answer):
-            warnings.append("⚠️ Question seems to ask for a number, but answer contains no digits.")
-    # Check for list format
-    if "list" in original_question.lower() and "," not in proposed_answer:
-        warnings.append("⚠️ Question asks for a list, consider comma-separated format.")
-    # Check for yes/no questions
-    if original_question.lower().strip().startswith(("is ", "are ", "was ", "were ", "do ", "does ", "did ", "can ", "will ")):
-        if proposed_answer.lower() not in ["yes", "no", "true", "false"]:
-            warnings.append("⚠️ This looks like a yes/no question. Consider simple yes/no answer.")
-    # Check for code fences or markdown
     if "```" in proposed_answer:
-        issues.append("❌ Remove code fences (```) from the answer.")
     # Check length
     if len(proposed_answer) > 500:
-        warnings.append("⚠️ Answer is quite long. Are you sure this is just the answer and not an explanation?")
     if issues:
-        return "🚫 VALIDATION FAILED:\n" + "\n".join(issues) + "\n\nFix these issues before calling final_answer_tool."
     if warnings:
-        return "⚠️ VALIDATION WARNINGS:\n" + "\n".join(warnings) + "\n\nConsider these points, but you may proceed if confident."
-    return "✅ VALIDATION PASSED: Answer looks good! Proceed with final_answer_tool now."
 # =============================================================================
@@ -306,48 +257,31 @@ def validate_answer(proposed_answer: str, original_question: str) -> str:
 # =============================================================================
 class SearchInput(BaseModel):
-    query: str = Field(description="The search query.")
 @tool(args_schema=SearchInput)
 def search_tool(query: str) -> str:
-    """
-    Searches the web using DuckDuckGo.
-    Use for: recent information, facts, general web searches.
-    Tips:
-    - Keep queries concise and specific
-    - Include year for time-sensitive queries (e.g., "GDP Brazil 2016")
-    - Try different phrasings if first search doesn't help
-    """
     if not isinstance(query, str) or not query.strip():
-        return "Error: Invalid input. 'query' must be a non-empty string."
     print(f"🔍 Searching: {query}")
     try:
         search = DuckDuckGoSearchRun()
         result = search.run(query)
-        if len(result) > MAX_MESSAGE_LENGTH:
-            result = result[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(result)} total chars]"
-        return result
     except Exception as e:
-        return f"Error running search for '{query}': {str(e)}"
 class CalcInput(BaseModel):
-    expression: str = Field(description="Mathematical expression to evaluate (e.g., '2 + 2', 'sqrt(16)', '45 * 1.2')")
 @tool(args_schema=CalcInput)
 def calculator(expression: str) -> str:
     """
-    Evaluates mathematical expressions.
-    Use this for ANY calculations instead of code_interpreter.
-    Supports: +, -, *, /, **, sqrt, sin, cos, tan, log, exp, pi, e, abs, round
-    Examples:
-    - calculator("127 * 83")
-    - calculator("sqrt(144)")
-    - calculator("(45 + 23) / 2")
     """
     if not isinstance(expression, str) or not expression.strip():
         return "Error: Invalid expression."
@@ -355,7 +289,6 @@ def calculator(expression: str) -> str:
     print(f"🧮 Calculating: {expression}")
     try:
-        # Create safe namespace with math functions
         import math
         safe_dict = {
             'sqrt': math.sqrt, 'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
@@ -365,47 +298,33 @@ def calculator(expression: str) -> str:
         }
         result = eval(expression, {"__builtins__": {}}, safe_dict)
-        return f"{result}"
     except Exception as e:
-        return f"Error evaluating '{expression}': {str(e)}\nMake sure to use proper syntax (e.g., sqrt(16), not sqrt 16)"
 class CodeInput(BaseModel):
-    code: str = Field(description="Python code to execute. MUST include print() for output.")
 @tool(args_schema=CodeInput)
 def code_interpreter(code: str) -> str:
     """
-    Executes Python code for complex data processing.
-    WHEN TO USE:
-    - Data analysis (CSV, Excel files)
-    - Complex calculations with loops/conditionals
-    - String manipulation
-    - Date/time calculations
-    WHEN NOT TO USE:
-    - Simple math (use calculator instead)
-    - Web searches (use search_tool)
-    Available libraries: pandas as pd, numpy as np, json, re, datetime
     CRITICAL: Always use print() to output results!
     """
     if not isinstance(code, str):
-        return "Error: Invalid input. 'code' must be a string."
     # Safety checks
-    dangerous_patterns = ['__import__', 'eval(', 'compile(', 'subprocess', 'os.system', 'exec(']
-    code_lower = code.lower()
-    for pattern in dangerous_patterns:
-        if pattern in code_lower:
-            return f"Error: Potentially dangerous operation '{pattern}' is not allowed."
-    if 'open(' in code_lower and any(mode in code for mode in ["'w'", '"w"', "'a'", '"a"', "'wb'", '"wb"']):
-        return "Error: Writing files is not allowed in code_interpreter. Use write_file tool instead."
-    print(f"💻 Executing code...")
     output_stream = io.StringIO()
     error_stream = io.StringIO()
@@ -424,126 +343,113 @@ def code_interpreter(code: str) -> str:
         stderr = error_stream.getvalue()
         if stderr:
-            return f"Error in execution:\n{stderr}\n\nStdout (if any):\n{stdout}"
         if stdout:
-            if len(stdout) > MAX_MESSAGE_LENGTH:
-                stdout = stdout[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(stdout)} total chars]"
-            return f"{stdout}"
-        return "Code executed but produced no output. Remember to use print() to display results!"
     except Exception as e:
-        tb_str = traceback.format_exc()
-        return f"Execution failed:\n{tb_str}"
 class ReadFileInput(BaseModel):
-    path: str = Field(description="Path to the file to read")
 @tool(args_schema=ReadFileInput)
 def read_file(path: str) -> str:
-    """Reads a file from the filesystem."""
     if not isinstance(path, str) or not path.strip():
-        return "Error: Invalid input. 'path' must be a non-empty string."
-    print(f"📄 Reading file: {path}")
     file_path = find_file(path)
     if not file_path:
-        cwd_files = os.listdir(".")
-        return (f"Error: File not found: '{path}'\n"
-                f"Files in current directory: {cwd_files}")
     try:
         content = file_path.read_text(encoding='utf-8')
         return truncate_if_needed(content)
     except UnicodeDecodeError:
-        size = file_path.stat().st_size
-        ext = file_path.suffix
-        return (f"File appears to be binary ({size} bytes). Cannot display as text.\n"
-                f"File type: {ext}\n"
-                f"Consider using audio_transcription_tool for audio files.")
     except Exception as e:
-        return f"Error reading file: {str(e)}"
 class WriteFileInput(BaseModel):
-    path: str = Field(description="Path where file should be written")
-    content: str = Field(description="Content to write to the file")
 @tool(args_schema=WriteFileInput)
 def write_file(path: str, content: str) -> str:
-    """Writes content to a file."""
-    if not isinstance(path, str) or not path.strip():
-        return "Error: Invalid input. 'path' must be a non-empty string."
-    if not isinstance(content, str):
-        return "Error: Invalid input. 'content' must be a string."
-    print(f"✍️ Writing file: {path}")
     try:
         file_path = Path.cwd() / path
         file_path.parent.mkdir(parents=True, exist_ok=True)
         file_path.write_text(content, encoding='utf-8')
-        return f"Successfully wrote {len(content)} characters to '{path}'."
     except Exception as e:
-        return f"Error writing file '{path}': {str(e)}"
 class ListDirInput(BaseModel):
-    path: str = Field(description="Directory path to list", default=".")
 @tool(args_schema=ListDirInput)
 def list_directory(path: str = ".") -> str:
-    """Lists files and directories in a path."""
-    print(f"📁 Listing directory: {path}")
     try:
         dir_path = Path.cwd() / path if path != "." else Path.cwd()
         if not dir_path.is_dir():
-            return f"Error: '{path}' is not a valid directory."
         items = sorted(dir_path.iterdir())
         if not items:
             return f"Directory '{path}' is empty."
-        files, directories = [], []
         for item in items:
             if item.is_dir():
-                directories.append(f"📁 {item.name}/")
             else:
-                size = item.stat().st_size
-                files.append(f"📄 {item.name} ({size} bytes)")
         result = f"Contents of '{path}':\n\n"
-        if directories:
-            result += "Directories:\n" + "\n".join(directories) + "\n\n"
         if files:
             result += "Files:\n" + "\n".join(files)
         return result
     except Exception as e:
-        return f"Error listing directory '{path}': {str(e)}"
 class AudioInput(BaseModel):
-    file_path: str = Field(description="Path to audio file to transcribe")
 @tool(args_schema=AudioInput)
 def audio_transcription_tool(file_path: str) -> str:
-    """Transcribes audio files to text using Whisper."""
-    if not isinstance(file_path, str) or not file_path.strip():
-        return "Error: Invalid input. 'file_path' must be a non-empty string."
-    print(f"🎤 Transcribing audio: {file_path}")
     if asr_pipeline is None:
-        return "Error: ASR pipeline is not available."
     audio_path = find_file(file_path)
     if not audio_path:
@@ -554,23 +460,23 @@ def audio_transcription_tool(file_path: str) -> str:
         result_text = transcription.get("text", "")
         if not result_text:
-            return "Error: Transcription produced no text."
         return f"Transcription:\n{truncate_if_needed(result_text)}"
     except Exception as e:
-        return f"Error transcribing '{file_path}': {str(e)}"
 class YoutubeInput(BaseModel):
-    video_url: str = Field(description="YouTube video URL")
 @tool(args_schema=YoutubeInput)
 def get_youtube_transcript(video_url: str) -> str:
-    """Fetches transcript/captions from a YouTube video."""
-    if not isinstance(video_url, str) or not video_url.strip():
-        return "Error: Invalid input. 'video_url' must be a non-empty string."
-    print(f"📺 Getting YouTube transcript: {video_url}")
     try:
         video_id = None
@@ -580,125 +486,101 @@ def get_youtube_transcript(video_url: str) -> str:
             video_id = video_url.split("youtu.be/")[1].split("?")[0]
         if not video_id:
-            return f"Error: Could not extract YouTube video ID from '{video_url}'."
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
         if not transcript_list:
-            return "Error: No transcript found for this video."
         full_transcript = " ".join([item["text"] for item in transcript_list])
-        return f"YouTube Transcript:\n{truncate_if_needed(full_transcript)}"
     except Exception as e:
-        return f"Error getting transcript for '{video_url}': {str(e)}"
 class ScrapeInput(BaseModel):
-    url: str = Field(description="URL to scrape (must start with http:// or https://)")
-    query: str = Field(description="Specific question or information to find on the page")
 @tool(args_schema=ScrapeInput)
 def scrape_and_retrieve(url: str, query: str) -> str:
     """
-    Scrapes a webpage and uses RAG to find relevant information.
-    Use when:
-    - You need specific information from a known webpage
-    - Search results give you a URL that contains the answer
-    - You need to extract data from a specific website
     """
-    if not (url.lower().startswith(('http://', 'https://'))):
-        return f"Error: Invalid URL. Must start with http:// or https://. Got: '{url}'"
-    if not query or not query.strip():
-        return "Error: A query is required to search the page content."
     if global_embeddings is None or global_text_splitter is None:
         if not initialize_rag_components():
-            return "Error: RAG components could not be initialized."
-    print(f"🌐 Scraping & retrieving from: {url}")
     try:
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
         response = requests.get(url, headers=headers, timeout=20)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
-        for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe", "noscript"]):
             tag.extract()
-        main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile('content|main|article', re.I)) or soup.body
-        if not main_content:
-            return "Error: Could not find main content on the page."
-        text = main_content.get_text(separator='\n', strip=True)
-        lines = [line.strip() for line in text.splitlines()]
-        text = '\n'.join(line for line in lines if line)
-        if not text or len(text) < 50:
-            return f"Error: Scraped content was too short or empty (length: {len(text)})."
         chunks = global_text_splitter.split_text(text)
         if not chunks:
-            return "Error: Text could not be split into chunks."
-        docs = [Document(page_content=chunk, metadata={"source": url}) for chunk in chunks]
         db = FAISS.from_documents(docs, global_embeddings)
         retriever = db.as_retriever(search_kwargs={"k": 5})
-        retrieved_docs = retriever.invoke(query)
-        if not retrieved_docs:
-            return f"No relevant information found on {url} for query: '{query}'\n\nThe page was successfully scraped but doesn't seem to contain information matching your query."
-        context_parts = []
-        for i, doc in enumerate(retrieved_docs, 1):
-            context_parts.append(f"[Chunk {i}]\n{doc.page_content}")
-        context = "\n\n---\n\n".join(context_parts)
-        result = f"Relevant information from {url}:\n\n{context}"
-        return truncate_if_needed(result)
     except requests.RequestException as e:
-        return f"Error fetching URL {url}: {str(e)}\n\nThe website may be blocking requests or may be temporarily unavailable."
     except Exception as e:
-        tb_str = traceback.format_exc()
-        return f"Error processing {url}: {str(e)}\n\nDetails:\n{tb_str}"
 class FinalAnswerInput(BaseModel):
-    answer: str = Field(description="The final answer - EXACTLY what was asked for, nothing more")
 @tool(args_schema=FinalAnswerInput)
 def final_answer_tool(answer: str) -> str:
     """
-    Submit your final answer.
-    CRITICAL RULES:
-    1. ALWAYS call validate_answer() before this
-    2. The answer must be EXACTLY what was asked for
-    3. NO conversational text (no "The answer is...", etc.)
     4. NO explanations
-    5. Match the requested format exactly
-    Examples:
-    - If asked for a number: "42" (not "The answer is 42")
-    - If asked for a list: "red, blue, green" (not "The colors are: red, blue, green")
-    - If asked yes/no: "yes" (not "Yes, it is true")
     """
     if not isinstance(answer, str):
-        try:
-            answer = str(answer)
-        except:
-            return "Error: Invalid input. 'answer' must be a string."
     print(f"✅ FINAL ANSWER SUBMITTED: {answer}")
     return answer
@@ -708,8 +590,8 @@ def final_answer_tool(answer: str) -> str:
 # DEFINED TOOLS LIST
 # =============================================================================
 defined_tools = [
-    # Planning & Reflection (use these strategically!)
-    think_through_logic,  # NEW: For logic puzzles
     create_plan,
     reflect_on_progress,
     validate_answer,
@@ -724,17 +606,16 @@ defined_tools = [
     write_file,
     list_directory,
-    # Specialized tools
     audio_transcription_tool,
     get_youtube_transcript,
     scrape_and_retrieve,
-    # Final answer
     final_answer_tool
 ]
 # =============================================================================
 # AGENT STATE
 # =============================================================================
@@ -744,152 +625,155 @@ class AgentState(TypedDict):
     has_plan: bool
     consecutive_errors: int
     tool_history: List[str]
 # =============================================================================
-# FALLBACK PARSER
 # =============================================================================
 def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
-    """Parses malformed tool call strings from an LLM response."""
-    print(f"Fallback parsing LLM content (first 500 chars):\n{content[:500]}")
     tool_name = None
     tool_input = None
-    cleaned_str = None
-    # STRATEGY 1: Parse Groq's <function=name{...}> format
-    groq_match = re.search(
-        r"<function=(\w+)\s*(\{.*?\})\s*(?:>|</function>)",
-        content,
-        re.DOTALL
-    )
     if groq_match:
         try:
             tool_name = groq_match.group(1).strip()
             json_str = groq_match.group(2).strip()
-            # Unescape unicode and clean up
             json_str = json_str.encode().decode('unicode_escape')
             tool_input = json.loads(json_str)
-            print(f"🔧 Fallback: Parsed Groq format for '{tool_name}'")
-        except Exception as e:
-            print(f"⚠️ Fallback: Failed to parse Groq format: {e}")
             tool_name = None
-    # STRATEGY 2: Try original <function(tool_name)>...{json_string}... format
     if not tool_name:
-        func_match = re.search(
-            r"<function[(=]\s*([^)]+)\s*[)>](.*)",
-            content,
-            re.DOTALL | re.IGNORECASE
-        )
         if func_match:
             try:
                 tool_name = func_match.group(1).strip().replace("'", "").replace('"', '')
-                remaining_content = func_match.group(2)
-                json_start_index = remaining_content.find('{')
-                if json_start_index != -1:
-                    json_str = remaining_content[json_start_index:]
-                    cleaned_str = json_str.strip()
-                    cleaned_str = ''.join(c for c in cleaned_str if c.isprintable() or c in '\n\r\t')
-                    cleaned_str = cleaned_str.strip().rstrip(',')
-                    tool_input = json.loads(cleaned_str)
-                    print(f"🔧 Fallback: Parsed standard format for '{tool_name}'")
-                else:
-                    print(f"⚠️ Fallback: Found <function> but no JSON blob.")
-                    tool_name = None
-            except json.JSONDecodeError as e:
-                print(f"⚠️ Fallback: json.loads failed, trying ast.literal_eval.")
-                try:
-                    if cleaned_str:
-                        potential_input = ast.literal_eval(cleaned_str)
-                        if isinstance(potential_input, dict):
-                            tool_input = potential_input
-                            print(f"🔧 Fallback: Parsed with ast.literal_eval for '{tool_name}'")
-                        else:
-                            tool_name = None
-                    else:
-                        tool_name = None
-                except:
-                    tool_name = None
-    # STRATEGY 3: Look for simple tool mentions and create default calls
-    if not tool_name and content:
-        # Look for tool name mentions
         for tool in tools:
-            if tool.name in content.lower():
                 tool_name = tool.name
-                # Create minimal valid input
                 tool_input = {}
                 if tool.args_schema:
                     schema = tool.args_schema.model_json_schema()
-                    for prop, details in schema.get('properties', {}).items():
                         if prop in schema.get('required', []):
-                            # Extract value from content if possible
-                            tool_input[prop] = "summarized_input"
-                print(f"🔧 Fallback: Created default call for mentioned tool '{tool_name}'")
                 break
-    # FINAL VALIDATION
     if tool_name and tool_input is not None:
-        if any(t.name == tool_name for t in tools):
-            tool_call = ToolCall(
-                name=tool_name,
-                args=tool_input,
-                id=str(uuid.uuid4())
-            )
-            print(f"✅ Successfully created tool call: {tool_name}")
-            return [tool_call]
         else:
-            print(f"❌ Tool '{tool_name}' not found in available tools")
-    print("❌ Failed to parse any valid tool call from content")
     return []
 # =============================================================================
-# CONDITIONAL EDGE FUNCTION
 # =============================================================================
 def should_continue(state: AgentState):
-    """Decide whether to continue, call tools, or end."""
-    last_message = state['messages'][-1]
     current_turn = state.get('turn', 0)
-    # Check for final_answer_tool
-    if isinstance(last_message, AIMessage) and last_message.tool_calls:
-        for tool_call in last_message.tool_calls:
-            if tool_call.get("name") == "final_answer_tool":
-                print("--- Condition: final_answer_tool called, ending. ---")
-                return END
-    # Check turn limit
     if current_turn >= MAX_TURNS:
-        print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
         return END
-    # Route to tools if tool calls exist
     if isinstance(last_message, AIMessage) and last_message.tool_calls:
-        print("--- Condition: Tools called, routing to tools node. ---")
         return "tools"
-    # Loop prevention
-    if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
-        print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
-        return END
-    # Loop back to agent
-    print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
     return "agent"
 # =============================================================================
-# ENHANCED AGENT CLASS WITH PLANNING & REFLECTION
 # =============================================================================
 class PlanningReflectionAgent:
     def __init__(self):
@@ -897,148 +781,100 @@ class PlanningReflectionAgent:
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
-            raise ValueError("GROQ_API_KEY environment variable is not set!")
         self.tools = defined_tools
-        # Initialize RAG Components
         if not initialize_rag_components():
-            print("⚠️ Warning: RAG components failed to initialize.")
         # Build tool descriptions
         tool_desc_list = []
         for tool in self.tools:
             if tool.args_schema:
                 schema = tool.args_schema.model_json_schema()
-                args_desc = []
-                for prop, details in schema.get('properties', {}).items():
-                    desc = details.get('description', '')
-                    args_desc.append(f"  - {prop}: {desc}")
-                args_str = "\n".join(args_desc)
-                desc = f"- {tool.name}:\n  {tool.description}\n  Args:\n{args_str}"
             else:
                 desc = f"- {tool.name}: {tool.description}"
             tool_desc_list.append(desc)
         tool_descriptions = "\n".join(tool_desc_list)
-        # Enhanced System Prompt with Planning & Reflection
-        self.system_prompt = f"""You are an elite AI agent designed for the GAIA benchmark - the most challenging question-answering tasks.
-🎯 YOUR MISSION: Provide the EXACT answer in the EXACT format requested.
 ═══════════════════════════════════════════════════════════════
-📋 QUESTION TYPES & STRATEGIES:
 ═══════════════════════════════════════════════════════════════
-**TYPE 1: LOGIC PUZZLES / RIDDLES** (No tools needed)
-- Riddles, brain teasers, logical reasoning problems
-- Strategy: Think through the logic, use calculator for any math
-- Example: "If all but 30 of 200 coins are face-up, make equal face-down piles"
-  → This is pure logic. Think it through, then use final_answer_tool
-**TYPE 2: FACTUAL QUESTIONS** (Need web search)
-- Who, what, when, where questions about real world
-- Strategy: search_tool → scrape_and_retrieve if needed
-- Example: "What was Einstein's birthplace population in 1900?"
-**TYPE 3: DATA ANALYSIS** (Need files + code)
-- Questions about CSV, Excel, or other data files
-- Strategy: list_directory → read_file → code_interpreter
-- Example: "What's the average of column X in data.csv?"
-**TYPE 4: CALCULATIONS** (Need calculator/code)
-- Math problems, computations
-- Strategy: calculator for simple math, code_interpreter for complex
-- Example: "What is 127 × 83 + sqrt(144)?"
 ═══════════════════════════════════════════════════════════════
-📋 MANDATORY PROTOCOL:
 ════════��══════════════════════════════════════════════════════
-**PHASE 1: IDENTIFY QUESTION TYPE**
-├─ Is this a logic puzzle? → Think through it, use calculator if needed
-├─ Need real-world facts? → Use search/scrape tools
-├─ Need to analyze files? → Use file/code tools
-└─ Just math? → Use calculator
-**PHASE 2: FOR TOOL-BASED QUESTIONS**
-├─ 1. Call create_plan() for multi-step questions
-├─ 2. Execute ONE step at a time
-├─ 3. After EACH tool, evaluate the result
-└─ 4. Ask: "Do I have enough to answer now?"
-**PHASE 3: FOR LOGIC PUZZLES**
-├─ 1. Think through the logic step-by-step
-├─ 2. Use calculator ONLY if there's arithmetic
-├─ 3. Once you've solved it, call validate_answer()
-└─ 4. Then call final_answer_tool()
-**PHASE 4: REFLECTION (If stuck)**
-├─ If no progress after 3-5 turns → call reflect_on_progress()
-├─ If tools keep failing → try different approach
-└─ If going in circles → step back and reconsider
-**PHASE 5: VALIDATION & SUBMISSION**
-├─ 1. When you have the answer → call validate_answer()
-├─ 2. If validation passes → call final_answer_tool()
-└─ 3. If validation fails → fix the issue first
-═══════════════════════════════════════════════════════════════
-🎓 EXAMPLES - LEARN FROM THESE:
-═══════════════════════════════════════════════════════════════
-**Example 1: Logic Puzzle (NO TOOLS EXCEPT CALCULATOR/FINAL)**
-Q: If you have 200 coins with 30 face-down, and divide into 2 piles with equal face-down...
-Turn 1: Think through: If I take 30 coins and flip them all, one pile has X face-down...
-Turn 2: calculator("30") → 30
-Turn 3: validate_answer("30", original_q) → ✅ Pass
-Turn 4: final_answer_tool("30")
-**Example 2: Simple Math**
-Q: What is 127 × 83?
-Turn 1: calculator("127 * 83") → 10541
-Turn 2: validate_answer("10541", "What is 127 × 83?") → ✅ Pass
-Turn 3: final_answer_tool("10541")
-**Example 3: Multi-step Research**
-Q: What was the population of Einstein's birthplace in 1900?
-Turn 1: create_plan("Brief: Einstein birthplace pop 1900")
-Turn 2: search_tool("Albert Einstein birthplace") → Ulm, Germany
-Turn 3: search_tool("Ulm Germany population 1900") → approximately 50,000
-Turn 4: validate_answer("50000", "What was the population...") → ✅ Pass
-Turn 5: final_answer_tool("50000")
-**Example 4: File + Calculation**
-Q: What's the average of the 'score' column in data.csv?
-Turn 1: list_directory(".") → [files shown]
-Turn 2: read_file("data.csv") → [content]
-Turn 3: code_interpreter("import pandas as pd; df = pd.read_csv('data.csv'); print(df['score'].mean())")
-        → 78.5
-Turn 4: validate_answer("78.5", "What's the average...") → ✅ Pass
-Turn 5: final_answer_tool("78.5")
-**Example 5: Getting Unstuck**
-Q: What's the GDP of the 2016 Olympics host?
-Turn 1: search_tool("2016 Olympics") → [general info, no clear answer]
-Turn 2: search_tool("Olympics 2016 location") → [still unclear]
-Turn 3: reflect_on_progress("Searching but not getting host country")
-        → Try: "2016 Summer Olympics host country"
-Turn 4: search_tool("2016 Summer Olympics host country") → Brazil
-Turn 5: search_tool("Brazil GDP 2016") → $1.796 trillion
-Turn 6: validate_answer("1.796 trillion", original_q) → ✅ Pass
-Turn 7: final_answer_tool("1.796 trillion")
 ═══════════════════════════════════════════════════════════════
-⚠️ CRITICAL RULES - NEVER VIOLATE THESE:
 ═══════════════════════════════════════════════════════════════
-1. **IDENTIFY QUESTION TYPE FIRST**: Logic puzzle vs. factual vs. data vs. math
-2. **LOGIC PUZZLES**: Don't use search/file tools. Just think + validate + final_answer
-3. **ONE STEP AT A TIME**: Don't try to do multiple things in one turn
-4. **EXACT FORMAT**: Answer must be EXACTLY what was asked for
-5. **NO FLUFF**: Never add "The answer is" or explanations in final answer
-6. **ALWAYS VALIDATE**: Call validate_answer() before final_answer_tool()
-7. **DON'T LOOP**: If 2 consecutive turns produce no tool calls, you're stuck - call a tool!
 ═══════════════════════════════════════════════════════════════
 📚 AVAILABLE TOOLS:
@@ -1047,29 +883,36 @@ Turn 7: final_answer_tool("1.796 trillion")
 {tool_descriptions}
 ═══════════════════════════════════════════════════════════════
-🎯 REMEMBER:
-- Logic puzzles: Think → Calculator (if needed) → Validate → Final Answer
-- Factual questions: Plan → Search → Validate → Final Answer
-- Always call a tool - never just output reasoning text!
 ═══════════════════════════════════════════════════════════════
 """
         print("Initializing Groq LLM...")
         try:
             self.llm_with_tools = ChatGroq(
                 temperature=0,
                 groq_api_key=GROQ_API_KEY,
                 model_name="llama-3.3-70b-versatile",
                 max_tokens=4096,
                 timeout=60
-            ).bind_tools(self.tools, tool_choice="auto")
-            print("✅ LLM initialized.")
         except Exception as e:
             print(f"❌ Error initializing Groq: {e}")
             raise
-        # Agent Node with Enhanced Logic
         def agent_node(state: AgentState):
             current_turn = state.get('turn', 0) + 1
             print(f"\n{'='*70}")
@@ -1078,46 +921,57 @@ Turn 7: final_answer_tool("1.796 trillion")
             if current_turn > MAX_TURNS:
                 return {
-                    "messages": [SystemMessage(content="Max turns reached. Submitting best available answer.")],
                     "turn": current_turn
                 }
-            # Check if we should auto-trigger reflection
-            should_reflect = False
             consecutive_errors = state.get('consecutive_errors', 0)
-            if current_turn > 5 and current_turn % REFLECT_EVERY_N_TURNS == 0:
-                should_reflect = True
-                print("🤔 Auto-triggering reflection (periodic check)")
-            if consecutive_errors >= 3:
-                should_reflect = True
-                print("🤔 Auto-triggering reflection (multiple errors)")
             # Add reflection hint if needed
-            messages_to_send = state["messages"].copy()
-            if should_reflect and not state.get('has_plan', False):
                 hint = SystemMessage(
-                    content="⚠️ SYSTEM HINT: You've been working for several turns. Consider calling reflect_on_progress() to evaluate your approach."
                 )
                 messages_to_send.append(hint)
-            # Invoke LLM with better error handling
             max_retries = 3
             ai_message = None
             for attempt in range(max_retries):
                 try:
                     ai_message = self.llm_with_tools.invoke(messages_to_send)
-                    break
                 except Exception as e:
                     error_str = str(e)
                     print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {error_str[:200]}")
-                    # If it's a tool_use_failed error, try without forcing tools
                     if "tool_use_failed" in error_str and attempt < max_retries - 1:
-                        print("🔧 Retrying without strict tool enforcement...")
                         try:
-                            # Try with a simpler LLM call
                             simple_llm = ChatGroq(
                                 temperature=0,
                                 groq_api_key=os.getenv("GROQ_API_KEY"),
@@ -1125,32 +979,56 @@ Turn 7: final_answer_tool("1.796 trillion")
                                 max_tokens=4096,
                                 timeout=60
                             )
-                            ai_message = simple_llm.invoke(messages_to_send)
-                            # Manually parse for tool calls
-                            if ai_message.content:
-                                parsed_calls = parse_tool_call_from_string(ai_message.content, self.tools)
-                                if parsed_calls:
-                                    ai_message.tool_calls = parsed_calls
                                     ai_message.content = ""
-                            break
                         except Exception as e2:
-                            print(f"⚠️ Simple LLM also failed: {e2}")
                     if attempt == max_retries - 1:
-                        # Last resort: return a message asking to proceed differently
                         ai_message = AIMessage(
-                            content="I need to approach this differently. Let me try a more direct method."
                         )
                     else:
                         time.sleep(2 ** attempt)
-            # Fallback Parsing
-            if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
-                parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
-                if parsed_tool_calls:
-                    print("🔧 Fallback: Successfully rebuilt tool call")
-                    ai_message.tool_calls = parsed_tool_calls
-                    ai_message.content = ""
             # Track tool usage
             tool_history = state.get('tool_history', [])
@@ -1164,34 +1042,45 @@ Turn 7: final_answer_tool("1.796 trillion")
                 if tool_name == "create_plan":
                     has_plan = True
             else:
-                print(f"💭 Reasoning: {ai_message.content[:200]}...")
             return {
                 "messages": [ai_message],
                 "turn": current_turn,
                 "has_plan": has_plan,
-                "tool_history": tool_history
             }
-        # Tool Node with Error Tracking
         def tool_node_wrapper(state: AgentState):
-            """Wraps tool execution to track errors"""
-            tool_node = ToolNode(self.tools)
-            result = tool_node(state)
-            # Check if last message is a tool error
-            if result['messages']:
                 last_msg = result['messages'][-1]
-                if isinstance(last_msg, ToolMessage) and "Error" in last_msg.content:
-                    consecutive_errors = state.get('consecutive_errors', 0) + 1
-                    result['consecutive_errors'] = consecutive_errors
-                else:
-                    result['consecutive_errors'] = 0
             return result
-# Build Graph
-        print("Building Planning & Reflection Agent graph...")
         graph_builder = StateGraph(AgentState)
         graph_builder.add_node("agent", agent_node)
@@ -1212,10 +1101,10 @@ Turn 7: final_answer_tool("1.796 trillion")
         graph_builder.add_edge("tools", "agent")
         self.graph = graph_builder.compile()
-        print("✅ Planning & Reflection Agent graph compiled successfully.")
     def __call__(self, question: str) -> str:
         print(f"\n{'='*70}")
         print(f"🎯 NEW QUESTION")
         print(f"{'='*70}")
@@ -1230,99 +1119,129 @@ Turn 7: final_answer_tool("1.796 trillion")
             "turn": 0,
             "has_plan": False,
             "consecutive_errors": 0,
-            "tool_history": []
         }
         final_answer = "AGENT FAILED TO PRODUCE ANSWER"
         try:
             config = {"recursion_limit": MAX_TURNS + 10}
             for event in self.graph.stream(graph_input, stream_mode="values", config=config):
                 if not event.get('messages'):
                     continue
-                last_message = event["messages"][-1]
-                # Check for final answer extraction
                 if isinstance(last_message, AIMessage) and last_message.tool_calls:
-                    if last_message.tool_calls[0].get("name") == "final_answer_tool":
-                        final_answer_args = last_message.tool_calls[0].get('args', {})
-                        if 'answer' in final_answer_args:
-                            final_answer = final_answer_args['answer']
-                            print(f"\n{'='*70}")
-                            print(f"✅ FINAL ANSWER CAPTURED: '{final_answer}'")
-                            print(f"{'='*70}\n")
-                            break
-                        else:
-                            print(f"⚠️ final_answer_tool called without 'answer' argument")
-                            final_answer = "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER"
-                            break
                 elif isinstance(last_message, ToolMessage):
-                    result_preview = last_message.content[:300].replace('\n', ' ')
-                    print(f"📊 Tool Result: {result_preview}...")
                 elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
-                    print(f"💭 AI Reasoning: {last_message.content[:300]}...")
-            # Final Answer Cleaning
-            cleaned_answer = str(final_answer).strip()
-            # Remove common prefixes
-            prefixes_to_remove = [
-                "The answer is:", "Here is the answer:", "Based on the information:",
-                "Final Answer:", "Answer:", "The final answer is:", "My answer is:",
-                "According to", "I found that", "The result is:"
             ]
-            for prefix in prefixes_to_remove:
-                if cleaned_answer.lower().startswith(prefix.lower()):
-                    potential_answer = cleaned_answer[len(prefix):].strip()
-                    if potential_answer:
-                        cleaned_answer = potential_answer
                         break
-            # Remove code fences
-            cleaned_answer = remove_fences_simple(cleaned_answer)
-            # Remove surrounding backticks
-            while cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
-                cleaned_answer = cleaned_answer[1:-1].strip()
-            # Remove quotes if they wrap the entire answer
-            if (cleaned_answer.startswith('"') and cleaned_answer.endswith('"')) or \
-               (cleaned_answer.startswith("'") and cleaned_answer.endswith("'")):
-                cleaned_answer = cleaned_answer[1:-1].strip()
-            # Remove trailing periods for non-sentence answers
-            if cleaned_answer.endswith('.') and len(cleaned_answer.split()) < 10:
-                cleaned_answer = cleaned_answer[:-1]
             print(f"\n{'='*70}")
-            print(f"🎉 FINAL CLEANED ANSWER")
             print(f"{'='*70}")
-            print(f"{cleaned_answer}")
             print(f"{'='*70}\n")
-            return cleaned_answer
         except Exception as e:
-            print(f"❌ Error running agent graph: {e}")
-            tb_str = traceback.format_exc()
-            print(tb_str)
-            return f"AGENT GRAPH ERROR: {e}"
 # =============================================================================
 # GLOBAL AGENT INSTANTIATION
 # =============================================================================
 try:
     initialize_rag_components()
     agent = PlanningReflectionAgent()
-    print("✅ Global PlanningReflectionAgent instantiated successfully.")
     if asr_pipeline is None:
-        print("⚠️ Global ASR Pipeline failed to load.")
 except Exception as e:
-    print(f"❌ FATAL: Could not instantiate global agent: {e}")
     traceback.print_exc()
     agent = None

 # CONFIGURATION
 # =============================================================================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MAX_TURNS = 25
 MAX_MESSAGE_LENGTH = 8000
 REFLECT_EVERY_N_TURNS = 5
 # =============================================================================
 class ThinkInput(BaseModel):
+    reasoning: str = Field(description="Brief reasoning summary (under 150 chars)")
 @tool(args_schema=ThinkInput)
 def think_through_logic(reasoning: str) -> str:
     - You need to reason through a logical problem
     - No external information is needed, just thinking
+    After thinking, use calculator if math is involved, then validate and submit answer.
     """
+    print(f"🧠 Thinking: {reasoning[:100]}...")
+    return f"""✅ Logic reasoning recorded.
+Next steps:
+1. If math needed → use calculator()
+2. Once you have answer → use validate_answer()
+3. Then → use final_answer_tool()
+Remember: You MUST call another tool. Do not output reasoning text."""
 class PlanInput(BaseModel):
+    task_summary: str = Field(description="Very brief task summary (under 80 chars)")
 @tool(args_schema=PlanInput)
+def create_plan(task_summary: str) -> str:
     """
+    Creates a plan for multi-step questions. Use for complex tasks only.
+    Keep the summary VERY brief to avoid errors.
     """
+    print(f"📋 Planning: {task_summary[:80]}...")
+    return f"""✅ Plan created for: {task_summary}
+FRAMEWORK:
+1. What info do I need?
+2. What tools will I use?
+3. In what order?
+Now execute step 1. You MUST call a tool next."""
 class ReflectInput(BaseModel):
+    situation: str = Field(description="Brief situation summary (under 80 chars)")
 @tool(args_schema=ReflectInput)
+def reflect_on_progress(situation: str) -> str:
     """
+    Reflects on progress when stuck. Use after 5+ turns without progress.
+    Keep situation summary VERY brief.
     """
+    print(f"🤔 Reflecting: {situation[:80]}...")
+    return f"""🔍 REFLECTION on: {situation}
+QUESTIONS:
+1. Am I using the right approach?
+2. Should I try a different tool?
+3. Do I actually have the answer already?
+Take a DIFFERENT approach now. You MUST call a tool next."""
 class ValidateInput(BaseModel):
+    proposed_answer: str = Field(description="The answer to validate")
+    original_question: str = Field(description="Original question (first 100 chars)")
 @tool(args_schema=ValidateInput)
 def validate_answer(proposed_answer: str, original_question: str) -> str:
     """
+    Validates answer format before submission. ALWAYS use before final_answer_tool.
     """
+    print(f"✓ Validating: '{proposed_answer[:50]}...'")
     issues = []
     warnings = []
     # Check for conversational fluff
+    fluff = ["the answer is", "based on", "according to", "i found", "here is"]
+    if any(p in proposed_answer.lower() for p in fluff):
+        issues.append("❌ Remove conversational text. Answer only.")
+    # Check for code fences
     if "```" in proposed_answer:
+        issues.append("❌ Remove code fences (```).")
     # Check length
     if len(proposed_answer) > 500:
+        warnings.append("⚠️ Answer very long. Just the answer?")
+    # Check for number questions
+    if any(k in original_question.lower() for k in ["how many", "what number", "count"]):
+        if not any(c.isdigit() for c in proposed_answer):
+            warnings.append("⚠️ Question asks for number but answer has no digits.")
     if issues:
+        return "🚫 VALIDATION FAILED:\n" + "\n".join(issues) + "\n\nFix then retry."
     if warnings:
+        return "⚠️ WARNINGS:\n" + "\n".join(warnings) + "\n\nConsider fixing, or proceed if confident."
+    return "✅ VALIDATION PASSED! Now call final_answer_tool() with this answer."
 # =============================================================================
 # =============================================================================
 class SearchInput(BaseModel):
+    query: str = Field(description="Search query (concise)")
 @tool(args_schema=SearchInput)
 def search_tool(query: str) -> str:
+    """Searches web via DuckDuckGo. Use for facts, recent info."""
     if not isinstance(query, str) or not query.strip():
+        return "Error: Invalid query."
     print(f"🔍 Searching: {query}")
     try:
         search = DuckDuckGoSearchRun()
         result = search.run(query)
+        return truncate_if_needed(result)
     except Exception as e:
+        return f"Search error: {str(e)}"
 class CalcInput(BaseModel):
+    expression: str = Field(description="Math expression (e.g., '2+2', 'sqrt(16)')")
 @tool(args_schema=CalcInput)
 def calculator(expression: str) -> str:
     """
+    Evaluates math expressions. Use for ANY calculations.
+    Supports: +, -, *, /, **, sqrt, sin, cos, log, pi, e, etc.
     """
     if not isinstance(expression, str) or not expression.strip():
         return "Error: Invalid expression."
     print(f"🧮 Calculating: {expression}")
     try:
         import math
         safe_dict = {
             'sqrt': math.sqrt, 'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
         }
         result = eval(expression, {"__builtins__": {}}, safe_dict)
+        return str(result)
     except Exception as e:
+        return f"Calculation error for '{expression}': {str(e)}"
 class CodeInput(BaseModel):
+    code: str = Field(description="Python code (MUST include print() for output)")
 @tool(args_schema=CodeInput)
 def code_interpreter(code: str) -> str:
     """
+    Executes Python code. Use for data processing, complex logic.
+    Available: pandas, numpy, json, re, datetime
     CRITICAL: Always use print() to output results!
     """
     if not isinstance(code, str):
+        return "Error: code must be string."
     # Safety checks
+    dangerous = ['__import__', 'eval(', 'compile(', 'subprocess', 'os.system', 'exec(']
+    if any(d in code.lower() for d in dangerous):
+        return f"Error: Dangerous operation not allowed."
+    if 'open(' in code.lower() and any(m in code for m in ["'w'", '"w"', "'a'", '"a"']):
+        return "Error: File writing not allowed. Use write_file tool."
+    print(f"💻 Executing code ({len(code)} chars)...")
     output_stream = io.StringIO()
     error_stream = io.StringIO()
         stderr = error_stream.getvalue()
         if stderr:
+            return f"Error:\n{stderr}\n\nStdout:\n{stdout}"
         if stdout:
+            return truncate_if_needed(stdout)
+        return "Code executed but no output. Remember to use print()!"
     except Exception as e:
+        return f"Execution failed:\n{traceback.format_exc()}"
 class ReadFileInput(BaseModel):
+    path: str = Field(description="File path")
 @tool(args_schema=ReadFileInput)
 def read_file(path: str) -> str:
+    """Reads file content."""
     if not isinstance(path, str) or not path.strip():
+        return "Error: Invalid path."
+    print(f"📄 Reading: {path}")
     file_path = find_file(path)
     if not file_path:
+        return f"Error: File not found: '{path}'\nCWD files: {os.listdir('.')}"
     try:
         content = file_path.read_text(encoding='utf-8')
         return truncate_if_needed(content)
     except UnicodeDecodeError:
+        return f"Error: Binary file. Size: {file_path.stat().st_size} bytes. Try audio_transcription_tool for audio."
     except Exception as e:
+        return f"Read error: {str(e)}"
 class WriteFileInput(BaseModel):
+    path: str = Field(description="File path")
+    content: str = Field(description="Content to write")
 @tool(args_schema=WriteFileInput)
 def write_file(path: str, content: str) -> str:
+    """Writes content to file."""
+    if not path or not isinstance(content, str):
+        return "Error: Invalid inputs."
+    print(f"✍️ Writing: {path}")
     try:
         file_path = Path.cwd() / path
         file_path.parent.mkdir(parents=True, exist_ok=True)
         file_path.write_text(content, encoding='utf-8')
+        return f"Wrote {len(content)} chars to '{path}'."
     except Exception as e:
+        return f"Write error: {str(e)}"
 class ListDirInput(BaseModel):
+    path: str = Field(description="Directory path", default=".")
 @tool(args_schema=ListDirInput)
 def list_directory(path: str = ".") -> str:
+    """Lists directory contents."""
+    print(f"📁 Listing: {path}")
     try:
         dir_path = Path.cwd() / path if path != "." else Path.cwd()
         if not dir_path.is_dir():
+            return f"Error: '{path}' not a directory."
         items = sorted(dir_path.iterdir())
         if not items:
             return f"Directory '{path}' is empty."
+        files, dirs = [], []
         for item in items:
             if item.is_dir():
+                dirs.append(f"📁 {item.name}/")
             else:
+                files.append(f"📄 {item.name} ({item.stat().st_size} bytes)")
         result = f"Contents of '{path}':\n\n"
+        if dirs:
+            result += "Directories:\n" + "\n".join(dirs) + "\n\n"
         if files:
             result += "Files:\n" + "\n".join(files)
         return result
     except Exception as e:
+        return f"List error: {str(e)}"
 class AudioInput(BaseModel):
+    file_path: str = Field(description="Audio file path")
 @tool(args_schema=AudioInput)
 def audio_transcription_tool(file_path: str) -> str:
+    """Transcribes audio using Whisper."""
+    if not file_path:
+        return "Error: Invalid file path."
+    print(f"🎤 Transcribing: {file_path}")
     if asr_pipeline is None:
+        return "Error: ASR not available."
     audio_path = find_file(file_path)
     if not audio_path:
         result_text = transcription.get("text", "")
         if not result_text:
+            return "Error: Transcription empty."
         return f"Transcription:\n{truncate_if_needed(result_text)}"
     except Exception as e:
+        return f"Transcription error: {str(e)}"
 class YoutubeInput(BaseModel):
+    video_url: str = Field(description="YouTube URL")
 @tool(args_schema=YoutubeInput)
 def get_youtube_transcript(video_url: str) -> str:
+    """Fetches YouTube video transcript."""
+    if not video_url:
+        return "Error: Invalid URL."
+    print(f"📺 YouTube transcript: {video_url}")
     try:
         video_id = None
             video_id = video_url.split("youtu.be/")[1].split("?")[0]
         if not video_id:
+            return f"Error: Could not extract video ID."
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
         if not transcript_list:
+            return "Error: No transcript found."
         full_transcript = " ".join([item["text"] for item in transcript_list])
+        return f"Transcript:\n{truncate_if_needed(full_transcript)}"
     except Exception as e:
+        return f"Transcript error: {str(e)}"
 class ScrapeInput(BaseModel):
+    url: str = Field(description="URL (must start with http:// or https://)")
+    query: str = Field(description="What to find on the page")
 @tool(args_schema=ScrapeInput)
 def scrape_and_retrieve(url: str, query: str) -> str:
     """
+    Scrapes webpage and uses RAG to find relevant info.
+    Use when you need specific info from a known URL.
     """
+    if not url.startswith(('http://', 'https://')):
+        return f"Error: Invalid URL format."
+    if not query:
+        return "Error: Query required."
     if global_embeddings is None or global_text_splitter is None:
         if not initialize_rag_components():
+            return "Error: RAG not initialized."
+    print(f"🌐 Scraping: {url}")
     try:
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
         response = requests.get(url, headers=headers, timeout=20)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
+        for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe"]):
             tag.extract()
+        main = soup.find('main') or soup.find('article') or soup.body
+        if not main:
+            return "Error: No main content found."
+        text = main.get_text(separator='\n', strip=True)
+        lines = [l.strip() for l in text.splitlines() if l.strip()]
+        text = '\n'.join(lines)
+        if len(text) < 50:
+            return f"Error: Content too short ({len(text)} chars)."
         chunks = global_text_splitter.split_text(text)
         if not chunks:
+            return "Error: Could not chunk text."
+        docs = [Document(page_content=c, metadata={"source": url}) for c in chunks]
         db = FAISS.from_documents(docs, global_embeddings)
         retriever = db.as_retriever(search_kwargs={"k": 5})
+        retrieved = retriever.invoke(query)
+        if not retrieved:
+            return f"No relevant info found for: '{query}'"
+        context = "\n\n---\n\n".join([f"[Chunk {i+1}]\n{d.page_content}" for i, d in enumerate(retrieved)])
+        return truncate_if_needed(f"From {url}:\n\n{context}")
     except requests.RequestException as e:
+        return f"Fetch error: {str(e)}"
     except Exception as e:
+        return f"Scrape error: {str(e)}\n{traceback.format_exc()}"
 class FinalAnswerInput(BaseModel):
+    answer: str = Field(description="Final answer - EXACTLY what was asked, nothing more")
 @tool(args_schema=FinalAnswerInput)
 def final_answer_tool(answer: str) -> str:
     """
+    Submit final answer. CRITICAL RULES:
+    1. ALWAYS call validate_answer() first
+    2. Answer must be EXACTLY what was asked
+    3. NO conversational text
     4. NO explanations
+    5. Match requested format exactly
     """
     if not isinstance(answer, str):
+        answer = str(answer)
     print(f"✅ FINAL ANSWER SUBMITTED: {answer}")
     return answer
 # DEFINED TOOLS LIST
 # =============================================================================
 defined_tools = [
+    # Planning & Reflection
+    think_through_logic,
     create_plan,
     reflect_on_progress,
     validate_answer,
     write_file,
     list_directory,
+    # Specialized
     audio_transcription_tool,
     get_youtube_transcript,
     scrape_and_retrieve,
+    # Final
     final_answer_tool
 ]
 # =============================================================================
 # AGENT STATE
 # =============================================================================
     has_plan: bool
     consecutive_errors: int
     tool_history: List[str]
+    last_tool_was_thinking: bool
 # =============================================================================
+# ENHANCED FALLBACK PARSER
 # =============================================================================
 def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
+    """Enhanced parser with multiple strategies."""
+    print(f"🔧 Fallback parsing (first 300 chars):\n{content[:300]}")
     tool_name = None
     tool_input = None
+    # STRATEGY 1: Groq's <function=name{...}> format
+    groq_match = re.search(r"<function=(\w+)\s*(\{.*?\})\s*(?:>|</function>)", content, re.DOTALL)
     if groq_match:
         try:
             tool_name = groq_match.group(1).strip()
             json_str = groq_match.group(2).strip()
             json_str = json_str.encode().decode('unicode_escape')
             tool_input = json.loads(json_str)
+            print(f"✓ Parsed Groq format: {tool_name}")
+        except:
             tool_name = None
+    # STRATEGY 2: Standard <function(name)>{...} format
     if not tool_name:
+        func_match = re.search(r"<function[(=]\s*([^)]+)\s*[)>](.*)", content, re.DOTALL | re.IGNORECASE)
         if func_match:
             try:
                 tool_name = func_match.group(1).strip().replace("'", "").replace('"', '')
+                remaining = func_match.group(2)
+                json_start = remaining.find('{')
+                if json_start != -1:
+                    json_str = remaining[json_start:].strip().rstrip(',')
+                    tool_input = json.loads(json_str)
+                    print(f"✓ Parsed standard format: {tool_name}")
+            except:
+                tool_name = None
+    # STRATEGY 3: Tool mention with code block → wrap in code_interpreter
+    if not tool_name and "```python" in content:
+        try:
+            code_match = re.search(r"```python\n(.*?)```", content, re.DOTALL)
+            if code_match:
+                code = code_match.group(1).strip()
+                tool_name = "code_interpreter"
+                tool_input = {"code": code}
+                print(f"✓ Extracted Python code → code_interpreter")
+        except:
+            pass
+    # STRATEGY 4: Direct tool mention → create minimal valid call
+    if not tool_name:
         for tool in tools:
+            if tool.name.lower() in content.lower():
                 tool_name = tool.name
                 tool_input = {}
+                # Try to extract arguments from content
                 if tool.args_schema:
                     schema = tool.args_schema.model_json_schema()
+                    for prop in schema.get('properties', {}).keys():
                         if prop in schema.get('required', []):
+                            # Use placeholder
+                            tool_input[prop] = "auto_extracted"
+                print(f"✓ Found mention of '{tool_name}' → creating default call")
                 break
+    # STRATEGY 5: Emergency - if no tool detected, force a reasonable one
+    if not tool_name:
+        # If content looks like reasoning, use think_through_logic
+        if len(content) > 50 and not any(kw in content.lower() for kw in ["error", "failed", "invalid"]):
+            tool_name = "think_through_logic"
+            tool_input = {"reasoning": content[:150]}
+            print(f"⚠️ No tool detected → forcing think_through_logic")
+    # Validate and create tool call
     if tool_name and tool_input is not None:
+        matching_tools = [t for t in tools if t.name == tool_name]
+        if matching_tools:
+            return [ToolCall(name=tool_name, args=tool_input, id=str(uuid.uuid4()))]
         else:
+            print(f"❌ Tool '{tool_name}' not in available tools")
+    print("❌ All parsing strategies failed")
     return []
 # =============================================================================
+# CONDITIONAL EDGE FUNCTION (FIXED)
 # =============================================================================
 def should_continue(state: AgentState):
+    """Decide next step with robust logic."""
+    messages = state.get('messages', [])
+    if not messages:
+        return "agent"
+    last_message = messages[-1]
     current_turn = state.get('turn', 0)
+    # Debug: Print what we're checking
+    msg_type = type(last_message).__name__
+    print(f"📍 Conditional check - Turn {current_turn}, Last msg type: {msg_type}")
+    # 1. Check turn limit
     if current_turn >= MAX_TURNS:
+        print(f"🛑 Max turns ({MAX_TURNS}) reached")
         return END
+    # 2. If last message is ToolMessage, agent needs to process it
+    if isinstance(last_message, ToolMessage):
+        print(f"📨 Tool result received from '{last_message.name}' → back to agent")
+        return "agent"
+    # 3. If last message is AIMessage with tool calls
     if isinstance(last_message, AIMessage) and last_message.tool_calls:
+        # Check each tool call explicitly
+        for tc in last_message.tool_calls:
+            tool_name = tc.get("name", "")
+            print(f"🔧 Tool call detected: '{tool_name}'")
+            # ONLY end if it's explicitly final_answer_tool
+            if tool_name == "final_answer_tool":
+                print(f"✅ final_answer_tool confirmed → ending")
+                return END
+        # Not final answer, route to tools
+        print(f"🔄 Routing to tools node")
         return "tools"
+    # 4. If AIMessage but no tool calls (reasoning text)
+    if isinstance(last_message, AIMessage) and not last_message.tool_calls:
+        # Check for consecutive AI messages (loop)
+        if len(messages) >= 2 and isinstance(messages[-2], AIMessage) and not messages[-2].tool_calls:
+            print(f"⚠️ Loop detected: 2 consecutive AI messages without tools")
+            return END
+        print(f"💭 AI message without tool call → continuing to agent (will force tool)")
+        return "agent"
+    # 5. Default: continue to agent
+    print(f"🔄 Default → continuing to agent")
     return "agent"
 # =============================================================================
+# ENHANCED AGENT CLASS
 # =============================================================================
 class PlanningReflectionAgent:
     def __init__(self):
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
+            raise ValueError("GROQ_API_KEY not set!")
         self.tools = defined_tools
+        # Initialize RAG
         if not initialize_rag_components():
+            print("⚠️ RAG components failed to initialize.")
         # Build tool descriptions
         tool_desc_list = []
         for tool in self.tools:
             if tool.args_schema:
                 schema = tool.args_schema.model_json_schema()
+                args_desc = [f"  - {p}: {d.get('description', '')}"
+                            for p, d in schema.get('properties', {}).items()]
+                desc = f"- {tool.name}:\n  {tool.description}\n" + "\n".join(args_desc)
             else:
                 desc = f"- {tool.name}: {tool.description}"
             tool_desc_list.append(desc)
         tool_descriptions = "\n".join(tool_desc_list)
+        # ULTRA-AGGRESSIVE SYSTEM PROMPT
+        self.system_prompt = f"""You are an elite AI agent for GAIA benchmark. Your ONLY job: provide the EXACT answer requested.
 ═══════════════════════════════════════════════════════════════
+⚠️ ABSOLUTE RULES - VIOLATE THESE AND YOU FAIL:
 ═══════════════════════════════════════════════════════════════
+1. **EVERY TURN MUST CALL EXACTLY ONE TOOL** - No exceptions
+2. **NEVER OUTPUT REASONING TEXT WITHOUT A TOOL CALL** - You will fail
+3. **IDENTIFY QUESTION TYPE FIRST** - Logic? Factual? Data? Math?
+4. **LOGIC PUZZLES**: think_through_logic → calculator (if needed) → validate → final_answer
+5. **FACTUAL QUESTIONS**: search_tool → validate → final_answer
+6. **DATA QUESTIONS**: read_file → code_interpreter → validate → final_answer
+7. **ALWAYS VALIDATE**: Call validate_answer() before final_answer_tool()
+8. **FINAL ANSWER FORMAT**: EXACTLY what was asked. NO "The answer is..." or explanations
 ═══════════════════════════════════════════════════════════════
+📋 QUESTION TYPE GUIDE:
 ════════��══════════════════════════════════════════════════════
+**RIDDLES/LOGIC PUZZLES** (No web search needed):
+- Brain teasers, puzzles, logical deduction
+- Strategy: think_through_logic → calculator (if math) → validate → final_answer
+- Example: "If 200 coins, 30 face-down, divide into equal piles..."
+  Turn 1: think_through_logic("Adventurer takes 30 coins and flips them")
+  Turn 2: calculator("30") [if needed]
+  Turn 3: validate_answer("30", question)
+  Turn 4: final_answer_tool("30")
+**FACTUAL/RESEARCH** (Need web):
+- Who, what, when, where questions
+- Strategy: search_tool → scrape_and_retrieve → validate → final_answer
+- Example: "What was Einstein's birthplace population in 1900?"
+  Turn 1: search_tool("Albert Einstein birthplace")
+  Turn 2: search_tool("Ulm Germany population 1900")
+  Turn 3: validate_answer("50000", question)
+  Turn 4: final_answer_tool("50000")
+**DATA ANALYSIS** (Need files):
+- CSV/Excel questions
+- Strategy: list_directory → read_file → code_interpreter → validate → final_answer
+**SIMPLE MATH**:
+- Calculations
+- Strategy: calculator() → validate_answer() → final_answer_tool()
 ═══════════════════════════════════════════════════════════════
+🎓 CRITICAL EXAMPLES:
 ═══════════════════════════════════════════════════════════════
+Example 1: Logic Puzzle
+Q: "Coin riddle with 200 coins, 30 face-down..."
+✅ CORRECT:
+  Turn 1: think_through_logic("Take 30 coins, flip all")
+  Turn 2: validate_answer("30", "coin riddle...")
+  Turn 3: final_answer_tool("30")
+❌ WRONG:
+  Turn 1: [reasoning text without tool] ← FAILS!
+Example 2: Letter Bank Puzzle
+Q: "Use letters to spell sentences, which letters need changing?"
+✅ CORRECT:
+  Turn 1: code_interpreter("code to count letters...")
+  Turn 2: validate_answer("A, B, C", question)
+  Turn 3: final_answer_tool("A, B, C")
+Example 3: Math Problem
+Q: "System of equations to solve..."
+✅ CORRECT:
+  Turn 1: code_interpreter("import numpy; solve equations...")
+  Turn 2: validate_answer("0, 1, 2", question)
+  Turn 3: final_answer_tool("0, 1, 2")
 ═══════════════════════════════════════════════════════════════
 📚 AVAILABLE TOOLS:
 {tool_descriptions}
 ═══════════════════════════════════════════════════════════════
+⚡ EXECUTION RULES:
+═══════════════════════════════════════════════════════════════
+- If you output text without a tool call, you have FAILED
+- If you're unsure, use think_through_logic() to organize thoughts
+- ALWAYS call a tool - preferably the right one for the question type
+- After EVERY tool result, decide: "Do I have the answer? → validate → submit"
+- If stuck after 3 turns: call reflect_on_progress()
+REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
 ═══════════════════════════════════════════════════════════════
 """
         print("Initializing Groq LLM...")
         try:
+            # Use tool_choice="any" to FORCE tool usage
             self.llm_with_tools = ChatGroq(
                 temperature=0,
                 groq_api_key=GROQ_API_KEY,
                 model_name="llama-3.3-70b-versatile",
                 max_tokens=4096,
                 timeout=60
+            ).bind_tools(self.tools, tool_choice="any")  # FORCE tool calls
+            print("✅ LLM initialized with FORCED tool usage.")
         except Exception as e:
             print(f"❌ Error initializing Groq: {e}")
             raise
+        # Agent Node with AGGRESSIVE tool forcing
         def agent_node(state: AgentState):
             current_turn = state.get('turn', 0) + 1
             print(f"\n{'='*70}")
             if current_turn > MAX_TURNS:
                 return {
+                    "messages": [SystemMessage(content="Max turns reached.")],
                     "turn": current_turn
                 }
+            # Check if we should force reflection
             consecutive_errors = state.get('consecutive_errors', 0)
+            should_reflect = (current_turn > 5 and current_turn % REFLECT_EVERY_N_TURNS == 0) or consecutive_errors >= 3
+            messages_to_send = state["messages"].copy()
+            # Add tool-forcing message if last turn had no tool call
+            if len(messages_to_send) >= 2:
+                last_msg = messages_to_send[-1]
+                if isinstance(last_msg, AIMessage) and not last_msg.tool_calls:
+                    force_msg = SystemMessage(
+                        content="⚠️ CRITICAL: You MUST call a tool this turn. NO reasoning text. Pick the most appropriate tool and call it now."
+                    )
+                    messages_to_send.append(force_msg)
+                    print("🚨 Injecting tool-forcing message")
             # Add reflection hint if needed
+            if should_reflect:
                 hint = SystemMessage(
+                    content="⚠️ HINT: Multiple turns without progress. Consider calling reflect_on_progress() or try a different approach."
                 )
                 messages_to_send.append(hint)
+                print("🤔 Injecting reflection hint")
+            # Invoke LLM with retries and fallback
             max_retries = 3
             ai_message = None
             for attempt in range(max_retries):
                 try:
                     ai_message = self.llm_with_tools.invoke(messages_to_send)
+                    # If we got a valid response with tool calls, break
+                    if ai_message.tool_calls:
+                        break
+                    # If no tool calls, this is a problem
+                    print(f"⚠️ LLM returned no tool calls on attempt {attempt+1}")
                 except Exception as e:
                     error_str = str(e)
                     print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {error_str[:200]}")
+                    # If tool_use_failed, try without strict binding
                     if "tool_use_failed" in error_str and attempt < max_retries - 1:
+                        print("🔧 Trying without strict tool enforcement...")
                         try:
                             simple_llm = ChatGroq(
                                 temperature=0,
                                 groq_api_key=os.getenv("GROQ_API_KEY"),
                                 max_tokens=4096,
                                 timeout=60
                             )
+                            # Add explicit tool forcing to the message
+                            force_tool_msg = SystemMessage(
+                                content="You MUST call a tool. Respond with a tool call, not reasoning text."
+                            )
+                            ai_message = simple_llm.invoke(messages_to_send + [force_tool_msg])
+                            # Try to parse tool calls from content
+                            if ai_message.content and not ai_message.tool_calls:
+                                parsed = parse_tool_call_from_string(ai_message.content, self.tools)
+                                if parsed:
+                                    ai_message.tool_calls = parsed
                                     ai_message.content = ""
+                                    print("✓ Fallback parsing succeeded")
+                                    break
                         except Exception as e2:
+                            print(f"⚠️ Fallback also failed: {e2}")
                     if attempt == max_retries - 1:
+                        # Last resort: inject a default tool call
+                        print("🚨 All attempts failed - forcing think_through_logic")
                         ai_message = AIMessage(
+                            content="",
+                            tool_calls=[ToolCall(
+                                name="think_through_logic",
+                                args={"reasoning": "Processing question"},
+                                id=str(uuid.uuid4())
+                            )]
                         )
                     else:
                         time.sleep(2 ** attempt)
+            # If still no tool calls after all attempts, force one
+            if not ai_message.tool_calls:
+                if isinstance(ai_message.content, str) and ai_message.content.strip():
+                    # Try one more parse
+                    parsed = parse_tool_call_from_string(ai_message.content, self.tools)
+                    if parsed:
+                        ai_message.tool_calls = parsed
+                        ai_message.content = ""
+                        print("✓ Final parse succeeded")
+                    else:
+                        # Absolute last resort
+                        print("🚨 EMERGENCY: Forcing think_through_logic")
+                        ai_message.tool_calls = [ToolCall(
+                            name="think_through_logic",
+                            args={"reasoning": "analyzing question"},
+                            id=str(uuid.uuid4())
+                        )]
+                        ai_message.content = ""
             # Track tool usage
             tool_history = state.get('tool_history', [])
                 if tool_name == "create_plan":
                     has_plan = True
             else:
+                print(f"⚠️ No tool call (this shouldn't happen!)")
+                print(f"💭 Content: {ai_message.content[:200]}...")
             return {
                 "messages": [ai_message],
                 "turn": current_turn,
                 "has_plan": has_plan,
+                "tool_history": tool_history,
+                "last_tool_was_thinking": ai_message.tool_calls and ai_message.tool_calls[0]['name'] == 'think_through_logic'
             }
+        # Tool Node with Error Tracking (FIXED)
         def tool_node_wrapper(state: AgentState):
+            """Executes tools and tracks errors."""
+            print(f"🔧 Executing tools...")
+            # Create fresh ToolNode instance
+            tool_executor = ToolNode(self.tools)
+            # Invoke properly
+            result = tool_executor.invoke(state)
+            # Track errors
+            consecutive_errors = state.get('consecutive_errors', 0)
+            if result.get('messages'):
                 last_msg = result['messages'][-1]
+                if isinstance(last_msg, ToolMessage):
+                    if "Error" in last_msg.content or "error" in last_msg.content.lower():
+                        consecutive_errors += 1
+                        print(f"⚠️ Tool error detected (consecutive: {consecutive_errors})")
+                    else:
+                        consecutive_errors = 0
+            result['consecutive_errors'] = consecutive_errors
             return result
+        # Build Graph
+        print("Building graph...")
         graph_builder = StateGraph(AgentState)
         graph_builder.add_node("agent", agent_node)
         graph_builder.add_edge("tools", "agent")
         self.graph = graph_builder.compile()
+        print("✅ Graph compiled successfully.")
     def __call__(self, question: str) -> str:
+        """Execute agent on a question."""
         print(f"\n{'='*70}")
         print(f"🎯 NEW QUESTION")
         print(f"{'='*70}")
             "turn": 0,
             "has_plan": False,
             "consecutive_errors": 0,
+            "tool_history": [],
+            "last_tool_was_thinking": False
         }
         final_answer = "AGENT FAILED TO PRODUCE ANSWER"
+        all_messages = []
         try:
             config = {"recursion_limit": MAX_TURNS + 10}
             for event in self.graph.stream(graph_input, stream_mode="values", config=config):
                 if not event.get('messages'):
                     continue
+                all_messages = event["messages"]
+                last_message = all_messages[-1]
+                # Check for final answer
                 if isinstance(last_message, AIMessage) and last_message.tool_calls:
+                    for tool_call in last_message.tool_calls:
+                        if tool_call.get("name") == "final_answer_tool":
+                            args = tool_call.get('args', {})
+                            if 'answer' in args:
+                                final_answer = args['answer']
+                                print(f"\n{'='*70}")
+                                print(f"✅ FINAL ANSWER: '{final_answer}'")
+                                print(f"{'='*70}\n")
+                                break
                 elif isinstance(last_message, ToolMessage):
+                    preview = last_message.content[:200].replace('\n', ' ')
+                    print(f"📊 Tool '{last_message.name}' result: {preview}...")
                 elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
+                    print(f"💭 AI: {last_message.content[:200]}...")
+            # If no final answer, try to extract from tool messages
+            if final_answer == "AGENT FAILED TO PRODUCE ANSWER":
+                print("⚠️ No final_answer_tool called. Checking tool results...")
+                for msg in reversed(all_messages):
+                    if isinstance(msg, ToolMessage):
+                        if msg.name in ["calculator", "think_through_logic", "code_interpreter"]:
+                            content = msg.content.strip()
+                            # Look for short, answer-like content
+                            if content and len(content) < 200 and not content.startswith("Error"):
+                                # Extract just the result part
+                                lines = content.split('\n')
+                                for line in reversed(lines):
+                                    if line.strip() and not line.startswith(('✅', '⚠️', 'Next', 'Remember')):
+                                        final_answer = line.strip()
+                                        print(f"📝 Extracted from {msg.name}: '{final_answer}'")
+                                        break
+                                break
+            # Clean the answer
+            cleaned = str(final_answer).strip()
+            # Remove prefixes
+            prefixes = [
+                "the answer is:", "here is the answer:", "based on",
+                "final answer:", "answer:", "the final answer is:",
+                "my answer is:", "according to", "i found that",
+                "the result is:", "result:"
             ]
+            for prefix in prefixes:
+                if cleaned.lower().startswith(prefix.lower()):
+                    potential = cleaned[len(prefix):].strip()
+                    if potential:
+                        cleaned = potential
                         break
+            # Remove code fences and quotes
+            cleaned = remove_fences_simple(cleaned)
+            while cleaned.startswith("`") and cleaned.endswith("`"):
+                cleaned = cleaned[1:-1].strip()
+            if (cleaned.startswith('"') and cleaned.endswith('"')) or \
+               (cleaned.startswith("'") and cleaned.endswith("'")):
+                cleaned = cleaned[1:-1].strip()
+            # Remove trailing period for short answers
+            if cleaned.endswith('.') and len(cleaned.split()) < 10:
+                cleaned = cleaned[:-1]
             print(f"\n{'='*70}")
+            print(f"🎉 RETURNING ANSWER")
             print(f"{'='*70}")
+            print(f"{cleaned}")
             print(f"{'='*70}\n")
+            return cleaned
         except Exception as e:
+            print(f"❌ Graph error: {e}")
+            print(traceback.format_exc())
+            return f"AGENT ERROR: {e}"
 # =============================================================================
 # GLOBAL AGENT INSTANTIATION
 # =============================================================================
+agent = None
 try:
     initialize_rag_components()
     agent = PlanningReflectionAgent()
+    print("✅ Global PlanningReflectionAgent instantiated.")
+    # Verify it's callable
+    if not callable(agent):
+        print("❌ ERROR: Agent not callable!")
+        agent = None
+    else:
+        print("✅ Agent is callable.")
     if asr_pipeline is None:
+        print("⚠️ ASR Pipeline not loaded.")
 except Exception as e:
+    print(f"❌ FATAL: Agent initialization failed: {e}")
     traceback.print_exc()
     agent = None