Spaces:

Csuarezg
/

Final_Assignment_Template_hf-course

Sleeping

App Files Files Community

Csuarezg commited on May 28, 2025

Commit

b56c671

verified ·

1 Parent(s): d15e45c

Update app.py

Browse files

Files changed (1) hide show

app.py +957 -188

app.py CHANGED Viewed

@@ -8,9 +8,10 @@ import tempfile
 import logging
 from typing import List, Dict, Optional, TypedDict, Annotated
 import numpy as np
 # Core ML/AI imports
-from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
 from langchain_openai import ChatOpenAI
 from langchain_core.tools import tool
 from langchain_community.tools.tavily_search import TavilySearchResults
@@ -22,10 +23,10 @@ from langgraph.checkpoint.memory import MemorySaver
 # File processing
 import wikipedia
-from youtube_transcript_api import YouTubeTranscriptApi
 import speech_recognition as sr
-# Computer vision (will be downloaded at runtime)
 try:
     from ultralytics import YOLO
     import cv2
@@ -49,65 +50,106 @@ os.environ['YOLO_VERBOSE'] = 'false'
 logging.getLogger("ultralytics").setLevel(logging.ERROR)
 # --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# System prompt for the agent
 SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
 CRITICAL ANSWER FORMAT RULES:
-- ALWAYS end with: FINAL ANSWER: [answer]
-- READ THE QUESTION CAREFULLY - answer EXACTLY what is asked for, nothing more, nothing less
 SPECIFIC FORMATTING BY QUESTION TYPE:
-- Numbers: ONLY the number, no units, no text
-  Example: "FINAL ANSWER: 2" NOT "FINAL ANSWER: 2 albums"
-- First name only: ONLY the first name
-  Example: If person is "John Smith" → "FINAL ANSWER: John"
-- Country codes, IOC codes, abbreviations, symbols: ONLY the code/abbreviation, no country name or brackets
-  Example: If asked for IOC country code → "FINAL ANSWER: PHI" NOT "FINAL ANSWER: PHILIPPINES [PHI]"
-- Lists/Sets: Exactly as requested format
-  Example: "FINAL ANSWER: a, b, d, e" (comma-separated, alphabetical order)
 CRITICAL TOOL SELECTION:
-- Wikipedia questions → wikipedia_tool ONLY
-- File questions → file_analyzer_tool FIRST to inspect contents, then reason based on structure
-- Current events → web_search_tool ONLY
-- Mathematical analysis/calculations → wolfram_alpha_tool or python_repl_tool ONLY
-- Tables, matrices, systematic checking → python_repl_tool ONLY
 FOR MATHEMATICAL PROBLEMS:
-ALWAYS use python_repl_tool when:
-- Analyzing mathematical tables or matrices
-- Checking properties like commutativity, associativity
-- Systematic verification of mathematical statements
-- Complex calculations that need precision
-- ANY problem involving tables, sets, or systematic checking
 FILE HANDLING:
-- You HAVE the ability to read and analyze uploaded files
-- ALWAYS use file_analyzer_tool when questions mention files
-- The tool automatically finds and analyzes Excel, CSV, images, and audio files
-- For Excel/CSV: Returns columns, data types, sample rows, and numeric totals
-- NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
-- Example: "The attached Excel file..." → Use file_analyzer_tool immediately
 REASONING PROCESS:
-1. Carefully read what the question is asking for
-2. Identify if it needs systematic/mathematical analysis
-3. Use appropriate tool (python_repl_tool for math problems)
-4. Extract ONLY the specific part requested
-5. Format according to the rules above
 """
 class GAIAAgent:
     def __init__(self):
         print("🚀 Initializing GAIA Agent...")
         # API Keys from HF Secrets
         self.openai_api_key = os.getenv("OPENAI_API_KEY")
-        self.tavily_api_key = os.getenv("TAVILY_API_KEY")
         self.wolfram_api_key = os.getenv("WOLFRAM_API_KEY")
         self.hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
         if not self.openai_api_key:
             raise ValueError("OPENAI_API_KEY not found in environment variables")
@@ -135,7 +177,7 @@ class GAIAAgent:
         print("✅ GAIA Agent initialized successfully!")
     def _setup_tools(self):
-        """Setup all the tools for the agent"""
         # Store reference to self for use in nested functions
         agent_instance = self
@@ -143,56 +185,313 @@ class GAIAAgent:
         # Wikipedia tool
         @tool
         def wikipedia_tool(query: str) -> str:
-            """Search Wikipedia for encyclopedic information"""
             try:
                 wikipedia.set_lang("en")
-                summary = wikipedia.summary(query, sentences=3)
-                page = wikipedia.page(query)
-                return f"WIKIPEDIA: {page.title}\n\n{summary}\n\nURL: {page.url}"
-            except wikipedia.DisambiguationError as e:
-                summary = wikipedia.summary(e.options[0], sentences=3)
-                page = wikipedia.page(e.options[0])
-                return f"WIKIPEDIA: {page.title}\n\n{summary}\n\nURL: {page.url}"
             except Exception as e:
                 return f"Wikipedia error: {str(e)}"
         # Web search tool
-        @tool
-        def web_search_tool(query: str) -> str:
-            """Web search for current information"""
             if not agent_instance.tavily_api_key:
-                return "Tavily API key not available"
             try:
-                tavily_search = TavilySearchResults(api_key=agent_instance.tavily_api_key, max_results=5)
-                results = tavily_search.invoke(query)
-                formatted_results = []
-                for i, res in enumerate(results, 1):
-                    formatted_results.append(f"RESULT {i}:\nTitle: {res.get('title', 'N/A')}\nContent: {res.get('content', 'N/A')}")
-                return "\n\n".join(formatted_results)
             except Exception as e:
-                return f"Search error: {str(e)}"
         # Wolfram Alpha tool
         @tool
         def wolfram_alpha_tool(query: str) -> str:
-            """Use Wolfram Alpha for computational questions"""
             if not agent_instance.wolfram_api_key:
-                return "Wolfram API key not available"
             params = {
                 'appid': agent_instance.wolfram_api_key,
                 'input': query,
                 'format': 'plaintext',
-                'output': 'JSON'
             }
             try:
                 resp = requests.get("http://api.wolframalpha.com/v2/query", params=params, timeout=30)
                 resp.raise_for_status()
                 data = resp.json().get('queryresult', {})
                 if not data.get('success'):
-                    return f"Wolfram Alpha couldn't process: {query}"
                 results = []
                 for pod in data.get('pods', []):
                     pod_title = pod.get('title', 'Unknown')
@@ -200,67 +499,63 @@ class GAIAAgent:
                         plaintext = subpod.get('plaintext')
                         if plaintext and plaintext.strip():
                             results.append(f"{pod_title}: {plaintext}")
-                return " | ".join(results[:5]) if results else "No readable results"
-            except Exception as e:
                 return f"Wolfram Alpha error: {e}"
-        # File analyzer tool
         @tool
-        def file_analyzer_tool(file_description: str = "uploaded file") -> str:
-            """Analyze uploaded files (Excel, CSV, images, audio)"""
             try:
-                search_paths = ["./", "./uploads", "./files", "./data"]
-                data_exts = ['.xlsx', '.xls', '.csv']
-                found_files = []
-                for path in search_paths:
-                    if os.path.exists(path):
-                        for file in os.listdir(path):
-                            if any(file.lower().endswith(ext) for ext in data_exts):
-                                found_files.append(os.path.join(path, file))
-                if not found_files:
-                    return "No supported data files found"
-                results = []
-                for file_path in found_files:
-                    try:
-                        ext = os.path.splitext(file_path)[1].lower()
-                        if ext in ['.xlsx', '.xls']:
-                            df = pd.read_excel(file_path)
-                        elif ext == '.csv':
-                            df = pd.read_csv(file_path)
-                        else:
-                            continue
-                        result = f"📄 FILE: {file_path}\n"
-                        result += f"🔢 SHAPE: {df.shape}\n"
-                        result += f"🧠 COLUMNS: {list(df.columns)}\n"
-                        result += f"📊 FIRST 5 ROWS:\n{df.head().to_string(index=False)}\n"
-                        numeric_cols = df.select_dtypes(include=['number']).columns
-                        if len(numeric_cols) > 0:
-                            totals = df[numeric_cols].sum().round(2)
-                            result += f"💰 NUMERIC TOTALS:\n{totals.to_string()}\n"
-                        results.append(result)
-                    except Exception as e:
-                        results.append(f"Error processing {file_path}: {e}")
-                return "\n\n".join(results)
             except Exception as e:
-                return f"File analysis error: {e}"
         # Python REPL tool
         python_repl_tool = PythonREPLTool()
         tools = [
             wikipedia_tool,
-            web_search_tool,
-            wolfram_alpha_tool,
             file_analyzer_tool,
             python_repl_tool
         ]
@@ -279,7 +574,11 @@ class GAIAAgent:
             if not messages or not isinstance(messages[0], SystemMessage):
                 messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
             response = model_with_tools.invoke(messages)
             return {"messages": [response]}
         tool_node = ToolNode(self.tools)
@@ -289,39 +588,390 @@ class GAIAAgent:
         builder.add_node("tools", tool_node)
         builder.add_edge(START, "agent")
-        builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", END: END})
         builder.add_edge("tools", "agent")
         memory = MemorySaver()
         return builder.compile(checkpointer=memory)
     def _extract_final_answer(self, response_text: str) -> str:
         """Extract the final answer from agent response"""
         match = re.search(r"FINAL ANSWER:\s*(.*)", response_text, re.DOTALL | re.IGNORECASE)
         if match:
             raw_answer = match.group(1).strip()
-            if "\n" in raw_answer:
-                raw_answer = raw_answer.split("\n", 1)[0].strip()
             if raw_answer.endswith('.') and not raw_answer[:-1].replace('.', '').isdigit():
                 raw_answer = raw_answer[:-1]
             return raw_answer.strip()
         lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
         return lines[-1] if lines else response_text.strip()
-    def __call__(self, question: str) -> str:
-        """Main method called by Gradio interface"""
-        print(f"🤖 Processing question: {question[:100]}...")
         try:
-            config = {"configurable": {"thread_id": "gaia_session"}}
-            # Run the agent
             final_state = None
             max_iterations = 0
             events = self.agent_runner.stream(
-                {"messages": [HumanMessage(content=question)]},
                 config=config,
                 stream_mode="values"
             )
@@ -329,28 +979,45 @@ class GAIAAgent:
             for event in events:
                 final_state = event
                 max_iterations += 1
-                if max_iterations > 8:  # Prevent infinite loops
                     break
             if not final_state or not final_state['messages']:
-                return "Agent execution failed - no response generated"
             last_message = final_state['messages'][-1]
-            full_response = last_message.content
-            print(f"📝 Agent response: {full_response[:200]}...")
-            # Extract final answer
             final_answer = self._extract_final_answer(full_response)
-            print(f"🎯 Final answer: {final_answer}")
-            return final_answer
         except Exception as e:
-            print(f"❌ Error processing question: {e}")
             import traceback
             traceback.print_exc()
-            return f"Error: {str(e)}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
@@ -366,10 +1033,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
     # 1. Instantiate GAIA Agent
     try:
         agent = GAIAAgent()
@@ -377,84 +1040,172 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"Error instantiating GAIA agent: {e}")
         return f"Error initializing GAIA agent: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Agent code URL: {agent_code}")
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
             return "Fetched questions list is empty.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
-        print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
-    # 3. Run GAIA Agent on questions
     results_log = []
     answers_payload = []
-    print(f"Running GAIA agent on {len(questions_data)} questions...")
-    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
-        question_text = item.get("question") or item.get("Question")
-        if not task_id or question_text is None:
-            print(f"Skipping item {i} with missing data")
             continue
-        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
         try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": submitted_answer
-            })
-            print(f"✅ Question {i+1} completed: {submitted_answer}")
         except Exception as e:
-            print(f"❌ Error on question {i+1}: {e}")
-            error_msg = f"AGENT ERROR: {str(e)}"
-            answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                "Submitted Answer": error_msg
             })
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare and Submit
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
     }
-    print(f"Submitting {len(answers_payload)} answers...")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
-            f"🎉 Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("✅ Submission successful!")
         return final_status, pd.DataFrame(results_log)
     except Exception as e:
-        error_msg = f"❌ Submission Failed: {str(e)}"
         print(error_msg)
         return error_msg, pd.DataFrame(results_log)
@@ -463,16 +1214,24 @@ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
     gr.Markdown("# 🤖 GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
-        **Advanced GAIA Benchmark Agent**
         This agent uses:
-        - 🧠 GPT-4 Turbo with specialized tools
-        - 📚 Wikipedia search for encyclopedic information
-        - 🌐 Web search for current events
         - 🧮 Wolfram Alpha for computational tasks
-        - 📊 File analysis for Excel/CSV data
         - 🐍 Python REPL for mathematical analysis
-        - 🎯 Specialized prompt engineering for GAIA benchmark
         **Instructions:**
         1. Log in to your Hugging Face account
@@ -488,15 +1247,15 @@ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
     run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
     status_output = gr.Textbox(
-        label="📊 Run Status / Submission Result",
-        lines=8,
         interactive=False
     )
     results_table = gr.DataFrame(
-        label="📝 Questions and Agent Answers",
         wrap=True,
-        max_height=400
     )
     run_button.click(
@@ -521,6 +1280,16 @@ if __name__ == "__main__":
         print(f"✅ SPACE_ID: {space_id}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id}")
     print("="*50 + "\n")
     print("🌟 Launching GAIA Agent Interface...")
     demo.launch(debug=True, share=False)

 import logging
 from typing import List, Dict, Optional, TypedDict, Annotated
 import numpy as np
+import base64
 # Core ML/AI imports
+from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage, ToolMessage
 from langchain_openai import ChatOpenAI
 from langchain_core.tools import tool
 from langchain_community.tools.tavily_search import TavilySearchResults
 # File processing
 import wikipedia
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 import speech_recognition as sr
+# Computer vision
 try:
     from ultralytics import YOLO
     import cv2
 logging.getLogger("ultralytics").setLevel(logging.ERROR)
 # --- Constants ---
+HF_API_BASE_URL = "https://agents-course-unit4-scoring.hf.space"
+USERNAME = "YOUR_USERNAME"  # Will be replaced with OAuth profile username
+AGENT_CODE = "langgraph_gaia_agent"
+# System prompt - EXACTLY as in gaia_agent.py
 SYSTEM_PROMPT = """You are a precision research assistant for the GAIA benchmark. Your mission is EXTREME ACCURACY.
 CRITICAL ANSWER FORMAT RULES:
+# - ALWAYS end with: FINAL ANSWER: [answer]
+# - READ THE QUESTION CAREFULLY - answer EXACTLY what is asked for, nothing more, nothing less
 SPECIFIC FORMATTING BY QUESTION TYPE:
+# - Numbers: ONLY the number, no units, no text
+# Example: "FINAL ANSWER: 2" NOT "FINAL ANSWER: 2 albums"
+# - First name only: ONLY the first name
+# Example: If person is "John Smith" → "FINAL ANSWER: John"
+# - Country codes, IOC codes, abbreviations, symbols: ONLY the code/abbreviation, no country name or brackets
+# Example: If asked for IOC country code → "FINAL ANSWER: PHI" NOT "FINAL ANSWER: PHILIPPINES [PHI]"
+# - When asked for a specific type of identifier (code, abbreviation, symbol):
+#   Give ONLY that identifier, strip all explanatory text, brackets, or full names
+# - Lists/Sets: Exactly as requested format
+# Example: "FINAL ANSWER: a, b, d, e" (comma-separated, alphabetical order)
 CRITICAL TOOL SELECTION:
+# - Wikipedia questions → wikipedia_tool ONLY
+# - File questions → file_analyzer_tool FIRST to inspect contents, then reason based on structure
+# - Current events → web_search_tool ONLY
+# - Mathematical analysis/calculations → wolfram_alpha_tool or python_repl_tool ONLY
+# - Tables, matrices, systematic checking → python_repl_tool ONLY
 FOR MATHEMATICAL PROBLEMS:
+# ALWAYS use python_repl_tool when:
+# - Analyzing mathematical tables or matrices
+# - Checking properties like commutativity, associativity
+# - Systematic verification of mathematical statements
+# - Complex calculations that need precision
+# - ANY problem involving tables, sets, or systematic checking
+MATHEMATICAL ANALYSIS PROCESS:
+# 1. Use python_repl_tool to parse data systematically
+# 2. Write code to check ALL cases (don't rely on manual inspection)
+# 3. Collect results programmatically
+# 4. Verify your logic with multiple approaches
+# 5. Format answer exactly as requested
+# Example for commutativity checking:
+# - Parse the operation table into a data structure
+# - Check ALL pairs (x,y) to see if x*y = y*x
+# - Collect ALL elements involved in ANY counter-example
+# - Return in requested format (e.g., comma-separated, alphabetical)
 FILE HANDLING:
+# - You HAVE the ability to read and analyze uploaded files
+# - ALWAYS use file_analyzer_tool when questions mention files
+# - The tool automatically finds and analyzes Excel, CSV, images, and audio files
+# - For Excel/CSV: Returns columns, data types, sample rows, and numeric totals
+# - NEVER say "I can't access files" - you CAN access them via file_analyzer_tool
+# - Example: "The attached Excel file..." → Use file_analyzer_tool immediately
+SPECIAL CASES TO HANDLE:
+# - If the question appears reversed or encoded, decode it first.
+# - If the question includes an instruction (e.g., "write the opposite of..."), follow the instruction precisely.
+# - DO NOT repeat or paraphrase the question in your answer.
+# - NEVER answer with the full sentence unless explicitly asked to.
+# - If the decoded question asks for a word, give ONLY the word, in the required format.
 REASONING PROCESS:
+# 1. Carefully read what the question is asking for
+# 2. Identify if it needs systematic/mathematical analysis
+# 3. Use appropriate tool (python_repl_tool for math problems)
+# 4. Extract ONLY the specific part requested
+# 5. Format according to the rules above
+# 6. For file questions:
+# a. First use file_analyzer_tool to inspect column names, types, and sample data
+# b. Identify relevant columns based on the question
+# c. Reason using the data (e.g., by counting, filtering, or identifying patterns)
+# d. Only use python_repl_tool if additional computation is necessary
+# 7. If the Wikipedia tool is used but fails to provide an answer (no relevant entry or content), automatically attempt a web search using the same query or a refined version of it
 """
+# YOLO detectable classes
+DETECTABLE_CLASSES = {
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+    'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+    'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+    'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
+    'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
+    'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+    'potted plant', 'bed', 'dining table', 'toilet', 'tv',
+    'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+    'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
+    'book', 'clock', 'vase', 'scissors', 'teddy bear',
+    'hair drier', 'toothbrush'
+}
 class GAIAAgent:
     def __init__(self):
         print("🚀 Initializing GAIA Agent...")
         # API Keys from HF Secrets
         self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        self.tavily_api_key = os.getenv("TAVILY_API_KEY")
         self.wolfram_api_key = os.getenv("WOLFRAM_API_KEY")
         self.hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
+        self.openweather_api_key = os.getenv("OPENWEATHER_API_KEY")
         if not self.openai_api_key:
             raise ValueError("OPENAI_API_KEY not found in environment variables")
         print("✅ GAIA Agent initialized successfully!")
     def _setup_tools(self):
+        """Setup all the tools for the agent - EXACTLY as in gaia_agent.py"""
         # Store reference to self for use in nested functions
         agent_instance = self
         # Wikipedia tool
         @tool
         def wikipedia_tool(query: str) -> str:
+            """
+            Tool: Search Wikipedia for encyclopedic, historical, and biographical information.
+            ⭐ PREFERRED TOOL when the question mentions:
+            - "Wikipedia" explicitly
+            - Historical information, biographies, encyclopedic topics
+            - Facts about people, places, concepts, events from the past
+            - Scientific concepts, country information, cultural topics
+            - Any information that would typically be found in an encyclopedia
+            🎯 ALWAYS USE THIS TOOL when:
+            - Question explicitly mentions "Wikipedia" or "encyclopedia"
+            - Looking for biographical information about notable people
+            - Need historical data, timelines, or established facts
+            - Question is about scientific concepts, countries, or cultural topics
+            Args:
+                query: Topic to search for (be specific, e.g., "Mercedes Sosa discography")
+                sentences: Number of sentences to return (default: 5, max: 10)
+            Examples of when to use:
+            - "Mercedes Sosa studio albums" ✅
+            - "Albert Einstein biography" ✅
+            - "World War II timeline" ✅
+            - "Photosynthesis process" ✅
+            This tool accesses Wikipedia's comprehensive, well-sourced encyclopedia content.
+            """
+            print(f"📚 USING WIKIPEDIA TOOL")
             try:
                 wikipedia.set_lang("en")
+                try:
+                    summary = wikipedia.summary(query, sentences=3)
+                    page = wikipedia.page(query)
+                    return f"WIKIPEDIA: {page.title}\n\n{summary}\n\nURL: {page.url}"
+                except wikipedia.DisambiguationError as e:
+                    # Take first option
+                    summary = wikipedia.summary(e.options[0], sentences=30)
+                    page = wikipedia.page(e.options[0])
+                    return f"WIKIPEDIA: {page.title}\n\n{summary}\n\nURL: {page.url}"
+                except wikipedia.PageError:
+                    search_results = wikipedia.search(query, results=30)
+                    if search_results:
+                        return f"No exact match. Similar topics: {', '.join(search_results)}"
+                    return f"No Wikipedia results for '{query}'"
             except Exception as e:
                 return f"Wikipedia error: {str(e)}"
+        # File analyzer tool
+        @tool
+        def file_analyzer_tool(file_description: str = "uploaded file") -> str:
+            """
+            Analyzes uploaded files including Excel, CSV, images, and audio (e.g., .mp3).
+            For data files: returns column summary and numeric stats.
+            For images: returns visual attributes and OCR text.
+            For audio files: transcribes speech and extracts structured data (e.g., ingredients).
+            """
+            try:
+                print(f"🔍 Searching for files related to: {file_description}")
+                search_paths = ["./", "./uploads", "./files", "./data", "./images", "./audio"]
+                data_exts = ['.xlsx', '.xls', '.csv']
+                image_exts = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp']
+                audio_exts = ['.mp3', '.wav']
+                all_exts = data_exts + image_exts + audio_exts
+                found_files = []
+                for path in search_paths:
+                    if os.path.exists(path):
+                        for file in os.listdir(path):
+                            if any(file.lower().endswith(ext) for ext in all_exts):
+                                found_files.append(os.path.join(path, file))
+                if not found_files:
+                    return f"No supported files found. Looking for: {', '.join(all_exts)}"
+                results = []
+                for file_path in found_files:
+                    ext = os.path.splitext(file_path)[1].lower()
+                    try:
+                        if ext in data_exts:
+                            results.append(agent_instance._analyze_data_file(file_path, ext))
+                        elif ext in image_exts:
+                            results.append(agent_instance._analyze_image_file(file_path))
+                        elif ext in audio_exts:
+                            results.append(agent_instance._analyze_audio_file(file_path))
+                    except Exception as e:
+                        results.append(f"⚠️ Error processing {file_path}: {e}")
+                return "\n\n".join(results)
+            except Exception as error:
+                return f"❌ Unexpected error: {error}"
+        # Computer vision analyzer
+        @tool
+        def computer_vision_analyzer(video_url: str, frames_per_second: int = 0.5) -> str:
+            """
+            tool: Analyzes a YouTube video and returns object detection counts per each frame.
+            Args:
+                video_url: YouTube video URL to analyze
+                frames_per_second: How many frames to extract per second (default: 1)
+            Returns:
+                JSON-like string with detection results per frame that can be used for various analyses.
+            """
+            if not VISION_AVAILABLE or not agent_instance.yolo_model:
+                return "Computer vision libraries not available"
+            try:
+                with tempfile.TemporaryDirectory() as temp_dir:
+                    print("📥 Downloading video...")
+                    video_path = agent_instance._download_youtube_video(video_url, temp_dir)
+                    print("📸 Extracting frames...")
+                    frames = agent_instance._extract_frames(video_path, frame_rate=frames_per_second)
+                    if not frames:
+                        return "❌ No frames could be extracted from the video."
+                    print("🔍 Detecting objects in each frame...")
+                    frame_results = agent_instance._detect_objects_per_frame(frames)
+                    # Format results as a readable string that the LLM can parse and analyze
+                    output_lines = []
+                    output_lines.append(f"FRAME_ANALYSIS_RESULTS:")
+                    output_lines.append(f"Total frames analyzed: {len(frame_results)}")
+                    output_lines.append(f"Extraction rate: {frames_per_second} frame(s) per second")
+                    output_lines.append("")
+                    for frame_data in frame_results:
+                        frame_num = frame_data['frame_number']
+                        timestamp = frame_data['timestamp_seconds']
+                        detections = frame_data['detections']
+                        if detections:
+                            output_lines.append(f"Frame {frame_num} (t={timestamp}s):")
+                            for obj_type, count in sorted(detections.items()):
+                                output_lines.append(f"  {obj_type}: {count}")
+                        else:
+                            output_lines.append(f"Frame {frame_num} (t={timestamp}s): No objects detected")
+                    return "\n".join(output_lines)
+            except Exception as e:
+                return f"❌ Error processing video: {e}"
         # Web search tool
+        @tool
+        def web_search_tool(query: str, search_mode: str = "comprehensive") -> str:
+            """
+            Tool: Web search for CURRENT, REAL-TIME information and recent events.
+            """
+            print(f"🌐 USING WEB SEARCH TOOL with query: '{query}', mode: '{search_mode}'")
             if not agent_instance.tavily_api_key:
+                return "Error: TAVILY_API_KEY environment variable not set."
             try:
+                tavily_search = TavilySearchResults(max_results=5 if search_mode == "comprehensive" else 8)
+                if search_mode == "simple":
+                    # Direct search approach - single query with more results
+                    print(f"🔍 Executing simple search: '{query}'")
+                    results = tavily_search.invoke(query)
+                    if not results:
+                        return "No search results found."
+                    # Format results with clear structure
+                    formatted_results = []
+                    for i, res in enumerate(results, 1):
+                        url = res.get('url', 'N/A')
+                        content = res.get('content', 'N/A')
+                        title = res.get('title', 'N/A')
+                        formatted_results.append(
+                            f"RESULT {i}:\nTitle: {title}\nURL: {url}\nContent: {content}"
+                        )
+                    return "\n\n".join(formatted_results)
+                else:  # comprehensive mode
+                    # Generate intelligent search variations based on query type
+                    base_query = query.strip()
+                    variations = [base_query]  # Always include the original query
+                    # Smart variation generation based on question patterns
+                    query_lower = base_query.lower()
+                    if any(phrase in query_lower for phrase in ["how many", "how much", "number of"]):
+                        # Quantity-focused searches
+                        clean_query = query_lower.replace('how many', '').replace('how much', '').strip()
+                        variations.extend([
+                            f"count of {clean_query}",
+                            f"total {clean_query}",
+                            f"list of {clean_query}"
+                        ])
+                    elif query_lower.startswith(("who is", "who was", "who are")):
+                        # Person/entity identification searches
+                        variations.extend([
+                            f"{base_query} biography",
+                            f"{base_query} wiki",
+                            f"{base_query} profile"
+                        ])
+                    elif query_lower.startswith(("where is", "where are", "where was")):
+                        # Location-based searches
+                        variations.extend([
+                            f"{base_query} location",
+                            f"{base_query} address",
+                            f"{base_query} map"
+                        ])
+                    elif query_lower.startswith(("what is", "what are", "what was")):
+                        # Definition/explanation searches
+                        variations.extend([
+                            f"{base_query} definition",
+                            f"{base_query} explanation",
+                            f"{base_query} facts"
+                        ])
+                    elif query_lower.startswith(("when is", "when was", "when did")):
+                        # Time/date searches
+                        variations.extend([
+                            f"{base_query} date",
+                            f"{base_query} timeline",
+                            f"{base_query} history"
+                        ])
+                    else:
+                        # General searches - add broader context
+                        variations.extend([
+                            f"{base_query} information",
+                            f"{base_query} facts details",
+                            f"{base_query} overview"
+                        ])
+                    # Remove duplicates while preserving order, limit to 4 total variations
+                    variations = list(dict.fromkeys(variations))[:4]
+                    # Execute searches for each variation
+                    all_results = []
+                    for i, variation in enumerate(variations):
+                        try:
+                            print(f"🔍 Search variation {i+1}: '{variation}'")
+                            results = tavily_search.invoke(variation)
+                            if results:
+                                # Format results for this variation
+                                formatted_res = []
+                                for j, res in enumerate(results):
+                                    url = res.get('url', 'N/A')
+                                    content = res.get('content', 'N/A')
+                                    title = res.get('title', 'N/A')
+                                    formatted_res.append(
+                                        f"Title: {title}\nURL: {url}\nContent: {content}"
+                                    )
+                                search_header = f"=== SEARCH {i+1}: \"{variation}\" ==="
+                                search_results = "\n---\n".join(formatted_res)
+                                all_results.append(f"{search_header}\n{search_results}")
+                            else:
+                                all_results.append(f"=== SEARCH {i+1}: \"{variation}\" ===\nNo results found.")
+                        except Exception as e:
+                            all_results.append(f"=== SEARCH {i+1}: \"{variation}\" ===\nError: {e}")
+                    final_result = "\n\n".join(all_results)
+                    return final_result
             except Exception as e:
+                return f"Search error: {e}"
+        # Reverse text tool
+        @tool
+        def reverse_text_tool(text: str) -> str:
+            """Tool: Reverses text for handling backwards questions."""
+            return text[::-1]
         # Wolfram Alpha tool
         @tool
         def wolfram_alpha_tool(query: str) -> str:
+            """Tool: Use Wolfram Alpha for fact-based, computational questions like math, science, data lookups, or unit conversions,
+            but not for opinions, real-time updates, or creative tasks"""
             if not agent_instance.wolfram_api_key:
+                return "Error: WOLFRAM_API_KEY environment variable not set."
             params = {
                 'appid': agent_instance.wolfram_api_key,
                 'input': query,
                 'format': 'plaintext',
+                'output': 'JSON',
+                'units': 'metric',
             }
             try:
+                print(f"🧠 Wolfram Alpha query: '{query}'")
                 resp = requests.get("http://api.wolframalpha.com/v2/query", params=params, timeout=30)
                 resp.raise_for_status()
                 data = resp.json().get('queryresult', {})
                 if not data.get('success'):
+                    return f"Wolfram Alpha couldn't process: {query}. Try rephrasing the query."
                 results = []
                 for pod in data.get('pods', []):
                     pod_title = pod.get('title', 'Unknown')
                         plaintext = subpod.get('plaintext')
                         if plaintext and plaintext.strip():
                             results.append(f"{pod_title}: {plaintext}")
+                if not results:
+                    return "Wolfram Alpha returned no readable results."
+                return " | ".join(results[:5])  # Limit results
+            except requests.exceptions.RequestException as e:
                 return f"Wolfram Alpha error: {e}"
+            except json.JSONDecodeError:
+                return "Wolfram Alpha returned invalid data."
+        # YouTube transcript tool
         @tool
+        def youtube_transcript_tool(url: str, question: str) -> str:
+            """
+            tool: Use this to transcript and answer questions about specific phrases in YouTube videos.
+            Args:
+                url: YouTube video URL
+                question: The question or phrase to search for in the transcript
+            Returns:
+                A string with the response found after the question in the transcript.
+            """
             try:
+                if not url or not question:
+                    return "Both 'url' and 'question' are required."
+                video_id = agent_instance._extract_video_id(url)
+                transcript = agent_instance._get_transcript(video_id)
+                if not transcript:
+                    return "No transcript available for this video."
+                response = agent_instance._find_response(transcript, question)
+                return response
+            except TranscriptsDisabled:
+                return "Transcripts are disabled for this video."
+            except NoTranscriptFound:
+                return "No transcript found for this video."
+            except ValueError as e:
+                return str(e)
             except Exception as e:
+                return f"Error during transcript analysis: {str(e)}"
         # Python REPL tool
         python_repl_tool = PythonREPLTool()
         tools = [
             wikipedia_tool,
+            youtube_transcript_tool,
             file_analyzer_tool,
+            computer_vision_analyzer,
+            web_search_tool,
+            wolfram_alpha_tool,
+            reverse_text_tool,
             python_repl_tool
         ]
             if not messages or not isinstance(messages[0], SystemMessage):
                 messages = [SystemMessage(content=SYSTEM_PROMPT)] + messages
+            print("\n🤖 Agent analyzing question...")
             response = model_with_tools.invoke(messages)
+            print(f"🤖 Response type: {type(response)}")
+            print(f"🤖 Content preview: {response.content[:200]}...")
+            print(f"🤖 Tool calls: {len(response.tool_calls) if response.tool_calls else 0}")
             return {"messages": [response]}
         tool_node = ToolNode(self.tools)
         builder.add_node("tools", tool_node)
         builder.add_edge(START, "agent")
+        builder.add_conditional_edges(
+            "agent",
+            tools_condition,
+            {
+                "tools": "tools",
+                END: END
+            }
+        )
         builder.add_edge("tools", "agent")
         memory = MemorySaver()
         return builder.compile(checkpointer=memory)
+    # Helper methods for file analysis
+    def _analyze_data_file(self, file_path: str, ext: str) -> str:
+        """Analyze Excel or CSV files"""
+        try:
+            if ext in ['.xlsx', '.xls']:
+                df = pd.read_excel(file_path)
+            elif ext == '.csv':
+                df = pd.read_csv(file_path)
+            else:
+                return f"Unsupported data file type: {ext}"
+            result = f"📄 DATA FILE: {file_path}\n"
+            result += f"🔢 SHAPE: {df.shape}\n"
+            result += f"🧠 COLUMNS: {list(df.columns)}\n"
+            result += f"🔍 COLUMN TYPES:\n{df.dtypes.to_string()}\n"
+            result += f"\n📊 FIRST 5 ROWS:\n{df.head().to_string(index=False)}\n"
+            numeric_cols = df.select_dtypes(include=['number']).columns
+            if len(numeric_cols) > 0:
+                totals = df[numeric_cols].sum().round(2)
+                result += f"\n💰 NUMERIC TOTALS:\n{totals.to_string()}\n"
+            return result
+        except Exception as e:
+            return f"Error analyzing data file {file_path}: {e}"
+    def _analyze_image_file(self, file_path: str) -> str:
+        """Analyze image files using OpenCV and other tools"""
+        result = f"🖼️ IMAGE FILE: {file_path}\n"
+        try:
+            if cv2 is not None:
+                # Read image with OpenCV
+                img = cv2.imread(file_path)
+                if img is None:
+                    return result + "Error: Could not read image file"
+                height, width = img.shape[:2]
+                channels = img.shape[2] if len(img.shape) > 2 else 1
+                result += f"📐 DIMENSIONS: {width}x{height} pixels\n"
+                result += f"🎨 CHANNELS: {channels} ({'Color' if channels > 1 else 'Grayscale'})\n"
+                # Convert to grayscale for analysis
+                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if channels > 1 else img
+                # Edge detection to understand structure
+                edges = cv2.Canny(gray, 50, 150)
+                edge_pixels = np.count_nonzero(edges)
+                edge_percentage = (edge_pixels / (width * height)) * 100
+                result += f"📏 EDGE DENSITY: {edge_percentage:.1f}% (complexity indicator)\n"
+                # Detect basic shapes/contours
+                contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                result += f"🔷 DETECTED CONTOURS: {len(contours)}\n"
+                # Analyze color distribution
+                if channels > 1:
+                    # Calculate dominant colors
+                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                    pixels = img_rgb.reshape(-1, 3)
+                    unique_colors = len(np.unique(pixels, axis=0))
+                    result += f"🎨 UNIQUE COLORS: {unique_colors}\n"
+                    # Calculate average color
+                    avg_color = pixels.mean(axis=0).astype(int)
+                    result += f"🎨 AVERAGE COLOR (RGB): {tuple(avg_color)}\n"
+                # Detect if it's likely a chess board (8x8 grid pattern)
+                result += self._analyze_chess_pattern(gray)
+                # OCR text detection if available
+                if OCR_AVAILABLE:
+                    try:
+                        pil_image = Image.open(file_path)
+                        text = pytesseract.image_to_string(pil_image).strip()
+                        if text:
+                            result += f"\n📝 DETECTED TEXT:\n{text[:500]}{'...' if len(text) > 500 else ''}\n"
+                    except Exception as ocr_error:
+                        result += f"\n⚠️ OCR failed: {ocr_error}\n"
+            else:
+                # Basic analysis without OpenCV
+                result += "⚠️ OpenCV not available. Limited analysis:\n"
+                try:
+                    from PIL import Image
+                    img = Image.open(file_path)
+                    result += f"📐 DIMENSIONS: {img.size[0]}x{img.size[1]} pixels\n"
+                    result += f"📄 FORMAT: {img.format}\n"
+                    result += f"🎨 MODE: {img.mode}\n"
+                except:
+                    result += "Unable to analyze image without proper libraries installed.\n"
+            return result
+        except Exception as e:
+            return result + f"Error analyzing image: {e}"
+    def _analyze_chess_pattern(self, gray_img):
+        """Detect if image contains a chess board pattern"""
+        result = ""
+        try:
+            # Try to detect chessboard corners (typical 8x8 pattern)
+            ret, corners = cv2.findChessboardCorners(gray_img, (7, 7), None)
+            if ret:
+                result += "\n♟️ CHESS BOARD DETECTED: Yes (found corner pattern)\n"
+                result += "♟️ This appears to be a chess position image.\n"
+            else:
+                # Alternative: check for grid-like structure
+                # Detect lines using Hough transform
+                edges = cv2.Canny(gray_img, 50, 150)
+                lines = cv2.HoughLinesP(edges, 1, np.pi/180, 100, minLineLength=100, maxLineGap=10)
+                if lines is not None and len(lines) > 20:
+                    # Check for perpendicular lines (potential grid)
+                    horizontal_lines = 0
+                    vertical_lines = 0
+                    for line in lines:
+                        x1, y1, x2, y2 = line[0]
+                        angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi)
+                        if angle < 10 or angle > 170:
+                            horizontal_lines += 1
+                        elif 80 < angle < 100:
+                            vertical_lines += 1
+                    if horizontal_lines > 5 and vertical_lines > 5:
+                        result += "\nGRID PATTERN DETECTED: Possible chess board\n"
+                        result += f"♟️ Horizontal lines: {horizontal_lines}, Vertical lines: {vertical_lines}\n"
+        except:
+            pass
+        return result
+    def _analyze_audio_file(self, file_path: str) -> str:
+        """Transcribes audio and extracts ingredients if it's a recipe voice note"""
+        result = f"🔊 AUDIO FILE: {file_path}\n"
+        recognizer = sr.Recognizer()
+        try:
+            with sr.AudioFile(file_path) as source:
+                audio_data = recognizer.record(source)
+                text = recognizer.recognize_google(audio_data)
+                result += f"📝 TRANSCRIPTION:\n{text}\n"
+                # Ingredient extraction logic
+                if "ingredient" in text.lower() or "filling" in text.lower():
+                    ingredients = self._extract_ingredients(text)
+                    result += f"\n🍓 EXTRACTED INGREDIENTS (filling only, alphabetized):\n{', '.join(ingredients)}\n"
+        except Exception as e:
+            result += f"⚠️ Audio processing failed: {e}"
+        return result
+    def _extract_ingredients(self, text: str) -> list:
+        """
+        Extracts a list of ingredients from a recipe transcription.
+        It strips quantities and returns only ingredient names.
+        """
+        lines = text.split('\n')
+        keywords = ["filling", "add", "mix", "combine", "put", "use", "for the filling"]
+        ingredient_list = []
+        for line in lines:
+            if any(k in line.lower() for k in keywords):
+                matches = re.findall(r"(?:a\s|an\s|some\s|[0-9]+[\/0-9\s]*)?([a-zA-Z\s\-]+?)(?=[\.,]|$)", line)
+                ingredient_list.extend([m.strip().lower() for m in matches if m.strip()])
+        # Post-process and alphabetize
+        unique_ingredients = sorted(set(ingredient_list))
+        return unique_ingredients
+    # Video processing helpers
+    def _download_youtube_video(self, video_url: str, output_dir: str) -> str:
+        output_template = os.path.join(output_dir, "downloaded_video.%(ext)s")
+        ydl_opts = {
+            'outtmpl': output_template,
+            'format': 'mp4',
+            'quiet': True,
+            'no_warnings': True,
+        }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(video_url, download=True)
+            downloaded_file = ydl.prepare_filename(info)
+            downloaded_file = downloaded_file.replace(".webm", ".mp4")
+            return downloaded_file
+    def _extract_frames(self, video_path: str, frame_rate: int = 1) -> list:
+        cap = cv2.VideoCapture(video_path)
+        frames = []
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        interval = int(fps * frame_rate)
+        count = 0
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if count % interval == 0:
+                frames.append(frame)
+            count += 1
+        cap.release()
+        return frames
+    def _detect_objects_per_frame(self, frames: list) -> list:
+        """
+        Detects and counts objects in each frame individually.
+        Returns a list with detection results for each frame.
+        """
+        results = []
+        for frame_idx, frame in enumerate(frames):
+            # Get detections for this frame
+            detections = self.yolo_model(frame, verbose=False)
+            # Count objects in this frame
+            frame_counts = {}
+            for detection in detections[0].boxes.cls:
+                label = self.yolo_model.names[int(detection)]
+                if label in DETECTABLE_CLASSES:
+                    frame_counts[label] = frame_counts.get(label, 0) + 1
+            # Store frame result
+            frame_result = {
+                'frame_number': frame_idx,
+                'timestamp_seconds': frame_idx,  # assuming 1 frame per second
+                'detections': frame_counts
+            }
+            results.append(frame_result)
+        return results
+    # YouTube transcript helpers
+    def _extract_video_id(self, url: str) -> str:
+        """Extracts YouTube video ID from a URL."""
+        patterns = [
+            r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/v\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})',
+            r'youtube\.com\/watch\?.*&v=([a-zA-Z0-9_-]{11})'
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                return match.group(1)
+        raise ValueError("Invalid YouTube URL format. Could not extract video ID.")
+    def _get_transcript(self, video_id: str) -> List[dict]:
+        """Fetch transcript using the YouTube Transcript API."""
+        try:
+            # Try to get transcript in English first, then any available language
+            transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
+        except :
+            # If English not available, get any available transcript
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            transcript = transcript_list.find_transcript(['en']).fetch()
+        return transcript
+    def _find_response(self, transcript: List[dict], question: str) -> Optional[str]:
+        """Find the transcript entry after a given question."""
+        question_lower = question.strip().lower()
+        # Remove common punctuation for better matching
+        question_normalized = re.sub(r'[^\w\s]', '', question_lower)
+        for i, entry in enumerate(transcript):
+            text = entry["text"].strip().lower()
+            text_normalized = re.sub(r'[^\w\s]', '', text)
+            # Check for partial matches (at least 70% of the words match)
+            question_words = set(question_normalized.split())
+            text_words = set(text_normalized.split())
+            if question_words and len(question_words.intersection(text_words)) / len(question_words) >= 0.7:
+                # Collect response lines (up to 5 lines or 30 seconds of content)
+                response_lines = []
+                total_duration = 0
+                for j in range(i + 1, min(i + 6, len(transcript))):
+                    response_lines.append(transcript[j]["text"])
+                    if "duration" in transcript[j]:
+                        total_duration += transcript[j]["duration"]
+                        if total_duration >= 30:  # Stop after 30 seconds
+                            break
+                if response_lines:
+                    return " ".join(response_lines)
+        return "Could not find a response to the question in the transcript."
     def _extract_final_answer(self, response_text: str) -> str:
         """Extract the final answer from agent response"""
         match = re.search(r"FINAL ANSWER:\s*(.*)", response_text, re.DOTALL | re.IGNORECASE)
         if match:
             raw_answer = match.group(1).strip()
+            if "\n" in raw_answer and not (',' in raw_answer and '\n' not in raw_answer.split(',', 1)[0]):
+                 raw_answer = raw_answer.split("\n", 1)[0].strip()
             if raw_answer.endswith('.') and not raw_answer[:-1].replace('.', '').isdigit():
                 raw_answer = raw_answer[:-1]
+            common_phrases = ["which is", "because", " as ", " since "]
+            for phrase in common_phrases:
+                if phrase in raw_answer.lower():
+                    raw_answer = raw_answer.split(phrase)[0].strip()
             return raw_answer.strip()
         lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
         return lines[-1] if lines else response_text.strip()
+    def _preprocess_question(self, question: str) -> str:
+        """Pre-process questions to handle special cases."""
+        q = question.strip()
+        # Check for reversed text
+        if (q.endswith('.') or q.endswith('?')) and len(q) > 10 and q[0].islower() and ' ' in q:
+             words = q.split()
+             if sum(1 for w in words[1:] if len(w) > 1 and w[0].isupper()) > len(words) / 3:
+                reversed_q = q[::-1]
+                print(f"👀 Question appears reversed. Reversed: '{reversed_q}'")
+                return f"[This question *might* be reversed. Original: '{q}'. Reversed: '{reversed_q}'] {reversed_q}"
+        # Check for attachments/files mentioned
+        file_indicators = [
+            "attached", "attachment", "file", "excel", "mp3", "audio", "image",
+            "recording", "python code", ".py", ".xlsx", ".mp3", ".wav", ".jpg",
+            ".png", ".pdf", "listen to", "analyze the", "review the", "examine the"
+        ]
+        if any(indicator in q.lower() for indicator in file_indicators):
+            print("📎 File/attachment detected in question.")
+            return f"{q}\n[NOTE: This question mentions files/attachments. Use file_analyzer_tool to read and analyze any uploaded files.]"
+        # Check for video URLs
+        video_patterns = [
+            r'youtube\.com/watch\?v=',
+            r'youtu\.be/',
+            r'\.mp4', r'\.avi', r'\.mov', r'\.mkv'
+        ]
+        for pattern in video_patterns:
+            if re.search(pattern, q, re.IGNORECASE):
+                print("📹 Video URL detected in question.")
+                return f"{q}\n[NOTE: Video detected. Use youtube_transcript_tool for dialogue or search tools for video content analysis.]"
+        return q
+    def process_question(self, task_id: str, question_text: str) -> Dict:
+        """Process a single question"""
+        print(f"\n{'='*80}")
+        print(f"⚡ Processing Task ID: {task_id}")
+        print(f"❓ Question: {question_text}")
+        print(f"{'='*80}")
+        processed_question = self._preprocess_question(question_text)
+        config = {"configurable": {"thread_id": f"gaia_task_{task_id}"}}
         try:
             final_state = None
             max_iterations = 0
+            # Stream events with iteration limit
             events = self.agent_runner.stream(
+                {"messages": [HumanMessage(content=processed_question)]},
                 config=config,
                 stream_mode="values"
             )
             for event in events:
                 final_state = event
                 max_iterations += 1
+                if max_iterations > 10:  # Prevent infinite loops
+                    print("⚠️ Max iterations reached, stopping...")
                     break
             if not final_state or not final_state['messages']:
+                print("❌ Agent did not return a final state.")
+                return {"success": False, "error": "Agent execution failed."}
             last_message = final_state['messages'][-1]
+            # If last message has tool calls, try one more time
+            if last_message.tool_calls and max_iterations < 10:
+                print("🔄 Getting final answer from agent...")
+                try:
+                    final_state = self.agent_runner.invoke({"messages": []}, config=config)
+                    last_message = final_state['messages'][-1]
+                except:
+                    pass  # Continue with current state
+            full_response = last_message.content
+            print(f"\n📝 Full Agent Response:\n{full_response}")
             final_answer = self._extract_final_answer(full_response)
+            print(f"\n🎯 Extracted Final Answer: '{final_answer}'")
+            if not final_answer or final_answer == full_response:
+                print("⚠️ Could not extract a 'FINAL ANSWER:' block.")
+            return {
+                "success": True,
+                "answer": final_answer,
+                "full_response": full_response
+            }
         except Exception as e:
+            print(f"❌ CRITICAL ERROR processing question {task_id}: {e}")
             import traceback
             traceback.print_exc()
+            return {"success": False, "error": str(e)}
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
     # 1. Instantiate GAIA Agent
     try:
         agent = GAIAAgent()
         print(f"Error instantiating GAIA agent: {e}")
         return f"Error initializing GAIA agent: {e}", None
+    agent_code = AGENT_CODE if space_id else f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(f"Agent code: {agent_code}")
     # 2. Fetch Questions
+    hf_token = os.getenv("HUGGING_FACE_API_TOKEN")
+    headers = {}
+    if hf_token:
+        headers["Authorization"] = f"Bearer {hf_token}"
+    questions_url = f"{HF_API_BASE_URL}/questions"
     print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, headers=headers, timeout=60)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
             return "Fetched questions list is empty.", None
+        print(f"✅ Retrieved {len(questions_data)} questions.")
     except Exception as e:
+        print(f"❌ Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
+    # 3. Filter for Level 1 questions
+    level_1_questions = [q for q in questions_data if q.get('level', 1) == 1]
+    print(f"📋 Processing {len(level_1_questions)} Level 1 questions.")
+    # 4. Run GAIA Agent on questions
     results_log = []
     answers_payload = []
+    stats = {
+        "total": len(level_1_questions),
+        "attempted": 0,
+        "processed": 0,
+        "failed": 0
+    }
+    for i, item in enumerate(level_1_questions):
         task_id = item.get("task_id")
+        question_text = item.get('Question', item.get('question'))
+        if not task_id or not question_text:
+            print(f"⚠️ Question {i+1} missing data, skipping...")
             continue
+        stats["attempted"] += 1
+        print(f"\n🔄 Processing question {i+1}/{len(level_1_questions)}: {task_id}")
         try:
+            result = agent.process_question(task_id, question_text)
+            if result.get("success"):
+                submitted_answer = result.get("answer", "")
+                # Attempt to convert to number if it looks like one
+                try:
+                    if re.fullmatch(r"-?\d+", submitted_answer):
+                        submitted_value = int(submitted_answer)
+                    elif re.fullmatch(r"-?\d+\.\d+", submitted_answer):
+                        submitted_value = float(submitted_answer)
+                    else:
+                        submitted_value = submitted_answer
+                except ValueError:
+                    submitted_value = submitted_answer
+                answers_payload.append({
+                    "task_id": task_id,
+                    "submitted_answer": submitted_value
+                })
+                results_log.append({
+                    "Task ID": task_id,
+                    "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                    "Submitted Answer": submitted_answer,
+                    "Status": "✅ Success"
+                })
+                stats["processed"] += 1
+                print(f"✅ Question {i+1} completed: {submitted_answer}")
+            else:
+                error_msg = result.get("error", "Unknown error")
+                results_log.append({
+                    "Task ID": task_id,
+                    "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                    "Submitted Answer": f"ERROR: {error_msg}",
+                    "Status": "❌ Failed"
+                })
+                stats["failed"] += 1
+                print(f"❌ Question {i+1} failed: {error_msg}")
         except Exception as e:
+            print(f"❌ Critical error on question {i+1}: {e}")
+            import traceback
+            traceback.print_exc()
             results_log.append({
                 "Task ID": task_id,
                 "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": f"CRITICAL ERROR: {str(e)}",
+                "Status": "💥 Critical Error"
             })
+            stats["failed"] += 1
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 5. Submit answers
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
     }
+    print(f"\n📤 Submitting {len(answers_payload)} answers...")
+    print(f"Submission payload: {json.dumps(submission_data, indent=2)}")
     try:
+        response = requests.post(
+            f"{HF_API_BASE_URL}/submit",
+            headers=headers,
+            json=submission_data,
+            timeout=120
+        )
         response.raise_for_status()
         result_data = response.json()
+        print(f"📦 API Response: {json.dumps(result_data, indent=2)}")
+        score = result_data.get('score', 0)
+        correct_count = result_data.get('correct_count', 0)
+        total_attempted = result_data.get('total_attempted', len(answers_payload))
         final_status = (
+            f"{'='*50}\n"
+            f"📊 SUBMISSION RESULTS\n"
+            f"{'='*50}\n"
+            f"✅ Submission Successful!\n"
+            f"👤 User: {result_data.get('username', username)}\n"
+            f"🎯 Overall Score: {score}%\n"
+            f"📊 Correct Answers: {correct_count}/{total_attempted}\n"
+            f"💬 Message: {result_data.get('message', 'No message received.')}\n"
+            f"\n📈 PROCESSING STATS:\n"
+            f"   Total Level 1 Questions: {stats['total']}\n"
+            f"   Questions Attempted: {stats['attempted']}\n"
+            f"   Successfully Processed: {stats['processed']}\n"
+            f"   Failed to Process: {stats['failed']}\n"
+            f"{'='*50}"
         )
         print("✅ Submission successful!")
+        print(final_status)
         return final_status, pd.DataFrame(results_log)
     except Exception as e:
+        error_msg = (
+            f"❌ SUBMISSION FAILED\n"
+            f"Error: {str(e)}\n"
+            f"\nProcessing Stats:\n"
+            f"   Questions Attempted: {stats['attempted']}\n"
+            f"   Successfully Processed: {stats['processed']}\n"
+            f"   Failed to Process: {stats['failed']}"
+        )
+        if hasattr(e, 'response') and e.response:
+            error_msg += f"\n\nAPI Response: {e.response.text}"
         print(error_msg)
         return error_msg, pd.DataFrame(results_log)
     gr.Markdown("# 🤖 GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
+        **Advanced GAIA Benchmark Agent (Exact Match with gaia_agent.py)**
         This agent uses:
+        - 🧠 GPT-4 Turbo with specialized GAIA prompt engineering
+        - 📚 Wikipedia search for encyclopedic information
+        - 🌐 Tavily web search for current events
         - 🧮 Wolfram Alpha for computational tasks
+        - 📊 File analysis for Excel/CSV/Image/Audio data
+        - 🎥 YouTube transcript analysis
+        - 👁️ Computer vision with YOLO for video analysis
         - 🐍 Python REPL for mathematical analysis
+        - 🔄 Text reversal tool for encoded questions
+        **Features:**
+        - Processes only Level 1 questions
+        - Exact answer extraction with FINAL ANSWER format
+        - Comprehensive error handling and retry logic
+        - Detailed processing statistics
         **Instructions:**
         1. Log in to your Hugging Face account
     run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
     status_output = gr.Textbox(
+        label="📊 Run Status / Submission Result",
+        lines=15,
         interactive=False
     )
     results_table = gr.DataFrame(
+        label="📝 Questions and Agent Answers",
         wrap=True,
+        max_height=600
     )
     run_button.click(
         print(f"✅ SPACE_ID: {space_id}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id}")
+    # Check for required API keys
+    required_keys = ["OPENAI_API_KEY", "TAVILY_API_KEY", "WOLFRAM_API_KEY"]
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"\n⚠️ WARNING: Missing API keys: {', '.join(missing_keys)}")
+        print("   Please set these in your HuggingFace Space secrets!")
+    else:
+        print("\n✅ All required API keys found!")
     print("="*50 + "\n")
     print("🌟 Launching GAIA Agent Interface...")
     demo.launch(debug=True, share=False)