Final_Assignment_AGENT_GAIA

Sleeping

App Files Files Community

Isateles commited on May 30, 2025

Commit

591a8d1

1 Parent(s): a7b80a9

Update GAIA agent-changed to ReAct arc

Browse files

Files changed (2) hide show

app.py +63 -80
tools.py +4 -10

app.py CHANGED Viewed

@@ -71,32 +71,39 @@ def setup_llm():
 def extract_final_answer(response_text: str) -> str:
-    """Extract answer aligned with GAIA scoring rules - FIXED VERSION"""
-    # First, remove any "assistant:" prefix that might have been added
     response_text = re.sub(r'^assistant:\s*', '', response_text, flags=re.IGNORECASE)
     # Look for FINAL ANSWER pattern
     match = re.search(r"FINAL ANSWER:\s*(.+?)(?:\n|$)", response_text, re.IGNORECASE | re.DOTALL)
     if not match:
-        logger.warning("No FINAL ANSWER found in response")
-        return ""
-    answer = match.group(1).strip()
-    # CRITICAL: Stop processing if we hit "assistant:" or any reasoning text
     if 'assistant:' in answer:
         answer = answer.split('assistant:')[0].strip()
-    # Remove any trailing explanatory text (usually starts with lowercase after answer)
-    sentences = answer.split('.')
-    if len(sentences) > 1:
-        # Check if second sentence starts with lowercase (indicates explanation)
-        first_sentence = sentences[0].strip()
-        if first_sentence and (not sentences[1].strip() or sentences[1].strip()[0].islower()):
-            answer = first_sentence
     # Clean for GAIA scoring
     # 1. Handle pure numbers
@@ -142,7 +149,7 @@ def extract_final_answer(response_text: str) -> str:
     return answer
 class GAIAAgent:
-    """GAIA RAG Agent using LlamaIndex AgentWorkflow"""
     def __init__(self):
         logger.info("Initializing GAIA RAG Agent...")
@@ -161,14 +168,18 @@ class GAIAAgent:
         for tool in self.tools:
             logger.info(f"  - {tool.metadata.name}: {tool.metadata.description}")
-        # Create agent with GAIA prompt
-        from llama_index.core.agent.workflow import AgentWorkflow
-        self.agent = AgentWorkflow.from_tools_or_functions(
-            tools_or_functions=self.tools,
             llm=self.llm,
             system_prompt=GAIA_SYSTEM_PROMPT,
-            verbose=True
         )
         logger.info("GAIA RAG Agent ready!")
@@ -177,70 +188,42 @@ class GAIAAgent:
         """Process a question and return clean answer for course submission"""
         logger.info(f"Processing question: {question[:100]}...")
-        import warnings
-        warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*Event loop is closed.*")
         try:
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            try:
-                async def run_agent():
-                    try:
-                        handler = self.agent.run(user_msg=question)
-                        # Wait for the result
-                        result = await handler
-                        # Extract response text more carefully
-                        response_text = ""
-                        # Try different ways to get the response
-                        if hasattr(result, 'response'):
-                            if hasattr(result.response, 'message'):
-                                if hasattr(result.response.message, 'content'):
-                                    response_text = result.response.message.content
-                                else:
-                                    response_text = str(result.response.message)
-                            else:
-                                response_text = str(result.response)
-                        elif hasattr(result, 'content'):
-                            response_text = result.content
-                        elif hasattr(result, 'output'):
-                            response_text = result.output
-                        else:
-                            response_text = str(result)
-                        # Clean up any streaming artifacts
-                        response_text = re.sub(r'assistant:\s*', '', response_text, flags=re.IGNORECASE)
-                        return response_text
-                    except Exception as e:
-                        logger.error(f"Agent execution error: {e}")
-                        import traceback
-                        logger.error(traceback.format_exc())
-                        return "FINAL ANSWER: "
-                response_text = loop.run_until_complete(
-                    asyncio.wait_for(run_agent(), timeout=60)
-                )
-                # Extract clean answer
-                clean_answer = extract_final_answer(response_text)
-                logger.info(f"Full response preview: {response_text[:200]}...")
-                logger.info(f"Extracted answer: '{clean_answer}'")
-                return clean_answer
-            finally:
-                loop.close()
         except Exception as e:
             logger.error(f"Error processing question: {e}")
             return ""
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """Run GAIA evaluation following course template structure"""

 def extract_final_answer(response_text: str) -> str:
+    """Extract answer aligned with GAIA scoring rules"""
+    # Remove any ReAct thinking patterns
+    response_text = re.sub(r'Thought:.*?\n', '', response_text, flags=re.DOTALL)
+    response_text = re.sub(r'Action:.*?\n', '', response_text, flags=re.DOTALL)
+    response_text = re.sub(r'Observation:.*?\n', '', response_text, flags=re.DOTALL)
+    # Remove assistant prefix
     response_text = re.sub(r'^assistant:\s*', '', response_text, flags=re.IGNORECASE)
     # Look for FINAL ANSWER pattern
     match = re.search(r"FINAL ANSWER:\s*(.+?)(?:\n|$)", response_text, re.IGNORECASE | re.DOTALL)
     if not match:
+        # Try to find answer at the end of response
+        lines = response_text.strip().split('\n')
+        if lines:
+            last_line = lines[-1].strip()
+            # If last line is short and doesn't look like reasoning
+            if last_line and len(last_line) < 50:
+                answer = last_line
+            else:
+                logger.warning("No FINAL ANSWER found")
+                return ""
+        else:
+            return ""
+    else:
+        answer = match.group(1).strip()
+    # Stop at any continuation
     if 'assistant:' in answer:
         answer = answer.split('assistant:')[0].strip()
     # Clean for GAIA scoring
     # 1. Handle pure numbers
     return answer
 class GAIAAgent:
+    """GAIA RAG Agent using ReActAgent for better compatibility"""
     def __init__(self):
         logger.info("Initializing GAIA RAG Agent...")
         for tool in self.tools:
             logger.info(f"  - {tool.metadata.name}: {tool.metadata.description}")
+        # Create ReActAgent instead of AgentWorkflow
+        from llama_index.core.agent import ReActAgent
+        self.agent = ReActAgent.from_tools(
+            tools=self.tools,
             llm=self.llm,
+            verbose=True,
             system_prompt=GAIA_SYSTEM_PROMPT,
+            max_iterations=10,
+            # ReAct specific settings
+            react_chat_formatter=None,  # Use default ReAct formatter
+            output_parser=None,  # Use default output parser
         )
         logger.info("GAIA RAG Agent ready!")
         """Process a question and return clean answer for course submission"""
         logger.info(f"Processing question: {question[:100]}...")
         try:
+            # Much simpler with ReActAgent - just call chat
+            response = self.agent.chat(question)
+            # Get the response text
+            response_text = str(response)
+            # Clean any artifacts
+            response_text = re.sub(r'assistant:\s*', '', response_text, flags=re.IGNORECASE)
+            # Extract clean answer
+            clean_answer = extract_final_answer(response_text)
+            if not clean_answer:
+                # Fallback: try to extract from response directly
+                logger.warning("Primary extraction failed, trying fallback")
+                # Look for short answers at the end
+                lines = response_text.strip().split('\n')
+                for line in reversed(lines):
+                    line = line.strip()
+                    if line and len(line) < 100 and not line.startswith(('Thought:', 'Action:', 'Observation:')):
+                        clean_answer = extract_final_answer(f"FINAL ANSWER: {line}")
+                        if clean_answer:
+                            break
+            logger.info(f"Full response: {response_text[:200]}...")
+            logger.info(f"Extracted answer: '{clean_answer}'")
+            return clean_answer
         except Exception as e:
             logger.error(f"Error processing question: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
             return ""
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """Run GAIA evaluation following course template structure"""

tools.py CHANGED Viewed

@@ -556,28 +556,22 @@ def get_gaia_tools(llm=None):
         FunctionTool.from_defaults(
             fn=search_web,
             name="web_search",
-            description="""Use ONLY for:
-        1. Current events after January 2025
-        2. Real-time data (stock prices, weather, sports scores)
-        3. When question explicitly asks to "search" or "look up"
-        4. To verify facts you're uncertain about
-        Do NOT use for general knowledge, historical facts, or math."""
         ),
         FunctionTool.from_defaults(
             fn=calculate,
             name="calculator",
-            description="ALWAYS use for ANY math calculation, including simple arithmetic like 2+2. Required for all numbers."
         ),
         FunctionTool.from_defaults(
             fn=analyze_file,
             name="file_analyzer",
-            description="Analyze file contents, especially CSV files. Returns statistics and data insights."
         ),
         FunctionTool.from_defaults(
             fn=get_weather,
             name="weather",
-            description="Get current weather information for any location. Use when asked about weather conditions."
         )
     ]

         FunctionTool.from_defaults(
             fn=search_web,
             name="web_search",
+            description="""Search the web for information. Use when you need current information, real-time data, or to verify facts. Input should be a search query string."""
         ),
         FunctionTool.from_defaults(
             fn=calculate,
             name="calculator",
+            description="""Perform mathematical calculations. Use for any math problem. Input should be the mathematical expression to evaluate."""
         ),
         FunctionTool.from_defaults(
             fn=analyze_file,
             name="file_analyzer",
+            description="""Analyze file contents, especially CSV files. Input should be the file content and file type."""
         ),
         FunctionTool.from_defaults(
             fn=get_weather,
             name="weather",
+            description="""Get current weather for a location. Input should be the location name."""
         )
     ]