Spaces:

schoolkithub
/

multi-agent-gaia-system

Runtime error

App Files Files Community

Omachoko commited on Jun 29, 2025

Commit

2d0e062

1 Parent(s): 50f18bd

GAIA agent: strict output normalization, reasoning planner, RAG, modular tool chaining, robust error handling

Browse files

Files changed (1) hide show

app.py +85 -53

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import requests
 import inspect
 import pandas as pd
 from typing import Any
 # (Keep Constants as is)
 # --- Constants ---
@@ -281,13 +282,81 @@ Question:
 Answer:
 """
 # --- Refactored ModularGAIAAgent ---
 class ModularGAIAAgent:
-    def __init__(self, api_url=DEFAULT_API_URL, tool_registry=None):
         self.api_url = api_url
-        self.tools = tool_registry or TOOL_REGISTRY
         self.reasoning_trace = []
         self.file_cache = set(os.listdir('.'))
     def fetch_questions(self, from_api=True, questions_path="Hugging Face Questions"):
         """Fetch questions from API or local file."""
@@ -357,15 +426,15 @@ class ModularGAIAAgent:
         """Analyze file and return context for the question."""
         try:
             if file_type == 'audio':
-                transcript = self.tools['asr_transcribe'](file_name)
                 self.reasoning_trace.append(f"Transcribed audio: {transcript[:100]}...")
                 return transcript
             elif file_type == 'image':
-                caption = self.tools['image_caption'](file_name)
                 self.reasoning_trace.append(f"Image caption: {caption}")
                 return caption
             elif file_type == 'code':
-                result = self.tools['code_analysis'](file_name)
                 self.reasoning_trace.append(f"Code analysis result: {result}")
                 return result
             elif file_type == 'excel':
@@ -400,41 +469,7 @@ class ModularGAIAAgent:
             self.reasoning_trace.append(f"Analyze file error: {e}")
             return None
-    def smart_tool_select(self, question, file_type=None):
-        """Select the best tool(s) for the question, optionally using GPT-4.1 for planning."""
-        api_key = os.environ.get("OPENAI_API_KEY", "")
-        try:
-            if api_key:
-                plan_prompt = f"""
-You are an expert AI agent. Given the following question and file type, suggest the best tool(s) to use from this list: {list(self.tools.keys())}.
-Question: {question}
-File type: {file_type}
-Respond with a comma-separated list of tool names only, in order of use. If unsure, start with web_search_duckduckgo.
-"""
-                plan = gpt4_chat(plan_prompt, api_key=api_key)
-                tool_names = [t.strip() for t in plan.split(',') if t.strip() in self.tools]
-                if tool_names:
-                    return tool_names
-        except Exception as e:
-            logger.error(f"smart_tool_select planning error: {e}")
-        # Fallback: heuristic
-        if file_type == 'audio':
-            return ['asr_transcribe']
-        elif file_type == 'image':
-            return ['image_caption']
-        elif file_type == 'code':
-            return ['code_analysis']
-        elif file_type in ['excel', 'csv']:
-            return ['table_qa']
-        elif 'youtube.com' in question or 'youtu.be' in question:
-            return ['youtube_video_qa']
-        elif any(w in question.lower() for w in ['wikipedia', 'who', 'when', 'where', 'what', 'how', 'find', 'search']):
-            return ['web_search_duckduckgo']
-        else:
-            return ['llama3_chat']
     def answer_question(self, question_obj):
-        """Answer a question using the best tool(s) and context."""
         self.reasoning_trace = []
         q = question_obj["question"]
         file_name = question_obj.get("file_name", "")
@@ -446,19 +481,23 @@ Respond with a comma-separated list of tool names only, in order of use. If unsu
             if local_file:
                 file_type = self.detect_file_type(local_file)
                 file_content = self.analyze_file(local_file, file_type)
-        # Smart tool selection
-        tool_names = self.smart_tool_select(q, file_type)
         answer = None
-        context = file_content
         for tool_name in tool_names:
-            tool = self.tools[tool_name]
             try:
                 logger.info(f"Using tool: {tool_name} | Question: {q} | Context: {str(context)[:200]}")
                 if tool_name == 'web_search_duckduckgo':
                     context = tool(q)
                     answer = llama3_chat(build_prompt(context, q))
-                elif tool_name == 'gpt4_chat':
-                    answer = tool(build_prompt(context, q))
                 elif tool_name == 'table_qa' and file_content:
                     answer = tool(q, file_content)
                 elif tool_name in ['asr_transcribe', 'image_caption', 'code_analysis'] and file_content:
@@ -466,7 +505,6 @@ Respond with a comma-separated list of tool names only, in order of use. If unsu
                 elif tool_name == 'youtube_video_qa':
                     answer = tool(q, q)
                 else:
-                    # Always pass context if available
                     if context:
                         answer = llama3_chat(build_prompt(context, q))
                     else:
@@ -479,13 +517,7 @@ Respond with a comma-separated list of tool names only, in order of use. If unsu
                 continue
         self.reasoning_trace.append(f"Tools used: {tool_names}")
         self.reasoning_trace.append(f"Final answer: {answer}")
-        return self.format_answer(answer), self.reasoning_trace
-    def format_answer(self, answer):
-        """Strict GAIA: only the answer, no extra text, no prefix."""
-        if isinstance(answer, str):
-            return answer.strip().split('\n')[0]
-        return str(answer)
 # --- Basic Agent Definition (now wraps ModularGAIAAgent) ---
 class BasicAgent:

 import inspect
 import pandas as pd
 from typing import Any
+import re
 # (Keep Constants as is)
 # --- Constants ---
 Answer:
 """
+# --- Centralized Output Formatting & Normalization ---
+def gaia_normalize_answer(answer):
+    """Normalize answer for GAIA: remove units, articles, extra text, and ensure concise, factual output."""
+    if not isinstance(answer, str):
+        answer = str(answer)
+    # Remove common articles and units unless required
+    answer = answer.strip()
+    answer = re.sub(r"\b(the|a|an)\b", "", answer, flags=re.IGNORECASE)
+    answer = re.sub(r"\s+", " ", answer)
+    # Remove currency, percent, or units unless specified (GAIA rules)
+    answer = re.sub(r"\$|%|USD|dollars|euros|eur|\bpercent\b", "", answer, flags=re.IGNORECASE)
+    # Remove leading/trailing punctuation
+    answer = answer.strip(' .,:;\n\t')
+    return answer
+# --- Reasoning Planner for Tool Chaining ---
+def reasoning_planner(question, file_type, tools):
+    """Plan the sequence of tools to use for a question. Uses LLM or heuristic."""
+    # Heuristic: if file_type is known, use the corresponding tool; else, use web search + LLM
+    if file_type == 'audio':
+        return ['asr_transcribe', 'llama3_chat']
+    elif file_type == 'image':
+        return ['image_caption', 'llama3_chat']
+    elif file_type == 'code':
+        return ['code_analysis', 'llama3_chat']
+    elif file_type in ['excel', 'csv']:
+        return ['table_qa']
+    elif 'youtube.com' in question or 'youtu.be' in question:
+        return ['youtube_video_qa']
+    elif any(w in question.lower() for w in ['wikipedia', 'who', 'when', 'where', 'what', 'how', 'find', 'search']):
+        return ['web_search_duckduckgo', 'llama3_chat']
+    else:
+        return ['llama3_chat']
+# --- Improved RAG: Context Retrieval & Chunking ---
+def retrieve_context(question, context_files, max_chunks=3):
+    """Retrieve relevant context chunks from large files for RAG."""
+    # Simple keyword search for now; can be replaced with semantic search
+    relevant_chunks = []
+    for file_path in context_files:
+        try:
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                text = f.read()
+            # Split into chunks (e.g., 500 words)
+            chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
+            for chunk in chunks:
+                if any(word.lower() in chunk.lower() for word in question.split()):
+                    relevant_chunks.append(chunk)
+                    if len(relevant_chunks) >= max_chunks:
+                        break
+        except Exception as e:
+            logger.error(f"retrieve_context error: {e}")
+    return '\n'.join(relevant_chunks)
+# --- Modular Tool Registry & Chaining ---
+class ToolRegistry:
+    """Central registry for tools. Allows easy addition and chaining."""
+    def __init__(self, tools):
+        self.tools = tools
+    def get(self, name):
+        return self.tools.get(name)
+    def add(self, name, func):
+        self.tools[name] = func
+    def list(self):
+        return list(self.tools.keys())
 # --- Refactored ModularGAIAAgent ---
 class ModularGAIAAgent:
+    """GAIA-compliant agent with robust reasoning, tool chaining, RAG, and output normalization."""
+    def __init__(self, api_url=DEFAULT_API_URL, tool_registry=None, context_files=None):
         self.api_url = api_url
+        self.tools = ToolRegistry(tool_registry or TOOL_REGISTRY)
         self.reasoning_trace = []
         self.file_cache = set(os.listdir('.'))
+        self.context_files = context_files or []
     def fetch_questions(self, from_api=True, questions_path="Hugging Face Questions"):
         """Fetch questions from API or local file."""
         """Analyze file and return context for the question."""
         try:
             if file_type == 'audio':
+                transcript = self.tools.get('asr_transcribe')(file_name)
                 self.reasoning_trace.append(f"Transcribed audio: {transcript[:100]}...")
                 return transcript
             elif file_type == 'image':
+                caption = self.tools.get('image_caption')(file_name)
                 self.reasoning_trace.append(f"Image caption: {caption}")
                 return caption
             elif file_type == 'code':
+                result = self.tools.get('code_analysis')(file_name)
                 self.reasoning_trace.append(f"Code analysis result: {result}")
                 return result
             elif file_type == 'excel':
             self.reasoning_trace.append(f"Analyze file error: {e}")
             return None
     def answer_question(self, question_obj):
         self.reasoning_trace = []
         q = question_obj["question"]
         file_name = question_obj.get("file_name", "")
             if local_file:
                 file_type = self.detect_file_type(local_file)
                 file_content = self.analyze_file(local_file, file_type)
+        # RAG: retrieve context if needed
+        rag_context = ''
+        if not file_content and self.context_files:
+            rag_context = retrieve_context(q, self.context_files)
+            if rag_context:
+                self.reasoning_trace.append(f"RAG context used: {rag_context[:200]}...")
+        # Reasoning planner: decide tool chain
+        tool_names = reasoning_planner(q, file_type, self.tools.list())
         answer = None
+        context = file_content or rag_context
         for tool_name in tool_names:
+            tool = self.tools.get(tool_name)
             try:
                 logger.info(f"Using tool: {tool_name} | Question: {q} | Context: {str(context)[:200]}")
                 if tool_name == 'web_search_duckduckgo':
                     context = tool(q)
                     answer = llama3_chat(build_prompt(context, q))
                 elif tool_name == 'table_qa' and file_content:
                     answer = tool(q, file_content)
                 elif tool_name in ['asr_transcribe', 'image_caption', 'code_analysis'] and file_content:
                 elif tool_name == 'youtube_video_qa':
                     answer = tool(q, q)
                 else:
                     if context:
                         answer = llama3_chat(build_prompt(context, q))
                     else:
                 continue
         self.reasoning_trace.append(f"Tools used: {tool_names}")
         self.reasoning_trace.append(f"Final answer: {answer}")
+        return gaia_normalize_answer(answer), self.reasoning_trace
 # --- Basic Agent Definition (now wraps ModularGAIAAgent) ---
 class BasicAgent: