|
|
|
|
|
""" |
|
|
Synthesizer Agent for GAIA Agent System |
|
|
GAIA-Compliant Final Answer Generation for Exact Match Evaluation |
|
|
""" |
|
|
|
|
|
import logging |
|
|
from typing import Dict, List, Optional, Any |
|
|
from statistics import mean |
|
|
|
|
|
from agents.state import GAIAAgentState, AgentRole, AgentResult |
|
|
from models.qwen_client import QwenClient, ModelTier |
|
|
from tools.final_answer_tool import FinalAnswerTool |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class SynthesizerAgent: |
|
|
""" |
|
|
GAIA-compliant synthesizer that produces EXACT MATCH answers |
|
|
Uses 72B model and final answer tool for precise extraction |
|
|
""" |
|
|
|
|
|
def __init__(self, llm_client: QwenClient): |
|
|
self.llm_client = llm_client |
|
|
self.final_answer_tool = FinalAnswerTool(llm_client) |
|
|
|
|
|
def process(self, state: GAIAAgentState) -> GAIAAgentState: |
|
|
""" |
|
|
Synthesize GAIA-compliant final answer from agent results |
|
|
""" |
|
|
logger.info("🎯 Synthesizer: Starting GAIA-compliant synthesis") |
|
|
state.add_processing_step("Synthesizer: Generating GAIA-compliant final answer") |
|
|
|
|
|
try: |
|
|
|
|
|
if not state.agent_results: |
|
|
logger.warning("No agent results available for synthesis") |
|
|
state.final_answer = "No results available" |
|
|
state.final_confidence = 0.0 |
|
|
state.final_reasoning = "No agent results to synthesize" |
|
|
state.is_complete = True |
|
|
return state |
|
|
|
|
|
|
|
|
combined_analysis = self._combine_agent_results(state) |
|
|
|
|
|
|
|
|
question_type = self._determine_question_type(state.question) |
|
|
|
|
|
|
|
|
if len(state.agent_results) > 1 or state.should_use_complex_model(): |
|
|
synthesis_result = self._synthesize_with_72b(state, combined_analysis, question_type) |
|
|
else: |
|
|
synthesis_result = self._synthesize_simple(state, combined_analysis, question_type) |
|
|
|
|
|
|
|
|
final_answer_result = self.final_answer_tool.extract_final_answer( |
|
|
question=state.question, |
|
|
agent_results=synthesis_result["analysis"], |
|
|
question_type=question_type |
|
|
) |
|
|
|
|
|
|
|
|
state.final_answer = final_answer_result["answer"] |
|
|
state.final_confidence = final_answer_result["confidence"] |
|
|
state.final_reasoning = f"Synthesis: {synthesis_result['reasoning']} | Extraction: {final_answer_result['reasoning']}" |
|
|
state.answer_source = "gaia_compliant_synthesis" |
|
|
state.is_complete = True |
|
|
|
|
|
|
|
|
if len(state.final_answer) > 100: |
|
|
logger.warning(f"Answer may be too long for GAIA: {len(state.final_answer)} chars") |
|
|
state.final_confidence *= 0.7 |
|
|
|
|
|
logger.info(f"✅ GAIA synthesis complete: '{state.final_answer}' (conf: {state.final_confidence:.2f})") |
|
|
state.add_processing_step(f"Synthesizer: GAIA answer generated - '{state.final_answer}'") |
|
|
|
|
|
return state |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"GAIA synthesis failed: {str(e)}" |
|
|
state.add_error(error_msg) |
|
|
logger.error(error_msg) |
|
|
|
|
|
|
|
|
state.final_answer = "Processing error" |
|
|
state.final_confidence = 0.0 |
|
|
state.final_reasoning = error_msg |
|
|
state.answer_source = "error_fallback" |
|
|
state.is_complete = True |
|
|
|
|
|
return state |
|
|
|
|
|
def _combine_agent_results(self, state: GAIAAgentState) -> str: |
|
|
"""Combine all agent results into comprehensive analysis""" |
|
|
|
|
|
analysis_parts = [] |
|
|
|
|
|
|
|
|
successful_results = [r for r in state.agent_results if r.success] |
|
|
if successful_results: |
|
|
analysis_parts.append("=== SUCCESSFUL AGENT RESULTS ===") |
|
|
for result in successful_results: |
|
|
analysis_parts.append(f""" |
|
|
{result.agent_role.value.upper()} (Confidence: {result.confidence:.2f}): |
|
|
Result: {result.result} |
|
|
Reasoning: {result.reasoning} |
|
|
""") |
|
|
|
|
|
|
|
|
failed_results = [r for r in state.agent_results if not r.success] |
|
|
if failed_results: |
|
|
analysis_parts.append("\n=== ADDITIONAL CONTEXT ===") |
|
|
for result in failed_results: |
|
|
if len(result.result) > 10: |
|
|
analysis_parts.append(f""" |
|
|
{result.agent_role.value.upper()} (Failed): |
|
|
Attempted: {result.result[:200]}... |
|
|
""") |
|
|
|
|
|
return "\n".join(analysis_parts) |
|
|
|
|
|
def _determine_question_type(self, question: str) -> str: |
|
|
"""Determine question type for specialized answer extraction""" |
|
|
|
|
|
question_lower = question.lower() |
|
|
|
|
|
|
|
|
if any(word in question_lower for word in ["how many", "count", "number of", "calculate", "sum", "total"]): |
|
|
return "mathematical" |
|
|
|
|
|
|
|
|
if any(word in question_lower for word in ["opposite", "reverse", "backwards", "decode"]): |
|
|
return "text_manipulation" |
|
|
|
|
|
|
|
|
if any(word in question_lower for word in ["yes or no", "true or false", "is it", "does it", "can it"]): |
|
|
return "yes_no" |
|
|
|
|
|
|
|
|
if any(word in question_lower for word in ["who", "name", "first name", "last name", "surname"]): |
|
|
return "name" |
|
|
|
|
|
|
|
|
if any(word in question_lower for word in ["where", "city", "country", "location", "place"]): |
|
|
return "location" |
|
|
|
|
|
|
|
|
if any(word in question_lower for word in ["file", "image", "code", "python", "attached", "excel"]): |
|
|
return "file_processing" |
|
|
|
|
|
return "general" |
|
|
|
|
|
def _synthesize_with_72b(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]: |
|
|
"""Use 72B model for complex synthesis""" |
|
|
|
|
|
synthesis_prompt = f""" |
|
|
CRITICAL: This is GAIA benchmark evaluation requiring EXACT MATCH answers. |
|
|
|
|
|
Question: {state.question} |
|
|
|
|
|
Agent Analysis Results: |
|
|
{combined_analysis} |
|
|
|
|
|
Your task: Analyze all agent results and provide the most accurate answer. |
|
|
|
|
|
GAIA COMPLIANCE RULES: |
|
|
- Your answer must be concise and precise for exact match comparison |
|
|
- No explanations, no "FINAL ANSWER:" prefix, no extra text |
|
|
- For numbers: just the number (e.g., "5") |
|
|
- For yes/no: just "yes" or "no" |
|
|
- For names: just the name requested |
|
|
- For locations: just the location name |
|
|
|
|
|
Question Type: {question_type} |
|
|
|
|
|
Based on all the agent results above, what is the precise answer to the original question? |
|
|
Think carefully but respond with ONLY the answer:""" |
|
|
|
|
|
|
|
|
result = self.llm_client.generate( |
|
|
synthesis_prompt, |
|
|
tier=ModelTier.COMPLEX, |
|
|
max_tokens=100 |
|
|
) |
|
|
|
|
|
if result.success: |
|
|
return { |
|
|
"analysis": result.response, |
|
|
"reasoning": f"72B synthesis of {len(state.agent_results)} agent results" |
|
|
} |
|
|
else: |
|
|
|
|
|
return self._synthesize_simple(state, combined_analysis, question_type) |
|
|
|
|
|
def _synthesize_simple(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]: |
|
|
"""Simple synthesis for single agent results or fallback""" |
|
|
|
|
|
|
|
|
successful_results = [r for r in state.agent_results if r.success] |
|
|
|
|
|
if successful_results: |
|
|
best_result = max(successful_results, key=lambda r: r.confidence) |
|
|
return { |
|
|
"analysis": f"Primary result from {best_result.agent_role.value}: {best_result.result}", |
|
|
"reasoning": f"Single agent result from {best_result.agent_role.value}" |
|
|
} |
|
|
else: |
|
|
|
|
|
all_results = list(state.agent_results) |
|
|
if all_results: |
|
|
fallback_result = all_results[0] |
|
|
return { |
|
|
"analysis": f"Fallback from {fallback_result.agent_role.value}: {fallback_result.result}", |
|
|
"reasoning": f"Fallback synthesis from {fallback_result.agent_role.value}" |
|
|
} |
|
|
else: |
|
|
return { |
|
|
"analysis": "No agent results available", |
|
|
"reasoning": "No synthesis possible - no results" |
|
|
} |
|
|
|
|
|
|
|
|
import re |