Chris
Final 7.6.3
b55bafd
#!/usr/bin/env python3
"""
Synthesizer Agent for GAIA Agent System
GAIA-Compliant Final Answer Generation for Exact Match Evaluation
"""
import logging
from typing import Dict, List, Optional, Any
from statistics import mean
from agents.state import GAIAAgentState, AgentRole, AgentResult
from models.qwen_client import QwenClient, ModelTier
from tools.final_answer_tool import FinalAnswerTool
logger = logging.getLogger(__name__)
class SynthesizerAgent:
"""
GAIA-compliant synthesizer that produces EXACT MATCH answers
Uses 72B model and final answer tool for precise extraction
"""
def __init__(self, llm_client: QwenClient):
self.llm_client = llm_client
self.final_answer_tool = FinalAnswerTool(llm_client)
def process(self, state: GAIAAgentState) -> GAIAAgentState:
"""
Synthesize GAIA-compliant final answer from agent results
"""
logger.info("🎯 Synthesizer: Starting GAIA-compliant synthesis")
state.add_processing_step("Synthesizer: Generating GAIA-compliant final answer")
try:
# Check if we have any agent results
if not state.agent_results:
logger.warning("No agent results available for synthesis")
state.final_answer = "No results available"
state.final_confidence = 0.0
state.final_reasoning = "No agent results to synthesize"
state.is_complete = True
return state
# Combine all agent results into comprehensive analysis
combined_analysis = self._combine_agent_results(state)
# Determine question type for specialized extraction
question_type = self._determine_question_type(state.question)
# Use 72B model for synthesis if we have multiple results or complex question
if len(state.agent_results) > 1 or state.should_use_complex_model():
synthesis_result = self._synthesize_with_72b(state, combined_analysis, question_type)
else:
synthesis_result = self._synthesize_simple(state, combined_analysis, question_type)
# Extract GAIA-compliant final answer
final_answer_result = self.final_answer_tool.extract_final_answer(
question=state.question,
agent_results=synthesis_result["analysis"],
question_type=question_type
)
# Update state with final results
state.final_answer = final_answer_result["answer"]
state.final_confidence = final_answer_result["confidence"]
state.final_reasoning = f"Synthesis: {synthesis_result['reasoning']} | Extraction: {final_answer_result['reasoning']}"
state.answer_source = "gaia_compliant_synthesis"
state.is_complete = True
# GAIA compliance check
if len(state.final_answer) > 100:
logger.warning(f"Answer may be too long for GAIA: {len(state.final_answer)} chars")
state.final_confidence *= 0.7 # Reduce confidence for long answers
logger.info(f"✅ GAIA synthesis complete: '{state.final_answer}' (conf: {state.final_confidence:.2f})")
state.add_processing_step(f"Synthesizer: GAIA answer generated - '{state.final_answer}'")
return state
except Exception as e:
error_msg = f"GAIA synthesis failed: {str(e)}"
state.add_error(error_msg)
logger.error(error_msg)
# Fallback to simple answer
state.final_answer = "Processing error"
state.final_confidence = 0.0
state.final_reasoning = error_msg
state.answer_source = "error_fallback"
state.is_complete = True
return state
def _combine_agent_results(self, state: GAIAAgentState) -> str:
"""Combine all agent results into comprehensive analysis"""
analysis_parts = []
# Add successful results first
successful_results = [r for r in state.agent_results if r.success]
if successful_results:
analysis_parts.append("=== SUCCESSFUL AGENT RESULTS ===")
for result in successful_results:
analysis_parts.append(f"""
{result.agent_role.value.upper()} (Confidence: {result.confidence:.2f}):
Result: {result.result}
Reasoning: {result.reasoning}
""")
# Add failed results with useful information
failed_results = [r for r in state.agent_results if not r.success]
if failed_results:
analysis_parts.append("\n=== ADDITIONAL CONTEXT ===")
for result in failed_results:
if len(result.result) > 10: # Only include if has some content
analysis_parts.append(f"""
{result.agent_role.value.upper()} (Failed):
Attempted: {result.result[:200]}...
""")
return "\n".join(analysis_parts)
def _determine_question_type(self, question: str) -> str:
"""Determine question type for specialized answer extraction"""
question_lower = question.lower()
# Mathematical/counting questions
if any(word in question_lower for word in ["how many", "count", "number of", "calculate", "sum", "total"]):
return "mathematical"
# Text manipulation (reversed text, opposites, etc.)
if any(word in question_lower for word in ["opposite", "reverse", "backwards", "decode"]):
return "text_manipulation"
# Yes/no questions
if any(word in question_lower for word in ["yes or no", "true or false", "is it", "does it", "can it"]):
return "yes_no"
# Name/person questions
if any(word in question_lower for word in ["who", "name", "first name", "last name", "surname"]):
return "name"
# Location questions
if any(word in question_lower for word in ["where", "city", "country", "location", "place"]):
return "location"
# File/code questions
if any(word in question_lower for word in ["file", "image", "code", "python", "attached", "excel"]):
return "file_processing"
return "general"
def _synthesize_with_72b(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]:
"""Use 72B model for complex synthesis"""
synthesis_prompt = f"""
CRITICAL: This is GAIA benchmark evaluation requiring EXACT MATCH answers.
Question: {state.question}
Agent Analysis Results:
{combined_analysis}
Your task: Analyze all agent results and provide the most accurate answer.
GAIA COMPLIANCE RULES:
- Your answer must be concise and precise for exact match comparison
- No explanations, no "FINAL ANSWER:" prefix, no extra text
- For numbers: just the number (e.g., "5")
- For yes/no: just "yes" or "no"
- For names: just the name requested
- For locations: just the location name
Question Type: {question_type}
Based on all the agent results above, what is the precise answer to the original question?
Think carefully but respond with ONLY the answer:"""
# Use 72B model for synthesis
result = self.llm_client.generate(
synthesis_prompt,
tier=ModelTier.COMPLEX, # 72B model
max_tokens=100
)
if result.success:
return {
"analysis": result.response,
"reasoning": f"72B synthesis of {len(state.agent_results)} agent results"
}
else:
# Fallback to simple synthesis
return self._synthesize_simple(state, combined_analysis, question_type)
def _synthesize_simple(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]:
"""Simple synthesis for single agent results or fallback"""
# Find the best available result
successful_results = [r for r in state.agent_results if r.success]
if successful_results:
best_result = max(successful_results, key=lambda r: r.confidence)
return {
"analysis": f"Primary result from {best_result.agent_role.value}: {best_result.result}",
"reasoning": f"Single agent result from {best_result.agent_role.value}"
}
else:
# Try to extract useful info from failures
all_results = list(state.agent_results)
if all_results:
fallback_result = all_results[0] # Use first available result
return {
"analysis": f"Fallback from {fallback_result.agent_role.value}: {fallback_result.result}",
"reasoning": f"Fallback synthesis from {fallback_result.agent_role.value}"
}
else:
return {
"analysis": "No agent results available",
"reasoning": "No synthesis possible - no results"
}
# Import regex for LLM response parsing
import re