Spaces:
Sleeping
Sleeping
File size: 9,346 Bytes
225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b b55bafd 5ec1e1b 225a75e 5ec1e1b b55bafd 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b b55bafd 225a75e 5ec1e1b b55bafd 5ec1e1b 225a75e 5ec1e1b 225a75e 5ec1e1b a248c93 5ec1e1b a248c93 225a75e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
#!/usr/bin/env python3
"""
Synthesizer Agent for GAIA Agent System
GAIA-Compliant Final Answer Generation for Exact Match Evaluation
"""
import logging
from typing import Dict, List, Optional, Any
from statistics import mean
from agents.state import GAIAAgentState, AgentRole, AgentResult
from models.qwen_client import QwenClient, ModelTier
from tools.final_answer_tool import FinalAnswerTool
logger = logging.getLogger(__name__)
class SynthesizerAgent:
"""
GAIA-compliant synthesizer that produces EXACT MATCH answers
Uses 72B model and final answer tool for precise extraction
"""
def __init__(self, llm_client: QwenClient):
self.llm_client = llm_client
self.final_answer_tool = FinalAnswerTool(llm_client)
def process(self, state: GAIAAgentState) -> GAIAAgentState:
"""
Synthesize GAIA-compliant final answer from agent results
"""
logger.info("🎯 Synthesizer: Starting GAIA-compliant synthesis")
state.add_processing_step("Synthesizer: Generating GAIA-compliant final answer")
try:
# Check if we have any agent results
if not state.agent_results:
logger.warning("No agent results available for synthesis")
state.final_answer = "No results available"
state.final_confidence = 0.0
state.final_reasoning = "No agent results to synthesize"
state.is_complete = True
return state
# Combine all agent results into comprehensive analysis
combined_analysis = self._combine_agent_results(state)
# Determine question type for specialized extraction
question_type = self._determine_question_type(state.question)
# Use 72B model for synthesis if we have multiple results or complex question
if len(state.agent_results) > 1 or state.should_use_complex_model():
synthesis_result = self._synthesize_with_72b(state, combined_analysis, question_type)
else:
synthesis_result = self._synthesize_simple(state, combined_analysis, question_type)
# Extract GAIA-compliant final answer
final_answer_result = self.final_answer_tool.extract_final_answer(
question=state.question,
agent_results=synthesis_result["analysis"],
question_type=question_type
)
# Update state with final results
state.final_answer = final_answer_result["answer"]
state.final_confidence = final_answer_result["confidence"]
state.final_reasoning = f"Synthesis: {synthesis_result['reasoning']} | Extraction: {final_answer_result['reasoning']}"
state.answer_source = "gaia_compliant_synthesis"
state.is_complete = True
# GAIA compliance check
if len(state.final_answer) > 100:
logger.warning(f"Answer may be too long for GAIA: {len(state.final_answer)} chars")
state.final_confidence *= 0.7 # Reduce confidence for long answers
logger.info(f"✅ GAIA synthesis complete: '{state.final_answer}' (conf: {state.final_confidence:.2f})")
state.add_processing_step(f"Synthesizer: GAIA answer generated - '{state.final_answer}'")
return state
except Exception as e:
error_msg = f"GAIA synthesis failed: {str(e)}"
state.add_error(error_msg)
logger.error(error_msg)
# Fallback to simple answer
state.final_answer = "Processing error"
state.final_confidence = 0.0
state.final_reasoning = error_msg
state.answer_source = "error_fallback"
state.is_complete = True
return state
def _combine_agent_results(self, state: GAIAAgentState) -> str:
"""Combine all agent results into comprehensive analysis"""
analysis_parts = []
# Add successful results first
successful_results = [r for r in state.agent_results if r.success]
if successful_results:
analysis_parts.append("=== SUCCESSFUL AGENT RESULTS ===")
for result in successful_results:
analysis_parts.append(f"""
{result.agent_role.value.upper()} (Confidence: {result.confidence:.2f}):
Result: {result.result}
Reasoning: {result.reasoning}
""")
# Add failed results with useful information
failed_results = [r for r in state.agent_results if not r.success]
if failed_results:
analysis_parts.append("\n=== ADDITIONAL CONTEXT ===")
for result in failed_results:
if len(result.result) > 10: # Only include if has some content
analysis_parts.append(f"""
{result.agent_role.value.upper()} (Failed):
Attempted: {result.result[:200]}...
""")
return "\n".join(analysis_parts)
def _determine_question_type(self, question: str) -> str:
"""Determine question type for specialized answer extraction"""
question_lower = question.lower()
# Mathematical/counting questions
if any(word in question_lower for word in ["how many", "count", "number of", "calculate", "sum", "total"]):
return "mathematical"
# Text manipulation (reversed text, opposites, etc.)
if any(word in question_lower for word in ["opposite", "reverse", "backwards", "decode"]):
return "text_manipulation"
# Yes/no questions
if any(word in question_lower for word in ["yes or no", "true or false", "is it", "does it", "can it"]):
return "yes_no"
# Name/person questions
if any(word in question_lower for word in ["who", "name", "first name", "last name", "surname"]):
return "name"
# Location questions
if any(word in question_lower for word in ["where", "city", "country", "location", "place"]):
return "location"
# File/code questions
if any(word in question_lower for word in ["file", "image", "code", "python", "attached", "excel"]):
return "file_processing"
return "general"
def _synthesize_with_72b(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]:
"""Use 72B model for complex synthesis"""
synthesis_prompt = f"""
CRITICAL: This is GAIA benchmark evaluation requiring EXACT MATCH answers.
Question: {state.question}
Agent Analysis Results:
{combined_analysis}
Your task: Analyze all agent results and provide the most accurate answer.
GAIA COMPLIANCE RULES:
- Your answer must be concise and precise for exact match comparison
- No explanations, no "FINAL ANSWER:" prefix, no extra text
- For numbers: just the number (e.g., "5")
- For yes/no: just "yes" or "no"
- For names: just the name requested
- For locations: just the location name
Question Type: {question_type}
Based on all the agent results above, what is the precise answer to the original question?
Think carefully but respond with ONLY the answer:"""
# Use 72B model for synthesis
result = self.llm_client.generate(
synthesis_prompt,
tier=ModelTier.COMPLEX, # 72B model
max_tokens=100
)
if result.success:
return {
"analysis": result.response,
"reasoning": f"72B synthesis of {len(state.agent_results)} agent results"
}
else:
# Fallback to simple synthesis
return self._synthesize_simple(state, combined_analysis, question_type)
def _synthesize_simple(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]:
"""Simple synthesis for single agent results or fallback"""
# Find the best available result
successful_results = [r for r in state.agent_results if r.success]
if successful_results:
best_result = max(successful_results, key=lambda r: r.confidence)
return {
"analysis": f"Primary result from {best_result.agent_role.value}: {best_result.result}",
"reasoning": f"Single agent result from {best_result.agent_role.value}"
}
else:
# Try to extract useful info from failures
all_results = list(state.agent_results)
if all_results:
fallback_result = all_results[0] # Use first available result
return {
"analysis": f"Fallback from {fallback_result.agent_role.value}: {fallback_result.result}",
"reasoning": f"Fallback synthesis from {fallback_result.agent_role.value}"
}
else:
return {
"analysis": "No agent results available",
"reasoning": "No synthesis possible - no results"
}
# Import regex for LLM response parsing
import re |