Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Synthesizer Agent for GAIA Agent System | |
| GAIA-Compliant Final Answer Generation for Exact Match Evaluation | |
| """ | |
| import logging | |
| from typing import Dict, List, Optional, Any | |
| from statistics import mean | |
| from agents.state import GAIAAgentState, AgentRole, AgentResult | |
| from models.qwen_client import QwenClient, ModelTier | |
| from tools.final_answer_tool import FinalAnswerTool | |
| logger = logging.getLogger(__name__) | |
| class SynthesizerAgent: | |
| """ | |
| GAIA-compliant synthesizer that produces EXACT MATCH answers | |
| Uses 72B model and final answer tool for precise extraction | |
| """ | |
| def __init__(self, llm_client: QwenClient): | |
| self.llm_client = llm_client | |
| self.final_answer_tool = FinalAnswerTool(llm_client) | |
| def process(self, state: GAIAAgentState) -> GAIAAgentState: | |
| """ | |
| Synthesize GAIA-compliant final answer from agent results | |
| """ | |
| logger.info("🎯 Synthesizer: Starting GAIA-compliant synthesis") | |
| state.add_processing_step("Synthesizer: Generating GAIA-compliant final answer") | |
| try: | |
| # Check if we have any agent results | |
| if not state.agent_results: | |
| logger.warning("No agent results available for synthesis") | |
| state.final_answer = "No results available" | |
| state.final_confidence = 0.0 | |
| state.final_reasoning = "No agent results to synthesize" | |
| state.is_complete = True | |
| return state | |
| # Combine all agent results into comprehensive analysis | |
| combined_analysis = self._combine_agent_results(state) | |
| # Determine question type for specialized extraction | |
| question_type = self._determine_question_type(state.question) | |
| # Use 72B model for synthesis if we have multiple results or complex question | |
| if len(state.agent_results) > 1 or state.should_use_complex_model(): | |
| synthesis_result = self._synthesize_with_72b(state, combined_analysis, question_type) | |
| else: | |
| synthesis_result = self._synthesize_simple(state, combined_analysis, question_type) | |
| # Extract GAIA-compliant final answer | |
| final_answer_result = self.final_answer_tool.extract_final_answer( | |
| question=state.question, | |
| agent_results=synthesis_result["analysis"], | |
| question_type=question_type | |
| ) | |
| # Update state with final results | |
| state.final_answer = final_answer_result["answer"] | |
| state.final_confidence = final_answer_result["confidence"] | |
| state.final_reasoning = f"Synthesis: {synthesis_result['reasoning']} | Extraction: {final_answer_result['reasoning']}" | |
| state.answer_source = "gaia_compliant_synthesis" | |
| state.is_complete = True | |
| # GAIA compliance check | |
| if len(state.final_answer) > 100: | |
| logger.warning(f"Answer may be too long for GAIA: {len(state.final_answer)} chars") | |
| state.final_confidence *= 0.7 # Reduce confidence for long answers | |
| logger.info(f"✅ GAIA synthesis complete: '{state.final_answer}' (conf: {state.final_confidence:.2f})") | |
| state.add_processing_step(f"Synthesizer: GAIA answer generated - '{state.final_answer}'") | |
| return state | |
| except Exception as e: | |
| error_msg = f"GAIA synthesis failed: {str(e)}" | |
| state.add_error(error_msg) | |
| logger.error(error_msg) | |
| # Fallback to simple answer | |
| state.final_answer = "Processing error" | |
| state.final_confidence = 0.0 | |
| state.final_reasoning = error_msg | |
| state.answer_source = "error_fallback" | |
| state.is_complete = True | |
| return state | |
| def _combine_agent_results(self, state: GAIAAgentState) -> str: | |
| """Combine all agent results into comprehensive analysis""" | |
| analysis_parts = [] | |
| # Add successful results first | |
| successful_results = [r for r in state.agent_results if r.success] | |
| if successful_results: | |
| analysis_parts.append("=== SUCCESSFUL AGENT RESULTS ===") | |
| for result in successful_results: | |
| analysis_parts.append(f""" | |
| {result.agent_role.value.upper()} (Confidence: {result.confidence:.2f}): | |
| Result: {result.result} | |
| Reasoning: {result.reasoning} | |
| """) | |
| # Add failed results with useful information | |
| failed_results = [r for r in state.agent_results if not r.success] | |
| if failed_results: | |
| analysis_parts.append("\n=== ADDITIONAL CONTEXT ===") | |
| for result in failed_results: | |
| if len(result.result) > 10: # Only include if has some content | |
| analysis_parts.append(f""" | |
| {result.agent_role.value.upper()} (Failed): | |
| Attempted: {result.result[:200]}... | |
| """) | |
| return "\n".join(analysis_parts) | |
| def _determine_question_type(self, question: str) -> str: | |
| """Determine question type for specialized answer extraction""" | |
| question_lower = question.lower() | |
| # Mathematical/counting questions | |
| if any(word in question_lower for word in ["how many", "count", "number of", "calculate", "sum", "total"]): | |
| return "mathematical" | |
| # Text manipulation (reversed text, opposites, etc.) | |
| if any(word in question_lower for word in ["opposite", "reverse", "backwards", "decode"]): | |
| return "text_manipulation" | |
| # Yes/no questions | |
| if any(word in question_lower for word in ["yes or no", "true or false", "is it", "does it", "can it"]): | |
| return "yes_no" | |
| # Name/person questions | |
| if any(word in question_lower for word in ["who", "name", "first name", "last name", "surname"]): | |
| return "name" | |
| # Location questions | |
| if any(word in question_lower for word in ["where", "city", "country", "location", "place"]): | |
| return "location" | |
| # File/code questions | |
| if any(word in question_lower for word in ["file", "image", "code", "python", "attached", "excel"]): | |
| return "file_processing" | |
| return "general" | |
| def _synthesize_with_72b(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]: | |
| """Use 72B model for complex synthesis""" | |
| synthesis_prompt = f""" | |
| CRITICAL: This is GAIA benchmark evaluation requiring EXACT MATCH answers. | |
| Question: {state.question} | |
| Agent Analysis Results: | |
| {combined_analysis} | |
| Your task: Analyze all agent results and provide the most accurate answer. | |
| GAIA COMPLIANCE RULES: | |
| - Your answer must be concise and precise for exact match comparison | |
| - No explanations, no "FINAL ANSWER:" prefix, no extra text | |
| - For numbers: just the number (e.g., "5") | |
| - For yes/no: just "yes" or "no" | |
| - For names: just the name requested | |
| - For locations: just the location name | |
| Question Type: {question_type} | |
| Based on all the agent results above, what is the precise answer to the original question? | |
| Think carefully but respond with ONLY the answer:""" | |
| # Use 72B model for synthesis | |
| result = self.llm_client.generate( | |
| synthesis_prompt, | |
| tier=ModelTier.COMPLEX, # 72B model | |
| max_tokens=100 | |
| ) | |
| if result.success: | |
| return { | |
| "analysis": result.response, | |
| "reasoning": f"72B synthesis of {len(state.agent_results)} agent results" | |
| } | |
| else: | |
| # Fallback to simple synthesis | |
| return self._synthesize_simple(state, combined_analysis, question_type) | |
| def _synthesize_simple(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]: | |
| """Simple synthesis for single agent results or fallback""" | |
| # Find the best available result | |
| successful_results = [r for r in state.agent_results if r.success] | |
| if successful_results: | |
| best_result = max(successful_results, key=lambda r: r.confidence) | |
| return { | |
| "analysis": f"Primary result from {best_result.agent_role.value}: {best_result.result}", | |
| "reasoning": f"Single agent result from {best_result.agent_role.value}" | |
| } | |
| else: | |
| # Try to extract useful info from failures | |
| all_results = list(state.agent_results) | |
| if all_results: | |
| fallback_result = all_results[0] # Use first available result | |
| return { | |
| "analysis": f"Fallback from {fallback_result.agent_role.value}: {fallback_result.result}", | |
| "reasoning": f"Fallback synthesis from {fallback_result.agent_role.value}" | |
| } | |
| else: | |
| return { | |
| "analysis": "No agent results available", | |
| "reasoning": "No synthesis possible - no results" | |
| } | |
| # Import regex for LLM response parsing | |
| import re |