File size: 9,346 Bytes
225a75e
 
 
5ec1e1b
225a75e
 
 
 
 
 
 
 
5ec1e1b
225a75e
 
 
 
 
5ec1e1b
 
225a75e
 
 
 
5ec1e1b
225a75e
 
 
5ec1e1b
225a75e
5ec1e1b
 
225a75e
 
5ec1e1b
225a75e
5ec1e1b
 
225a75e
5ec1e1b
225a75e
 
 
5ec1e1b
 
225a75e
5ec1e1b
 
 
 
 
 
225a75e
5ec1e1b
 
 
 
 
 
 
 
225a75e
 
5ec1e1b
 
 
 
225a75e
 
5ec1e1b
 
 
 
225a75e
5ec1e1b
 
225a75e
 
 
 
5ec1e1b
225a75e
 
 
5ec1e1b
 
225a75e
 
 
 
 
 
 
5ec1e1b
 
225a75e
5ec1e1b
225a75e
5ec1e1b
b55bafd
5ec1e1b
 
 
 
 
 
 
 
225a75e
5ec1e1b
b55bafd
5ec1e1b
 
 
 
 
 
 
 
225a75e
5ec1e1b
225a75e
5ec1e1b
 
225a75e
5ec1e1b
225a75e
5ec1e1b
 
 
225a75e
5ec1e1b
 
 
225a75e
5ec1e1b
 
 
225a75e
5ec1e1b
 
 
225a75e
5ec1e1b
 
 
225a75e
5ec1e1b
 
 
225a75e
5ec1e1b
225a75e
5ec1e1b
 
225a75e
 
5ec1e1b
 
225a75e
 
5ec1e1b
 
225a75e
5ec1e1b
225a75e
5ec1e1b
 
 
 
 
 
 
225a75e
5ec1e1b
 
 
 
225a75e
5ec1e1b
 
 
 
 
 
225a75e
5ec1e1b
225a75e
5ec1e1b
 
225a75e
 
5ec1e1b
 
225a75e
5ec1e1b
 
225a75e
5ec1e1b
b55bafd
225a75e
5ec1e1b
 
 
 
 
 
 
 
b55bafd
5ec1e1b
 
225a75e
5ec1e1b
 
225a75e
5ec1e1b
a248c93
5ec1e1b
 
a248c93
225a75e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#!/usr/bin/env python3
"""
Synthesizer Agent for GAIA Agent System
GAIA-Compliant Final Answer Generation for Exact Match Evaluation
"""

import logging
from typing import Dict, List, Optional, Any
from statistics import mean

from agents.state import GAIAAgentState, AgentRole, AgentResult
from models.qwen_client import QwenClient, ModelTier
from tools.final_answer_tool import FinalAnswerTool

logger = logging.getLogger(__name__)

class SynthesizerAgent:
    """
    GAIA-compliant synthesizer that produces EXACT MATCH answers
    Uses 72B model and final answer tool for precise extraction
    """
    
    def __init__(self, llm_client: QwenClient):
        self.llm_client = llm_client
        self.final_answer_tool = FinalAnswerTool(llm_client)
        
    def process(self, state: GAIAAgentState) -> GAIAAgentState:
        """
        Synthesize GAIA-compliant final answer from agent results
        """
        logger.info("🎯 Synthesizer: Starting GAIA-compliant synthesis")
        state.add_processing_step("Synthesizer: Generating GAIA-compliant final answer")
        
        try:
            # Check if we have any agent results
            if not state.agent_results:
                logger.warning("No agent results available for synthesis")
                state.final_answer = "No results available"
                state.final_confidence = 0.0
                state.final_reasoning = "No agent results to synthesize"
                state.is_complete = True
                return state
            
            # Combine all agent results into comprehensive analysis
            combined_analysis = self._combine_agent_results(state)
            
            # Determine question type for specialized extraction
            question_type = self._determine_question_type(state.question)
            
            # Use 72B model for synthesis if we have multiple results or complex question
            if len(state.agent_results) > 1 or state.should_use_complex_model():
                synthesis_result = self._synthesize_with_72b(state, combined_analysis, question_type)
            else:
                synthesis_result = self._synthesize_simple(state, combined_analysis, question_type)
            
            # Extract GAIA-compliant final answer
            final_answer_result = self.final_answer_tool.extract_final_answer(
                question=state.question,
                agent_results=synthesis_result["analysis"],
                question_type=question_type
            )
            
            # Update state with final results
            state.final_answer = final_answer_result["answer"]
            state.final_confidence = final_answer_result["confidence"]
            state.final_reasoning = f"Synthesis: {synthesis_result['reasoning']} | Extraction: {final_answer_result['reasoning']}"
            state.answer_source = "gaia_compliant_synthesis"
            state.is_complete = True
            
            # GAIA compliance check
            if len(state.final_answer) > 100:
                logger.warning(f"Answer may be too long for GAIA: {len(state.final_answer)} chars")
                state.final_confidence *= 0.7  # Reduce confidence for long answers
            
            logger.info(f"✅ GAIA synthesis complete: '{state.final_answer}' (conf: {state.final_confidence:.2f})")
            state.add_processing_step(f"Synthesizer: GAIA answer generated - '{state.final_answer}'")
            
            return state
            
        except Exception as e:
            error_msg = f"GAIA synthesis failed: {str(e)}"
            state.add_error(error_msg)
            logger.error(error_msg)
            
            # Fallback to simple answer
            state.final_answer = "Processing error"
            state.final_confidence = 0.0
            state.final_reasoning = error_msg
            state.answer_source = "error_fallback"
            state.is_complete = True
            
            return state
    
    def _combine_agent_results(self, state: GAIAAgentState) -> str:
        """Combine all agent results into comprehensive analysis"""
        
        analysis_parts = []
        
        # Add successful results first
        successful_results = [r for r in state.agent_results if r.success]
        if successful_results:
            analysis_parts.append("=== SUCCESSFUL AGENT RESULTS ===")
            for result in successful_results:
                analysis_parts.append(f"""
{result.agent_role.value.upper()} (Confidence: {result.confidence:.2f}):
Result: {result.result}
Reasoning: {result.reasoning}
""")
        
        # Add failed results with useful information
        failed_results = [r for r in state.agent_results if not r.success]
        if failed_results:
            analysis_parts.append("\n=== ADDITIONAL CONTEXT ===")
            for result in failed_results:
                if len(result.result) > 10:  # Only include if has some content
                    analysis_parts.append(f"""
{result.agent_role.value.upper()} (Failed):
Attempted: {result.result[:200]}...
""")
        
        return "\n".join(analysis_parts)
    
    def _determine_question_type(self, question: str) -> str:
        """Determine question type for specialized answer extraction"""
        
        question_lower = question.lower()
        
        # Mathematical/counting questions
        if any(word in question_lower for word in ["how many", "count", "number of", "calculate", "sum", "total"]):
            return "mathematical"
        
        # Text manipulation (reversed text, opposites, etc.)
        if any(word in question_lower for word in ["opposite", "reverse", "backwards", "decode"]):
            return "text_manipulation"
        
        # Yes/no questions
        if any(word in question_lower for word in ["yes or no", "true or false", "is it", "does it", "can it"]):
            return "yes_no"
        
        # Name/person questions  
        if any(word in question_lower for word in ["who", "name", "first name", "last name", "surname"]):
            return "name"
        
        # Location questions
        if any(word in question_lower for word in ["where", "city", "country", "location", "place"]):
            return "location"
        
        # File/code questions
        if any(word in question_lower for word in ["file", "image", "code", "python", "attached", "excel"]):
            return "file_processing"
        
        return "general"
    
    def _synthesize_with_72b(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]:
        """Use 72B model for complex synthesis"""
        
        synthesis_prompt = f"""
CRITICAL: This is GAIA benchmark evaluation requiring EXACT MATCH answers.

Question: {state.question}

Agent Analysis Results:
{combined_analysis}

Your task: Analyze all agent results and provide the most accurate answer.

GAIA COMPLIANCE RULES:
- Your answer must be concise and precise for exact match comparison
- No explanations, no "FINAL ANSWER:" prefix, no extra text
- For numbers: just the number (e.g., "5")
- For yes/no: just "yes" or "no"
- For names: just the name requested
- For locations: just the location name

Question Type: {question_type}

Based on all the agent results above, what is the precise answer to the original question?
Think carefully but respond with ONLY the answer:"""
        
        # Use 72B model for synthesis
        result = self.llm_client.generate(
            synthesis_prompt,
            tier=ModelTier.COMPLEX,  # 72B model
            max_tokens=100
        )
        
        if result.success:
            return {
                "analysis": result.response,
                "reasoning": f"72B synthesis of {len(state.agent_results)} agent results"
            }
        else:
            # Fallback to simple synthesis
            return self._synthesize_simple(state, combined_analysis, question_type)
    
    def _synthesize_simple(self, state: GAIAAgentState, combined_analysis: str, question_type: str) -> Dict[str, Any]:
        """Simple synthesis for single agent results or fallback"""
        
        # Find the best available result
        successful_results = [r for r in state.agent_results if r.success]
        
        if successful_results:
            best_result = max(successful_results, key=lambda r: r.confidence)
            return {
                "analysis": f"Primary result from {best_result.agent_role.value}: {best_result.result}",
                "reasoning": f"Single agent result from {best_result.agent_role.value}"
            }
        else:
            # Try to extract useful info from failures
            all_results = list(state.agent_results)
            if all_results:
                fallback_result = all_results[0]  # Use first available result
                return {
                    "analysis": f"Fallback from {fallback_result.agent_role.value}: {fallback_result.result}",
                    "reasoning": f"Fallback synthesis from {fallback_result.agent_role.value}"
                }
            else:
                return {
                    "analysis": "No agent results available",
                    "reasoning": "No synthesis possible - no results"
                }

# Import regex for LLM response parsing
import re