File size: 7,992 Bytes
d7fb055
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ced7ae9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# Reflection (Peer-Review) Agent for the AI Co-Scientist system

import logging
from typing import List, Dict, Any

from .base_agent import BaseAgent

class ReflectionAgent(BaseAgent):
    """Agent responsible for critically reviewing hypotheses, similar to a peer reviewer."""
    
    def __init__(self, model=None, temperature=None):
        """Initialize the Reflection Agent.
        
        Args:
            model: Optional model override
            temperature: Optional temperature override
        """
        system_prompt = """
        You are a Reflection Agent in an AI Co-Scientist system, responsible for critically reviewing 
        scientific hypotheses. You have expertise across multiple scientific disciplines at a PhD level 
        and act as a rigorous peer reviewer.
        
        Your role is to:
        1. Critically evaluate hypotheses for scientific soundness and plausibility
        2. Cross-check hypotheses against known facts and literature
        3. Identify weaknesses, inconsistencies, or logical flaws
        4. Flag any ethical concerns or practical limitations
        5. Assess the novelty and potential impact of each hypothesis
        6. Run simulation reviews to envision how the hypothesis would play out
        7. Suggest specific improvements or alternative approaches
        
        For each hypothesis you review, provide:
        - Overall assessment of scientific validity and plausibility
        - Specific strengths identified in the hypothesis
        - Critical weaknesses or inconsistencies found
        - Potential contradictions with established knowledge
        - Ethical considerations or concerning implications
        - Practical limitations for testing or implementation
        - Suggested modifications to strengthen the hypothesis
        
        Be fair but rigorous in your assessment. Your goal is not to dismiss hypotheses but to 
        strengthen them through critical feedback, just as a constructive peer reviewer would.
        """
        
        super().__init__(
            name="Reflection",
            system_prompt=system_prompt,
            model=model,
            temperature=temperature if temperature is not None else 0.2  # Lower temperature for analytical thinking
        )
        
        self.logger = logging.getLogger("agent.reflection")
    
    def process(self, hypotheses: List[Dict[str, str]], research_goal: str) -> List[Dict[str, Any]]:
        """Critically review each hypothesis provided.
        
        Args:
            hypotheses: List of hypothesis dictionaries to review
            research_goal: The original research goal for context
            
        Returns:
            A list of reviewed hypothesis dictionaries with added review information
        """
        self.logger.info(f"Reviewing {len(hypotheses)} hypotheses")
        
        reviewed_hypotheses = []
        
        for idx, hypothesis in enumerate(hypotheses):
            self.logger.info(f"Reviewing hypothesis {idx+1}")
            
            # Create a prompt for reviewing this specific hypothesis
            prompt = f"""
            RESEARCH GOAL: {research_goal}
            
            HYPOTHESIS TO REVIEW:
            Statement: {hypothesis['statement']}
            Rationale: {hypothesis['rationale']}
            Evidence: {hypothesis['evidence']}
            Assumptions: {hypothesis['assumptions']}
            Validation Approach: {hypothesis['validation']}
            
            Please conduct a thorough peer review of this hypothesis. Assess:
            1. Scientific validity and plausibility
            2. Strengths of the hypothesis
            3. Weaknesses, inconsistencies, or logical flaws
            4. Potential contradictions with established knowledge
            5. Ethical considerations or concerning implications
            6. Practical limitations for testing or implementation
            7. Suggested modifications to strengthen the hypothesis
            
            Structure your review clearly with sections for each aspect of the assessment.
            """
            
            review_response = self.get_response(prompt)
            
            # Add the review to the hypothesis dictionary
            reviewed_hypothesis = hypothesis.copy()
            reviewed_hypothesis['review'] = review_response
            
            # Extract a summary assessment (simplified implementation)
            assessment_summary = self._extract_assessment_summary(review_response)
            reviewed_hypothesis['assessment_summary'] = assessment_summary
            
            reviewed_hypotheses.append(reviewed_hypothesis)
            
            # Clear conversation history for the next hypothesis to reduce context length
            self.clear_history()
        
        return reviewed_hypotheses
    
    def _extract_assessment_summary(self, review: str) -> Dict[str, Any]:
        """Extract a structured summary from the review text.
        
        This is a placeholder implementation. In a real system, this would use more 
        sophisticated parsing to extract structured data from the review.
        
        Args:
            review: The full review text
            
        Returns:
            A dictionary with summary assessment information
        """
        # Simple implementation - would be more sophisticated in a real system
        assessment = {
            'valid': 'invalid' not in review.lower() and 'implausible' not in review.lower(),
            'strengths': [],
            'weaknesses': [],
            'ethical_concerns': 'ethical concerns' in review.lower() or 'ethical issues' in review.lower(),
            'practical_limitations': 'impractical' in review.lower() or 'limitation' in review.lower(),
            'overall_score': self._estimate_score(review)
        }
        
        return assessment
    
    def _estimate_score(self, review: str) -> float:
        """Estimate a numeric score based on the sentiment of the review.
        
        This is a very simplified implementation. In a real system, this would use
        more sophisticated NLP techniques.
        
        Args:
            review: The review text
            
        Returns:
            A score between 0.0 and 1.0
        """
        # Simple sentiment-based scoring - would be more sophisticated in a real system
        positive_terms = ['strong', 'valid', 'plausible', 'consistent', 'novel', 'innovative']
        negative_terms = ['weak', 'invalid', 'implausible', 'inconsistent', 'contradicts', 'flawed']
        
        # Count positive and negative terms
        positive_count = sum(term in review.lower() for term in positive_terms)
        negative_count = sum(term in review.lower() for term in negative_terms)
        
        # Calculate a simple score
        total = positive_count + negative_count
        if total == 0:
            return 0.5  # Neutral if no terms found
        
        score = positive_count / total
        return score

    def review_hypothesis(self, hypothesis: str, research_goal: str) -> str:
        """Review a single hypothesis in the context of the research goal and return feedback as a string."""
        prompt = f"""
        RESEARCH GOAL: {research_goal}
        
        HYPOTHESIS TO REVIEW:
        {hypothesis}
        
        Please conduct a thorough peer review of this hypothesis. Assess:
        1. Scientific validity and plausibility
        2. Strengths of the hypothesis
        3. Weaknesses, inconsistencies, or logical flaws
        4. Potential contradictions with established knowledge
        5. Ethical considerations or concerning implications
        6. Practical limitations for testing or implementation
        7. Suggested modifications to strengthen the hypothesis
        
        Structure your review clearly with sections for each aspect of the assessment.
        """
        return self.get_response(prompt)