File size: 7,992 Bytes
d7fb055 ced7ae9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# Reflection (Peer-Review) Agent for the AI Co-Scientist system
import logging
from typing import List, Dict, Any
from .base_agent import BaseAgent
class ReflectionAgent(BaseAgent):
"""Agent responsible for critically reviewing hypotheses, similar to a peer reviewer."""
def __init__(self, model=None, temperature=None):
"""Initialize the Reflection Agent.
Args:
model: Optional model override
temperature: Optional temperature override
"""
system_prompt = """
You are a Reflection Agent in an AI Co-Scientist system, responsible for critically reviewing
scientific hypotheses. You have expertise across multiple scientific disciplines at a PhD level
and act as a rigorous peer reviewer.
Your role is to:
1. Critically evaluate hypotheses for scientific soundness and plausibility
2. Cross-check hypotheses against known facts and literature
3. Identify weaknesses, inconsistencies, or logical flaws
4. Flag any ethical concerns or practical limitations
5. Assess the novelty and potential impact of each hypothesis
6. Run simulation reviews to envision how the hypothesis would play out
7. Suggest specific improvements or alternative approaches
For each hypothesis you review, provide:
- Overall assessment of scientific validity and plausibility
- Specific strengths identified in the hypothesis
- Critical weaknesses or inconsistencies found
- Potential contradictions with established knowledge
- Ethical considerations or concerning implications
- Practical limitations for testing or implementation
- Suggested modifications to strengthen the hypothesis
Be fair but rigorous in your assessment. Your goal is not to dismiss hypotheses but to
strengthen them through critical feedback, just as a constructive peer reviewer would.
"""
super().__init__(
name="Reflection",
system_prompt=system_prompt,
model=model,
temperature=temperature if temperature is not None else 0.2 # Lower temperature for analytical thinking
)
self.logger = logging.getLogger("agent.reflection")
def process(self, hypotheses: List[Dict[str, str]], research_goal: str) -> List[Dict[str, Any]]:
"""Critically review each hypothesis provided.
Args:
hypotheses: List of hypothesis dictionaries to review
research_goal: The original research goal for context
Returns:
A list of reviewed hypothesis dictionaries with added review information
"""
self.logger.info(f"Reviewing {len(hypotheses)} hypotheses")
reviewed_hypotheses = []
for idx, hypothesis in enumerate(hypotheses):
self.logger.info(f"Reviewing hypothesis {idx+1}")
# Create a prompt for reviewing this specific hypothesis
prompt = f"""
RESEARCH GOAL: {research_goal}
HYPOTHESIS TO REVIEW:
Statement: {hypothesis['statement']}
Rationale: {hypothesis['rationale']}
Evidence: {hypothesis['evidence']}
Assumptions: {hypothesis['assumptions']}
Validation Approach: {hypothesis['validation']}
Please conduct a thorough peer review of this hypothesis. Assess:
1. Scientific validity and plausibility
2. Strengths of the hypothesis
3. Weaknesses, inconsistencies, or logical flaws
4. Potential contradictions with established knowledge
5. Ethical considerations or concerning implications
6. Practical limitations for testing or implementation
7. Suggested modifications to strengthen the hypothesis
Structure your review clearly with sections for each aspect of the assessment.
"""
review_response = self.get_response(prompt)
# Add the review to the hypothesis dictionary
reviewed_hypothesis = hypothesis.copy()
reviewed_hypothesis['review'] = review_response
# Extract a summary assessment (simplified implementation)
assessment_summary = self._extract_assessment_summary(review_response)
reviewed_hypothesis['assessment_summary'] = assessment_summary
reviewed_hypotheses.append(reviewed_hypothesis)
# Clear conversation history for the next hypothesis to reduce context length
self.clear_history()
return reviewed_hypotheses
def _extract_assessment_summary(self, review: str) -> Dict[str, Any]:
"""Extract a structured summary from the review text.
This is a placeholder implementation. In a real system, this would use more
sophisticated parsing to extract structured data from the review.
Args:
review: The full review text
Returns:
A dictionary with summary assessment information
"""
# Simple implementation - would be more sophisticated in a real system
assessment = {
'valid': 'invalid' not in review.lower() and 'implausible' not in review.lower(),
'strengths': [],
'weaknesses': [],
'ethical_concerns': 'ethical concerns' in review.lower() or 'ethical issues' in review.lower(),
'practical_limitations': 'impractical' in review.lower() or 'limitation' in review.lower(),
'overall_score': self._estimate_score(review)
}
return assessment
def _estimate_score(self, review: str) -> float:
"""Estimate a numeric score based on the sentiment of the review.
This is a very simplified implementation. In a real system, this would use
more sophisticated NLP techniques.
Args:
review: The review text
Returns:
A score between 0.0 and 1.0
"""
# Simple sentiment-based scoring - would be more sophisticated in a real system
positive_terms = ['strong', 'valid', 'plausible', 'consistent', 'novel', 'innovative']
negative_terms = ['weak', 'invalid', 'implausible', 'inconsistent', 'contradicts', 'flawed']
# Count positive and negative terms
positive_count = sum(term in review.lower() for term in positive_terms)
negative_count = sum(term in review.lower() for term in negative_terms)
# Calculate a simple score
total = positive_count + negative_count
if total == 0:
return 0.5 # Neutral if no terms found
score = positive_count / total
return score
def review_hypothesis(self, hypothesis: str, research_goal: str) -> str:
"""Review a single hypothesis in the context of the research goal and return feedback as a string."""
prompt = f"""
RESEARCH GOAL: {research_goal}
HYPOTHESIS TO REVIEW:
{hypothesis}
Please conduct a thorough peer review of this hypothesis. Assess:
1. Scientific validity and plausibility
2. Strengths of the hypothesis
3. Weaknesses, inconsistencies, or logical flaws
4. Potential contradictions with established knowledge
5. Ethical considerations or concerning implications
6. Practical limitations for testing or implementation
7. Suggested modifications to strengthen the hypothesis
Structure your review clearly with sections for each aspect of the assessment.
"""
return self.get_response(prompt)
|