| |
|
|
| import logging |
| from typing import List, Dict, Any |
|
|
| from .base_agent import BaseAgent |
|
|
| class ReflectionAgent(BaseAgent): |
| """Agent responsible for critically reviewing hypotheses, similar to a peer reviewer.""" |
| |
| def __init__(self, model=None, temperature=None): |
| """Initialize the Reflection Agent. |
| |
| Args: |
| model: Optional model override |
| temperature: Optional temperature override |
| """ |
| system_prompt = """ |
| You are a Reflection Agent in an AI Co-Scientist system, responsible for critically reviewing |
| scientific hypotheses. You have expertise across multiple scientific disciplines at a PhD level |
| and act as a rigorous peer reviewer. |
| |
| Your role is to: |
| 1. Critically evaluate hypotheses for scientific soundness and plausibility |
| 2. Cross-check hypotheses against known facts and literature |
| 3. Identify weaknesses, inconsistencies, or logical flaws |
| 4. Flag any ethical concerns or practical limitations |
| 5. Assess the novelty and potential impact of each hypothesis |
| 6. Run simulation reviews to envision how the hypothesis would play out |
| 7. Suggest specific improvements or alternative approaches |
| |
| For each hypothesis you review, provide: |
| - Overall assessment of scientific validity and plausibility |
| - Specific strengths identified in the hypothesis |
| - Critical weaknesses or inconsistencies found |
| - Potential contradictions with established knowledge |
| - Ethical considerations or concerning implications |
| - Practical limitations for testing or implementation |
| - Suggested modifications to strengthen the hypothesis |
| |
| Be fair but rigorous in your assessment. Your goal is not to dismiss hypotheses but to |
| strengthen them through critical feedback, just as a constructive peer reviewer would. |
| """ |
| |
| super().__init__( |
| name="Reflection", |
| system_prompt=system_prompt, |
| model=model, |
| temperature=temperature if temperature is not None else 0.2 |
| ) |
| |
| self.logger = logging.getLogger("agent.reflection") |
| |
| def process(self, hypotheses: List[Dict[str, str]], research_goal: str) -> List[Dict[str, Any]]: |
| """Critically review each hypothesis provided. |
| |
| Args: |
| hypotheses: List of hypothesis dictionaries to review |
| research_goal: The original research goal for context |
| |
| Returns: |
| A list of reviewed hypothesis dictionaries with added review information |
| """ |
| self.logger.info(f"Reviewing {len(hypotheses)} hypotheses") |
| |
| reviewed_hypotheses = [] |
| |
| for idx, hypothesis in enumerate(hypotheses): |
| self.logger.info(f"Reviewing hypothesis {idx+1}") |
| |
| |
| prompt = f""" |
| RESEARCH GOAL: {research_goal} |
| |
| HYPOTHESIS TO REVIEW: |
| Statement: {hypothesis['statement']} |
| Rationale: {hypothesis['rationale']} |
| Evidence: {hypothesis['evidence']} |
| Assumptions: {hypothesis['assumptions']} |
| Validation Approach: {hypothesis['validation']} |
| |
| Please conduct a thorough peer review of this hypothesis. Assess: |
| 1. Scientific validity and plausibility |
| 2. Strengths of the hypothesis |
| 3. Weaknesses, inconsistencies, or logical flaws |
| 4. Potential contradictions with established knowledge |
| 5. Ethical considerations or concerning implications |
| 6. Practical limitations for testing or implementation |
| 7. Suggested modifications to strengthen the hypothesis |
| |
| Structure your review clearly with sections for each aspect of the assessment. |
| """ |
| |
| review_response = self.get_response(prompt) |
| |
| |
| reviewed_hypothesis = hypothesis.copy() |
| reviewed_hypothesis['review'] = review_response |
| |
| |
| assessment_summary = self._extract_assessment_summary(review_response) |
| reviewed_hypothesis['assessment_summary'] = assessment_summary |
| |
| reviewed_hypotheses.append(reviewed_hypothesis) |
| |
| |
| self.clear_history() |
| |
| return reviewed_hypotheses |
| |
| def _extract_assessment_summary(self, review: str) -> Dict[str, Any]: |
| """Extract a structured summary from the review text. |
| |
| This is a placeholder implementation. In a real system, this would use more |
| sophisticated parsing to extract structured data from the review. |
| |
| Args: |
| review: The full review text |
| |
| Returns: |
| A dictionary with summary assessment information |
| """ |
| |
| assessment = { |
| 'valid': 'invalid' not in review.lower() and 'implausible' not in review.lower(), |
| 'strengths': [], |
| 'weaknesses': [], |
| 'ethical_concerns': 'ethical concerns' in review.lower() or 'ethical issues' in review.lower(), |
| 'practical_limitations': 'impractical' in review.lower() or 'limitation' in review.lower(), |
| 'overall_score': self._estimate_score(review) |
| } |
| |
| return assessment |
| |
| def _estimate_score(self, review: str) -> float: |
| """Estimate a numeric score based on the sentiment of the review. |
| |
| This is a very simplified implementation. In a real system, this would use |
| more sophisticated NLP techniques. |
| |
| Args: |
| review: The review text |
| |
| Returns: |
| A score between 0.0 and 1.0 |
| """ |
| |
| positive_terms = ['strong', 'valid', 'plausible', 'consistent', 'novel', 'innovative'] |
| negative_terms = ['weak', 'invalid', 'implausible', 'inconsistent', 'contradicts', 'flawed'] |
| |
| |
| positive_count = sum(term in review.lower() for term in positive_terms) |
| negative_count = sum(term in review.lower() for term in negative_terms) |
| |
| |
| total = positive_count + negative_count |
| if total == 0: |
| return 0.5 |
| |
| score = positive_count / total |
| return score |
|
|
| def review_hypothesis(self, hypothesis: str, research_goal: str) -> str: |
| """Review a single hypothesis in the context of the research goal and return feedback as a string.""" |
| prompt = f""" |
| RESEARCH GOAL: {research_goal} |
| |
| HYPOTHESIS TO REVIEW: |
| {hypothesis} |
| |
| Please conduct a thorough peer review of this hypothesis. Assess: |
| 1. Scientific validity and plausibility |
| 2. Strengths of the hypothesis |
| 3. Weaknesses, inconsistencies, or logical flaws |
| 4. Potential contradictions with established knowledge |
| 5. Ethical considerations or concerning implications |
| 6. Practical limitations for testing or implementation |
| 7. Suggested modifications to strengthen the hypothesis |
| |
| Structure your review clearly with sections for each aspect of the assessment. |
| """ |
| return self.get_response(prompt) |
|
|