File size: 9,546 Bytes
d7fb055
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e1fa3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# Proximity (Relevance) Agent for ensuring hypotheses remain on-topic

import logging
from typing import List, Dict, Any

from .base_agent import BaseAgent

class ProximityAgent(BaseAgent):
    """Agent responsible for ensuring hypotheses remain on-topic and relevant to research goals."""
    
    def __init__(self, model=None, temperature=None):
        """Initialize the Proximity Agent.
        
        Args:
            model: Optional model override
            temperature: Optional temperature override
        """
        system_prompt = """
        You are a Proximity Agent in an AI Co-Scientist system, responsible for evaluating how closely 
        scientific hypotheses align with the original research goals. You have expertise across 
        multiple scientific disciplines at a PhD level.
        
        Your role is to:
        1. Assess the semantic and conceptual relatedness of hypotheses to the research objectives
        2. Identify and filter out tangential or irrelevant suggestions
        3. Ensure that hypotheses address the core questions posed in the research goal
        4. Evaluate whether hypotheses maintain appropriate scope (neither too broad nor too narrow)
        5. Provide specific feedback on how to increase relevance when needed
        
        When evaluating proximity, consider:
        - Conceptual alignment: Does the hypothesis address the same fundamental concepts as the research goal?
        - Problem-solution fit: Does the hypothesis potentially solve the problem outlined in the goal?
        - Scope appropriateness: Is the hypothesis at the right level of specificity for the goal?
        - Scientific domain match: Does the hypothesis stay within the relevant scientific domains?
        - Practical applicability: Would findings based on this hypothesis be useful for the stated objective?
        
        Your evaluations should be precise and constructive, focusing on relevance rather than 
        scientific validity (which is handled by other agents).
        """
        
        super().__init__(
            name="Proximity",
            system_prompt=system_prompt,
            model=model,
            temperature=temperature if temperature is not None else 0.2  # Lower temperature for consistency
        )
        
        self.logger = logging.getLogger("agent.proximity")
    
    def process(self, hypotheses: List[Dict[str, Any]], research_goal: str) -> List[Dict[str, Any]]:
        """Evaluate how closely each hypothesis aligns with the research goal.
        
        Args:
            hypotheses: List of hypothesis dictionaries to evaluate
            research_goal: The original research goal for context
            
        Returns:
            A list of hypothesis dictionaries with added proximity evaluation
        """
        self.logger.info(f"Evaluating proximity of {len(hypotheses)} hypotheses to research goal")
        
        evaluated_hypotheses = []
        
        for idx, hypothesis in enumerate(hypotheses):
            self.logger.info(f"Evaluating proximity of hypothesis {idx+1}")
            
            # Prepare a concise version of the hypothesis for evaluation
            hypothesis_statement = hypothesis['statement']
            hypothesis_rationale = hypothesis.get('rationale', '')[:300] + "..." if len(hypothesis.get('rationale', '')) > 300 else hypothesis.get('rationale', '')
            
            prompt = f"""
            RESEARCH GOAL: {research_goal}
            
            HYPOTHESIS TO EVALUATE:
            Statement: {hypothesis_statement}
            Rationale: {hypothesis_rationale}
            
            Please evaluate how closely this hypothesis aligns with the research goal.
            Focus specifically on:
            
            1. Conceptual alignment: Does the hypothesis address the same fundamental concepts as the research goal?
            2. Problem-solution fit: Does the hypothesis potentially solve the problem outlined in the goal?
            3. Scope appropriateness: Is the hypothesis at the right level of specificity for the goal?
            4. Scientific domain match: Does the hypothesis stay within the relevant scientific domains?
            5. Practical applicability: Would findings based on this hypothesis be useful for the stated objective?
            
            For each criterion, provide a score from 1-10 and brief justification.
            Then provide an overall proximity score (1-10) and a summary assessment of relevance.
            
            Finally, offer specific suggestions for how the hypothesis could be modified to increase its 
            relevance to the research goal, if needed.
            """
            
            proximity_evaluation = self.get_response(prompt)
            
            # Add the evaluation to the hypothesis
            evaluated_hypothesis = hypothesis.copy()
            evaluated_hypothesis['proximity_evaluation'] = proximity_evaluation
            
            # Extract proximity score and determine if hypothesis passes relevance threshold
            proximity_score, is_relevant = self._extract_proximity_info(proximity_evaluation)
            evaluated_hypothesis['proximity_score'] = proximity_score
            evaluated_hypothesis['is_relevant'] = is_relevant
            
            evaluated_hypotheses.append(evaluated_hypothesis)
            
            # Clear conversation history for the next hypothesis
            self.clear_history()
        
        # Filter out irrelevant hypotheses if specified
        return evaluated_hypotheses
    
    def filter_relevant_hypotheses(self, evaluated_hypotheses: List[Dict[str, Any]], threshold: float = 5.0) -> List[Dict[str, Any]]:
        """Filter hypotheses to keep only those above the relevance threshold.
        
        Args:
            evaluated_hypotheses: List of hypotheses with proximity evaluations
            threshold: Minimum proximity score to be considered relevant (1-10 scale)
            
        Returns:
            A filtered list of relevant hypotheses
        """
        self.logger.info(f"Filtering hypotheses with proximity threshold {threshold}")
        
        relevant_hypotheses = [h for h in evaluated_hypotheses if h.get('proximity_score', 0) >= threshold]
        self.logger.info(f"Kept {len(relevant_hypotheses)} out of {len(evaluated_hypotheses)} hypotheses")
        
        return relevant_hypotheses
    
    def _extract_proximity_info(self, evaluation: str) -> tuple[float, bool]:
        """Extract proximity score and relevance decision from evaluation text.
        
        This is a placeholder implementation. In a real system, this would use more 
        sophisticated parsing to extract structured data reliably.
        
        Args:
            evaluation: The proximity evaluation text
            
        Returns:
            A tuple of (proximity_score, is_relevant)
        """
        # Simple parsing implementation - would be more sophisticated in a real system
        proximity_score = 5.0  # Default middle score
        
        # Look for overall proximity score patterns
        score_patterns = [
            "overall proximity score: {}",
            "overall proximity score of {}",
            "proximity score: {}",
            "overall score: {}"
        ]
        
        for pattern in score_patterns:
            for i in range(1, 11):
                search_pattern = pattern.format(i)
                if search_pattern.lower() in evaluation.lower():
                    proximity_score = float(i)
                    break
        
        # Determine relevance based on score and keywords
        is_relevant = proximity_score >= 6.0
        
        # Override based on explicit statements in text
        if "not relevant" in evaluation.lower() or "irrelevant" in evaluation.lower():
            is_relevant = False
        if "highly relevant" in evaluation.lower() or "very relevant" in evaluation.lower():
            is_relevant = True
        
        return proximity_score, is_relevant

    def evaluate_proximity(self, hypothesis: str, research_goal: str) -> dict:
        """Evaluate the proximity of a single hypothesis to the research goal and return a score and assessment."""
        prompt = f"""
        RESEARCH GOAL: {research_goal}
        
        HYPOTHESIS TO EVALUATE:
        {hypothesis}
        
        Please evaluate how closely this hypothesis aligns with the research goal.
        Focus specifically on:
        1. Conceptual alignment: Does the hypothesis address the same fundamental concepts as the research goal?
        2. Problem-solution fit: Does the hypothesis potentially solve the problem outlined in the goal?
        3. Scope appropriateness: Is the hypothesis at the right level of specificity for the goal?
        4. Scientific domain match: Does the hypothesis stay within the relevant scientific domains?
        5. Practical applicability: Would findings based on this hypothesis be useful for the stated objective?
        
        For each criterion, provide a score from 1-10 and brief justification.
        Then provide an overall proximity score (1-10) and a summary assessment of relevance.
        """
        evaluation = self.get_response(prompt)
        proximity_score, is_relevant = self._extract_proximity_info(evaluation)
        return {
            "proximity_score": proximity_score,
            "is_relevant": is_relevant,
            "evaluation": evaluation
        }