Update src/agents/generation_agent.py
Browse files- src/agents/generation_agent.py +49 -65
src/agents/generation_agent.py
CHANGED
|
@@ -57,20 +57,8 @@ class GenerationAgent(BaseAgent):
|
|
| 57 |
self.logger.info(f"Generating {count} hypotheses for research goal: {research_goal}")
|
| 58 |
return self.process(research_goal)
|
| 59 |
|
| 60 |
-
def process(self, research_goal: str) ->
|
| 61 |
-
"""Generate
|
| 62 |
-
|
| 63 |
-
Args:
|
| 64 |
-
research_goal: The research goal or question
|
| 65 |
-
|
| 66 |
-
Returns:
|
| 67 |
-
A list of hypothesis dictionaries with keys:
|
| 68 |
-
- hypothesis: The hypothesis statement
|
| 69 |
-
- rationale: Scientific rationale
|
| 70 |
-
- evidence: Known evidence or references
|
| 71 |
-
- assumptions: Underlying assumptions
|
| 72 |
-
- validation: Potential validation approaches
|
| 73 |
-
"""
|
| 74 |
self.logger.info(f"Generating hypotheses for research goal: {research_goal}")
|
| 75 |
|
| 76 |
prompt = f"""
|
|
@@ -85,56 +73,52 @@ class GenerationAgent(BaseAgent):
|
|
| 85 |
"""
|
| 86 |
|
| 87 |
response = self.get_response(prompt)
|
|
|
|
| 88 |
|
| 89 |
-
#
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
return hypotheses
|
| 94 |
-
|
| 95 |
-
def _parse_hypotheses(self, response: str) -> List[Dict[str, str]]:
|
| 96 |
-
"""Parse the raw response into structured hypotheses.
|
| 97 |
-
|
| 98 |
-
This is a placeholder implementation. In a real system, this would use more
|
| 99 |
-
sophisticated parsing to extract the structured data.
|
| 100 |
-
|
| 101 |
-
Args:
|
| 102 |
-
response: The raw response from the LLM
|
| 103 |
-
|
| 104 |
-
Returns:
|
| 105 |
-
A list of hypothesis dictionaries
|
| 106 |
"""
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
self.logger.info(f"Generating {count} hypotheses for research goal: {research_goal}")
|
| 58 |
return self.process(research_goal)
|
| 59 |
|
| 60 |
+
def process(self, research_goal: str) -> list:
|
| 61 |
+
"""Generate areas of interest and research questions based on the research goal."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
self.logger.info(f"Generating hypotheses for research goal: {research_goal}")
|
| 63 |
|
| 64 |
prompt = f"""
|
|
|
|
| 73 |
"""
|
| 74 |
|
| 75 |
response = self.get_response(prompt)
|
| 76 |
+
self.logger.info(f"Raw LLM response: {response}")
|
| 77 |
|
| 78 |
+
# Use the new robust parser
|
| 79 |
+
return self._parse_areas_of_interest(response)
|
| 80 |
+
|
| 81 |
+
def _parse_areas_of_interest(self, response: str) -> list:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
"""
|
| 83 |
+
Robustly parse LLM output for areas of interest and their research questions.
|
| 84 |
+
Handles numbered/bulleted lists, headings, and flexible formats.
|
| 85 |
+
Returns a list of dicts: { 'statement': ..., 'research_questions': [...] }
|
| 86 |
+
"""
|
| 87 |
+
import re
|
| 88 |
+
areas = []
|
| 89 |
+
current_area = None
|
| 90 |
+
current_questions = []
|
| 91 |
+
lines = response.splitlines()
|
| 92 |
+
area_pattern = re.compile(r"^(?:\d+\.|[-*])?\s*(Area of Interest|Area|Direction|Topic)?\s*:?\s*(.+)$", re.IGNORECASE)
|
| 93 |
+
question_pattern = re.compile(r"^(?:[-*]|\d+\.|\d+\))\s*(What|How|Why|Which|Could|Is|Are|Does|Do|Can|To what extent|In what ways|Where|When|Who|Should|Would|Might|Will|Has|Have|Did|Does)\b.+", re.IGNORECASE)
|
| 94 |
+
|
| 95 |
+
for line in lines:
|
| 96 |
+
line = line.strip()
|
| 97 |
+
if not line:
|
| 98 |
+
continue
|
| 99 |
+
# Detect area of interest
|
| 100 |
+
if area_pattern.match(line) and not question_pattern.match(line):
|
| 101 |
+
# Save previous area
|
| 102 |
+
if current_area:
|
| 103 |
+
areas.append({
|
| 104 |
+
'statement': current_area,
|
| 105 |
+
'research_questions': current_questions
|
| 106 |
+
})
|
| 107 |
+
# Start new area
|
| 108 |
+
match = area_pattern.match(line)
|
| 109 |
+
area_text = match.group(2).strip()
|
| 110 |
+
current_area = area_text
|
| 111 |
+
current_questions = []
|
| 112 |
+
# Detect research question
|
| 113 |
+
elif question_pattern.match(line):
|
| 114 |
+
current_questions.append(line)
|
| 115 |
+
# Sometimes questions are indented or bulleted without a clear marker
|
| 116 |
+
elif line.endswith('?') and len(line) < 200:
|
| 117 |
+
current_questions.append(line)
|
| 118 |
+
# Add last area
|
| 119 |
+
if current_area:
|
| 120 |
+
areas.append({
|
| 121 |
+
'statement': current_area,
|
| 122 |
+
'research_questions': current_questions
|
| 123 |
+
})
|
| 124 |
+
return areas
|