Leon4gr45 commited on
Commit
72ee4a9
·
verified ·
1 Parent(s): dc6e87c

Update src/agents/generation_agent.py

Browse files
Files changed (1) hide show
  1. src/agents/generation_agent.py +49 -65
src/agents/generation_agent.py CHANGED
@@ -57,20 +57,8 @@ class GenerationAgent(BaseAgent):
57
  self.logger.info(f"Generating {count} hypotheses for research goal: {research_goal}")
58
  return self.process(research_goal)
59
 
60
- def process(self, research_goal: str) -> List[Dict[str, str]]:
61
- """Generate initial hypotheses based on the research goal.
62
-
63
- Args:
64
- research_goal: The research goal or question
65
-
66
- Returns:
67
- A list of hypothesis dictionaries with keys:
68
- - hypothesis: The hypothesis statement
69
- - rationale: Scientific rationale
70
- - evidence: Known evidence or references
71
- - assumptions: Underlying assumptions
72
- - validation: Potential validation approaches
73
- """
74
  self.logger.info(f"Generating hypotheses for research goal: {research_goal}")
75
 
76
  prompt = f"""
@@ -85,56 +73,52 @@ class GenerationAgent(BaseAgent):
85
  """
86
 
87
  response = self.get_response(prompt)
 
88
 
89
- # Process the response into structured hypotheses
90
- # For now, we'll just return the raw response, but this would be parsed in a real implementation
91
- hypotheses = self._parse_hypotheses(response)
92
-
93
- return hypotheses
94
-
95
- def _parse_hypotheses(self, response: str) -> List[Dict[str, str]]:
96
- """Parse the raw response into structured hypotheses.
97
-
98
- This is a placeholder implementation. In a real system, this would use more
99
- sophisticated parsing to extract the structured data.
100
-
101
- Args:
102
- response: The raw response from the LLM
103
-
104
- Returns:
105
- A list of hypothesis dictionaries
106
  """
107
- # Simple parsing implementation - would be more sophisticated in a real system
108
- hypotheses = []
109
- current_hypothesis = {}
110
- current_section = None
111
-
112
- # Add placeholder parsing logic
113
- # This is a simplified implementation - would need more robust parsing
114
- sections = response.split("\n\n")
115
- for section in sections:
116
- if "Hypothesis" in section and "hypothesis" not in current_hypothesis:
117
- # Start a new hypothesis
118
- if current_hypothesis:
119
- hypotheses.append(current_hypothesis)
120
- current_hypothesis = {
121
- "hypothesis": section.split(":\n", 1)[-1] if ":\n" in section else section,
122
- "rationale": "",
123
- "evidence": "",
124
- "assumptions": "",
125
- "validation": ""
126
- }
127
- elif "Rationale" in section:
128
- current_hypothesis["rationale"] = section.split(":\n", 1)[-1] if ":\n" in section else section
129
- elif "Evidence" in section:
130
- current_hypothesis["evidence"] = section.split(":\n", 1)[-1] if ":\n" in section else section
131
- elif "Assumptions" in section:
132
- current_hypothesis["assumptions"] = section.split(":\n", 1)[-1] if ":\n" in section else section
133
- elif "Validation" in section or "Testing" in section:
134
- current_hypothesis["validation"] = section.split(":\n", 1)[-1] if ":\n" in section else section
135
-
136
- # Add the last hypothesis if it exists
137
- if current_hypothesis:
138
- hypotheses.append(current_hypothesis)
139
-
140
- return hypotheses
 
 
 
 
 
 
 
 
 
57
  self.logger.info(f"Generating {count} hypotheses for research goal: {research_goal}")
58
  return self.process(research_goal)
59
 
60
+ def process(self, research_goal: str) -> list:
61
+ """Generate areas of interest and research questions based on the research goal."""
 
 
 
 
 
 
 
 
 
 
 
 
62
  self.logger.info(f"Generating hypotheses for research goal: {research_goal}")
63
 
64
  prompt = f"""
 
73
  """
74
 
75
  response = self.get_response(prompt)
76
+ self.logger.info(f"Raw LLM response: {response}")
77
 
78
+ # Use the new robust parser
79
+ return self._parse_areas_of_interest(response)
80
+
81
+ def _parse_areas_of_interest(self, response: str) -> list:
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  """
83
+ Robustly parse LLM output for areas of interest and their research questions.
84
+ Handles numbered/bulleted lists, headings, and flexible formats.
85
+ Returns a list of dicts: { 'statement': ..., 'research_questions': [...] }
86
+ """
87
+ import re
88
+ areas = []
89
+ current_area = None
90
+ current_questions = []
91
+ lines = response.splitlines()
92
+ area_pattern = re.compile(r"^(?:\d+\.|[-*])?\s*(Area of Interest|Area|Direction|Topic)?\s*:?\s*(.+)$", re.IGNORECASE)
93
+ question_pattern = re.compile(r"^(?:[-*]|\d+\.|\d+\))\s*(What|How|Why|Which|Could|Is|Are|Does|Do|Can|To what extent|In what ways|Where|When|Who|Should|Would|Might|Will|Has|Have|Did|Does)\b.+", re.IGNORECASE)
94
+
95
+ for line in lines:
96
+ line = line.strip()
97
+ if not line:
98
+ continue
99
+ # Detect area of interest
100
+ if area_pattern.match(line) and not question_pattern.match(line):
101
+ # Save previous area
102
+ if current_area:
103
+ areas.append({
104
+ 'statement': current_area,
105
+ 'research_questions': current_questions
106
+ })
107
+ # Start new area
108
+ match = area_pattern.match(line)
109
+ area_text = match.group(2).strip()
110
+ current_area = area_text
111
+ current_questions = []
112
+ # Detect research question
113
+ elif question_pattern.match(line):
114
+ current_questions.append(line)
115
+ # Sometimes questions are indented or bulleted without a clear marker
116
+ elif line.endswith('?') and len(line) < 200:
117
+ current_questions.append(line)
118
+ # Add last area
119
+ if current_area:
120
+ areas.append({
121
+ 'statement': current_area,
122
+ 'research_questions': current_questions
123
+ })
124
+ return areas