Spaces:

Leon4gr45
/

hyp

Sleeping

App Files Files Community

Leon4gr45 commited on May 10, 2025

Commit

72ee4a9

verified ·

1 Parent(s): dc6e87c

Update src/agents/generation_agent.py

Browse files

Files changed (1) hide show

src/agents/generation_agent.py +49 -65

src/agents/generation_agent.py CHANGED Viewed

@@ -57,20 +57,8 @@ class GenerationAgent(BaseAgent):
         self.logger.info(f"Generating {count} hypotheses for research goal: {research_goal}")
         return self.process(research_goal)
-    def process(self, research_goal: str) -> List[Dict[str, str]]:
-        """Generate initial hypotheses based on the research goal.
-        Args:
-            research_goal: The research goal or question
-        Returns:
-            A list of hypothesis dictionaries with keys:
-            - hypothesis: The hypothesis statement
-            - rationale: Scientific rationale
-            - evidence: Known evidence or references
-            - assumptions: Underlying assumptions
-            - validation: Potential validation approaches
-        """
         self.logger.info(f"Generating hypotheses for research goal: {research_goal}")
         prompt = f"""
@@ -85,56 +73,52 @@ class GenerationAgent(BaseAgent):
         """
         response = self.get_response(prompt)
-        # Process the response into structured hypotheses
-        # For now, we'll just return the raw response, but this would be parsed in a real implementation
-        hypotheses = self._parse_hypotheses(response)
-        return hypotheses
-    def _parse_hypotheses(self, response: str) -> List[Dict[str, str]]:
-        """Parse the raw response into structured hypotheses.
-        This is a placeholder implementation. In a real system, this would use more
-        sophisticated parsing to extract the structured data.
-        Args:
-            response: The raw response from the LLM
-        Returns:
-            A list of hypothesis dictionaries
         """
-        # Simple parsing implementation - would be more sophisticated in a real system
-        hypotheses = []
-        current_hypothesis = {}
-        current_section = None
-        # Add placeholder parsing logic
-        # This is a simplified implementation - would need more robust parsing
-        sections = response.split("\n\n")
-        for section in sections:
-            if "Hypothesis" in section and "hypothesis" not in current_hypothesis:
-                # Start a new hypothesis
-                if current_hypothesis:
-                    hypotheses.append(current_hypothesis)
-                current_hypothesis = {
-                    "hypothesis": section.split(":\n", 1)[-1] if ":\n" in section else section,
-                    "rationale": "",
-                    "evidence": "",
-                    "assumptions": "",
-                    "validation": ""
-                }
-            elif "Rationale" in section:
-                current_hypothesis["rationale"] = section.split(":\n", 1)[-1] if ":\n" in section else section
-            elif "Evidence" in section:
-                current_hypothesis["evidence"] = section.split(":\n", 1)[-1] if ":\n" in section else section
-            elif "Assumptions" in section:
-                current_hypothesis["assumptions"] = section.split(":\n", 1)[-1] if ":\n" in section else section
-            elif "Validation" in section or "Testing" in section:
-                current_hypothesis["validation"] = section.split(":\n", 1)[-1] if ":\n" in section else section
-        # Add the last hypothesis if it exists
-        if current_hypothesis:
-            hypotheses.append(current_hypothesis)
-        return hypotheses

         self.logger.info(f"Generating {count} hypotheses for research goal: {research_goal}")
         return self.process(research_goal)
+    def process(self, research_goal: str) -> list:
+        """Generate areas of interest and research questions based on the research goal."""
         self.logger.info(f"Generating hypotheses for research goal: {research_goal}")
         prompt = f"""
         """
         response = self.get_response(prompt)
+        self.logger.info(f"Raw LLM response: {response}")
+        # Use the new robust parser
+        return self._parse_areas_of_interest(response)
+    def _parse_areas_of_interest(self, response: str) -> list:
         """
+        Robustly parse LLM output for areas of interest and their research questions.
+        Handles numbered/bulleted lists, headings, and flexible formats.
+        Returns a list of dicts: { 'statement': ..., 'research_questions': [...] }
+        """
+        import re
+        areas = []
+        current_area = None
+        current_questions = []
+        lines = response.splitlines()
+        area_pattern = re.compile(r"^(?:\d+\.|[-*])?\s*(Area of Interest|Area|Direction|Topic)?\s*:?\s*(.+)$", re.IGNORECASE)
+        question_pattern = re.compile(r"^(?:[-*]|\d+\.|\d+\))\s*(What|How|Why|Which|Could|Is|Are|Does|Do|Can|To what extent|In what ways|Where|When|Who|Should|Would|Might|Will|Has|Have|Did|Does)\b.+", re.IGNORECASE)
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            # Detect area of interest
+            if area_pattern.match(line) and not question_pattern.match(line):
+                # Save previous area
+                if current_area:
+                    areas.append({
+                        'statement': current_area,
+                        'research_questions': current_questions
+                    })
+                # Start new area
+                match = area_pattern.match(line)
+                area_text = match.group(2).strip()
+                current_area = area_text
+                current_questions = []
+            # Detect research question
+            elif question_pattern.match(line):
+                current_questions.append(line)
+            # Sometimes questions are indented or bulleted without a clear marker
+            elif line.endswith('?') and len(line) < 200:
+                current_questions.append(line)
+        # Add last area
+        if current_area:
+            areas.append({
+                'statement': current_area,
+                'research_questions': current_questions
+            })
+        return areas