|
|
|
|
|
|
|
|
import logging |
|
|
from typing import List, Dict, Any |
|
|
|
|
|
from .base_agent import BaseAgent |
|
|
|
|
|
class GenerationAgent(BaseAgent): |
|
|
"""Agent responsible for generating initial hypotheses based on the research goal.""" |
|
|
|
|
|
def __init__(self, model=None, temperature=None): |
|
|
"""Initialize the Generation Agent. |
|
|
|
|
|
Args: |
|
|
model: Optional model override |
|
|
temperature: Optional temperature override |
|
|
""" |
|
|
system_prompt = """ |
|
|
You are a Generation Agent in an AI Co-Scientist system, responsible for proposing novel areas of interest and potential research directions based on the user's research goal. You have expertise across multiple scientific disciplines at a PhD level. |
|
|
|
|
|
Your role is to: |
|
|
1. Suggest diverse, novel, and relevant areas of interest or research directions that the user might not have considered, based on the research goal provided. |
|
|
2. Leverage your broad knowledge to surface surprising or emerging topics, interdisciplinary connections, and new angles for investigation. |
|
|
3. For each area of interest, generate multiple specific research questions that could guide further investigation into new aspects of the topic. |
|
|
4. Ensure each area of interest is clearly described and the research questions are actionable and thought-provoking. |
|
|
|
|
|
Your output should include: |
|
|
- A list of distinct areas of interest (not just hypotheses), each with: |
|
|
- A clear description of the area or direction |
|
|
- 2-3 research questions that could be explored within this area |
|
|
|
|
|
Remember: |
|
|
- Areas of interest should be relevant to the research goal, but can be tangential or surprising if they open up new avenues for discovery. |
|
|
- Research questions should be specific, actionable, and designed to inspire further investigation. |
|
|
- Avoid repeating the research goal verbatim; instead, expand on it with new perspectives. |
|
|
""" |
|
|
|
|
|
super().__init__( |
|
|
name="Generation", |
|
|
system_prompt=system_prompt, |
|
|
model=model, |
|
|
temperature=temperature if temperature is not None else 0.7 |
|
|
) |
|
|
|
|
|
self.logger = logging.getLogger("agent.generation") |
|
|
|
|
|
def generate_hypotheses(self, research_goal: str, count: int = 5) -> List[Dict[str, Any]]: |
|
|
"""Generate initial hypotheses based on the research goal. |
|
|
|
|
|
Args: |
|
|
research_goal: The research goal or question |
|
|
count: Number of hypotheses to generate |
|
|
|
|
|
Returns: |
|
|
A list of hypothesis dictionaries |
|
|
""" |
|
|
self.logger.info(f"Generating {count} hypotheses for research goal: {research_goal}") |
|
|
return self.process(research_goal) |
|
|
|
|
|
def process(self, research_goal: str) -> list: |
|
|
"""Generate areas of interest and research questions based on the research goal.""" |
|
|
self.logger.info(f"Generating hypotheses for research goal: {research_goal}") |
|
|
|
|
|
prompt = f""" |
|
|
RESEARCH GOAL: {research_goal} |
|
|
|
|
|
Based on the research goal above, suggest at least 3-5 distinct AREAS OF INTEREST or potential research directions that the user might not have considered. For each area of interest: |
|
|
- Provide a clear description of the area or direction |
|
|
- Generate 2-3 specific research questions that could be explored within this area |
|
|
|
|
|
Format your response as a structured list, with each area of interest clearly separated, and each research question listed under its area. |
|
|
Be creative, leverage your broad knowledge, and focus on novelty and relevance. |
|
|
""" |
|
|
|
|
|
response = self.get_response(prompt) |
|
|
self.logger.info(f"Raw LLM response: {response}") |
|
|
|
|
|
|
|
|
return self._parse_areas_of_interest(response) |
|
|
|
|
|
def _parse_areas_of_interest(self, response: str) -> list: |
|
|
""" |
|
|
Robustly parse LLM output for areas of interest and their research questions. |
|
|
Handles numbered/bulleted lists, headings, and flexible formats. |
|
|
Returns a list of dicts: { 'statement': ..., 'research_questions': [...] } |
|
|
""" |
|
|
import re |
|
|
areas = [] |
|
|
current_area = None |
|
|
current_questions = [] |
|
|
lines = response.splitlines() |
|
|
area_pattern = re.compile(r"^(?:\d+\.|[-*])?\s*(Area of Interest|Area|Direction|Topic)?\s*:?\s*(.+)$", re.IGNORECASE) |
|
|
question_pattern = re.compile(r"^(?:[-*]|\d+\.|\d+\))\s*(What|How|Why|Which|Could|Is|Are|Does|Do|Can|To what extent|In what ways|Where|When|Who|Should|Would|Might|Will|Has|Have|Did|Does)\b.+", re.IGNORECASE) |
|
|
|
|
|
for line in lines: |
|
|
line = line.strip() |
|
|
if not line: |
|
|
continue |
|
|
|
|
|
if area_pattern.match(line) and not question_pattern.match(line): |
|
|
|
|
|
if current_area: |
|
|
areas.append({ |
|
|
'statement': current_area, |
|
|
'research_questions': current_questions |
|
|
}) |
|
|
|
|
|
match = area_pattern.match(line) |
|
|
area_text = match.group(2).strip() |
|
|
current_area = area_text |
|
|
current_questions = [] |
|
|
|
|
|
elif question_pattern.match(line): |
|
|
current_questions.append(line) |
|
|
|
|
|
elif line.endswith('?') and len(line) < 200: |
|
|
current_questions.append(line) |
|
|
|
|
|
if current_area: |
|
|
areas.append({ |
|
|
'statement': current_area, |
|
|
'research_questions': current_questions |
|
|
}) |
|
|
return areas |
|
|
|