# Generation Agent for proposing initial hypotheses import logging from typing import List, Dict, Any from .base_agent import BaseAgent class GenerationAgent(BaseAgent): """Agent responsible for generating initial hypotheses based on the research goal.""" def __init__(self, model=None, temperature=None): """Initialize the Generation Agent. Args: model: Optional model override temperature: Optional temperature override """ system_prompt = """ You are a Generation Agent in an AI Co-Scientist system, responsible for proposing novel areas of interest and potential research directions based on the user's research goal. You have expertise across multiple scientific disciplines at a PhD level. Your role is to: 1. Suggest diverse, novel, and relevant areas of interest or research directions that the user might not have considered, based on the research goal provided. 2. Leverage your broad knowledge to surface surprising or emerging topics, interdisciplinary connections, and new angles for investigation. 3. For each area of interest, generate multiple specific research questions that could guide further investigation into new aspects of the topic. 4. Ensure each area of interest is clearly described and the research questions are actionable and thought-provoking. Your output should include: - A list of distinct areas of interest (not just hypotheses), each with: - A clear description of the area or direction - 2-3 research questions that could be explored within this area Remember: - Areas of interest should be relevant to the research goal, but can be tangential or surprising if they open up new avenues for discovery. - Research questions should be specific, actionable, and designed to inspire further investigation. - Avoid repeating the research goal verbatim; instead, expand on it with new perspectives. """ super().__init__( name="Generation", system_prompt=system_prompt, model=model, temperature=temperature if temperature is not None else 0.7 # Higher temperature for creativity ) self.logger = logging.getLogger("agent.generation") def generate_hypotheses(self, research_goal: str, count: int = 5) -> List[Dict[str, Any]]: """Generate initial hypotheses based on the research goal. Args: research_goal: The research goal or question count: Number of hypotheses to generate Returns: A list of hypothesis dictionaries """ self.logger.info(f"Generating {count} hypotheses for research goal: {research_goal}") return self.process(research_goal) def process(self, research_goal: str) -> list: """Generate areas of interest and research questions based on the research goal.""" self.logger.info(f"Generating hypotheses for research goal: {research_goal}") prompt = f""" RESEARCH GOAL: {research_goal} Based on the research goal above, suggest at least 3-5 distinct AREAS OF INTEREST or potential research directions that the user might not have considered. For each area of interest: - Provide a clear description of the area or direction - Generate 2-3 specific research questions that could be explored within this area Format your response as a structured list, with each area of interest clearly separated, and each research question listed under its area. Be creative, leverage your broad knowledge, and focus on novelty and relevance. """ response = self.get_response(prompt) self.logger.info(f"Raw LLM response: {response}") # Use the new robust parser return self._parse_areas_of_interest(response) def _parse_areas_of_interest(self, response: str) -> list: """ Robustly parse LLM output for areas of interest and their research questions. Handles numbered/bulleted lists, headings, and flexible formats. Returns a list of dicts: { 'statement': ..., 'research_questions': [...] } """ import re areas = [] current_area = None current_questions = [] lines = response.splitlines() area_pattern = re.compile(r"^(?:\d+\.|[-*])?\s*(Area of Interest|Area|Direction|Topic)?\s*:?\s*(.+)$", re.IGNORECASE) question_pattern = re.compile(r"^(?:[-*]|\d+\.|\d+\))\s*(What|How|Why|Which|Could|Is|Are|Does|Do|Can|To what extent|In what ways|Where|When|Who|Should|Would|Might|Will|Has|Have|Did|Does)\b.+", re.IGNORECASE) for line in lines: line = line.strip() if not line: continue # Detect area of interest if area_pattern.match(line) and not question_pattern.match(line): # Save previous area if current_area: areas.append({ 'statement': current_area, 'research_questions': current_questions }) # Start new area match = area_pattern.match(line) area_text = match.group(2).strip() current_area = area_text current_questions = [] # Detect research question elif question_pattern.match(line): current_questions.append(line) # Sometimes questions are indented or bulleted without a clear marker elif line.endswith('?') and len(line) < 200: current_questions.append(line) # Add last area if current_area: areas.append({ 'statement': current_area, 'research_questions': current_questions }) return areas