Spaces:

Samarth0710
/

PaperBanana

Running

App Files Files Community

PaperBanana / agents /planner.py

Samarth0710

Deploy PaperBanana app

572d3da verified 25 days ago

raw

history blame contribute delete

4.6 kB

	"""
	Planner Agent for PaperBanana framework.

	Serves as the cognitive core. Translates unstructured methodology data
	into comprehensive textual description of the target illustration.
	"""
	import os
	from typing import List, Dict, Any
	from google import genai
	from google.genai import types
	import config


	class PlannerAgent:
	"""
	Planner Agent: Translates methodology into comprehensive illustration description.

	The cognitive core that interprets source context S and communicative intent C,
	then produces detailed textual description P of the target illustration.
	"""

	def __init__(self):
	"""Initialize Planner Agent."""
	self.client = genai.Client(api_key=config.GEMINI_API_KEY)
	self.model = config.VLM_MODEL

	def plan(self,
	methodology_text: str,
	caption: str,
	reference_examples: List[Dict[str, Any]] = None) -> str:
	"""
	Generate comprehensive textual description of target illustration.

	Args:
	methodology_text: Source methodology description (S)
	caption: Diagram caption (part of C)
	reference_examples: Retrieved reference examples (E)

	Returns:
	Detailed textual description P of the illustration
	"""
	prompt = self._create_planning_prompt(methodology_text, caption, reference_examples)

	contents = [
	types.Content(
	role="user",
	parts=[types.Part.from_text(text=prompt)]
	)
	]

	generate_config = types.GenerateContentConfig(
	thinking_config=types.ThinkingConfig(
	thinking_level=config.THINKING_LEVEL
	)
	)

	description = ""
	for chunk in self.client.models.generate_content_stream(
	model=self.model,
	contents=contents,
	config=generate_config
	):
	description += chunk.text

	return description.strip()

	def _create_planning_prompt(self,
	methodology_text: str,
	caption: str,
	reference_examples: List[Dict[str, Any]] = None) -> str:
	"""Create prompt for generating illustration description."""

	# Include reference examples if available
	reference_context = ""
	if reference_examples:
	reference_context = "\n\nREFERENCE EXAMPLES (for inspiration):\n"
	for i, ref in enumerate(reference_examples[:3], 1): # Use top 3
	reference_context += f"\nExample {i}:\n"
	reference_context += f"Domain: {ref.get('domain', 'N/A')}\n"
	reference_context += f"Type: {ref.get('diagram_type', 'N/A')}\n"
	reference_context += f"Description: {ref.get('description', 'N/A')}\n"

	prompt = f"""You are an expert at designing academic methodology diagrams for scientific publications.

	Your task is to create a COMPREHENSIVE and DETAILED textual description of an illustration that would
	effectively visualize the given methodology. This description will be used to generate the actual diagram.

	METHODOLOGY TO VISUALIZE:
	{methodology_text}

	TARGET DIAGRAM CAPTION:
	{caption}
	{reference_context}

	REQUIREMENTS:
	1. Layout Structure: Specify the overall layout (left-to-right, top-to-bottom, circular, etc.)
	2. Components: List all visual elements needed (boxes, arrows, icons, labels, etc.)
	3. Content: What text/symbols should appear in each component
	4. Connections: How components connect (arrows, lines, groupings)
	5. Hierarchy: Which elements are primary vs secondary
	6. Grouping: How to group related components (containers, background colors)
	7. Flow: The logical flow of information through the diagram
	8. Key Details: Important technical details, equations, or annotations

	IMPORTANT GUIDELINES:
	- Be specific about spatial relationships and positioning
	- Describe the logical flow clearly (input → process → output)
	- Include any mathematical notation or technical terminology
	- Consider the target audience (academic researchers)
	- Focus on clarity and information density
	- Think about how this supports the paper's narrative

	OUTPUT FORMAT:
	Provide a detailed paragraph-form description that covers all aspects above.
	Be thorough - this description should be sufficient for someone to create the diagram without seeing the original methodology.
	"""
	return prompt