Spaces:

Boobs00
/

use

Configuration error

App Files Files Community

use / browser_use /agent /prompts.py

Boobs00

Upload folder using huggingface_hub

db4810d verified 10 months ago

raw

history blame contribute delete

5.38 kB

	import datetime
	import importlib.resources
	from datetime import datetime
	from typing import TYPE_CHECKING, List, Optional

	from langchain_core.messages import HumanMessage, SystemMessage

	if TYPE_CHECKING:
	from browser_use.agent.views import ActionResult, AgentStepInfo
	from browser_use.browser.views import BrowserState


	class SystemPrompt:
	def __init__(
	self,
	action_description: str,
	max_actions_per_step: int = 10,
	override_system_message: Optional[str] = None,
	extend_system_message: Optional[str] = None,
	):
	self.default_action_description = action_description
	self.max_actions_per_step = max_actions_per_step
	prompt = ''
	if override_system_message:
	prompt = override_system_message
	else:
	self._load_prompt_template()
	prompt = self.prompt_template.format(max_actions=self.max_actions_per_step)

	if extend_system_message:
	prompt += f'\n{extend_system_message}'

	self.system_message = SystemMessage(content=prompt)

	def _load_prompt_template(self) -> None:
	"""Load the prompt template from the markdown file."""
	try:
	# This works both in development and when installed as a package
	with importlib.resources.files('browser_use.agent').joinpath('system_prompt.md').open('r') as f:
	self.prompt_template = f.read()
	except Exception as e:
	raise RuntimeError(f'Failed to load system prompt template: {e}')

	def get_system_message(self) -> SystemMessage:
	"""
	Get the system prompt for the agent.

	Returns:
	SystemMessage: Formatted system prompt
	"""
	return self.system_message


	# Functions:
	# {self.default_action_description}

	# Example:
	# {self.example_response()}
	# Your AVAILABLE ACTIONS:
	# {self.default_action_description}


	class AgentMessagePrompt:
	def __init__(
	self,
	state: 'BrowserState',
	result: Optional[List['ActionResult']] = None,
	include_attributes: list[str] = [],
	step_info: Optional['AgentStepInfo'] = None,
	):
	self.state = state
	self.result = result
	self.include_attributes = include_attributes
	self.step_info = step_info

	def get_user_message(self, use_vision: bool = True) -> HumanMessage:
	elements_text = self.state.element_tree.clickable_elements_to_string(include_attributes=self.include_attributes)

	has_content_above = (self.state.pixels_above or 0) > 0
	has_content_below = (self.state.pixels_below or 0) > 0

	if elements_text != '':
	if has_content_above:
	elements_text = (
	f'... {self.state.pixels_above} pixels above - scroll or extract content to see more ...\n{elements_text}'
	)
	else:
	elements_text = f'[Start of page]\n{elements_text}'
	if has_content_below:
	elements_text = (
	f'{elements_text}\n... {self.state.pixels_below} pixels below - scroll or extract content to see more ...'
	)
	else:
	elements_text = f'{elements_text}\n[End of page]'
	else:
	elements_text = 'empty page'

	if self.step_info:
	step_info_description = f'Current step: {self.step_info.step_number + 1}/{self.step_info.max_steps}'
	else:
	step_info_description = ''
	time_str = datetime.now().strftime('%Y-%m-%d %H:%M')
	step_info_description += f'Current date and time: {time_str}'

	state_description = f"""
	[Task history memory ends]
	[Current state starts here]
	The following is one-time information - if you need to remember it write it to memory:
	Current url: {self.state.url}
	Available tabs:
	{self.state.tabs}
	Interactive elements from top layer of the current page inside the viewport:
	{elements_text}
	{step_info_description}
	"""

	if self.result:
	for i, result in enumerate(self.result):
	if result.extracted_content:
	state_description += f'\nAction result {i + 1}/{len(self.result)}: {result.extracted_content}'
	if result.error:
	# only use last line of error
	error = result.error.split('\n')[-1]
	state_description += f'\nAction error {i + 1}/{len(self.result)}: ...{error}'

	if self.state.screenshot and use_vision == True:
	# Format message for vision model
	return HumanMessage(
	content=[
	{'type': 'text', 'text': state_description},
	{
	'type': 'image_url',
	'image_url': {'url': f'data:image/png;base64,{self.state.screenshot}'}, # , 'detail': 'low'
	},
	]
	)

	return HumanMessage(content=state_description)


	class PlannerPrompt(SystemPrompt):
	def get_system_message(self) -> SystemMessage:
	return SystemMessage(
	content="""You are a planning agent that helps break down tasks into smaller steps and reason about the current state.
	Your role is to:
	1. Analyze the current state and history
	2. Evaluate progress towards the ultimate goal
	3. Identify potential challenges or roadblocks
	4. Suggest the next high-level steps to take

	Inside your messages, there will be AI messages from different agents with different formats.

	Your output format should be always a JSON object with the following fields:
	{
	"state_analysis": "Brief analysis of the current state and what has been done so far",
	"progress_evaluation": "Evaluation of progress towards the ultimate goal (as percentage and description)",
	"challenges": "List any potential challenges or roadblocks",
	"next_steps": "List 2-3 concrete next steps to take",
	"reasoning": "Explain your reasoning for the suggested next steps"
	}

	Ignore the other AI messages output structures.

	Keep your responses concise and focused on actionable insights."""
	)