|
|
import datetime |
|
|
import importlib.resources |
|
|
from datetime import datetime |
|
|
from typing import TYPE_CHECKING, List, Optional |
|
|
|
|
|
from langchain_core.messages import HumanMessage, SystemMessage |
|
|
|
|
|
if TYPE_CHECKING: |
|
|
from browser_use.agent.views import ActionResult, AgentStepInfo |
|
|
from browser_use.browser.views import BrowserState |
|
|
|
|
|
|
|
|
class SystemPrompt: |
|
|
def __init__( |
|
|
self, |
|
|
action_description: str, |
|
|
max_actions_per_step: int = 10, |
|
|
override_system_message: Optional[str] = None, |
|
|
extend_system_message: Optional[str] = None, |
|
|
): |
|
|
self.default_action_description = action_description |
|
|
self.max_actions_per_step = max_actions_per_step |
|
|
prompt = '' |
|
|
if override_system_message: |
|
|
prompt = override_system_message |
|
|
else: |
|
|
self._load_prompt_template() |
|
|
prompt = self.prompt_template.format(max_actions=self.max_actions_per_step) |
|
|
|
|
|
if extend_system_message: |
|
|
prompt += f'\n{extend_system_message}' |
|
|
|
|
|
self.system_message = SystemMessage(content=prompt) |
|
|
|
|
|
def _load_prompt_template(self) -> None: |
|
|
"""Load the prompt template from the markdown file.""" |
|
|
try: |
|
|
|
|
|
with importlib.resources.files('browser_use.agent').joinpath('system_prompt.md').open('r') as f: |
|
|
self.prompt_template = f.read() |
|
|
except Exception as e: |
|
|
raise RuntimeError(f'Failed to load system prompt template: {e}') |
|
|
|
|
|
def get_system_message(self) -> SystemMessage: |
|
|
""" |
|
|
Get the system prompt for the agent. |
|
|
|
|
|
Returns: |
|
|
SystemMessage: Formatted system prompt |
|
|
""" |
|
|
return self.system_message |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AgentMessagePrompt: |
|
|
def __init__( |
|
|
self, |
|
|
state: 'BrowserState', |
|
|
result: Optional[List['ActionResult']] = None, |
|
|
include_attributes: list[str] = [], |
|
|
step_info: Optional['AgentStepInfo'] = None, |
|
|
): |
|
|
self.state = state |
|
|
self.result = result |
|
|
self.include_attributes = include_attributes |
|
|
self.step_info = step_info |
|
|
|
|
|
def get_user_message(self, use_vision: bool = True) -> HumanMessage: |
|
|
elements_text = self.state.element_tree.clickable_elements_to_string(include_attributes=self.include_attributes) |
|
|
|
|
|
has_content_above = (self.state.pixels_above or 0) > 0 |
|
|
has_content_below = (self.state.pixels_below or 0) > 0 |
|
|
|
|
|
if elements_text != '': |
|
|
if has_content_above: |
|
|
elements_text = ( |
|
|
f'... {self.state.pixels_above} pixels above - scroll or extract content to see more ...\n{elements_text}' |
|
|
) |
|
|
else: |
|
|
elements_text = f'[Start of page]\n{elements_text}' |
|
|
if has_content_below: |
|
|
elements_text = ( |
|
|
f'{elements_text}\n... {self.state.pixels_below} pixels below - scroll or extract content to see more ...' |
|
|
) |
|
|
else: |
|
|
elements_text = f'{elements_text}\n[End of page]' |
|
|
else: |
|
|
elements_text = 'empty page' |
|
|
|
|
|
if self.step_info: |
|
|
step_info_description = f'Current step: {self.step_info.step_number + 1}/{self.step_info.max_steps}' |
|
|
else: |
|
|
step_info_description = '' |
|
|
time_str = datetime.now().strftime('%Y-%m-%d %H:%M') |
|
|
step_info_description += f'Current date and time: {time_str}' |
|
|
|
|
|
state_description = f""" |
|
|
[Task history memory ends] |
|
|
[Current state starts here] |
|
|
The following is one-time information - if you need to remember it write it to memory: |
|
|
Current url: {self.state.url} |
|
|
Available tabs: |
|
|
{self.state.tabs} |
|
|
Interactive elements from top layer of the current page inside the viewport: |
|
|
{elements_text} |
|
|
{step_info_description} |
|
|
""" |
|
|
|
|
|
if self.result: |
|
|
for i, result in enumerate(self.result): |
|
|
if result.extracted_content: |
|
|
state_description += f'\nAction result {i + 1}/{len(self.result)}: {result.extracted_content}' |
|
|
if result.error: |
|
|
|
|
|
error = result.error.split('\n')[-1] |
|
|
state_description += f'\nAction error {i + 1}/{len(self.result)}: ...{error}' |
|
|
|
|
|
if self.state.screenshot and use_vision == True: |
|
|
|
|
|
return HumanMessage( |
|
|
content=[ |
|
|
{'type': 'text', 'text': state_description}, |
|
|
{ |
|
|
'type': 'image_url', |
|
|
'image_url': {'url': f'data:image/png;base64,{self.state.screenshot}'}, |
|
|
}, |
|
|
] |
|
|
) |
|
|
|
|
|
return HumanMessage(content=state_description) |
|
|
|
|
|
|
|
|
class PlannerPrompt(SystemPrompt): |
|
|
def get_system_message(self) -> SystemMessage: |
|
|
return SystemMessage( |
|
|
content="""You are a planning agent that helps break down tasks into smaller steps and reason about the current state. |
|
|
Your role is to: |
|
|
1. Analyze the current state and history |
|
|
2. Evaluate progress towards the ultimate goal |
|
|
3. Identify potential challenges or roadblocks |
|
|
4. Suggest the next high-level steps to take |
|
|
|
|
|
Inside your messages, there will be AI messages from different agents with different formats. |
|
|
|
|
|
Your output format should be always a JSON object with the following fields: |
|
|
{ |
|
|
"state_analysis": "Brief analysis of the current state and what has been done so far", |
|
|
"progress_evaluation": "Evaluation of progress towards the ultimate goal (as percentage and description)", |
|
|
"challenges": "List any potential challenges or roadblocks", |
|
|
"next_steps": "List 2-3 concrete next steps to take", |
|
|
"reasoning": "Explain your reasoning for the suggested next steps" |
|
|
} |
|
|
|
|
|
Ignore the other AI messages output structures. |
|
|
|
|
|
Keep your responses concise and focused on actionable insights.""" |
|
|
) |
|
|
|