Spaces:
Runtime error
Runtime error
File size: 3,912 Bytes
6a42990 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | from tinytroupe.agent.mental_faculty import TinyMentalFaculty
from tinytroupe.tools import browser
import textwrap
class BrowserFaculty(TinyMentalFaculty):
"""
A mental faculty that allows an agent to interact with a web browser.
"""
def __init__(self):
super().__init__("Browser Navigation")
def process_action(self, agent, action: dict) -> bool:
"""
Processes a browser-related action.
"""
action_type = action.get("type")
content = action.get("content")
target = action.get("target")
if action_type == "See":
screenshot_path = browser.screenshot()
agent.see(f"Took a screenshot and saved it to {screenshot_path}. I will now analyze the screenshot.")
return True
elif action_type == "Click":
browser.click(target)
agent.see(f"Clicked on element with selector: {target}")
return True
elif action_type == "Write":
browser.fill(target, content)
agent.see(f"Typed '{content}' into element with selector: {target}")
return True
elif action_type == "Submit":
browser.submit_form(target)
agent.see(f"Submitted form with element: {target}")
return True
elif action_type == "Wait":
browser.wait_for_element(target)
agent.see(f"Waited for element: {target}")
return True
elif action_type == "Scroll":
browser.scroll_page(content)
agent.see(f"Scrolled page {content}")
return True
elif action_type == "Hover":
browser.hover_element(target)
agent.see(f"Hovered over element: {target}")
return True
elif action_type == "Keyboard_Key":
browser.press_key(content)
agent.see(f"Pressed key: {content}")
return True
elif action_type == "ScanPage":
page_info = browser.get_page_info()
agent.see(f"Scanned page and found the following information: {page_info}")
return True
return False
def actions_definitions_prompt(self) -> str:
"""
Returns the prompt for defining browser-related actions.
"""
prompt = """
- See: Take a screenshot of the current page. The `content` will be a placeholder for vision.
- Click: Click on an element on the page. The `target` should be a CSS selector for the element.
- Write: Type text into an element on the page. The `target` should be a CSS selector for the element, and `content` should be the text to type.
- Submit: Submit a form on the page. The `target` should be a CSS selector for a form or an element within a form.
- Wait: Wait for an element to appear on the page. The `target` should be a CSS selector for the element.
- Scroll: Scroll the page. The `content` should be 'up' or 'down'.
- Hover: Hover over an element on the page. The `target` should be a CSS selector for the element.
- Keyboard_Key: Press a key on the keyboard. The `content` should be the key to press (e.g., 'Enter', 'ArrowDown').
- ScanPage: Get information about the current page, such as links and form elements.
"""
return textwrap.dedent(prompt)
def actions_constraints_prompt(self) -> str:
"""
Returns the prompt for defining constraints on browser-related actions.
"""
prompt = """
- Use See to get a visual representation of the page to help you decide on the next action.
- Use ScanPage to get a list of interactive elements to help you decide on the next action.
- Use Click, Write, and other actions to interact with elements on the page to accomplish the task.
"""
return textwrap.dedent(prompt)
|