Spaces:

AUXteam
/

tiny_factory

Runtime error

tiny_factory / tinytroupe /agent /browser_faculty.py

root

Import from HF Space harvesthealth/tiny_factory

6a42990 about 2 months ago

3.91 kB

	from tinytroupe.agent.mental_faculty import TinyMentalFaculty
	from tinytroupe.tools import browser
	import textwrap

	class BrowserFaculty(TinyMentalFaculty):
	"""
	A mental faculty that allows an agent to interact with a web browser.
	"""

	def __init__(self):
	super().__init__("Browser Navigation")

	def process_action(self, agent, action: dict) -> bool:
	"""
	Processes a browser-related action.
	"""
	action_type = action.get("type")
	content = action.get("content")
	target = action.get("target")

	if action_type == "See":
	screenshot_path = browser.screenshot()
	agent.see(f"Took a screenshot and saved it to {screenshot_path}. I will now analyze the screenshot.")
	return True
	elif action_type == "Click":
	browser.click(target)
	agent.see(f"Clicked on element with selector: {target}")
	return True
	elif action_type == "Write":
	browser.fill(target, content)
	agent.see(f"Typed '{content}' into element with selector: {target}")
	return True
	elif action_type == "Submit":
	browser.submit_form(target)
	agent.see(f"Submitted form with element: {target}")
	return True
	elif action_type == "Wait":
	browser.wait_for_element(target)
	agent.see(f"Waited for element: {target}")
	return True
	elif action_type == "Scroll":
	browser.scroll_page(content)
	agent.see(f"Scrolled page {content}")
	return True
	elif action_type == "Hover":
	browser.hover_element(target)
	agent.see(f"Hovered over element: {target}")
	return True
	elif action_type == "Keyboard_Key":
	browser.press_key(content)
	agent.see(f"Pressed key: {content}")
	return True
	elif action_type == "ScanPage":
	page_info = browser.get_page_info()
	agent.see(f"Scanned page and found the following information: {page_info}")
	return True
	return False

	def actions_definitions_prompt(self) -> str:
	"""
	Returns the prompt for defining browser-related actions.
	"""
	prompt = """
	- See: Take a screenshot of the current page. The `content` will be a placeholder for vision.
	- Click: Click on an element on the page. The `target` should be a CSS selector for the element.
	- Write: Type text into an element on the page. The `target` should be a CSS selector for the element, and `content` should be the text to type.
	- Submit: Submit a form on the page. The `target` should be a CSS selector for a form or an element within a form.
	- Wait: Wait for an element to appear on the page. The `target` should be a CSS selector for the element.
	- Scroll: Scroll the page. The `content` should be 'up' or 'down'.
	- Hover: Hover over an element on the page. The `target` should be a CSS selector for the element.
	- Keyboard_Key: Press a key on the keyboard. The `content` should be the key to press (e.g., 'Enter', 'ArrowDown').
	- ScanPage: Get information about the current page, such as links and form elements.
	"""
	return textwrap.dedent(prompt)

	def actions_constraints_prompt(self) -> str:
	"""
	Returns the prompt for defining constraints on browser-related actions.
	"""
	prompt = """
	- Use See to get a visual representation of the page to help you decide on the next action.
	- Use ScanPage to get a list of interactive elements to help you decide on the next action.
	- Use Click, Write, and other actions to interact with elements on the page to accomplish the task.
	"""
	return textwrap.dedent(prompt)