Spaces:

frankystew1
/

AutomatedBrowserAgent

Build error

App Files Files Community

AutomatedBrowserAgent / app.py

frankystew1

Upload app.py with huggingface_hub

afc1327 verified 5 months ago

raw

history blame contribute delete

9.5 kB

	import json
	import time
	from typing import Dict, List, Optional, Tuple
	from dataclasses import dataclass
	from enum import Enum

	# Assuming these libraries are available for browser automation and LLM interaction
	# from playwright.sync_api import sync_playwright
	# import openai # or another LLM API client

	# Define enums for status tracking
	class Status(Enum):
	SUCCESS = "success"
	FAILURE = "failure"
	PENDING = "pending"

	# Data classes for structured data handling
	@dataclass
	class SubGoal:
	id: str
	description: str
	expected_state: str
	status: Status = Status.PENDING

	@dataclass
	class TaskResult:
	status: Status
	output_data: Optional[Dict] = None
	error_message: Optional[str] = None

	# Core modules of the agent
	class PlannerModule:
	"""
	Planner Module: Breaks down user commands into executable sub-goals.
	"""
	def __init__(self, llm_client):
	self.llm_client = llm_client
	self.working_memory = {
	"current_state": "",
	"completed_subgoals": [],
	"pending_subgoals": []
	}

	def create_plan(self, user_command: str) -> List[SubGoal]:
	"""
	Uses LLM to generate a step-by-step plan from the user command.
	Returns a list of sub-goals.
	"""
	prompt = f"""
	Convert the following user command into discrete browser automation steps:
	"{user_command}"

	Return a JSON array of steps with these keys:
	- id: unique identifier for the step
	- description: detailed description of the action to take
	- expected_state: what the page should look like after completion

	Example format:
	[
	{{
	"id": "1",
	"description": "Navigate to https://example.com",
	"expected_state": "Homepage with logo visible"
	}},
	{{
	"id": "2",
	"description": "Click on the 'Login' button",
	"expected_state": "Login form appears with username and password fields"
	}}
	]
	"""

	# response = self.llm_client.generate_response(prompt)
	# For demonstration, returning a mock plan
	mock_plan = [
	SubGoal(id="1", description="Navigate to website", expected_state="Page loaded"),
	SubGoal(id="2", description="Click sign-in button", expected_state="Login form visible"),
	SubGoal(id="3", description="Enter credentials", expected_state="User logged in")
	]
	self.working_memory["pending_subgoals"] = mock_plan
	return mock_plan

	def replan(self, failed_subgoal: SubGoal, error_reason: str) -> List[SubGoal]:
	"""
	Re-generates plan based on failure feedback from Validator.
	"""
	prompt = f"""
	The following step failed: "{failed_subgoal.description}"
	Reason: "{error_reason}"

	Generate alternative steps to achieve the same goal.
	"""

	# response = self.llm_client.generate_response(prompt)
	# For demonstration, returning a mock replan
	mock_replan = [
	SubGoal(id="2a", description="Click alternative sign-in button", expected_state="Login form visible"),
	SubGoal(id="2b", description="Wait for 5 seconds and retry click", expected_state="Login form visible")
	]
	return mock_replan


	class ActorModule:
	"""
	Actor Module: Executes browser actions based on sub-goals.
	"""
	def __init__(self, browser_controller):
	self.browser = browser_controller

	def execute_action(self, subgoal: SubGoal) -> TaskResult:
	"""
	Performs the specified action in the browser.
	"""
	try:
	# Parse subgoal description and perform corresponding action
	if "navigate" in subgoal.description.lower():
	url = subgoal.description.split(" ")[-1]
	self.browser.navigate(url)
	elif "click" in subgoal.description.lower():
	element = self.browser.find_element_by_text("Sign In")
	self.browser.click(element)
	elif "enter" in subgoal.description.lower():
	input_field = self.browser.find_element_by_label("username")
	self.browser.type(input_field, "user@example.com")

	return TaskResult(status=Status.SUCCESS)
	except Exception as e:
	return TaskResult(status=Status.FAILURE, error_message=str(e))


	class ValidatorModule:
	"""
	Validator Module: Verifies if actions were successful.
	"""
	def __init__(self, llm_client):
	self.llm_client = llm_client

	def validate(self, subgoal: SubGoal, browser_state: Dict) -> Tuple[Status, str]:
	"""
	Compares current browser state with expected state using LLM.
	Returns validation status and optional message.
	"""
	prompt = f"""
	Goal: {subgoal.description}
	Expected State: {subgoal.expected_state}
	Current State: {json.dumps(browser_state)}

	Has the goal been successfully achieved? Respond with YES or NO followed by reason.
	"""

	# response = self.llm_client.generate_response(prompt)
	# For demonstration, returning mock validation
	if subgoal.id == "2":
	return (Status.SUCCESS, "Login form is visible")
	else:
	return (Status.FAILURE, "Element not found or page not loaded as expected")


	# Main automation agent using planner-actor-validator loop
	class SkyvernAgent:
	"""
	Main automation agent implementing the Planner-Actor-Validator loop.
	"""
	def __init__(self):
	# self.llm_client = openai.Client(api_key="YOUR_API_KEY")
	# self.browser_controller = sync_playwright()
	self.planner = PlannerModule(llm_client=None)
	self.actor = ActorModule(browser_controller=None)
	self.validator = ValidatorModule(llm_client=None)
	self.settings = {}

	def run_task(self, user_command: str, settings: Dict) -> TaskResult:
	"""
	Main automation loop that coordinates all modules.
	"""
	self.settings = settings
	plan = self.planner.create_plan(user_command)
	output_data = {}

	step_count = 0
	max_steps = settings.get("max_steps", 100)

	while plan and step_count < max_steps:
	current_subgoal = plan.pop(0)
	step_count += 1

	# Actor executes the action
	result = self.actor.execute_action(current_subgoal)

	if result.status == Status.SUCCESS:
	# Validator checks if the action was successful
	browser_state = self._get_browser_state()
	validation_status, message = self.validator.validate(current_subgoal, browser_state)

	if validation_status == Status.SUCCESS:
	self.planner.working_memory["completed_subgoals"].append(current_subgoal)
	# Extract data if defined in schema
	if "data_schema" in settings:
	output_data = self._extract_data(browser_state, settings["data_schema"])
	else:
	# Replanning when validation fails
	new_plan = self.planner.replan(current_subgoal, message)
	plan = new_plan + plan # Prepend new steps to existing plan
	else:
	# Replanning when action fails
	new_plan = self.planner.replan(current_subgoal, result.error_message)
	plan = new_plan + plan # Prepend new steps to existing plan

	# Return final result
	final_status = Status.SUCCESS if not plan else Status.FAILURE
	return TaskResult(status=final_status, output_data=output_data)

	def _get_browser_state(self) -> Dict:
	"""
	Captures current browser state (screenshot, HTML, URL, etc.)
	"""
	return {
	"url": "https://example.com",
	"html": "<html>...</html>",
	"screenshot": "base64-encoded-screenshot"
	}

	def _extract_data(self, browser_state: Dict, schema: Dict) -> Dict:
	"""
	Extracts data based on provided schema.
	"""
	# Using LLM to extract structured data according to schema
	prompt = f"""
	Extract data from the following browser state according to this schema:
	Schema: {json.dumps(schema)}
	State: {json.dumps(browser_state)}

	Return a JSON object with extracted data.
	"""
	# response = self.llm.generate_response(prompt)
	# return json.loads(response)
	return {"extracted": "data"} # Mock response

	# Example usage
	if __name__ == "__main__":
	agent = SkyvernAgent()
	command = "Add a product to the cart"
	settings = {
	"webhook_url": "https://your-webhook-url.com",
	"proxy_type": "residential",
	"session_id": "session_12345",
	"two_factor_id": "2fa_67890",
	"http_headers": {"User-Agent": "Custom Browser"},
	"publish_workflow": True,
	"max_steps": 50,
	"data_schema": {"product_name": "string", "price": "number"},
	"max_scrolls": 5
	}

	result = agent.run_task(command, settings)
	print(json.dumps(result.__dict__, indent=2))