Spaces:
Build error
Build error
| import json | |
| import time | |
| from typing import Dict, List, Optional, Tuple | |
| from dataclasses import dataclass | |
| from enum import Enum | |
| # Assuming these libraries are available for browser automation and LLM interaction | |
| # from playwright.sync_api import sync_playwright | |
| # import openai # or another LLM API client | |
| # Define enums for status tracking | |
| class Status(Enum): | |
| SUCCESS = "success" | |
| FAILURE = "failure" | |
| PENDING = "pending" | |
| # Data classes for structured data handling | |
| class SubGoal: | |
| id: str | |
| description: str | |
| expected_state: str | |
| status: Status = Status.PENDING | |
| class TaskResult: | |
| status: Status | |
| output_data: Optional[Dict] = None | |
| error_message: Optional[str] = None | |
| # Core modules of the agent | |
| class PlannerModule: | |
| """ | |
| Planner Module: Breaks down user commands into executable sub-goals. | |
| """ | |
| def __init__(self, llm_client): | |
| self.llm_client = llm_client | |
| self.working_memory = { | |
| "current_state": "", | |
| "completed_subgoals": [], | |
| "pending_subgoals": [] | |
| } | |
| def create_plan(self, user_command: str) -> List[SubGoal]: | |
| """ | |
| Uses LLM to generate a step-by-step plan from the user command. | |
| Returns a list of sub-goals. | |
| """ | |
| prompt = f""" | |
| Convert the following user command into discrete browser automation steps: | |
| "{user_command}" | |
| Return a JSON array of steps with these keys: | |
| - id: unique identifier for the step | |
| - description: detailed description of the action to take | |
| - expected_state: what the page should look like after completion | |
| Example format: | |
| [ | |
| {{ | |
| "id": "1", | |
| "description": "Navigate to https://example.com", | |
| "expected_state": "Homepage with logo visible" | |
| }}, | |
| {{ | |
| "id": "2", | |
| "description": "Click on the 'Login' button", | |
| "expected_state": "Login form appears with username and password fields" | |
| }} | |
| ] | |
| """ | |
| # response = self.llm_client.generate_response(prompt) | |
| # For demonstration, returning a mock plan | |
| mock_plan = [ | |
| SubGoal(id="1", description="Navigate to website", expected_state="Page loaded"), | |
| SubGoal(id="2", description="Click sign-in button", expected_state="Login form visible"), | |
| SubGoal(id="3", description="Enter credentials", expected_state="User logged in") | |
| ] | |
| self.working_memory["pending_subgoals"] = mock_plan | |
| return mock_plan | |
| def replan(self, failed_subgoal: SubGoal, error_reason: str) -> List[SubGoal]: | |
| """ | |
| Re-generates plan based on failure feedback from Validator. | |
| """ | |
| prompt = f""" | |
| The following step failed: "{failed_subgoal.description}" | |
| Reason: "{error_reason}" | |
| Generate alternative steps to achieve the same goal. | |
| """ | |
| # response = self.llm_client.generate_response(prompt) | |
| # For demonstration, returning a mock replan | |
| mock_replan = [ | |
| SubGoal(id="2a", description="Click alternative sign-in button", expected_state="Login form visible"), | |
| SubGoal(id="2b", description="Wait for 5 seconds and retry click", expected_state="Login form visible") | |
| ] | |
| return mock_replan | |
| class ActorModule: | |
| """ | |
| Actor Module: Executes browser actions based on sub-goals. | |
| """ | |
| def __init__(self, browser_controller): | |
| self.browser = browser_controller | |
| def execute_action(self, subgoal: SubGoal) -> TaskResult: | |
| """ | |
| Performs the specified action in the browser. | |
| """ | |
| try: | |
| # Parse subgoal description and perform corresponding action | |
| if "navigate" in subgoal.description.lower(): | |
| url = subgoal.description.split(" ")[-1] | |
| self.browser.navigate(url) | |
| elif "click" in subgoal.description.lower(): | |
| element = self.browser.find_element_by_text("Sign In") | |
| self.browser.click(element) | |
| elif "enter" in subgoal.description.lower(): | |
| input_field = self.browser.find_element_by_label("username") | |
| self.browser.type(input_field, "user@example.com") | |
| return TaskResult(status=Status.SUCCESS) | |
| except Exception as e: | |
| return TaskResult(status=Status.FAILURE, error_message=str(e)) | |
| class ValidatorModule: | |
| """ | |
| Validator Module: Verifies if actions were successful. | |
| """ | |
| def __init__(self, llm_client): | |
| self.llm_client = llm_client | |
| def validate(self, subgoal: SubGoal, browser_state: Dict) -> Tuple[Status, str]: | |
| """ | |
| Compares current browser state with expected state using LLM. | |
| Returns validation status and optional message. | |
| """ | |
| prompt = f""" | |
| Goal: {subgoal.description} | |
| Expected State: {subgoal.expected_state} | |
| Current State: {json.dumps(browser_state)} | |
| Has the goal been successfully achieved? Respond with YES or NO followed by reason. | |
| """ | |
| # response = self.llm_client.generate_response(prompt) | |
| # For demonstration, returning mock validation | |
| if subgoal.id == "2": | |
| return (Status.SUCCESS, "Login form is visible") | |
| else: | |
| return (Status.FAILURE, "Element not found or page not loaded as expected") | |
| # Main automation agent using planner-actor-validator loop | |
| class SkyvernAgent: | |
| """ | |
| Main automation agent implementing the Planner-Actor-Validator loop. | |
| """ | |
| def __init__(self): | |
| # self.llm_client = openai.Client(api_key="YOUR_API_KEY") | |
| # self.browser_controller = sync_playwright() | |
| self.planner = PlannerModule(llm_client=None) | |
| self.actor = ActorModule(browser_controller=None) | |
| self.validator = ValidatorModule(llm_client=None) | |
| self.settings = {} | |
| def run_task(self, user_command: str, settings: Dict) -> TaskResult: | |
| """ | |
| Main automation loop that coordinates all modules. | |
| """ | |
| self.settings = settings | |
| plan = self.planner.create_plan(user_command) | |
| output_data = {} | |
| step_count = 0 | |
| max_steps = settings.get("max_steps", 100) | |
| while plan and step_count < max_steps: | |
| current_subgoal = plan.pop(0) | |
| step_count += 1 | |
| # Actor executes the action | |
| result = self.actor.execute_action(current_subgoal) | |
| if result.status == Status.SUCCESS: | |
| # Validator checks if the action was successful | |
| browser_state = self._get_browser_state() | |
| validation_status, message = self.validator.validate(current_subgoal, browser_state) | |
| if validation_status == Status.SUCCESS: | |
| self.planner.working_memory["completed_subgoals"].append(current_subgoal) | |
| # Extract data if defined in schema | |
| if "data_schema" in settings: | |
| output_data = self._extract_data(browser_state, settings["data_schema"]) | |
| else: | |
| # Replanning when validation fails | |
| new_plan = self.planner.replan(current_subgoal, message) | |
| plan = new_plan + plan # Prepend new steps to existing plan | |
| else: | |
| # Replanning when action fails | |
| new_plan = self.planner.replan(current_subgoal, result.error_message) | |
| plan = new_plan + plan # Prepend new steps to existing plan | |
| # Return final result | |
| final_status = Status.SUCCESS if not plan else Status.FAILURE | |
| return TaskResult(status=final_status, output_data=output_data) | |
| def _get_browser_state(self) -> Dict: | |
| """ | |
| Captures current browser state (screenshot, HTML, URL, etc.) | |
| """ | |
| return { | |
| "url": "https://example.com", | |
| "html": "<html>...</html>", | |
| "screenshot": "base64-encoded-screenshot" | |
| } | |
| def _extract_data(self, browser_state: Dict, schema: Dict) -> Dict: | |
| """ | |
| Extracts data based on provided schema. | |
| """ | |
| # Using LLM to extract structured data according to schema | |
| prompt = f""" | |
| Extract data from the following browser state according to this schema: | |
| Schema: {json.dumps(schema)} | |
| State: {json.dumps(browser_state)} | |
| Return a JSON object with extracted data. | |
| """ | |
| # response = self.llm.generate_response(prompt) | |
| # return json.loads(response) | |
| return {"extracted": "data"} # Mock response | |
| # Example usage | |
| if __name__ == "__main__": | |
| agent = SkyvernAgent() | |
| command = "Add a product to the cart" | |
| settings = { | |
| "webhook_url": "https://your-webhook-url.com", | |
| "proxy_type": "residential", | |
| "session_id": "session_12345", | |
| "two_factor_id": "2fa_67890", | |
| "http_headers": {"User-Agent": "Custom Browser"}, | |
| "publish_workflow": True, | |
| "max_steps": 50, | |
| "data_schema": {"product_name": "string", "price": "number"}, | |
| "max_scrolls": 5 | |
| } | |
| result = agent.run_task(command, settings) | |
| print(json.dumps(result.__dict__, indent=2)) |