Spaces:

frankystew1
/

AutomatedBrowserAgent

Build error

App Files Files Community

frankystew1 commited on Aug 4, 2025

Commit

afc1327

verified ·

1 Parent(s): 88e83ab

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +256 -0

app.py ADDED Viewed

	@@ -0,0 +1,256 @@

+import json
+import time
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+from enum import Enum
+# Assuming these libraries are available for browser automation and LLM interaction
+# from playwright.sync_api import sync_playwright
+# import openai  # or another LLM API client
+# Define enums for status tracking
+class Status(Enum):
+    SUCCESS = "success"
+    FAILURE = "failure"
+    PENDING = "pending"
+# Data classes for structured data handling
+@dataclass
+class SubGoal:
+    id: str
+    description: str
+    expected_state: str
+    status: Status = Status.PENDING
+@dataclass
+class TaskResult:
+    status: Status
+    output_data: Optional[Dict] = None
+    error_message: Optional[str] = None
+# Core modules of the agent
+class PlannerModule:
+    """
+    Planner Module: Breaks down user commands into executable sub-goals.
+    """
+    def __init__(self, llm_client):
+        self.llm_client = llm_client
+        self.working_memory = {
+            "current_state": "",
+            "completed_subgoals": [],
+            "pending_subgoals": []
+        }
+    def create_plan(self, user_command: str) -> List[SubGoal]:
+        """
+        Uses LLM to generate a step-by-step plan from the user command.
+        Returns a list of sub-goals.
+        """
+        prompt = f"""
+        Convert the following user command into discrete browser automation steps:
+        "{user_command}"
+        Return a JSON array of steps with these keys:
+        - id: unique identifier for the step
+        - description: detailed description of the action to take
+        - expected_state: what the page should look like after completion
+        Example format:
+        [
+            {{
+                "id": "1",
+                "description": "Navigate to https://example.com",
+                "expected_state": "Homepage with logo visible"
+            }},
+            {{
+                "id": "2",
+                "description": "Click on the 'Login' button",
+                "expected_state": "Login form appears with username and password fields"
+            }}
+        ]
+        """
+        # response = self.llm_client.generate_response(prompt)
+        # For demonstration, returning a mock plan
+        mock_plan = [
+            SubGoal(id="1", description="Navigate to website", expected_state="Page loaded"),
+            SubGoal(id="2", description="Click sign-in button", expected_state="Login form visible"),
+            SubGoal(id="3", description="Enter credentials", expected_state="User logged in")
+        ]
+        self.working_memory["pending_subgoals"] = mock_plan
+        return mock_plan
+    def replan(self, failed_subgoal: SubGoal, error_reason: str) -> List[SubGoal]:
+        """
+        Re-generates plan based on failure feedback from Validator.
+        """
+        prompt = f"""
+        The following step failed: "{failed_subgoal.description}"
+        Reason: "{error_reason}"
+        Generate alternative steps to achieve the same goal.
+        """
+        # response = self.llm_client.generate_response(prompt)
+        # For demonstration, returning a mock replan
+        mock_replan = [
+            SubGoal(id="2a", description="Click alternative sign-in button", expected_state="Login form visible"),
+            SubGoal(id="2b", description="Wait for 5 seconds and retry click", expected_state="Login form visible")
+        ]
+        return mock_replan
+class ActorModule:
+    """
+    Actor Module: Executes browser actions based on sub-goals.
+    """
+    def __init__(self, browser_controller):
+        self.browser = browser_controller
+    def execute_action(self, subgoal: SubGoal) -> TaskResult:
+        """
+        Performs the specified action in the browser.
+        """
+        try:
+            # Parse subgoal description and perform corresponding action
+            if "navigate" in subgoal.description.lower():
+                url = subgoal.description.split(" ")[-1]
+                self.browser.navigate(url)
+            elif "click" in subgoal.description.lower():
+                element = self.browser.find_element_by_text("Sign In")
+                self.browser.click(element)
+            elif "enter" in subgoal.description.lower():
+                input_field = self.browser.find_element_by_label("username")
+                self.browser.type(input_field, "user@example.com")
+            return TaskResult(status=Status.SUCCESS)
+        except Exception as e:
+            return TaskResult(status=Status.FAILURE, error_message=str(e))
+class ValidatorModule:
+    """
+    Validator Module: Verifies if actions were successful.
+    """
+    def __init__(self, llm_client):
+        self.llm_client = llm_client
+    def validate(self, subgoal: SubGoal, browser_state: Dict) -> Tuple[Status, str]:
+        """
+        Compares current browser state with expected state using LLM.
+        Returns validation status and optional message.
+        """
+        prompt = f"""
+        Goal: {subgoal.description}
+        Expected State: {subgoal.expected_state}
+        Current State: {json.dumps(browser_state)}
+        Has the goal been successfully achieved? Respond with YES or NO followed by reason.
+        """
+        # response = self.llm_client.generate_response(prompt)
+        # For demonstration, returning mock validation
+        if subgoal.id == "2":
+            return (Status.SUCCESS, "Login form is visible")
+        else:
+            return (Status.FAILURE, "Element not found or page not loaded as expected")
+# Main automation agent using planner-actor-validator loop
+class SkyvernAgent:
+    """
+    Main automation agent implementing the Planner-Actor-Validator loop.
+    """
+    def __init__(self):
+        # self.llm_client = openai.Client(api_key="YOUR_API_KEY")
+        # self.browser_controller = sync_playwright()
+        self.planner = PlannerModule(llm_client=None)
+        self.actor = ActorModule(browser_controller=None)
+        self.validator = ValidatorModule(llm_client=None)
+        self.settings = {}
+    def run_task(self, user_command: str, settings: Dict) -> TaskResult:
+        """
+        Main automation loop that coordinates all modules.
+        """
+        self.settings = settings
+        plan = self.planner.create_plan(user_command)
+        output_data = {}
+        step_count = 0
+        max_steps = settings.get("max_steps", 100)
+        while plan and step_count < max_steps:
+            current_subgoal = plan.pop(0)
+            step_count += 1
+            # Actor executes the action
+            result = self.actor.execute_action(current_subgoal)
+            if result.status == Status.SUCCESS:
+                # Validator checks if the action was successful
+                browser_state = self._get_browser_state()
+                validation_status, message = self.validator.validate(current_subgoal, browser_state)
+                if validation_status == Status.SUCCESS:
+                    self.planner.working_memory["completed_subgoals"].append(current_subgoal)
+                    # Extract data if defined in schema
+                    if "data_schema" in settings:
+                        output_data = self._extract_data(browser_state, settings["data_schema"])
+                else:
+                    # Replanning when validation fails
+                    new_plan = self.planner.replan(current_subgoal, message)
+                    plan = new_plan + plan  # Prepend new steps to existing plan
+            else:
+                # Replanning when action fails
+                new_plan = self.planner.replan(current_subgoal, result.error_message)
+                plan = new_plan + plan  # Prepend new steps to existing plan
+        # Return final result
+        final_status = Status.SUCCESS if not plan else Status.FAILURE
+        return TaskResult(status=final_status, output_data=output_data)
+    def _get_browser_state(self) -> Dict:
+        """
+        Captures current browser state (screenshot, HTML, URL, etc.)
+        """
+        return {
+            "url": "https://example.com",
+            "html": "<html>...</html>",
+            "screenshot": "base64-encoded-screenshot"
+        }
+    def _extract_data(self, browser_state: Dict, schema: Dict) -> Dict:
+        """
+        Extracts data based on provided schema.
+        """
+        # Using LLM to extract structured data according to schema
+        prompt = f"""
+        Extract data from the following browser state according to this schema:
+        Schema: {json.dumps(schema)}
+        State: {json.dumps(browser_state)}
+        Return a JSON object with extracted data.
+        """
+        # response = self.llm.generate_response(prompt)
+        # return json.loads(response)
+        return {"extracted": "data"}  # Mock response
+# Example usage
+if __name__ == "__main__":
+    agent = SkyvernAgent()
+    command = "Add a product to the cart"
+    settings = {
+        "webhook_url": "https://your-webhook-url.com",
+        "proxy_type": "residential",
+        "session_id": "session_12345",
+        "two_factor_id": "2fa_67890",
+        "http_headers": {"User-Agent": "Custom Browser"},
+        "publish_workflow": True,
+        "max_steps": 50,
+        "data_schema": {"product_name": "string", "price": "number"},
+        "max_scrolls": 5
+    }
+    result = agent.run_task(command, settings)
+    print(json.dumps(result.__dict__, indent=2))