frankystew1 commited on
Commit
afc1327
·
verified ·
1 Parent(s): 88e83ab

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +256 -0
app.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+ from typing import Dict, List, Optional, Tuple
4
+ from dataclasses import dataclass
5
+ from enum import Enum
6
+
7
+ # Assuming these libraries are available for browser automation and LLM interaction
8
+ # from playwright.sync_api import sync_playwright
9
+ # import openai # or another LLM API client
10
+
11
+ # Define enums for status tracking
12
+ class Status(Enum):
13
+ SUCCESS = "success"
14
+ FAILURE = "failure"
15
+ PENDING = "pending"
16
+
17
+ # Data classes for structured data handling
18
+ @dataclass
19
+ class SubGoal:
20
+ id: str
21
+ description: str
22
+ expected_state: str
23
+ status: Status = Status.PENDING
24
+
25
+ @dataclass
26
+ class TaskResult:
27
+ status: Status
28
+ output_data: Optional[Dict] = None
29
+ error_message: Optional[str] = None
30
+
31
+ # Core modules of the agent
32
+ class PlannerModule:
33
+ """
34
+ Planner Module: Breaks down user commands into executable sub-goals.
35
+ """
36
+ def __init__(self, llm_client):
37
+ self.llm_client = llm_client
38
+ self.working_memory = {
39
+ "current_state": "",
40
+ "completed_subgoals": [],
41
+ "pending_subgoals": []
42
+ }
43
+
44
+ def create_plan(self, user_command: str) -> List[SubGoal]:
45
+ """
46
+ Uses LLM to generate a step-by-step plan from the user command.
47
+ Returns a list of sub-goals.
48
+ """
49
+ prompt = f"""
50
+ Convert the following user command into discrete browser automation steps:
51
+ "{user_command}"
52
+
53
+ Return a JSON array of steps with these keys:
54
+ - id: unique identifier for the step
55
+ - description: detailed description of the action to take
56
+ - expected_state: what the page should look like after completion
57
+
58
+ Example format:
59
+ [
60
+ {{
61
+ "id": "1",
62
+ "description": "Navigate to https://example.com",
63
+ "expected_state": "Homepage with logo visible"
64
+ }},
65
+ {{
66
+ "id": "2",
67
+ "description": "Click on the 'Login' button",
68
+ "expected_state": "Login form appears with username and password fields"
69
+ }}
70
+ ]
71
+ """
72
+
73
+ # response = self.llm_client.generate_response(prompt)
74
+ # For demonstration, returning a mock plan
75
+ mock_plan = [
76
+ SubGoal(id="1", description="Navigate to website", expected_state="Page loaded"),
77
+ SubGoal(id="2", description="Click sign-in button", expected_state="Login form visible"),
78
+ SubGoal(id="3", description="Enter credentials", expected_state="User logged in")
79
+ ]
80
+ self.working_memory["pending_subgoals"] = mock_plan
81
+ return mock_plan
82
+
83
+ def replan(self, failed_subgoal: SubGoal, error_reason: str) -> List[SubGoal]:
84
+ """
85
+ Re-generates plan based on failure feedback from Validator.
86
+ """
87
+ prompt = f"""
88
+ The following step failed: "{failed_subgoal.description}"
89
+ Reason: "{error_reason}"
90
+
91
+ Generate alternative steps to achieve the same goal.
92
+ """
93
+
94
+ # response = self.llm_client.generate_response(prompt)
95
+ # For demonstration, returning a mock replan
96
+ mock_replan = [
97
+ SubGoal(id="2a", description="Click alternative sign-in button", expected_state="Login form visible"),
98
+ SubGoal(id="2b", description="Wait for 5 seconds and retry click", expected_state="Login form visible")
99
+ ]
100
+ return mock_replan
101
+
102
+
103
+ class ActorModule:
104
+ """
105
+ Actor Module: Executes browser actions based on sub-goals.
106
+ """
107
+ def __init__(self, browser_controller):
108
+ self.browser = browser_controller
109
+
110
+ def execute_action(self, subgoal: SubGoal) -> TaskResult:
111
+ """
112
+ Performs the specified action in the browser.
113
+ """
114
+ try:
115
+ # Parse subgoal description and perform corresponding action
116
+ if "navigate" in subgoal.description.lower():
117
+ url = subgoal.description.split(" ")[-1]
118
+ self.browser.navigate(url)
119
+ elif "click" in subgoal.description.lower():
120
+ element = self.browser.find_element_by_text("Sign In")
121
+ self.browser.click(element)
122
+ elif "enter" in subgoal.description.lower():
123
+ input_field = self.browser.find_element_by_label("username")
124
+ self.browser.type(input_field, "user@example.com")
125
+
126
+ return TaskResult(status=Status.SUCCESS)
127
+ except Exception as e:
128
+ return TaskResult(status=Status.FAILURE, error_message=str(e))
129
+
130
+
131
+ class ValidatorModule:
132
+ """
133
+ Validator Module: Verifies if actions were successful.
134
+ """
135
+ def __init__(self, llm_client):
136
+ self.llm_client = llm_client
137
+
138
+ def validate(self, subgoal: SubGoal, browser_state: Dict) -> Tuple[Status, str]:
139
+ """
140
+ Compares current browser state with expected state using LLM.
141
+ Returns validation status and optional message.
142
+ """
143
+ prompt = f"""
144
+ Goal: {subgoal.description}
145
+ Expected State: {subgoal.expected_state}
146
+ Current State: {json.dumps(browser_state)}
147
+
148
+ Has the goal been successfully achieved? Respond with YES or NO followed by reason.
149
+ """
150
+
151
+ # response = self.llm_client.generate_response(prompt)
152
+ # For demonstration, returning mock validation
153
+ if subgoal.id == "2":
154
+ return (Status.SUCCESS, "Login form is visible")
155
+ else:
156
+ return (Status.FAILURE, "Element not found or page not loaded as expected")
157
+
158
+
159
+ # Main automation agent using planner-actor-validator loop
160
+ class SkyvernAgent:
161
+ """
162
+ Main automation agent implementing the Planner-Actor-Validator loop.
163
+ """
164
+ def __init__(self):
165
+ # self.llm_client = openai.Client(api_key="YOUR_API_KEY")
166
+ # self.browser_controller = sync_playwright()
167
+ self.planner = PlannerModule(llm_client=None)
168
+ self.actor = ActorModule(browser_controller=None)
169
+ self.validator = ValidatorModule(llm_client=None)
170
+ self.settings = {}
171
+
172
+ def run_task(self, user_command: str, settings: Dict) -> TaskResult:
173
+ """
174
+ Main automation loop that coordinates all modules.
175
+ """
176
+ self.settings = settings
177
+ plan = self.planner.create_plan(user_command)
178
+ output_data = {}
179
+
180
+ step_count = 0
181
+ max_steps = settings.get("max_steps", 100)
182
+
183
+ while plan and step_count < max_steps:
184
+ current_subgoal = plan.pop(0)
185
+ step_count += 1
186
+
187
+ # Actor executes the action
188
+ result = self.actor.execute_action(current_subgoal)
189
+
190
+ if result.status == Status.SUCCESS:
191
+ # Validator checks if the action was successful
192
+ browser_state = self._get_browser_state()
193
+ validation_status, message = self.validator.validate(current_subgoal, browser_state)
194
+
195
+ if validation_status == Status.SUCCESS:
196
+ self.planner.working_memory["completed_subgoals"].append(current_subgoal)
197
+ # Extract data if defined in schema
198
+ if "data_schema" in settings:
199
+ output_data = self._extract_data(browser_state, settings["data_schema"])
200
+ else:
201
+ # Replanning when validation fails
202
+ new_plan = self.planner.replan(current_subgoal, message)
203
+ plan = new_plan + plan # Prepend new steps to existing plan
204
+ else:
205
+ # Replanning when action fails
206
+ new_plan = self.planner.replan(current_subgoal, result.error_message)
207
+ plan = new_plan + plan # Prepend new steps to existing plan
208
+
209
+ # Return final result
210
+ final_status = Status.SUCCESS if not plan else Status.FAILURE
211
+ return TaskResult(status=final_status, output_data=output_data)
212
+
213
+ def _get_browser_state(self) -> Dict:
214
+ """
215
+ Captures current browser state (screenshot, HTML, URL, etc.)
216
+ """
217
+ return {
218
+ "url": "https://example.com",
219
+ "html": "<html>...</html>",
220
+ "screenshot": "base64-encoded-screenshot"
221
+ }
222
+
223
+ def _extract_data(self, browser_state: Dict, schema: Dict) -> Dict:
224
+ """
225
+ Extracts data based on provided schema.
226
+ """
227
+ # Using LLM to extract structured data according to schema
228
+ prompt = f"""
229
+ Extract data from the following browser state according to this schema:
230
+ Schema: {json.dumps(schema)}
231
+ State: {json.dumps(browser_state)}
232
+
233
+ Return a JSON object with extracted data.
234
+ """
235
+ # response = self.llm.generate_response(prompt)
236
+ # return json.loads(response)
237
+ return {"extracted": "data"} # Mock response
238
+
239
+ # Example usage
240
+ if __name__ == "__main__":
241
+ agent = SkyvernAgent()
242
+ command = "Add a product to the cart"
243
+ settings = {
244
+ "webhook_url": "https://your-webhook-url.com",
245
+ "proxy_type": "residential",
246
+ "session_id": "session_12345",
247
+ "two_factor_id": "2fa_67890",
248
+ "http_headers": {"User-Agent": "Custom Browser"},
249
+ "publish_workflow": True,
250
+ "max_steps": 50,
251
+ "data_schema": {"product_name": "string", "price": "number"},
252
+ "max_scrolls": 5
253
+ }
254
+
255
+ result = agent.run_task(command, settings)
256
+ print(json.dumps(result.__dict__, indent=2))