frdel commited on
Commit
a6d3079
·
1 Parent(s): 7a28f98

browser agent fixes

Browse files
Files changed (1) hide show
  1. python/tools/browser_agent.py +58 -41
python/tools/browser_agent.py CHANGED
@@ -41,9 +41,13 @@ class State:
41
  headless=True,
42
  disable_security=True,
43
  chromium_sandbox=False,
 
 
44
  minimum_wait_page_load_time=1.0,
45
  wait_for_network_idle_page_load_time=2.0,
46
  maximum_wait_page_load_time=10.0,
 
 
47
  args=['--headless=new'],
48
  )
49
  )
@@ -126,17 +130,18 @@ class State:
126
 
127
  self.iter_no = get_iter_no(self.agent)
128
 
129
- try:
130
- result = await self.use_agent.run(max_steps=50)
131
- return result
132
- finally:
133
- if self.browser_session:
134
- try:
135
- await self.browser_session.close()
136
- except Exception as e:
137
- PrintStyle().error(f"Error closing browser session in task cleanup: {e}")
138
- finally:
139
- self.browser_session = None
 
140
 
141
  def override_hooks(self):
142
  def override_hook(func):
@@ -188,7 +193,9 @@ class BrowserAgent(Tool):
188
 
189
  await self.agent.handle_intervention()
190
  await asyncio.sleep(1)
191
- try:
 
 
192
  update = await self.get_update()
193
  log = update.get("log")
194
  if log:
@@ -196,26 +203,26 @@ class BrowserAgent(Tool):
196
  screenshot = update.get("screenshot", None)
197
  if screenshot:
198
  self.log.update(screenshot=screenshot)
199
- except Exception:
200
- pass
201
 
202
  # collect result with error handling
203
  try:
204
  result = await task.result()
205
- PrintStyle().debug(f"Browser agent task completed, is_done: {result['is_done']}")
206
  except Exception as e:
207
  PrintStyle().error(f"Error getting browser agent task result: {str(e)}")
208
  # Return a timeout response if task.result() fails
209
  answer_text = f"Browser agent task failed to return result: {str(e)}"
210
  self.log.update(answer=answer_text)
211
  return Response(message=answer_text, break_loop=False)
212
- finally:
213
- # Stop any further browser access after task completion
214
- self.state.kill_task()
 
215
 
216
  # Check if task completed successfully
217
- if result['is_done']:
218
- answer = result['final_result']
219
  try:
220
  if answer and isinstance(answer, str) and answer.strip():
221
  answer_data = DirtyJson.parse_string(answer)
@@ -226,7 +233,7 @@ class BrowserAgent(Tool):
226
  answer_text = str(answer) if answer else f"Task completed with parse error: {str(e)}"
227
  else:
228
  # Task hit max_steps without calling done()
229
- urls = result['urls']
230
  current_url = urls[-1] if urls else "unknown"
231
  answer_text = (f"Task reached step limit without completion. Last page: {current_url}. "
232
  f"The browser agent may need clearer instructions on when to finish.")
@@ -258,26 +265,34 @@ class BrowserAgent(Tool):
258
  await agent.wait_if_paused()
259
 
260
  log = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
- for message in ua.message_manager.get_messages():
263
- if message.type == "system":
264
- continue
265
- if message.type == "ai":
266
- try:
267
- data = json.loads(message.content) # type: ignore
268
- cs = data.get("current_state")
269
- if cs:
270
- log.append("AI:" + cs["memory"])
271
- log.append("AI:" + cs["next_goal"])
272
- except Exception:
273
- pass
274
- if message.type == "human":
275
- content = str(message.content).strip()
276
- part = content.split("\n", 1)[0].split(",", 1)[0]
277
- if part:
278
- if len(part) > 150:
279
- part = part[:150] + "..."
280
- log.append("FW:" + part)
281
  result["log"] = log
282
 
283
  path = files.get_abs_path(
@@ -290,7 +305,7 @@ class BrowserAgent(Tool):
290
  await page.screenshot(path=path, full_page=False, timeout=3000)
291
  result["screenshot"] = f"img://{path}&t={str(time.time())}"
292
 
293
- if self.state.task:
294
  await self.state.task.execute_inside(_get_update)
295
 
296
  except Exception:
@@ -300,6 +315,8 @@ class BrowserAgent(Tool):
300
 
301
  async def prepare_state(self, reset=False):
302
  self.state = self.agent.get_data("_browser_agent_state")
 
 
303
  if not self.state or reset:
304
  self.state = await State.create(self.agent)
305
  self.agent.set_data("_browser_agent_state", self.state)
 
41
  headless=True,
42
  disable_security=True,
43
  chromium_sandbox=False,
44
+ accept_downloads=True,
45
+ keep_alive=True,
46
  minimum_wait_page_load_time=1.0,
47
  wait_for_network_idle_page_load_time=2.0,
48
  maximum_wait_page_load_time=10.0,
49
+ screen={'width': 1024, 'height': 1024},
50
+ viewport={'width': 1024, 'height': 1024},
51
  args=['--headless=new'],
52
  )
53
  )
 
130
 
131
  self.iter_no = get_iter_no(self.agent)
132
 
133
+ # try:
134
+ result = await self.use_agent.run(max_steps=50)
135
+ return result
136
+ # finally:
137
+ # # if self.browser_session:
138
+ # # try:
139
+ # # await self.browser_session.close()
140
+ # # except Exception as e:
141
+ # # PrintStyle().error(f"Error closing browser session in task cleanup: {e}")
142
+ # # finally:
143
+ # # self.browser_session = None
144
+ # pass
145
 
146
  def override_hooks(self):
147
  def override_hook(func):
 
193
 
194
  await self.agent.handle_intervention()
195
  await asyncio.sleep(1)
196
+ try:
197
+ if task.is_ready(): # otherwise get_update hangs
198
+ break
199
  update = await self.get_update()
200
  log = update.get("log")
201
  if log:
 
203
  screenshot = update.get("screenshot", None)
204
  if screenshot:
205
  self.log.update(screenshot=screenshot)
206
+ except Exception as e:
207
+ PrintStyle().error(f"Error getting update: {str(e)}")
208
 
209
  # collect result with error handling
210
  try:
211
  result = await task.result()
 
212
  except Exception as e:
213
  PrintStyle().error(f"Error getting browser agent task result: {str(e)}")
214
  # Return a timeout response if task.result() fails
215
  answer_text = f"Browser agent task failed to return result: {str(e)}"
216
  self.log.update(answer=answer_text)
217
  return Response(message=answer_text, break_loop=False)
218
+ # finally:
219
+ # # Stop any further browser access after task completion
220
+ # # self.state.kill_task()
221
+ # pass
222
 
223
  # Check if task completed successfully
224
+ if result.is_done():
225
+ answer = result.final_result()
226
  try:
227
  if answer and isinstance(answer, str) and answer.strip():
228
  answer_data = DirtyJson.parse_string(answer)
 
233
  answer_text = str(answer) if answer else f"Task completed with parse error: {str(e)}"
234
  else:
235
  # Task hit max_steps without calling done()
236
+ urls = result.urls()
237
  current_url = urls[-1] if urls else "unknown"
238
  answer_text = (f"Task reached step limit without completion. Last page: {current_url}. "
239
  f"The browser agent may need clearer instructions on when to finish.")
 
265
  await agent.wait_if_paused()
266
 
267
  log = []
268
+
269
+
270
+ # for message in ua.message_manager.get_messages():
271
+ # if message.type == "system":
272
+ # continue
273
+ # if message.type == "ai":
274
+ # try:
275
+ # data = json.loads(message.content) # type: ignore
276
+ # cs = data.get("current_state")
277
+ # if cs:
278
+ # log.append("AI:" + cs["memory"])
279
+ # log.append("AI:" + cs["next_goal"])
280
+ # except Exception:
281
+ # pass
282
+ # if message.type == "human":
283
+ # content = str(message.content).strip()
284
+ # part = content.split("\n", 1)[0].split(",", 1)[0]
285
+ # if part:
286
+ # if len(part) > 150:
287
+ # part = part[:150] + "..."
288
+ # log.append("FW:" + part)
289
+
290
+ # for hist in ua.state.history.history:
291
+ # for res in hist.result:
292
+ # log.append(res.extracted_content)
293
+ log = ua.state.history.extracted_content()
294
+
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  result["log"] = log
297
 
298
  path = files.get_abs_path(
 
305
  await page.screenshot(path=path, full_page=False, timeout=3000)
306
  result["screenshot"] = f"img://{path}&t={str(time.time())}"
307
 
308
+ if self.state.task and not self.state.task.is_ready():
309
  await self.state.task.execute_inside(_get_update)
310
 
311
  except Exception:
 
315
 
316
  async def prepare_state(self, reset=False):
317
  self.state = self.agent.get_data("_browser_agent_state")
318
+ if reset and self.state:
319
+ self.state.kill_task()
320
  if not self.state or reset:
321
  self.state = await State.create(self.agent)
322
  self.agent.set_data("_browser_agent_state", self.state)