Vision and Browsing Tools
#2
by
mmichiels13 - opened
tools.py
CHANGED
|
@@ -89,3 +89,22 @@ def close_popups() -> str:
|
|
| 89 |
Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows! This does not work on cookie consent banners.
|
| 90 |
"""
|
| 91 |
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows! This does not work on cookie consent banners.
|
| 90 |
"""
|
| 91 |
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def save_screenshot(step_log: ActionStep, agent: CodeAgent) -> None:
|
| 95 |
+
sleep(1.0) # Let JavaScript animations happen before taking the screenshot
|
| 96 |
+
driver = helium.get_driver()
|
| 97 |
+
current_step = step_log.step_number
|
| 98 |
+
if driver is not None:
|
| 99 |
+
for step_logs in agent.logs: # Remove previous screenshots from logs for lean processing
|
| 100 |
+
if isinstance(step_log, ActionStep) and step_log.step_number <= current_step - 2:
|
| 101 |
+
step_logs.observations_images = None
|
| 102 |
+
png_bytes = driver.get_screenshot_as_png()
|
| 103 |
+
image = Image.open(BytesIO(png_bytes))
|
| 104 |
+
print(f"Captured a browser screenshot: {image.size} pixels")
|
| 105 |
+
step_log.observations_images = [image.copy()] # Create a copy to ensure it persists, important!
|
| 106 |
+
|
| 107 |
+
# Update observations with current URL
|
| 108 |
+
url_info = f"Current url: {driver.current_url}"
|
| 109 |
+
step_log.observations = url_info if step_logs.observations is None else step_log.observations + "\n" + url_info
|
| 110 |
+
return
|