Spaces:
Sleeping
Sleeping
| from io import BytesIO | |
| from time import sleep | |
| import os | |
| import sys | |
| # Add the parent directory to the Python path so modules can be found | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| import helium | |
| from dotenv import load_dotenv | |
| from PIL import Image | |
| from selenium import webdriver | |
| from smolagents import CodeAgent | |
| from smolagents.agents import ActionStep | |
| from agents.agent import MyAgent | |
| from prompts.helium import HELIUM_PROMPT | |
| load_dotenv() | |
| # Configure Chrome options | |
| chrome_options = webdriver.ChromeOptions() | |
| chrome_options.add_argument("--force-device-scale-factor=1") | |
| chrome_options.add_argument("--window-size=1000,1350") | |
| chrome_options.add_argument("--disable-pdf-viewer") | |
| chrome_options.add_argument("--window-position=0,0") | |
| # Initialize the browser | |
| driver = helium.start_chrome(headless=False, options=chrome_options) | |
| def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: | |
| sleep(1.0) # Let JavaScript animations happen before taking the screenshot | |
| driver = helium.get_driver() | |
| current_step = memory_step.step_number | |
| if driver is not None: | |
| for ( | |
| previous_memory_step | |
| ) in agent.memory.steps: # Remove previous screenshots for lean processing | |
| if ( | |
| isinstance(previous_memory_step, ActionStep) | |
| and previous_memory_step.step_number <= current_step - 2 | |
| ): | |
| previous_memory_step.observations_images = None | |
| png_bytes = driver.get_screenshot_as_png() | |
| image = Image.open(BytesIO(png_bytes)) | |
| print(f"Captured a browser screenshot: {image.size} pixels") | |
| memory_step.observations_images = [ | |
| image.copy() | |
| ] # Create a copy to ensure it persists | |
| # Update observations with current URL | |
| url_info = f"Current url: {driver.current_url}" | |
| memory_step.observations = ( | |
| url_info | |
| if memory_step.observations is None | |
| else memory_step.observations + "\n" + url_info | |
| ) | |
| video_agent = MyAgent( | |
| api_key=os.getenv("GEMINI_API_KEY"), | |
| temperature=0.0, | |
| add_base_tools=False, | |
| additional_authorized_imports=["helium"], | |
| step_callbacks=[save_screenshot], | |
| max_steps=20, | |
| verbosity_level=2, | |
| ) | |
| video_agent.agent.python_executor("from helium import *", video_agent.agent.state) | |
| search_request = """ | |
| Please navigate to https://en.wikipedia.org/wiki/Chicago and give me a sentence containing the word "1992" that mentions a construction accident. | |
| """ | |
| agent_output = video_agent(search_request + HELIUM_PROMPT) | |
| print("Final output:") | |
| print(agent_output) | |