from app.core.config import settings import platform import argparse import time import json import re import os import shutil import logging from PIL import Image, ImageDraw from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.action_chains import ActionChains from prompts import SYSTEM_PROMPT, SYSTEM_PROMPT_TEXT_ONLY, PERSONA_SYSTEM_PROMPT from openai import OpenAI, APIError import httpx from utils import get_web_element_rect, encode_image, extract_information, print_message,\ get_webarena_accessibility_tree, get_pdf_retrieval_ans_from_assistant, clip_message_and_obs, clip_message_and_obs_text_only def setup_logger(folder_path): log_file_path = os.path.join(folder_path, 'agent.log') logger = logging.getLogger() for handler in logger.handlers[:]: logger.removeHandler(handler) handler.close() handler = logging.FileHandler(log_file_path) formatter = logging.Formatter('%(message)s') # Keep logs clean handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(logging.INFO) def driver_config(args): options = webdriver.ChromeOptions() if args.save_accessibility_tree: args.force_device_scale = True if args.force_device_scale: options.add_argument("--force-device-scale-factor=1") if args.headless: options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument( "--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" ) options.add_experimental_option( "prefs", { "download.default_directory": args.download_dir, "plugins.always_open_pdf_externally": True } ) return options def format_msg(it, init_msg, pdf_obs, warn_obs, web_img_b64, web_text): if it == 1: init_msg += f"I've provided the tag name of each element and the text it contains (if text exists). Note that