import gradio as gr import argparse import os import sys import tempfile import threading import json import traceback from run import webvoyager_run import re def format_log_for_gradio(log_content): """ Formats the raw log content into a more readable format for the Gradio UI. """ # Extract the JSON part of the log json_match = re.search(r'\[(\{.*?\})\]', log_content, re.DOTALL) if not json_match: return log_content try: log_data = json.loads(json_match.group(1)) formatted_output = "" if 'role' in log_data and log_data['role'] == 'user': # This is the initial prompt, let's summarize it formatted_output += "--- Initial Prompt ---\n" content = log_data['content'][0]['text'] task_match = re.search(r'Now given a task: (.*?)\s+Please interact with', content) if task_match: formatted_output += f"Task: {task_match.group(1)}\n\n" elif 'role' in log_data and log_data['role'] == 'assistant': # This is the agent's response content = log_data['content'] thought_match = re.search(r'Thought: (.*?)\nAction:', content, re.DOTALL) action_match = re.search(r'Action: (.*)', content, re.DOTALL) if thought_match: formatted_output += f"Thought: {thought_match.group(1).strip()}\n" if action_match: formatted_output += f"Action: {action_match.group(1).strip()}\n" return formatted_output except json.JSONDecodeError: return log_content # Return raw log if JSON parsing fails def run_script_for_gradio(url, task): """ A wrapper to run the webvoyager script for Gradio, capturing output and screenshots. """ with tempfile.TemporaryDirectory() as temp_dir: task_file_path = os.path.join(temp_dir, 'task.jsonl') with open(task_file_path, 'w') as f: f.write(f'{{"id": "custom_task", "web": "{url}", "ques": "{task}"}}') args = argparse.Namespace( test_file=task_file_path, max_iter=5, api_key=os.environ.get("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY"), api_model="gpt-4-turbo", output_dir=os.path.join(temp_dir, 'results'), seed=None, max_attached_imgs=1, temperature=1.0, download_dir=os.path.join(temp_dir, 'downloads'), text_only=False, headless=True, save_accessibility_tree=False, force_device_scale=False, window_width=1024, window_height=768, fix_box_color=False ) os.makedirs(args.output_dir, exist_ok=True) os.makedirs(args.download_dir, exist_ok=True) task_dir = os.path.join(args.output_dir, 'taskcustom_task') os.makedirs(task_dir, exist_ok=True) full_log = "" try: # We'll get real-time updates by iterating through the run function for i, log_entry in enumerate(webvoyager_run(args, {"id": "custom_task", "web": url, "ques": task}, task_dir)): formatted_log = format_log_for_gradio(log_entry) if formatted_log: full_log += f"--- Step {i+1} ---\n{formatted_log}\n" screenshot_path = os.path.join(task_dir, f'screenshot{i+1}.png') if os.path.exists(screenshot_path): yield screenshot_path, full_log else: yield None, full_log except Exception as e: tb = traceback.format_exc() full_log += f"An error occurred: {e}\n\nFull Traceback:\n{tb}" yield None, full_log iface = gr.Interface( fn=run_script_for_gradio, inputs=[ gr.Textbox(label="URL", placeholder="Enter the URL of the website"), gr.Textbox(label="Task", placeholder="Describe the task to perform"), ], outputs=[ gr.Image(label="Agent's View", type="filepath"), gr.Textbox(label="Agent Output", lines=20, interactive=False), ], title="WebVoyager", description="An LMM-powered web agent that can complete user instructions end-to-end.", ) if __name__ == "__main__": iface.launch()