Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import argparse | |
| import os | |
| import sys | |
| import tempfile | |
| import threading | |
| import json | |
| import traceback | |
| from run import webvoyager_run | |
| import re | |
| def format_log_for_gradio(log_content): | |
| """ | |
| Formats the raw log content into a more readable format for the Gradio UI. | |
| """ | |
| # Extract the JSON part of the log | |
| json_match = re.search(r'\[(\{.*?\})\]', log_content, re.DOTALL) | |
| if not json_match: | |
| return log_content | |
| try: | |
| log_data = json.loads(json_match.group(1)) | |
| formatted_output = "" | |
| if 'role' in log_data and log_data['role'] == 'user': | |
| # This is the initial prompt, let's summarize it | |
| formatted_output += "--- Initial Prompt ---\n" | |
| content = log_data['content'][0]['text'] | |
| task_match = re.search(r'Now given a task: (.*?)\s+Please interact with', content) | |
| if task_match: | |
| formatted_output += f"Task: {task_match.group(1)}\n\n" | |
| elif 'role' in log_data and log_data['role'] == 'assistant': | |
| # This is the agent's response | |
| content = log_data['content'] | |
| thought_match = re.search(r'Thought: (.*?)\nAction:', content, re.DOTALL) | |
| action_match = re.search(r'Action: (.*)', content, re.DOTALL) | |
| if thought_match: | |
| formatted_output += f"Thought: {thought_match.group(1).strip()}\n" | |
| if action_match: | |
| formatted_output += f"Action: {action_match.group(1).strip()}\n" | |
| return formatted_output | |
| except json.JSONDecodeError: | |
| return log_content # Return raw log if JSON parsing fails | |
| def run_script_for_gradio(url, task): | |
| """ | |
| A wrapper to run the webvoyager script for Gradio, capturing output and screenshots. | |
| """ | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| task_file_path = os.path.join(temp_dir, 'task.jsonl') | |
| with open(task_file_path, 'w') as f: | |
| f.write(f'{{"id": "custom_task", "web": "{url}", "ques": "{task}"}}') | |
| args = argparse.Namespace( | |
| test_file=task_file_path, | |
| max_iter=5, | |
| api_key=os.environ.get("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY"), | |
| api_model="gpt-4-turbo", | |
| output_dir=os.path.join(temp_dir, 'results'), | |
| seed=None, | |
| max_attached_imgs=1, | |
| temperature=1.0, | |
| download_dir=os.path.join(temp_dir, 'downloads'), | |
| text_only=False, | |
| headless=True, | |
| save_accessibility_tree=False, | |
| force_device_scale=False, | |
| window_width=1024, | |
| window_height=768, | |
| fix_box_color=False | |
| ) | |
| os.makedirs(args.output_dir, exist_ok=True) | |
| os.makedirs(args.download_dir, exist_ok=True) | |
| task_dir = os.path.join(args.output_dir, 'taskcustom_task') | |
| os.makedirs(task_dir, exist_ok=True) | |
| full_log = "" | |
| try: | |
| # We'll get real-time updates by iterating through the run function | |
| for i, log_entry in enumerate(webvoyager_run(args, {"id": "custom_task", "web": url, "ques": task}, task_dir)): | |
| formatted_log = format_log_for_gradio(log_entry) | |
| if formatted_log: | |
| full_log += f"--- Step {i+1} ---\n{formatted_log}\n" | |
| screenshot_path = os.path.join(task_dir, f'screenshot{i+1}.png') | |
| if os.path.exists(screenshot_path): | |
| yield screenshot_path, full_log | |
| else: | |
| yield None, full_log | |
| except Exception as e: | |
| tb = traceback.format_exc() | |
| full_log += f"An error occurred: {e}\n\nFull Traceback:\n{tb}" | |
| yield None, full_log | |
| iface = gr.Interface( | |
| fn=run_script_for_gradio, | |
| inputs=[ | |
| gr.Textbox(label="URL", placeholder="Enter the URL of the website"), | |
| gr.Textbox(label="Task", placeholder="Describe the task to perform"), | |
| ], | |
| outputs=[ | |
| gr.Image(label="Agent's View", type="filepath"), | |
| gr.Textbox(label="Agent Output", lines=20, interactive=False), | |
| ], | |
| title="WebVoyager", | |
| description="An LMM-powered web agent that can complete user instructions end-to-end.", | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |