File size: 4,356 Bytes
076c26d
 
 
 
 
 
0379bb1
94ac9b4
076c26d
663480b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
076c26d
 
 
663480b
076c26d
 
 
 
 
 
 
 
 
 
663480b
076c26d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
663480b
 
 
076c26d
663480b
076c26d
663480b
 
 
 
 
 
076c26d
663480b
 
 
 
 
076c26d
 
94ac9b4
663480b
 
076c26d
 
 
 
 
 
 
 
663480b
 
 
 
076c26d
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
import argparse
import os
import sys
import tempfile
import threading
import json
import traceback
from run import webvoyager_run
import re

def format_log_for_gradio(log_content):
    """
    Formats the raw log content into a more readable format for the Gradio UI.
    """
    # Extract the JSON part of the log
    json_match = re.search(r'\[(\{.*?\})\]', log_content, re.DOTALL)
    if not json_match:
        return log_content

    try:
        log_data = json.loads(json_match.group(1))
        formatted_output = ""
        
        if 'role' in log_data and log_data['role'] == 'user':
            # This is the initial prompt, let's summarize it
            formatted_output += "--- Initial Prompt ---\n"
            content = log_data['content'][0]['text']
            task_match = re.search(r'Now given a task: (.*?)\s+Please interact with', content)
            if task_match:
                formatted_output += f"Task: {task_match.group(1)}\n\n"

        elif 'role' in log_data and log_data['role'] == 'assistant':
            # This is the agent's response
            content = log_data['content']
            thought_match = re.search(r'Thought: (.*?)\nAction:', content, re.DOTALL)
            action_match = re.search(r'Action: (.*)', content, re.DOTALL)
            
            if thought_match:
                formatted_output += f"Thought: {thought_match.group(1).strip()}\n"
            if action_match:
                formatted_output += f"Action: {action_match.group(1).strip()}\n"

        return formatted_output

    except json.JSONDecodeError:
        return log_content # Return raw log if JSON parsing fails


def run_script_for_gradio(url, task):
    """
    A wrapper to run the webvoyager script for Gradio, capturing output and screenshots.
    """
    with tempfile.TemporaryDirectory() as temp_dir:
        task_file_path = os.path.join(temp_dir, 'task.jsonl')
        with open(task_file_path, 'w') as f:
            f.write(f'{{"id": "custom_task", "web": "{url}", "ques": "{task}"}}')

        args = argparse.Namespace(
            test_file=task_file_path,
            max_iter=5,
            api_key=os.environ.get("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY"),
            api_model="gpt-4-turbo",
            output_dir=os.path.join(temp_dir, 'results'),
            seed=None,
            max_attached_imgs=1,
            temperature=1.0,
            download_dir=os.path.join(temp_dir, 'downloads'),
            text_only=False,
            headless=True,
            save_accessibility_tree=False,
            force_device_scale=False,
            window_width=1024,
            window_height=768,
            fix_box_color=False
        )

        os.makedirs(args.output_dir, exist_ok=True)
        os.makedirs(args.download_dir, exist_ok=True)
        
        task_dir = os.path.join(args.output_dir, 'taskcustom_task')
        os.makedirs(task_dir, exist_ok=True)

        full_log = ""
        try:
            # We'll get real-time updates by iterating through the run function
            for i, log_entry in enumerate(webvoyager_run(args, {"id": "custom_task", "web": url, "ques": task}, task_dir)):
                
                formatted_log = format_log_for_gradio(log_entry)
                if formatted_log:
                    full_log += f"--- Step {i+1} ---\n{formatted_log}\n"
                
                screenshot_path = os.path.join(task_dir, f'screenshot{i+1}.png')
                if os.path.exists(screenshot_path):
                    yield screenshot_path, full_log
                else:
                    yield None, full_log

        except Exception as e:
            tb = traceback.format_exc()
            full_log += f"An error occurred: {e}\n\nFull Traceback:\n{tb}"
            yield None, full_log


iface = gr.Interface(
    fn=run_script_for_gradio,
    inputs=[
        gr.Textbox(label="URL", placeholder="Enter the URL of the website"),
        gr.Textbox(label="Task", placeholder="Describe the task to perform"),
    ],
    outputs=[
        gr.Image(label="Agent's View", type="filepath"),
        gr.Textbox(label="Agent Output", lines=20, interactive=False),
    ],
    title="WebVoyager",
    description="An LMM-powered web agent that can complete user instructions end-to-end.",
)

if __name__ == "__main__":
    iface.launch()