webvoyager_backup

Sleeping

App Files Files Community

webvoyager_backup / app.py

harvesthealth

Upload folder using huggingface_hub

663480b verified 3 months ago

raw

history blame contribute delete

4.36 kB

	import gradio as gr
	import argparse
	import os
	import sys
	import tempfile
	import threading
	import json
	import traceback
	from run import webvoyager_run
	import re

	def format_log_for_gradio(log_content):
	"""
	Formats the raw log content into a more readable format for the Gradio UI.
	"""
	# Extract the JSON part of the log
	json_match = re.search(r'\[(\{.*?\})\]', log_content, re.DOTALL)
	if not json_match:
	return log_content

	try:
	log_data = json.loads(json_match.group(1))
	formatted_output = ""

	if 'role' in log_data and log_data['role'] == 'user':
	# This is the initial prompt, let's summarize it
	formatted_output += "--- Initial Prompt ---\n"
	content = log_data['content'][0]['text']
	task_match = re.search(r'Now given a task: (.*?)\s+Please interact with', content)
	if task_match:
	formatted_output += f"Task: {task_match.group(1)}\n\n"

	elif 'role' in log_data and log_data['role'] == 'assistant':
	# This is the agent's response
	content = log_data['content']
	thought_match = re.search(r'Thought: (.*?)\nAction:', content, re.DOTALL)
	action_match = re.search(r'Action: (.*)', content, re.DOTALL)

	if thought_match:
	formatted_output += f"Thought: {thought_match.group(1).strip()}\n"
	if action_match:
	formatted_output += f"Action: {action_match.group(1).strip()}\n"

	return formatted_output

	except json.JSONDecodeError:
	return log_content # Return raw log if JSON parsing fails


	def run_script_for_gradio(url, task):
	"""
	A wrapper to run the webvoyager script for Gradio, capturing output and screenshots.
	"""
	with tempfile.TemporaryDirectory() as temp_dir:
	task_file_path = os.path.join(temp_dir, 'task.jsonl')
	with open(task_file_path, 'w') as f:
	f.write(f'{{"id": "custom_task", "web": "{url}", "ques": "{task}"}}')

	args = argparse.Namespace(
	test_file=task_file_path,
	max_iter=5,
	api_key=os.environ.get("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY"),
	api_model="gpt-4-turbo",
	output_dir=os.path.join(temp_dir, 'results'),
	seed=None,
	max_attached_imgs=1,
	temperature=1.0,
	download_dir=os.path.join(temp_dir, 'downloads'),
	text_only=False,
	headless=True,
	save_accessibility_tree=False,
	force_device_scale=False,
	window_width=1024,
	window_height=768,
	fix_box_color=False
	)

	os.makedirs(args.output_dir, exist_ok=True)
	os.makedirs(args.download_dir, exist_ok=True)

	task_dir = os.path.join(args.output_dir, 'taskcustom_task')
	os.makedirs(task_dir, exist_ok=True)

	full_log = ""
	try:
	# We'll get real-time updates by iterating through the run function
	for i, log_entry in enumerate(webvoyager_run(args, {"id": "custom_task", "web": url, "ques": task}, task_dir)):

	formatted_log = format_log_for_gradio(log_entry)
	if formatted_log:
	full_log += f"--- Step {i+1} ---\n{formatted_log}\n"

	screenshot_path = os.path.join(task_dir, f'screenshot{i+1}.png')
	if os.path.exists(screenshot_path):
	yield screenshot_path, full_log
	else:
	yield None, full_log

	except Exception as e:
	tb = traceback.format_exc()
	full_log += f"An error occurred: {e}\n\nFull Traceback:\n{tb}"
	yield None, full_log


	iface = gr.Interface(
	fn=run_script_for_gradio,
	inputs=[
	gr.Textbox(label="URL", placeholder="Enter the URL of the website"),
	gr.Textbox(label="Task", placeholder="Describe the task to perform"),
	],
	outputs=[
	gr.Image(label="Agent's View", type="filepath"),
	gr.Textbox(label="Agent Output", lines=20, interactive=False),
	],
	title="WebVoyager",
	description="An LMM-powered web agent that can complete user instructions end-to-end.",
	)

	if __name__ == "__main__":
	iface.launch()