Spaces:

Speedofmastery
/

HMM

Sleeping

App Files Files Community

HMM / browser-use-main /examples /cloud /03_structured_output.py

Speedofmastery

Merge Landrun + Browser-Use + Chromium with AI agent support (without binary files)

d7b3d84 3 months ago

raw

history blame contribute delete

11.7 kB

	"""
	Cloud Example 3: Structured JSON Output 📋
	==========================================

	This example demonstrates how to get structured, validated JSON output:
	- Define Pydantic schemas for type safety
	- Extract structured data from websites
	- Validate and parse JSON responses
	- Handle different data types and nested structures

	Perfect for: Data extraction, API integration, structured analysis

	Cost: ~$0.06 (1 task + 5-6 steps with GPT-4.1 mini)
	"""

	import argparse
	import json
	import os
	import time
	from typing import Any

	import requests
	from pydantic import BaseModel, Field, ValidationError
	from requests.exceptions import RequestException

	# Configuration
	API_KEY = os.getenv('BROWSER_USE_API_KEY')
	if not API_KEY:
	raise ValueError(
	'Please set BROWSER_USE_API_KEY environment variable. You can also create an API key at https://cloud.browser-use.com/new-api-key'
	)

	BASE_URL = os.getenv('BROWSER_USE_BASE_URL', 'https://api.browser-use.com/api/v1')
	TIMEOUT = int(os.getenv('BROWSER_USE_TIMEOUT', '30'))
	HEADERS = {'Authorization': f'Bearer {API_KEY}', 'Content-Type': 'application/json'}


	def _request_with_retry(method: str, url: str, **kwargs) -> requests.Response:
	"""Make HTTP request with timeout and retry logic."""
	kwargs.setdefault('timeout', TIMEOUT)

	for attempt in range(3):
	try:
	response = requests.request(method, url, **kwargs)
	response.raise_for_status()
	return response
	except RequestException as e:
	if attempt == 2: # Last attempt
	raise
	sleep_time = 2**attempt
	print(f'⚠️ Request failed (attempt {attempt + 1}/3), retrying in {sleep_time}s: {e}')
	time.sleep(sleep_time)

	raise RuntimeError('Unexpected error in retry logic')


	# Define structured output schemas using Pydantic
	class NewsArticle(BaseModel):
	"""Schema for a news article."""

	title: str = Field(description='The headline of the article')
	summary: str = Field(description='Brief summary of the article')
	url: str = Field(description='Direct link to the article')
	published_date: str \| None = Field(description='Publication date if available')
	category: str \| None = Field(description='Article category/section')


	class NewsResponse(BaseModel):
	"""Schema for multiple news articles."""

	articles: list[NewsArticle] = Field(description='List of news articles')
	source_website: str = Field(description='The website where articles were found')
	extracted_at: str = Field(description='When the data was extracted')


	class ProductInfo(BaseModel):
	"""Schema for product information."""

	name: str = Field(description='Product name')
	price: float = Field(description='Product price in USD')
	rating: float \| None = Field(description='Average rating (0-5 scale)')
	availability: str = Field(description='Stock status (in stock, out of stock, etc.)')
	description: str = Field(description='Product description')


	class CompanyInfo(BaseModel):
	"""Schema for company information."""

	name: str = Field(description='Company name')
	stock_symbol: str \| None = Field(description='Stock ticker symbol')
	market_cap: str \| None = Field(description='Market capitalization')
	industry: str = Field(description='Primary industry')
	headquarters: str = Field(description='Headquarters location')
	founded_year: int \| None = Field(description='Year founded')


	def create_structured_task(instructions: str, schema_model: type[BaseModel], **kwargs) -> str:
	"""
	Create a task that returns structured JSON output.

	Args:
	instructions: Task description
	schema_model: Pydantic model defining the expected output structure
	**kwargs: Additional task parameters

	Returns:
	task_id: Unique identifier for the created task
	"""
	print(f'📝 Creating structured task: {instructions}')
	print(f'🏗️ Expected schema: {schema_model.__name__}')

	# Generate JSON schema from Pydantic model
	json_schema = schema_model.model_json_schema()

	payload = {
	'task': instructions,
	'structured_output_json': json.dumps(json_schema),
	'llm_model': 'gpt-4.1-mini',
	'max_agent_steps': 15,
	'enable_public_share': True, # Enable shareable execution URLs
	**kwargs,
	}

	response = _request_with_retry('post', f'{BASE_URL}/run-task', headers=HEADERS, json=payload)

	task_id = response.json()['id']
	print(f'✅ Structured task created: {task_id}')
	return task_id


	def wait_for_structured_completion(task_id: str, max_wait_time: int = 300) -> dict[str, Any]:
	"""Wait for task completion and return the result."""
	print(f'⏳ Waiting for structured output (max {max_wait_time}s)...')

	start_time = time.time()

	while True:
	response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}/status', headers=HEADERS)
	status = response.json()
	elapsed = time.time() - start_time

	# Check for timeout
	if elapsed > max_wait_time:
	print(f'\r⏰ Task timeout after {max_wait_time}s - stopping wait' + ' ' * 30)
	# Get final details before timeout
	details_response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}', headers=HEADERS)
	details = details_response.json()
	return details

	# Get step count from full details for better progress tracking
	details_response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}', headers=HEADERS)
	details = details_response.json()
	steps = len(details.get('steps', []))

	# Build status message
	if status == 'running':
	status_msg = f'📋 Structured task \| Step {steps} \| ⏱️ {elapsed:.0f}s \| 🔄 Extracting...'
	else:
	status_msg = f'📋 Structured task \| Step {steps} \| ⏱️ {elapsed:.0f}s \| Status: {status}'

	# Clear line and show status
	print(f'\r{status_msg:<80}', end='', flush=True)

	if status == 'finished':
	print(f'\r✅ Structured data extracted! ({steps} steps in {elapsed:.1f}s)' + ' ' * 20)
	return details

	elif status in ['failed', 'stopped']:
	print(f'\r❌ Task {status} after {steps} steps' + ' ' * 30)
	return details

	time.sleep(3)


	def validate_and_display_output(output: str, schema_model: type[BaseModel]):
	"""
	Validate the JSON output against the schema and display results.

	Args:
	output: Raw JSON string from the task
	schema_model: Pydantic model for validation
	"""
	print('\n📊 Structured Output Analysis')
	print('=' * 40)

	try:
	# Parse and validate the JSON
	parsed_data = schema_model.model_validate_json(output)
	print('✅ JSON validation successful!')

	# Pretty print the structured data
	print('\n📋 Parsed Data:')
	print('-' * 20)
	print(parsed_data.model_dump_json(indent=2))

	# Display specific fields based on model type
	if isinstance(parsed_data, NewsResponse):
	print(f'\n📰 Found {len(parsed_data.articles)} articles from {parsed_data.source_website}')
	for i, article in enumerate(parsed_data.articles[:3], 1):
	print(f'\n{i}. {article.title}')
	print(f' Summary: {article.summary[:100]}...')
	print(f' URL: {article.url}')

	elif isinstance(parsed_data, ProductInfo):
	print(f'\n🛍️ Product: {parsed_data.name}')
	print(f' Price: ${parsed_data.price}')
	print(f' Rating: {parsed_data.rating}/5' if parsed_data.rating else ' Rating: N/A')
	print(f' Status: {parsed_data.availability}')

	elif isinstance(parsed_data, CompanyInfo):
	print(f'\n🏢 Company: {parsed_data.name}')
	print(f' Industry: {parsed_data.industry}')
	print(f' Headquarters: {parsed_data.headquarters}')
	if parsed_data.founded_year:
	print(f' Founded: {parsed_data.founded_year}')

	return parsed_data

	except ValidationError as e:
	print('❌ JSON validation failed!')
	print(f'Errors: {e}')
	print(f'\nRaw output: {output[:500]}...')
	return None

	except json.JSONDecodeError as e:
	print('❌ Invalid JSON format!')
	print(f'Error: {e}')
	print(f'\nRaw output: {output[:500]}...')
	return None


	def demo_news_extraction():
	"""Demo: Extract structured news data."""
	print('\n📰 Demo 1: News Article Extraction')
	print('-' * 40)

	task = """
	Go to a major news website (like BBC, CNN, or Reuters) and extract information
	about the top 3 news articles. For each article, get the title, summary, URL,
	and any other available metadata.
	"""

	task_id = create_structured_task(task, NewsResponse)
	result = wait_for_structured_completion(task_id)

	if result.get('output'):
	parsed_result = validate_and_display_output(result['output'], NewsResponse)

	# Show execution URLs
	if result.get('live_url'):
	print(f'\n🔗 Live Preview: {result["live_url"]}')
	if result.get('public_share_url'):
	print(f'🌐 Share URL: {result["public_share_url"]}')
	elif result.get('share_url'):
	print(f'🌐 Share URL: {result["share_url"]}')

	return parsed_result
	else:
	print('❌ No structured output received')
	return None


	def demo_product_extraction():
	"""Demo: Extract structured product data."""
	print('\n🛍️ Demo 2: Product Information Extraction')
	print('-' * 40)

	task = """
	Go to Amazon and search for 'wireless headphones'. Find the first product result
	and extract detailed information including name, price, rating, availability,
	and description.
	"""

	task_id = create_structured_task(task, ProductInfo)
	result = wait_for_structured_completion(task_id)

	if result.get('output'):
	parsed_result = validate_and_display_output(result['output'], ProductInfo)

	# Show execution URLs
	if result.get('live_url'):
	print(f'\n🔗 Live Preview: {result["live_url"]}')
	if result.get('public_share_url'):
	print(f'🌐 Share URL: {result["public_share_url"]}')
	elif result.get('share_url'):
	print(f'🌐 Share URL: {result["share_url"]}')

	return parsed_result
	else:
	print('❌ No structured output received')
	return None


	def demo_company_extraction():
	"""Demo: Extract structured company data."""
	print('\n🏢 Demo 3: Company Information Extraction')
	print('-' * 40)

	task = """
	Go to a financial website and look up information about Apple Inc.
	Extract company details including name, stock symbol, market cap,
	industry, headquarters, and founding year.
	"""

	task_id = create_structured_task(task, CompanyInfo)
	result = wait_for_structured_completion(task_id)

	if result.get('output'):
	parsed_result = validate_and_display_output(result['output'], CompanyInfo)

	# Show execution URLs
	if result.get('live_url'):
	print(f'\n🔗 Live Preview: {result["live_url"]}')
	if result.get('public_share_url'):
	print(f'🌐 Share URL: {result["public_share_url"]}')
	elif result.get('share_url'):
	print(f'🌐 Share URL: {result["share_url"]}')

	return parsed_result
	else:
	print('❌ No structured output received')
	return None


	def main():
	"""Demonstrate structured output extraction."""
	print('📋 Browser Use Cloud - Structured JSON Output')
	print('=' * 50)

	print('🎯 Features:')
	print('• Type-safe Pydantic schemas')
	print('• Automatic JSON validation')
	print('• Structured data extraction')
	print('• Multiple output formats')

	try:
	# Parse command line arguments
	parser = argparse.ArgumentParser(description='Structured output extraction demo')
	parser.add_argument('--demo', choices=['news', 'product', 'company', 'all'], default='news', help='Which demo to run')
	args = parser.parse_args()

	print(f'\n🔍 Running {args.demo} demo(s)...')

	if args.demo == 'news':
	demo_news_extraction()
	elif args.demo == 'product':
	demo_product_extraction()
	elif args.demo == 'company':
	demo_company_extraction()
	elif args.demo == 'all':
	demo_news_extraction()
	demo_product_extraction()
	demo_company_extraction()

	except requests.exceptions.RequestException as e:
	print(f'❌ API Error: {e}')
	except Exception as e:
	print(f'❌ Error: {e}')


	if __name__ == '__main__':
	main()