Spaces:
Sleeping
Sleeping
File size: 11,728 Bytes
d7b3d84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 |
"""
Cloud Example 3: Structured JSON Output π
==========================================
This example demonstrates how to get structured, validated JSON output:
- Define Pydantic schemas for type safety
- Extract structured data from websites
- Validate and parse JSON responses
- Handle different data types and nested structures
Perfect for: Data extraction, API integration, structured analysis
Cost: ~$0.06 (1 task + 5-6 steps with GPT-4.1 mini)
"""
import argparse
import json
import os
import time
from typing import Any
import requests
from pydantic import BaseModel, Field, ValidationError
from requests.exceptions import RequestException
# Configuration
API_KEY = os.getenv('BROWSER_USE_API_KEY')
if not API_KEY:
raise ValueError(
'Please set BROWSER_USE_API_KEY environment variable. You can also create an API key at https://cloud.browser-use.com/new-api-key'
)
BASE_URL = os.getenv('BROWSER_USE_BASE_URL', 'https://api.browser-use.com/api/v1')
TIMEOUT = int(os.getenv('BROWSER_USE_TIMEOUT', '30'))
HEADERS = {'Authorization': f'Bearer {API_KEY}', 'Content-Type': 'application/json'}
def _request_with_retry(method: str, url: str, **kwargs) -> requests.Response:
"""Make HTTP request with timeout and retry logic."""
kwargs.setdefault('timeout', TIMEOUT)
for attempt in range(3):
try:
response = requests.request(method, url, **kwargs)
response.raise_for_status()
return response
except RequestException as e:
if attempt == 2: # Last attempt
raise
sleep_time = 2**attempt
print(f'β οΈ Request failed (attempt {attempt + 1}/3), retrying in {sleep_time}s: {e}')
time.sleep(sleep_time)
raise RuntimeError('Unexpected error in retry logic')
# Define structured output schemas using Pydantic
class NewsArticle(BaseModel):
"""Schema for a news article."""
title: str = Field(description='The headline of the article')
summary: str = Field(description='Brief summary of the article')
url: str = Field(description='Direct link to the article')
published_date: str | None = Field(description='Publication date if available')
category: str | None = Field(description='Article category/section')
class NewsResponse(BaseModel):
"""Schema for multiple news articles."""
articles: list[NewsArticle] = Field(description='List of news articles')
source_website: str = Field(description='The website where articles were found')
extracted_at: str = Field(description='When the data was extracted')
class ProductInfo(BaseModel):
"""Schema for product information."""
name: str = Field(description='Product name')
price: float = Field(description='Product price in USD')
rating: float | None = Field(description='Average rating (0-5 scale)')
availability: str = Field(description='Stock status (in stock, out of stock, etc.)')
description: str = Field(description='Product description')
class CompanyInfo(BaseModel):
"""Schema for company information."""
name: str = Field(description='Company name')
stock_symbol: str | None = Field(description='Stock ticker symbol')
market_cap: str | None = Field(description='Market capitalization')
industry: str = Field(description='Primary industry')
headquarters: str = Field(description='Headquarters location')
founded_year: int | None = Field(description='Year founded')
def create_structured_task(instructions: str, schema_model: type[BaseModel], **kwargs) -> str:
"""
Create a task that returns structured JSON output.
Args:
instructions: Task description
schema_model: Pydantic model defining the expected output structure
**kwargs: Additional task parameters
Returns:
task_id: Unique identifier for the created task
"""
print(f'π Creating structured task: {instructions}')
print(f'ποΈ Expected schema: {schema_model.__name__}')
# Generate JSON schema from Pydantic model
json_schema = schema_model.model_json_schema()
payload = {
'task': instructions,
'structured_output_json': json.dumps(json_schema),
'llm_model': 'gpt-4.1-mini',
'max_agent_steps': 15,
'enable_public_share': True, # Enable shareable execution URLs
**kwargs,
}
response = _request_with_retry('post', f'{BASE_URL}/run-task', headers=HEADERS, json=payload)
task_id = response.json()['id']
print(f'β
Structured task created: {task_id}')
return task_id
def wait_for_structured_completion(task_id: str, max_wait_time: int = 300) -> dict[str, Any]:
"""Wait for task completion and return the result."""
print(f'β³ Waiting for structured output (max {max_wait_time}s)...')
start_time = time.time()
while True:
response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}/status', headers=HEADERS)
status = response.json()
elapsed = time.time() - start_time
# Check for timeout
if elapsed > max_wait_time:
print(f'\rβ° Task timeout after {max_wait_time}s - stopping wait' + ' ' * 30)
# Get final details before timeout
details_response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}', headers=HEADERS)
details = details_response.json()
return details
# Get step count from full details for better progress tracking
details_response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}', headers=HEADERS)
details = details_response.json()
steps = len(details.get('steps', []))
# Build status message
if status == 'running':
status_msg = f'π Structured task | Step {steps} | β±οΈ {elapsed:.0f}s | π Extracting...'
else:
status_msg = f'π Structured task | Step {steps} | β±οΈ {elapsed:.0f}s | Status: {status}'
# Clear line and show status
print(f'\r{status_msg:<80}', end='', flush=True)
if status == 'finished':
print(f'\rβ
Structured data extracted! ({steps} steps in {elapsed:.1f}s)' + ' ' * 20)
return details
elif status in ['failed', 'stopped']:
print(f'\rβ Task {status} after {steps} steps' + ' ' * 30)
return details
time.sleep(3)
def validate_and_display_output(output: str, schema_model: type[BaseModel]):
"""
Validate the JSON output against the schema and display results.
Args:
output: Raw JSON string from the task
schema_model: Pydantic model for validation
"""
print('\nπ Structured Output Analysis')
print('=' * 40)
try:
# Parse and validate the JSON
parsed_data = schema_model.model_validate_json(output)
print('β
JSON validation successful!')
# Pretty print the structured data
print('\nπ Parsed Data:')
print('-' * 20)
print(parsed_data.model_dump_json(indent=2))
# Display specific fields based on model type
if isinstance(parsed_data, NewsResponse):
print(f'\nπ° Found {len(parsed_data.articles)} articles from {parsed_data.source_website}')
for i, article in enumerate(parsed_data.articles[:3], 1):
print(f'\n{i}. {article.title}')
print(f' Summary: {article.summary[:100]}...')
print(f' URL: {article.url}')
elif isinstance(parsed_data, ProductInfo):
print(f'\nποΈ Product: {parsed_data.name}')
print(f' Price: ${parsed_data.price}')
print(f' Rating: {parsed_data.rating}/5' if parsed_data.rating else ' Rating: N/A')
print(f' Status: {parsed_data.availability}')
elif isinstance(parsed_data, CompanyInfo):
print(f'\nπ’ Company: {parsed_data.name}')
print(f' Industry: {parsed_data.industry}')
print(f' Headquarters: {parsed_data.headquarters}')
if parsed_data.founded_year:
print(f' Founded: {parsed_data.founded_year}')
return parsed_data
except ValidationError as e:
print('β JSON validation failed!')
print(f'Errors: {e}')
print(f'\nRaw output: {output[:500]}...')
return None
except json.JSONDecodeError as e:
print('β Invalid JSON format!')
print(f'Error: {e}')
print(f'\nRaw output: {output[:500]}...')
return None
def demo_news_extraction():
"""Demo: Extract structured news data."""
print('\nπ° Demo 1: News Article Extraction')
print('-' * 40)
task = """
Go to a major news website (like BBC, CNN, or Reuters) and extract information
about the top 3 news articles. For each article, get the title, summary, URL,
and any other available metadata.
"""
task_id = create_structured_task(task, NewsResponse)
result = wait_for_structured_completion(task_id)
if result.get('output'):
parsed_result = validate_and_display_output(result['output'], NewsResponse)
# Show execution URLs
if result.get('live_url'):
print(f'\nπ Live Preview: {result["live_url"]}')
if result.get('public_share_url'):
print(f'π Share URL: {result["public_share_url"]}')
elif result.get('share_url'):
print(f'π Share URL: {result["share_url"]}')
return parsed_result
else:
print('β No structured output received')
return None
def demo_product_extraction():
"""Demo: Extract structured product data."""
print('\nποΈ Demo 2: Product Information Extraction')
print('-' * 40)
task = """
Go to Amazon and search for 'wireless headphones'. Find the first product result
and extract detailed information including name, price, rating, availability,
and description.
"""
task_id = create_structured_task(task, ProductInfo)
result = wait_for_structured_completion(task_id)
if result.get('output'):
parsed_result = validate_and_display_output(result['output'], ProductInfo)
# Show execution URLs
if result.get('live_url'):
print(f'\nπ Live Preview: {result["live_url"]}')
if result.get('public_share_url'):
print(f'π Share URL: {result["public_share_url"]}')
elif result.get('share_url'):
print(f'π Share URL: {result["share_url"]}')
return parsed_result
else:
print('β No structured output received')
return None
def demo_company_extraction():
"""Demo: Extract structured company data."""
print('\nπ’ Demo 3: Company Information Extraction')
print('-' * 40)
task = """
Go to a financial website and look up information about Apple Inc.
Extract company details including name, stock symbol, market cap,
industry, headquarters, and founding year.
"""
task_id = create_structured_task(task, CompanyInfo)
result = wait_for_structured_completion(task_id)
if result.get('output'):
parsed_result = validate_and_display_output(result['output'], CompanyInfo)
# Show execution URLs
if result.get('live_url'):
print(f'\nπ Live Preview: {result["live_url"]}')
if result.get('public_share_url'):
print(f'π Share URL: {result["public_share_url"]}')
elif result.get('share_url'):
print(f'π Share URL: {result["share_url"]}')
return parsed_result
else:
print('β No structured output received')
return None
def main():
"""Demonstrate structured output extraction."""
print('π Browser Use Cloud - Structured JSON Output')
print('=' * 50)
print('π― Features:')
print('β’ Type-safe Pydantic schemas')
print('β’ Automatic JSON validation')
print('β’ Structured data extraction')
print('β’ Multiple output formats')
try:
# Parse command line arguments
parser = argparse.ArgumentParser(description='Structured output extraction demo')
parser.add_argument('--demo', choices=['news', 'product', 'company', 'all'], default='news', help='Which demo to run')
args = parser.parse_args()
print(f'\nπ Running {args.demo} demo(s)...')
if args.demo == 'news':
demo_news_extraction()
elif args.demo == 'product':
demo_product_extraction()
elif args.demo == 'company':
demo_company_extraction()
elif args.demo == 'all':
demo_news_extraction()
demo_product_extraction()
demo_company_extraction()
except requests.exceptions.RequestException as e:
print(f'β API Error: {e}')
except Exception as e:
print(f'β Error: {e}')
if __name__ == '__main__':
main()
|