Spaces:
Sleeping
Sleeping
| """ | |
| Cloud Example 3: Structured JSON Output π | |
| ========================================== | |
| This example demonstrates how to get structured, validated JSON output: | |
| - Define Pydantic schemas for type safety | |
| - Extract structured data from websites | |
| - Validate and parse JSON responses | |
| - Handle different data types and nested structures | |
| Perfect for: Data extraction, API integration, structured analysis | |
| Cost: ~$0.06 (1 task + 5-6 steps with GPT-4.1 mini) | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import time | |
| from typing import Any | |
| import requests | |
| from pydantic import BaseModel, Field, ValidationError | |
| from requests.exceptions import RequestException | |
| # Configuration | |
| API_KEY = os.getenv('BROWSER_USE_API_KEY') | |
| if not API_KEY: | |
| raise ValueError( | |
| 'Please set BROWSER_USE_API_KEY environment variable. You can also create an API key at https://cloud.browser-use.com/new-api-key' | |
| ) | |
| BASE_URL = os.getenv('BROWSER_USE_BASE_URL', 'https://api.browser-use.com/api/v1') | |
| TIMEOUT = int(os.getenv('BROWSER_USE_TIMEOUT', '30')) | |
| HEADERS = {'Authorization': f'Bearer {API_KEY}', 'Content-Type': 'application/json'} | |
| def _request_with_retry(method: str, url: str, **kwargs) -> requests.Response: | |
| """Make HTTP request with timeout and retry logic.""" | |
| kwargs.setdefault('timeout', TIMEOUT) | |
| for attempt in range(3): | |
| try: | |
| response = requests.request(method, url, **kwargs) | |
| response.raise_for_status() | |
| return response | |
| except RequestException as e: | |
| if attempt == 2: # Last attempt | |
| raise | |
| sleep_time = 2**attempt | |
| print(f'β οΈ Request failed (attempt {attempt + 1}/3), retrying in {sleep_time}s: {e}') | |
| time.sleep(sleep_time) | |
| raise RuntimeError('Unexpected error in retry logic') | |
| # Define structured output schemas using Pydantic | |
| class NewsArticle(BaseModel): | |
| """Schema for a news article.""" | |
| title: str = Field(description='The headline of the article') | |
| summary: str = Field(description='Brief summary of the article') | |
| url: str = Field(description='Direct link to the article') | |
| published_date: str | None = Field(description='Publication date if available') | |
| category: str | None = Field(description='Article category/section') | |
| class NewsResponse(BaseModel): | |
| """Schema for multiple news articles.""" | |
| articles: list[NewsArticle] = Field(description='List of news articles') | |
| source_website: str = Field(description='The website where articles were found') | |
| extracted_at: str = Field(description='When the data was extracted') | |
| class ProductInfo(BaseModel): | |
| """Schema for product information.""" | |
| name: str = Field(description='Product name') | |
| price: float = Field(description='Product price in USD') | |
| rating: float | None = Field(description='Average rating (0-5 scale)') | |
| availability: str = Field(description='Stock status (in stock, out of stock, etc.)') | |
| description: str = Field(description='Product description') | |
| class CompanyInfo(BaseModel): | |
| """Schema for company information.""" | |
| name: str = Field(description='Company name') | |
| stock_symbol: str | None = Field(description='Stock ticker symbol') | |
| market_cap: str | None = Field(description='Market capitalization') | |
| industry: str = Field(description='Primary industry') | |
| headquarters: str = Field(description='Headquarters location') | |
| founded_year: int | None = Field(description='Year founded') | |
| def create_structured_task(instructions: str, schema_model: type[BaseModel], **kwargs) -> str: | |
| """ | |
| Create a task that returns structured JSON output. | |
| Args: | |
| instructions: Task description | |
| schema_model: Pydantic model defining the expected output structure | |
| **kwargs: Additional task parameters | |
| Returns: | |
| task_id: Unique identifier for the created task | |
| """ | |
| print(f'π Creating structured task: {instructions}') | |
| print(f'ποΈ Expected schema: {schema_model.__name__}') | |
| # Generate JSON schema from Pydantic model | |
| json_schema = schema_model.model_json_schema() | |
| payload = { | |
| 'task': instructions, | |
| 'structured_output_json': json.dumps(json_schema), | |
| 'llm_model': 'gpt-4.1-mini', | |
| 'max_agent_steps': 15, | |
| 'enable_public_share': True, # Enable shareable execution URLs | |
| **kwargs, | |
| } | |
| response = _request_with_retry('post', f'{BASE_URL}/run-task', headers=HEADERS, json=payload) | |
| task_id = response.json()['id'] | |
| print(f'β Structured task created: {task_id}') | |
| return task_id | |
| def wait_for_structured_completion(task_id: str, max_wait_time: int = 300) -> dict[str, Any]: | |
| """Wait for task completion and return the result.""" | |
| print(f'β³ Waiting for structured output (max {max_wait_time}s)...') | |
| start_time = time.time() | |
| while True: | |
| response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}/status', headers=HEADERS) | |
| status = response.json() | |
| elapsed = time.time() - start_time | |
| # Check for timeout | |
| if elapsed > max_wait_time: | |
| print(f'\rβ° Task timeout after {max_wait_time}s - stopping wait' + ' ' * 30) | |
| # Get final details before timeout | |
| details_response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}', headers=HEADERS) | |
| details = details_response.json() | |
| return details | |
| # Get step count from full details for better progress tracking | |
| details_response = _request_with_retry('get', f'{BASE_URL}/task/{task_id}', headers=HEADERS) | |
| details = details_response.json() | |
| steps = len(details.get('steps', [])) | |
| # Build status message | |
| if status == 'running': | |
| status_msg = f'π Structured task | Step {steps} | β±οΈ {elapsed:.0f}s | π Extracting...' | |
| else: | |
| status_msg = f'π Structured task | Step {steps} | β±οΈ {elapsed:.0f}s | Status: {status}' | |
| # Clear line and show status | |
| print(f'\r{status_msg:<80}', end='', flush=True) | |
| if status == 'finished': | |
| print(f'\rβ Structured data extracted! ({steps} steps in {elapsed:.1f}s)' + ' ' * 20) | |
| return details | |
| elif status in ['failed', 'stopped']: | |
| print(f'\rβ Task {status} after {steps} steps' + ' ' * 30) | |
| return details | |
| time.sleep(3) | |
| def validate_and_display_output(output: str, schema_model: type[BaseModel]): | |
| """ | |
| Validate the JSON output against the schema and display results. | |
| Args: | |
| output: Raw JSON string from the task | |
| schema_model: Pydantic model for validation | |
| """ | |
| print('\nπ Structured Output Analysis') | |
| print('=' * 40) | |
| try: | |
| # Parse and validate the JSON | |
| parsed_data = schema_model.model_validate_json(output) | |
| print('β JSON validation successful!') | |
| # Pretty print the structured data | |
| print('\nπ Parsed Data:') | |
| print('-' * 20) | |
| print(parsed_data.model_dump_json(indent=2)) | |
| # Display specific fields based on model type | |
| if isinstance(parsed_data, NewsResponse): | |
| print(f'\nπ° Found {len(parsed_data.articles)} articles from {parsed_data.source_website}') | |
| for i, article in enumerate(parsed_data.articles[:3], 1): | |
| print(f'\n{i}. {article.title}') | |
| print(f' Summary: {article.summary[:100]}...') | |
| print(f' URL: {article.url}') | |
| elif isinstance(parsed_data, ProductInfo): | |
| print(f'\nποΈ Product: {parsed_data.name}') | |
| print(f' Price: ${parsed_data.price}') | |
| print(f' Rating: {parsed_data.rating}/5' if parsed_data.rating else ' Rating: N/A') | |
| print(f' Status: {parsed_data.availability}') | |
| elif isinstance(parsed_data, CompanyInfo): | |
| print(f'\nπ’ Company: {parsed_data.name}') | |
| print(f' Industry: {parsed_data.industry}') | |
| print(f' Headquarters: {parsed_data.headquarters}') | |
| if parsed_data.founded_year: | |
| print(f' Founded: {parsed_data.founded_year}') | |
| return parsed_data | |
| except ValidationError as e: | |
| print('β JSON validation failed!') | |
| print(f'Errors: {e}') | |
| print(f'\nRaw output: {output[:500]}...') | |
| return None | |
| except json.JSONDecodeError as e: | |
| print('β Invalid JSON format!') | |
| print(f'Error: {e}') | |
| print(f'\nRaw output: {output[:500]}...') | |
| return None | |
| def demo_news_extraction(): | |
| """Demo: Extract structured news data.""" | |
| print('\nπ° Demo 1: News Article Extraction') | |
| print('-' * 40) | |
| task = """ | |
| Go to a major news website (like BBC, CNN, or Reuters) and extract information | |
| about the top 3 news articles. For each article, get the title, summary, URL, | |
| and any other available metadata. | |
| """ | |
| task_id = create_structured_task(task, NewsResponse) | |
| result = wait_for_structured_completion(task_id) | |
| if result.get('output'): | |
| parsed_result = validate_and_display_output(result['output'], NewsResponse) | |
| # Show execution URLs | |
| if result.get('live_url'): | |
| print(f'\nπ Live Preview: {result["live_url"]}') | |
| if result.get('public_share_url'): | |
| print(f'π Share URL: {result["public_share_url"]}') | |
| elif result.get('share_url'): | |
| print(f'π Share URL: {result["share_url"]}') | |
| return parsed_result | |
| else: | |
| print('β No structured output received') | |
| return None | |
| def demo_product_extraction(): | |
| """Demo: Extract structured product data.""" | |
| print('\nποΈ Demo 2: Product Information Extraction') | |
| print('-' * 40) | |
| task = """ | |
| Go to Amazon and search for 'wireless headphones'. Find the first product result | |
| and extract detailed information including name, price, rating, availability, | |
| and description. | |
| """ | |
| task_id = create_structured_task(task, ProductInfo) | |
| result = wait_for_structured_completion(task_id) | |
| if result.get('output'): | |
| parsed_result = validate_and_display_output(result['output'], ProductInfo) | |
| # Show execution URLs | |
| if result.get('live_url'): | |
| print(f'\nπ Live Preview: {result["live_url"]}') | |
| if result.get('public_share_url'): | |
| print(f'π Share URL: {result["public_share_url"]}') | |
| elif result.get('share_url'): | |
| print(f'π Share URL: {result["share_url"]}') | |
| return parsed_result | |
| else: | |
| print('β No structured output received') | |
| return None | |
| def demo_company_extraction(): | |
| """Demo: Extract structured company data.""" | |
| print('\nπ’ Demo 3: Company Information Extraction') | |
| print('-' * 40) | |
| task = """ | |
| Go to a financial website and look up information about Apple Inc. | |
| Extract company details including name, stock symbol, market cap, | |
| industry, headquarters, and founding year. | |
| """ | |
| task_id = create_structured_task(task, CompanyInfo) | |
| result = wait_for_structured_completion(task_id) | |
| if result.get('output'): | |
| parsed_result = validate_and_display_output(result['output'], CompanyInfo) | |
| # Show execution URLs | |
| if result.get('live_url'): | |
| print(f'\nπ Live Preview: {result["live_url"]}') | |
| if result.get('public_share_url'): | |
| print(f'π Share URL: {result["public_share_url"]}') | |
| elif result.get('share_url'): | |
| print(f'π Share URL: {result["share_url"]}') | |
| return parsed_result | |
| else: | |
| print('β No structured output received') | |
| return None | |
| def main(): | |
| """Demonstrate structured output extraction.""" | |
| print('π Browser Use Cloud - Structured JSON Output') | |
| print('=' * 50) | |
| print('π― Features:') | |
| print('β’ Type-safe Pydantic schemas') | |
| print('β’ Automatic JSON validation') | |
| print('β’ Structured data extraction') | |
| print('β’ Multiple output formats') | |
| try: | |
| # Parse command line arguments | |
| parser = argparse.ArgumentParser(description='Structured output extraction demo') | |
| parser.add_argument('--demo', choices=['news', 'product', 'company', 'all'], default='news', help='Which demo to run') | |
| args = parser.parse_args() | |
| print(f'\nπ Running {args.demo} demo(s)...') | |
| if args.demo == 'news': | |
| demo_news_extraction() | |
| elif args.demo == 'product': | |
| demo_product_extraction() | |
| elif args.demo == 'company': | |
| demo_company_extraction() | |
| elif args.demo == 'all': | |
| demo_news_extraction() | |
| demo_product_extraction() | |
| demo_company_extraction() | |
| except requests.exceptions.RequestException as e: | |
| print(f'β API Error: {e}') | |
| except Exception as e: | |
| print(f'β Error: {e}') | |
| if __name__ == '__main__': | |
| main() | |