""" Example: Using large blocklists (400k+ domains) with automatic optimization This example demonstrates: 1. Loading a real-world blocklist (HaGeZi's Pro++ with 439k+ domains) 2. Automatic conversion to set for O(1) lookup performance 3. Testing that blocked domains are actually blocked Performance: ~0.02ms per domain check (50,000+ checks/second!) """ import asyncio import os import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from dotenv import load_dotenv load_dotenv() from browser_use import Agent, ChatOpenAI from browser_use.browser import BrowserProfile, BrowserSession llm = ChatOpenAI(model='gpt-4.1-mini') def load_blocklist_from_url(url: str) -> list[str]: """Load and parse a blocklist from a URL. Args: url: URL to the blocklist file Returns: List of domain strings (comments and empty lines removed) """ import urllib.request print(f'šŸ“„ Downloading blocklist from {url}...') domains = [] with urllib.request.urlopen(url) as response: for line in response: line = line.decode('utf-8').strip() # Skip comments and empty lines if line and not line.startswith('#'): domains.append(line) print(f'āœ… Loaded {len(domains):,} domains') return domains async def main(): # Load HaGeZi's Pro++ blocklist (blocks ads, tracking, malware, etc.) # Source: https://github.com/hagezi/dns-blocklists blocklist_url = 'https://gitlab.com/hagezi/mirror/-/raw/main/dns-blocklists/domains/pro.plus.txt' print('=' * 70) print('šŸš€ Large Blocklist Demo - 439k+ Blocked Domains') print('=' * 70) print() # Load the blocklist prohibited_domains = load_blocklist_from_url(blocklist_url) # Sample some blocked domains to test test_blocked = [prohibited_domains[0], prohibited_domains[1000], prohibited_domains[-1]] print(f'\nšŸ“‹ Sample blocked domains: {", ".join(test_blocked[:3])}') print(f'\nšŸ”§ Creating browser with {len(prohibited_domains):,} blocked domains...') print(' (Auto-optimizing to set for O(1) lookup performance)') # Create browser with the blocklist # The list will be automatically optimized to a set for fast lookups browser_session = BrowserSession( browser_profile=BrowserProfile( prohibited_domains=prohibited_domains, headless=False, user_data_dir='~/.config/browseruse/profiles/blocklist-demo', ), ) # Task: Try to visit a blocked domain and a safe domain blocked_site = test_blocked[0] # Will be blocked safe_site = 'github.com' # Will be allowed task = f""" Try to navigate to these websites and report what happens: 1. First, try to visit https://{blocked_site} 2. Then, try to visit https://{safe_site} Tell me which sites you were able to access and which were blocked. """ agent = Agent( task=task, llm=llm, browser_session=browser_session, ) print(f'\nšŸ¤– Agent task: Try to visit {blocked_site} (blocked) and {safe_site} (allowed)') print('\n' + '=' * 70) await agent.run(max_steps=5) print('\n' + '=' * 70) print('āœ… Demo complete!') print(f'šŸ’” The blocklist with {len(prohibited_domains):,} domains was optimized to a set') print(' for instant O(1) domain checking (vs slow O(n) pattern matching)') print('=' * 70) input('\nPress Enter to close the browser...') await browser_session.kill() if __name__ == '__main__': asyncio.run(main())