#type: ignore from agent_dir.content_agent import content_agent from agent_dir.media_agent import media_agent, client, post_schema from agent_dir.web_inspector_agent import WebInspectorAgent from agent_dir.browser_agent import ( tools, # ElementScreenshotParams, # PageVisited, # WebsiteInfo, # ContentInfo, # Colors, # Typography, # ButtonStyles, # HeadingStyles, # Components, # DesignSystem, # Screenshot, BrowserAgentOutput, ) # Core imports import os import sys import time import json import logging import asyncio import aiohttp import requests import base64 from datetime import datetime from playwright.async_api import TimeoutError as PlaywrightTimeoutError import signal # Browser setup imports from browser_use import Agent as AgentBrowser, ChatGoogle, ChatOpenAI as ChatOpenAIBrowserUse from browser_use.browser import BrowserSession, BrowserProfile from utils.chrome_playwright import start_chrome_with_debug_port, connect_playwright_to_cdp # Initialize LLM clients for browser usage from model import get_model # Task templates task_old_1 = f""" You are a Browser Intelligence Agent specialized in extracting website content and brand identity assets. Your goal is to visit the given website URL and return a structured, comprehensive extraction. Follow these steps strictly: 1. Website Navigation: - Open the provided URL. - If a user query is provided, search across multiple related internal pages (navigation links, relevant subpages) that may contain information about the query. - If no query is provided, focus on the landing page only. 2. Content Extraction: - If a query is provided: • Extract and summarize text relevant to the query from all visited pages. • Provide a coherent summary that highlights key points across pages. - If no query: • Extract the full visible text from the landing page. 3. Brand & Design Extraction: - Identify and extract the brand's visual identity, including: • Primary and secondary colors (hex codes). • Extended color palette if available. • Typography (fonts, weights, styles). • Design system or style guide elements. • Social media brand kit details (logos, icons, button styles, heading styles). 4. Screenshots (via custom tools): - Capture screenshots of **topic-related content** (e.g., pricing tables, signup buttons, hero sections if the query is "pricing plans"). - Capture screenshots of **brand identity elements** (e.g., color swatches, typography samples, buttons, logos, icons, headings). - Save screenshots with clear, descriptive filenames (e.g., `pricing_table.png`, `signup_button.png`, `primary_colors.png`, `typography_styles.png`). 5. Output: - Return the extracted content, brand identity data, and screenshot metadata in a clean and structured JSON format. - Do not include free text or commentary outside the JSON. Today is {datetime.now().strftime('%Y-%m-%d')} User's query: Go to https://github.com/pricing and extract content and brand identity assets and screenshots for linkedin post, Topic is pricing plans. """ task_old_2 = """ ###Selector Discovery, Verification & Screenshot Instructions When identifying selectors for taking elements or sections screenshots: Verify each selector's element or section, then capture its screenshot immediately after successful verification. 1. **Analyze** the HTML DOM structure of the page to identify potential selectors for the target elements or sections based on the query. 2. **Generate** a list of possible selectors that could uniquely identify each target element. 3. **Locate the Target Section or Element:** - Identify the element or section that visually and contextually matches the target. - Focus on the most relevant container or element that directly represents the intended target — not its parent or unrelated siblings. 4. For each candidate selector: - Use the `"execute_js"` tool to verify that the selector matches exactly the target. - **Highlight** the matched element by injecting a visible red border (`2px solid red`) or a temporary background color. 5. **Validate the Finalized Selector Against the Query:** - Once a selector is finalized, confirm that it accurately represents the element or section described in the query. - Ensure it precisely corresponds to the query intent and does not include unrelated, broader, or nested regions. 6. **Remove injected visual styles or modifications** from the DOM to restore the page to its original state before proceeding to the next selector. 7. **After verification**, immediately **capture a screenshot** of the verified element or section. 8. Continue this process until **all target selectors** have been verified and their screenshots captured. After successful verification, remove all injected visual styles or temporary DOM modifications. User's query: Go to https://github.com/pricing and take screenshot of header and pricing details """ task_old_3=""" You are a Browser Agent that must locate, visually verify, and capture a screenshot of a webpage section or element based on a natural language query. ### Steps to Follow 1. **Understand the Query** - Interpret the user's intent (e.g., "header", "footer", "main hero section", "signup form"). - (Optional) gather page context if needed via `extract_content`. 2. **Find the Element** - Primary: `find_element_by_prompt(query)` - Fallback / extra probes: use page methods like `get_elements_by_css_selector` or `query_selector` if `find_element_by_prompt` is ambiguous. 3. **Get Element Details** - Retrieve coordinates and size with `get_bounding_box(selector)`. - Inspect returned element metadata (id, classes, backend_node_id) from `find_element_by_prompt`. 4. **Highlight for Verification** - Scroll into view and outline the element using `highlight_element(selector_or_obj)`. 5. **Visually Verify** - Take a temporary screenshot of the highlighted region with `element_screenshot_clip(clip)` (or `element_screenshot(selectors=[selector])`). - Ask the visual verifier to confirm with `verify_element_visual(query, screenshot_path)`. - If verification fails: refine and retry by re-calling `find_element_by_prompt` (or exploring parent/child/sibling via `get_elements_by_css_selector`) — repeat Steps 3–5. 6. **Capture Final Screenshot** - After verification, capture final image with `element_screenshot({ "selectors": [verified_selector], "highlight": False, "padding": 10 })`. - Remove temporary highlight (call `highlight_element({"selector": verified_selector, "remove": True})` or similar). 7. **Return Results** - Return structured output containing: `selector` (from `find_element_by_prompt` / derived), `bounding_box` (from `get_bounding_box`), `screenshot_path` (from `element_screenshot`), and `confidence` (derived from `verify_element_visual`). ### Rules (enforced by the flow) - Always visually verify before finalizing: use `verify_element_visual`. - Ensure element is scrolled into view (use `highlight_element`). - Prefer precise selectors (id, `data-*`, unique class) returned or implied by `find_element_by_prompt`. - If verification fails, retry up to 3 times by re-invoking `find_element_by_prompt` and refining selectors. User's query: Go to https://github.com/pricing and take screenshot of header and pricing details """ task_old_4=""" You are a Browser Agent that must locate, visually verify, and capture a screenshot of a webpage section or element based on a natural language query. ### Steps to Follow 1. **Understand the Query** - Interpret the user's intent (e.g., "header", "footer", "main hero section", "signup form"). - The page is already loaded, so you don't need to navigate to any URL. 2. **Find the Element** - Primary: `find_element_by_prompt` - Pass a detailed natural language description of the element to find, including its visual appearance, position, and any visible text it contains (e.g., 'the login button with the text Sign In'). 3. **Visually Verify** - After finding the element, visually confirm that the correct element was found before proceeding. User's query: Take screenshot of header """ # Browser agent task for extracting color systems colors_extract_task=""" Extract and verify the complete color system from this webpage. ## Process: ### 1. Scroll & Identify Elements - Scroll the page to view all sections (header, hero, CTAs, footer) - Identify the MOST VISUALLY DISTINCT elements for each color category ### 2. Extract Colors with Hints Call `extract_color_system` with element hints for ALL color types you can identify: ``` extract_color_system({ "elements_to_find": [ # MANDATORY: Brand Colors (3 required) {"text": "Get Started", "tags": ["button", "a"], "priority": "primary"}, {"text": "Learn More", "tags": ["button"], "priority": "secondary"}, {"text": "New", "tags": ["span", "div"], "priority": "accent"}, # OPTIONAL: Background Color (improve accuracy if hinted) {"text": "", "tags": ["body", "header", "main"], "priority": "background"}, # OPTIONAL: Text Colors (improve accuracy if hinted) {"text": "Main Heading", "tags": ["h1", "h2"], "priority": "text-heading"}, {"text": "Body paragraph text", "tags": ["p"], "priority": "text-body"}, {"text": "Subtle caption", "tags": ["small", "span"], "priority": "text-subtle"} ] }) ``` **Priority Types:** **MANDATORY (must verify):** - `primary` = Main brand color (brightest CTA, most eye-catching button) - `secondary` = Supporting color (less prominent actions, links) - `accent` = Highlight color (small accents, badges, status indicators) **OPTIONAL (auto-detected with fallback, hints improve accuracy):** - `background` = Page background color (body, header, main sections) - `text-heading` = Main heading text color (h1, h2) - `text-body` = Body paragraph text color (p, span) - `text-subtle` = Subtle/muted text color (small, captions) **Tips for Better Results:** - **Brand Colors (mandatory)**: Use EXACT text from interactive elements (buttons, links) - **Background (optional)**: Leave text="" for container elements (body, header, main) - **Text Colors (optional)**: Use sample text content from headings/paragraphs - Focus on DISTINCT colors (not gray/white/black for brand colors) - 3-7 hints total is optimal (3 mandatory brand + up to 4 optional background/text) ### 3. Verify Extraction After extraction, verify the results: **MANDATORY Checks:** - ✓ Primary should be the most prominent brand color (main CTA background/color) - ✓ Primary should NOT be a page background (#1b1f23, #ffffff, etc.) - ✓ Secondary and accent should be visually distinct from primary - ✓ All 3 mandatory colors (primary/secondary/accent) must be present **OPTIONAL Checks (if auto-detected):** - ✓ Background should be the main page container color - ✓ Text hierarchy should show heading/body/subtle text colors - ✓ Check "source" field: "agent-hint" (you provided it) or "auto-detected" (tool found it) **If mandatory colors are incorrect:** - Re-call extract_color_system with better element examples for primary/secondary/accent - Focus on the brightest, most colorful interactive elements - Avoid selecting text-only or container elements for brand colors **Optional colors will auto-detect with fallback if not hinted.** Execute the extraction and verification now. """ browser_instance = None def shutdown_browser(*args): global browser_instance if browser_instance: try: import asyncio asyncio.run(browser_instance.stop()) print('✅ Browser stopped via signal handler') except Exception as e: print(f'⚠️ Error stopping browser via signal handler: {type(e).__name__}: {e}') signal.signal(signal.SIGINT, shutdown_browser) signal.signal(signal.SIGTERM, shutdown_browser) async def run_search() -> None: global browser_instance print('====================================================') print('Starting run_search() function') print('====================================================') # Check installed packages that might be relevant try: import importlib packages = ['browser_use', 'playwright', 'aiohttp'] for package in packages: try: mod = importlib.import_module(package) print(f"✅ {package} is installed: {getattr(mod, '__version__', 'unknown version')}") except ImportError: print(f"❌ {package} is NOT installed") except Exception as e: print(f"Error checking packages: {e}") # Check environment variables (redacted for security) for key in ['google_api_key', 'OPENROUTER_API_KEY']: if os.environ.get(key): print(f"✅ {key} environment variable is set") else: print(f"❌ {key} environment variable is NOT set") browser = None playwright_browser = None try: # Import Browser from browser_use from browser_use import Browser # Create browser profile print('🔄 Creating browser profile...') browser_profile = BrowserProfile( is_local=True, headless=False, launch_args=[ '--no-first-run', '--no-default-browser-check', '--disable-extensions', '--disable-background-networking', '--disable-background-timer-throttling', '--disable-backgrounding-occluded-windows', '--disable-popup-blocking', '--disable-renderer-backgrounding', '--force-color-profile=srgb', '--metrics-recording-only', '--mute-audio', ], ) # Create and start the browser print('🔄 Creating Browser instance...') browser = Browser(browser_profile=browser_profile) browser_instance = browser print('🚀 Starting browser...') await browser.start() print(f"✅ Browser started successfully") # Use the already opened tab and navigate if needed target_url = "https://github.com/pricing" print(f'🌐 Navigating to {target_url} in the first tab...') page = await browser.get_current_page() await page.goto(target_url) print(f"✅ Page loaded successfully: {target_url}") # Optional: Wait a moment for page to fully load await asyncio.sleep(2) # Build the Browser Agent using the browser instance print('🔄 Creating Browser Agent with pre-navigated browser...') browser_agent = AgentBrowser( task=colors_extract_task, # llm=get_model("browser_agent_openrouter:google/gemini-2.5-flash"), llm=get_model("llm_browser_google"), use_vision=True, generate_gif=False, max_failures=3, file_system_path="./browser_agent_data", tools=tools, # output_model_schema=BrowserAgentOutput, # ⚠️ TEMPORARILY DISABLED for testing color extraction browser=browser, # Pass the Browser instance instead of BrowserSession ) print('✅ Browser Agent created with pre-navigated browser') print('🚀 Running browser agent...') try: print("Starting browser agent.run() with max_steps=15") history = await browser_agent.run(max_steps=15) print("-------------Agent run completed---------------") print("Steps executed:", len(history.steps) if hasattr(history, 'steps') else "Unknown") print("-------------Final result---------------") # print(history.final_result) except Exception as run_error: print(f'❌ Error during browser agent run: {type(run_error).__name__}: {run_error}') import traceback print("Detailed traceback:") traceback.print_exc() raise except Exception as e: print(f'❌ Error: {e}') raise finally: # Clean up resources in proper order print('🧹 Cleaning up resources...') # Close browser try: if browser: print(f"Attempting to stop browser: {browser}") await browser.stop() print('✅ Stopped browser') else: print('ℹ️ No browser was created') except Exception as e: print(f'⚠️ Error stopping browser: {type(e).__name__}: {e}') import traceback traceback.print_exc() # Close playwright browser if exists if playwright_browser: try: print(f"Attempting to close Playwright browser: {playwright_browser}") await playwright_browser.close() print('✅ Closed Playwright browser') except Exception as e: print(f'⚠️ Error closing Playwright browser: {type(e).__name__}: {e}') import traceback traceback.print_exc() # Check if Chrome is still running via CDP try: print("Checking if Chrome CDP is still accessible...") async with aiohttp.ClientSession() as session: async with session.get('http://localhost:9222/json/version', timeout=aiohttp.ClientTimeout(total=1)) as response: if response.status == 200: print('⚠️ WARNING: Chrome with CDP is still running after cleanup!') else: print('✅ Chrome CDP no longer accessible (status code != 200)') except Exception: print('✅ Chrome CDP no longer accessible (connection failed)') print('✅ All cleanup complete') if __name__ == "__main__": try: asyncio.run(run_search()) finally: shutdown_browser() print('_agents file')