Spaces:
No application file
No application file
| #type: ignore | |
| from agent_dir.content_agent import content_agent | |
| from agent_dir.media_agent import media_agent, client, post_schema | |
| from agent_dir.web_inspector_agent import WebInspectorAgent | |
| from agent_dir.browser_agent import ( | |
| tools, | |
| # ElementScreenshotParams, | |
| # PageVisited, | |
| # WebsiteInfo, | |
| # ContentInfo, | |
| # Colors, | |
| # Typography, | |
| # ButtonStyles, | |
| # HeadingStyles, | |
| # Components, | |
| # DesignSystem, | |
| # Screenshot, | |
| BrowserAgentOutput, | |
| ) | |
| # Core imports | |
| import os | |
| import sys | |
| import time | |
| import json | |
| import logging | |
| import asyncio | |
| import aiohttp | |
| import requests | |
| import base64 | |
| from datetime import datetime | |
| from playwright.async_api import TimeoutError as PlaywrightTimeoutError | |
| import signal | |
| # Browser setup imports | |
| from browser_use import Agent as AgentBrowser, ChatGoogle, ChatOpenAI as ChatOpenAIBrowserUse | |
| from browser_use.browser import BrowserSession, BrowserProfile | |
| from utils.chrome_playwright import start_chrome_with_debug_port, connect_playwright_to_cdp | |
| # Initialize LLM clients for browser usage | |
| from model import get_model | |
| # Task templates | |
| task_old_1 = f""" | |
| You are a Browser Intelligence Agent specialized in extracting website content and brand identity assets. | |
| Your goal is to visit the given website URL and return a structured, comprehensive extraction. | |
| Follow these steps strictly: | |
| 1. Website Navigation: | |
| - Open the provided URL. | |
| - If a user query is provided, search across multiple related internal pages (navigation links, relevant subpages) that may contain information about the query. | |
| - If no query is provided, focus on the landing page only. | |
| 2. Content Extraction: | |
| - If a query is provided: | |
| β’ Extract and summarize text relevant to the query from all visited pages. | |
| β’ Provide a coherent summary that highlights key points across pages. | |
| - If no query: | |
| β’ Extract the full visible text from the landing page. | |
| 3. Brand & Design Extraction: | |
| - Identify and extract the brand's visual identity, including: | |
| β’ Primary and secondary colors (hex codes). | |
| β’ Extended color palette if available. | |
| β’ Typography (fonts, weights, styles). | |
| β’ Design system or style guide elements. | |
| β’ Social media brand kit details (logos, icons, button styles, heading styles). | |
| 4. Screenshots (via custom tools): | |
| - Capture screenshots of **topic-related content** (e.g., pricing tables, signup buttons, hero sections if the query is "pricing plans"). | |
| - Capture screenshots of **brand identity elements** (e.g., color swatches, typography samples, buttons, logos, icons, headings). | |
| - Save screenshots with clear, descriptive filenames (e.g., `pricing_table.png`, `signup_button.png`, `primary_colors.png`, `typography_styles.png`). | |
| 5. Output: | |
| - Return the extracted content, brand identity data, and screenshot metadata in a clean and structured JSON format. | |
| - Do not include free text or commentary outside the JSON. | |
| Today is {datetime.now().strftime('%Y-%m-%d')} | |
| User's query: Go to https://github.com/pricing and extract content and brand identity assets and screenshots for linkedin post, Topic is pricing plans. | |
| """ | |
| task_old_2 = """ | |
| ###Selector Discovery, Verification & Screenshot Instructions | |
| When identifying selectors for taking elements or sections screenshots: | |
| Verify each selector's element or section, then capture its screenshot immediately after successful verification. | |
| 1. **Analyze** the HTML DOM structure of the page to identify potential selectors for the target elements or sections based on the query. | |
| 2. **Generate** a list of possible selectors that could uniquely identify each target element. | |
| 3. **Locate the Target Section or Element:** | |
| - Identify the element or section that visually and contextually matches the target. | |
| - Focus on the most relevant container or element that directly represents the intended target β not its parent or unrelated siblings. | |
| 4. For each candidate selector: | |
| - Use the `"execute_js"` tool to verify that the selector matches exactly the target. | |
| - **Highlight** the matched element by injecting a visible red border (`2px solid red`) or a temporary background color. | |
| 5. **Validate the Finalized Selector Against the Query:** | |
| - Once a selector is finalized, confirm that it accurately represents the element or section described in the query. | |
| - Ensure it precisely corresponds to the query intent and does not include unrelated, broader, or nested regions. | |
| 6. **Remove injected visual styles or modifications** from the DOM to restore the page to its original state before proceeding to the next selector. | |
| 7. **After verification**, immediately **capture a screenshot** of the verified element or section. | |
| 8. Continue this process until **all target selectors** have been verified and their screenshots captured. | |
| After successful verification, remove all injected visual styles or temporary DOM modifications. | |
| User's query: Go to https://github.com/pricing and take screenshot of header and pricing details | |
| """ | |
| task_old_3=""" | |
| You are a Browser Agent that must locate, visually verify, and capture a screenshot of a webpage section or element based on a natural language query. | |
| ### Steps to Follow | |
| 1. **Understand the Query** | |
| - Interpret the user's intent (e.g., "header", "footer", "main hero section", "signup form"). | |
| - (Optional) gather page context if needed via `extract_content`. | |
| 2. **Find the Element** | |
| - Primary: `find_element_by_prompt(query)` | |
| - Fallback / extra probes: use page methods like `get_elements_by_css_selector` or `query_selector` if `find_element_by_prompt` is ambiguous. | |
| 3. **Get Element Details** | |
| - Retrieve coordinates and size with `get_bounding_box(selector)`. | |
| - Inspect returned element metadata (id, classes, backend_node_id) from `find_element_by_prompt`. | |
| 4. **Highlight for Verification** | |
| - Scroll into view and outline the element using `highlight_element(selector_or_obj)`. | |
| 5. **Visually Verify** | |
| - Take a temporary screenshot of the highlighted region with `element_screenshot_clip(clip)` (or `element_screenshot(selectors=[selector])`). | |
| - Ask the visual verifier to confirm with `verify_element_visual(query, screenshot_path)`. | |
| - If verification fails: refine and retry by re-calling `find_element_by_prompt` (or exploring parent/child/sibling via `get_elements_by_css_selector`) β repeat Steps 3β5. | |
| 6. **Capture Final Screenshot** | |
| - After verification, capture final image with `element_screenshot({ "selectors": [verified_selector], "highlight": False, "padding": 10 })`. | |
| - Remove temporary highlight (call `highlight_element({"selector": verified_selector, "remove": True})` or similar). | |
| 7. **Return Results** | |
| - Return structured output containing: `selector` (from `find_element_by_prompt` / derived), `bounding_box` (from `get_bounding_box`), `screenshot_path` (from `element_screenshot`), and `confidence` (derived from `verify_element_visual`). | |
| ### Rules (enforced by the flow) | |
| - Always visually verify before finalizing: use `verify_element_visual`. | |
| - Ensure element is scrolled into view (use `highlight_element`). | |
| - Prefer precise selectors (id, `data-*`, unique class) returned or implied by `find_element_by_prompt`. | |
| - If verification fails, retry up to 3 times by re-invoking `find_element_by_prompt` and refining selectors. | |
| User's query: Go to https://github.com/pricing and take screenshot of header and pricing details | |
| """ | |
| task_old_4=""" | |
| You are a Browser Agent that must locate, visually verify, and capture a screenshot of a webpage section or element based on a natural language query. | |
| ### Steps to Follow | |
| 1. **Understand the Query** | |
| - Interpret the user's intent (e.g., "header", "footer", "main hero section", "signup form"). | |
| - The page is already loaded, so you don't need to navigate to any URL. | |
| 2. **Find the Element** | |
| - Primary: `find_element_by_prompt` | |
| - Pass a detailed natural language description of the element to find, including its visual appearance, position, and any visible text it contains (e.g., 'the login button with the text Sign In'). | |
| 3. **Visually Verify** | |
| - After finding the element, visually confirm that the correct element was found before proceeding. | |
| User's query: Take screenshot of header | |
| """ | |
| # Browser agent task for extracting color systems | |
| colors_extract_task=""" | |
| Extract and verify the complete color system from this webpage. | |
| ## Process: | |
| ### 1. Scroll & Identify Elements | |
| - Scroll the page to view all sections (header, hero, CTAs, footer) | |
| - Identify the MOST VISUALLY DISTINCT elements for each color category | |
| ### 2. Extract Colors with Hints | |
| Call `extract_color_system` with element hints for ALL color types you can identify: | |
| ``` | |
| extract_color_system({ | |
| "elements_to_find": [ | |
| # MANDATORY: Brand Colors (3 required) | |
| {"text": "Get Started", "tags": ["button", "a"], "priority": "primary"}, | |
| {"text": "Learn More", "tags": ["button"], "priority": "secondary"}, | |
| {"text": "New", "tags": ["span", "div"], "priority": "accent"}, | |
| # OPTIONAL: Background Color (improve accuracy if hinted) | |
| {"text": "", "tags": ["body", "header", "main"], "priority": "background"}, | |
| # OPTIONAL: Text Colors (improve accuracy if hinted) | |
| {"text": "Main Heading", "tags": ["h1", "h2"], "priority": "text-heading"}, | |
| {"text": "Body paragraph text", "tags": ["p"], "priority": "text-body"}, | |
| {"text": "Subtle caption", "tags": ["small", "span"], "priority": "text-subtle"} | |
| ] | |
| }) | |
| ``` | |
| **Priority Types:** | |
| **MANDATORY (must verify):** | |
| - `primary` = Main brand color (brightest CTA, most eye-catching button) | |
| - `secondary` = Supporting color (less prominent actions, links) | |
| - `accent` = Highlight color (small accents, badges, status indicators) | |
| **OPTIONAL (auto-detected with fallback, hints improve accuracy):** | |
| - `background` = Page background color (body, header, main sections) | |
| - `text-heading` = Main heading text color (h1, h2) | |
| - `text-body` = Body paragraph text color (p, span) | |
| - `text-subtle` = Subtle/muted text color (small, captions) | |
| **Tips for Better Results:** | |
| - **Brand Colors (mandatory)**: Use EXACT text from interactive elements (buttons, links) | |
| - **Background (optional)**: Leave text="" for container elements (body, header, main) | |
| - **Text Colors (optional)**: Use sample text content from headings/paragraphs | |
| - Focus on DISTINCT colors (not gray/white/black for brand colors) | |
| - 3-7 hints total is optimal (3 mandatory brand + up to 4 optional background/text) | |
| ### 3. Verify Extraction | |
| After extraction, verify the results: | |
| **MANDATORY Checks:** | |
| - β Primary should be the most prominent brand color (main CTA background/color) | |
| - β Primary should NOT be a page background (#1b1f23, #ffffff, etc.) | |
| - β Secondary and accent should be visually distinct from primary | |
| - β All 3 mandatory colors (primary/secondary/accent) must be present | |
| **OPTIONAL Checks (if auto-detected):** | |
| - β Background should be the main page container color | |
| - β Text hierarchy should show heading/body/subtle text colors | |
| - β Check "source" field: "agent-hint" (you provided it) or "auto-detected" (tool found it) | |
| **If mandatory colors are incorrect:** | |
| - Re-call extract_color_system with better element examples for primary/secondary/accent | |
| - Focus on the brightest, most colorful interactive elements | |
| - Avoid selecting text-only or container elements for brand colors | |
| **Optional colors will auto-detect with fallback if not hinted.** | |
| Execute the extraction and verification now. | |
| """ | |
| browser_instance = None | |
| def shutdown_browser(*args): | |
| global browser_instance | |
| if browser_instance: | |
| try: | |
| import asyncio | |
| asyncio.run(browser_instance.stop()) | |
| print('β Browser stopped via signal handler') | |
| except Exception as e: | |
| print(f'β οΈ Error stopping browser via signal handler: {type(e).__name__}: {e}') | |
| signal.signal(signal.SIGINT, shutdown_browser) | |
| signal.signal(signal.SIGTERM, shutdown_browser) | |
| async def run_search() -> None: | |
| global browser_instance | |
| print('====================================================') | |
| print('Starting run_search() function') | |
| print('====================================================') | |
| # Check installed packages that might be relevant | |
| try: | |
| import importlib | |
| packages = ['browser_use', 'playwright', 'aiohttp'] | |
| for package in packages: | |
| try: | |
| mod = importlib.import_module(package) | |
| print(f"β {package} is installed: {getattr(mod, '__version__', 'unknown version')}") | |
| except ImportError: | |
| print(f"β {package} is NOT installed") | |
| except Exception as e: | |
| print(f"Error checking packages: {e}") | |
| # Check environment variables (redacted for security) | |
| for key in ['google_api_key', 'OPENROUTER_API_KEY']: | |
| if os.environ.get(key): | |
| print(f"β {key} environment variable is set") | |
| else: | |
| print(f"β {key} environment variable is NOT set") | |
| browser = None | |
| playwright_browser = None | |
| try: | |
| # Import Browser from browser_use | |
| from browser_use import Browser | |
| # Create browser profile | |
| print('π Creating browser profile...') | |
| browser_profile = BrowserProfile( | |
| is_local=True, | |
| headless=False, | |
| launch_args=[ | |
| '--no-first-run', | |
| '--no-default-browser-check', | |
| '--disable-extensions', | |
| '--disable-background-networking', | |
| '--disable-background-timer-throttling', | |
| '--disable-backgrounding-occluded-windows', | |
| '--disable-popup-blocking', | |
| '--disable-renderer-backgrounding', | |
| '--force-color-profile=srgb', | |
| '--metrics-recording-only', | |
| '--mute-audio', | |
| ], | |
| ) | |
| # Create and start the browser | |
| print('π Creating Browser instance...') | |
| browser = Browser(browser_profile=browser_profile) | |
| browser_instance = browser | |
| print('π Starting browser...') | |
| await browser.start() | |
| print(f"β Browser started successfully") | |
| # Use the already opened tab and navigate if needed | |
| target_url = "https://github.com/pricing" | |
| print(f'π Navigating to {target_url} in the first tab...') | |
| page = await browser.get_current_page() | |
| await page.goto(target_url) | |
| print(f"β Page loaded successfully: {target_url}") | |
| # Optional: Wait a moment for page to fully load | |
| await asyncio.sleep(2) | |
| # Build the Browser Agent using the browser instance | |
| print('π Creating Browser Agent with pre-navigated browser...') | |
| browser_agent = AgentBrowser( | |
| task=colors_extract_task, | |
| # llm=get_model("browser_agent_openrouter:google/gemini-2.5-flash"), | |
| llm=get_model("llm_browser_google"), | |
| use_vision=True, | |
| generate_gif=False, | |
| max_failures=3, | |
| file_system_path="./browser_agent_data", | |
| tools=tools, | |
| # output_model_schema=BrowserAgentOutput, # β οΈ TEMPORARILY DISABLED for testing color extraction | |
| browser=browser, # Pass the Browser instance instead of BrowserSession | |
| ) | |
| print('β Browser Agent created with pre-navigated browser') | |
| print('π Running browser agent...') | |
| try: | |
| print("Starting browser agent.run() with max_steps=15") | |
| history = await browser_agent.run(max_steps=15) | |
| print("-------------Agent run completed---------------") | |
| print("Steps executed:", len(history.steps) if hasattr(history, 'steps') else "Unknown") | |
| print("-------------Final result---------------") | |
| # print(history.final_result) | |
| except Exception as run_error: | |
| print(f'β Error during browser agent run: {type(run_error).__name__}: {run_error}') | |
| import traceback | |
| print("Detailed traceback:") | |
| traceback.print_exc() | |
| raise | |
| except Exception as e: | |
| print(f'β Error: {e}') | |
| raise | |
| finally: | |
| # Clean up resources in proper order | |
| print('π§Ή Cleaning up resources...') | |
| # Close browser | |
| try: | |
| if browser: | |
| print(f"Attempting to stop browser: {browser}") | |
| await browser.stop() | |
| print('β Stopped browser') | |
| else: | |
| print('βΉοΈ No browser was created') | |
| except Exception as e: | |
| print(f'β οΈ Error stopping browser: {type(e).__name__}: {e}') | |
| import traceback | |
| traceback.print_exc() | |
| # Close playwright browser if exists | |
| if playwright_browser: | |
| try: | |
| print(f"Attempting to close Playwright browser: {playwright_browser}") | |
| await playwright_browser.close() | |
| print('β Closed Playwright browser') | |
| except Exception as e: | |
| print(f'β οΈ Error closing Playwright browser: {type(e).__name__}: {e}') | |
| import traceback | |
| traceback.print_exc() | |
| # Check if Chrome is still running via CDP | |
| try: | |
| print("Checking if Chrome CDP is still accessible...") | |
| async with aiohttp.ClientSession() as session: | |
| async with session.get('http://localhost:9222/json/version', timeout=aiohttp.ClientTimeout(total=1)) as response: | |
| if response.status == 200: | |
| print('β οΈ WARNING: Chrome with CDP is still running after cleanup!') | |
| else: | |
| print('β Chrome CDP no longer accessible (status code != 200)') | |
| except Exception: | |
| print('β Chrome CDP no longer accessible (connection failed)') | |
| print('β All cleanup complete') | |
| if __name__ == "__main__": | |
| try: | |
| asyncio.run(run_search()) | |
| finally: | |
| shutdown_browser() | |
| print('_agents file') |