Spaces:

ncolex
/

browserpilot

Runtime error

App Files Files Community

ncolex commited on 3 days ago

Commit

c5f9050

verified ·

1 Parent(s): c66a865

Upload backend folder

Browse files

Files changed (12) hide show

backend/agent.py +437 -0
backend/anti_bot_detection.py +166 -0
backend/browser_controller.py +757 -0
backend/cdp_streamer.py +94 -0
backend/database.py +252 -0
backend/main.py +487 -0
backend/proxy_manager.py +208 -0
backend/smart_browser_controller.py +258 -0
backend/telegram_bot.py +207 -0
backend/universal_extractor.py +607 -0
backend/vision_model.py +368 -0
backend/vnc_proxy.py +110 -0

backend/agent.py ADDED Viewed

	@@ -0,0 +1,437 @@

+import asyncio, json, base64, re
+from pathlib import Path
+from typing import Literal
+from backend.smart_browser_controller import SmartBrowserController
+from backend.vision_model import decide
+from backend.universal_extractor import UniversalExtractor
+def detect_format_from_prompt(prompt: str, default_fmt: str) -> str:
+    """Detect format from prompt text and override default if found"""
+    prompt_lower = prompt.lower()
+    # Format detection patterns
+    format_patterns = {
+        'pdf': [r'\bpdf\b', r'pdf format', r'save.*pdf', r'as pdf', r'to pdf'],
+        'csv': [r'\bcsv\b', r'csv format', r'save.*csv', r'as csv', r'to csv'],
+        'json': [r'\bjson\b', r'json format', r'save.*json', r'as json', r'to json'],
+        'html': [r'\bhtml\b', r'html format', r'save.*html', r'as html', r'to html'],
+        'md': [r'\bmarkdown\b', r'md format', r'save.*markdown', r'as markdown', r'to md'],
+        'txt': [r'\btext\b', r'txt format', r'save.*text', r'as text', r'to txt', r'plain text']
+    }
+    # Check each format pattern
+    for fmt, patterns in format_patterns.items():
+        for pattern in patterns:
+            if re.search(pattern, prompt_lower):
+                print(f"🎯 Detected format '{fmt}' from prompt")
+                return fmt
+    print(f"📋 No specific format detected, using default: {default_fmt}")
+    return default_fmt
+def get_file_extension(fmt: str) -> str:
+    """Get appropriate file extension for format"""
+    extensions = {
+        'txt': 'txt',
+        'md': 'md',
+        'json': 'json',
+        'html': 'html',
+        'csv': 'csv',
+        'pdf': 'pdf'
+    }
+    return extensions.get(fmt, 'output')  # fallback to .output
+def get_content_type(fmt: str) -> str:
+    """Get MIME type for format"""
+    content_types = {
+        'txt': 'text/plain',
+        'md': 'text/markdown',
+        'json': 'application/json',
+        'html': 'text/html',
+        'csv': 'text/csv',
+        'pdf': 'application/pdf'
+    }
+    return content_types.get(fmt, 'application/octet-stream')
+async def run_agent(job_id: str, prompt: str, fmt: Literal["txt","md","json","html","csv","pdf"],
+                   headless: bool, proxy: dict | None, enable_streaming: bool = False):
+    """Enhanced agent with smart proxy rotation and vision-based anti-bot detection"""
+    from backend.main import broadcast, OUTPUT_DIR, register_streaming_session, store_job_info
+    print(f"🚀 Starting smart agent with vision-based anti-bot detection")
+    print(f"📋 Goal: {prompt}")
+    print(f"🌐 Default Format: {fmt}")
+    # Smart format detection from prompt
+    detected_fmt = detect_format_from_prompt(prompt, fmt)
+    if detected_fmt != fmt:
+        print(f"🔄 Format overridden: {fmt} → {detected_fmt}")
+        fmt = detected_fmt
+    # Initialize universal extractor
+    extractor = UniversalExtractor()
+    # Use SmartBrowserController instead of regular BrowserController
+    async with SmartBrowserController(headless, proxy, enable_streaming) as browser:
+        # Register streaming session
+        if enable_streaming:
+            await register_streaming_session(job_id, browser)
+        # Store job info for later download
+        await store_job_info(job_id, {
+            "format": fmt,
+            "content_type": get_content_type(fmt),
+            "extension": get_file_extension(fmt),
+            "prompt": prompt
+        })
+        # Show initial proxy stats
+        proxy_stats = browser.get_proxy_stats()
+        print(f"📊 Initial proxy stats: {proxy_stats}")
+        await broadcast(job_id, {
+            "type": "proxy_stats",
+            "stats": proxy_stats
+        })
+        # Smart navigation to starting URL
+        url_match = re.search(r"https?://[\w\-\.]+[^\s]*", prompt)
+        if url_match:
+            start_url = url_match.group(0).rstrip('".,;')
+            print(f"🔗 Found URL in prompt: {start_url}")
+        else:
+            start_url = determine_starting_url(prompt)
+            # start_url = 'www.google.com'
+            print(f"🔗 Starting at: {start_url}")
+        try:
+            # This now uses smart navigation with anti-bot detection and proxy rotation
+            await browser.goto(start_url)
+            print("✅ Successfully navigated with smart proxy rotation")
+        except Exception as e:
+            print(f"❌ Smart navigation failed: {e}")
+            await broadcast(job_id, {
+                "type": "error",
+                "message": f"Navigation failed: {str(e)}",
+                "proxy_stats": browser.get_proxy_stats()
+            })
+            return
+        await broadcast(job_id, {
+            "status": "started",
+            "initial_url": browser.page.url,
+            "detected_format": fmt,
+            "file_extension": get_file_extension(fmt),
+            "proxy_stats": browser.get_proxy_stats()
+        })
+        # Dynamic limits based on task complexity
+        max_steps = determine_max_steps(prompt)
+        consecutive_scrolls = 0
+        max_consecutive_scrolls = 3
+        extraction_attempts = 0
+        max_extraction_attempts = 2
+        print(f"🎯 Running for max {max_steps} steps, output format: {fmt}")
+        # Main enhanced agent loop with smart proxy rotation
+        for step in range(max_steps):
+            print(f"\n🔄 Step {step + 1}/{max_steps}")
+            # Periodically check proxy health and broadcast stats
+            if step % 5 == 0:
+                proxy_stats = browser.get_proxy_stats()
+                await broadcast(job_id, {
+                    "type": "proxy_stats",
+                    "stats": proxy_stats,
+                    "step": step
+                })
+                print(f"📊 Proxy health check: {proxy_stats['available']}/{proxy_stats['total']} available")
+            try:
+                page_state = await browser.get_page_state(include_screenshot=True)
+                print(f"📊 Found {len(page_state.selector_map)} interactive elements")
+                print(f"📍 Current: {page_state.url}")
+                await broadcast(job_id, {
+                    "type": "page_info",
+                    "step": step + 1,
+                    "url": page_state.url,
+                    "title": page_state.title,
+                    "interactive_elements": len(page_state.selector_map),
+                    "format": fmt
+                })
+                if page_state.screenshot:
+                    await broadcast(job_id, {
+                        "type": "screenshot",
+                        "screenshot": page_state.screenshot
+                    })
+            except Exception as e:
+                print(f"❌ Page state failed: {e}")
+                continue
+            # Handle empty pages
+            if len(page_state.selector_map) == 0:
+                if consecutive_scrolls < max_consecutive_scrolls:
+                    print("⚠️ No interactive elements, trying to scroll...")
+                    await browser.scroll_page("down", 400)
+                    consecutive_scrolls += 1
+                    continue
+                else:
+                    print("⚠️ No elements found after scrolling")
+                    break
+            # AI decision making
+            try:
+                screenshot_bytes = base64.b64decode(page_state.screenshot)
+                decision = await decide(screenshot_bytes, page_state, prompt)
+                print(f"🤖 AI Decision: {decision.get('action')} - {decision.get('reason', 'No reason')}")
+                await broadcast(job_id, {
+                    "type": "decision",
+                    "step": step + 1,
+                    "decision": decision
+                })
+            except Exception as e:
+                print(f"❌ AI decision failed: {e}")
+                continue
+            # Execute action with enhanced error handling
+            action = decision.get("action")
+            print(f"⚡ Executing: {action}")
+            try:
+                if action == "click":
+                    index = decision.get("index")
+                    if index is not None and index in page_state.selector_map:
+                        elem = page_state.selector_map[index]
+                        print(f"🖱️ Clicking: {elem.text[:50]}...")
+                        await browser.click_element_by_index(index, page_state)
+                        consecutive_scrolls = 0
+                        extraction_attempts = 0  # Reset on navigation
+                        await asyncio.sleep(2)
+                    else:
+                        print(f"❌ Invalid click index: {index}")
+                elif action == "type":
+                    index = decision.get("index")
+                    text = decision.get("text", "")
+                    if index is not None and index in page_state.selector_map and text:
+                        elem = page_state.selector_map[index]
+                        print(f"⌨️ Typing '{text}' into: {elem.text[:30]}...")
+                        await browser.input_text_by_index(index, text, page_state)
+                        consecutive_scrolls = 0
+                        await asyncio.sleep(1)
+                    else:
+                        print(f"❌ Invalid type parameters: index={index}, text='{text}'")
+                elif action == "scroll":
+                    direction = decision.get("direction", "down")
+                    amount = decision.get("amount", 400)
+                    print(f"📜 Scrolling {direction} by {amount}px")
+                    await browser.scroll_page(direction, amount)
+                    consecutive_scrolls += 1
+                    if consecutive_scrolls >= max_consecutive_scrolls:
+                        print("⚠️ Too many scrolls, trying page end")
+                        await browser.press_key("End")
+                        consecutive_scrolls = 0
+                elif action == "press_key":
+                    key = decision.get("key", "Enter")
+                    print(f"🔑 Pressing key: {key}")
+                    await browser.press_key(key)
+                    consecutive_scrolls = 0
+                    await asyncio.sleep(2)
+                elif action == "navigate":
+                    url = decision.get("url", "")
+                    if url and url.startswith("http"):
+                        print(f"🔗 Navigating to: {url}")
+                        # This will use smart navigation with anti-bot detection
+                        try:
+                            await browser.goto(url)
+                            consecutive_scrolls = 0
+                            extraction_attempts = 0
+                            await asyncio.sleep(2)
+                        except Exception as nav_error:
+                            print(f"❌ Smart navigation failed: {nav_error}")
+                            # Broadcast navigation failure with proxy stats
+                            await broadcast(job_id, {
+                                "type": "navigation_error",
+                                "url": url,
+                                "error": str(nav_error),
+                                "proxy_stats": browser.get_proxy_stats()
+                            })
+                    else:
+                        print(f"❌ Invalid navigation URL: {url}")
+                elif action == "extract":
+                    extraction_attempts += 1
+                    if extraction_attempts <= max_extraction_attempts:
+                        print(f"🔍 Starting intelligent extraction in {fmt} format...")
+                        await broadcast(job_id, {
+                            "type": "extraction",
+                            "status": "starting",
+                            "attempt": extraction_attempts,
+                            "format": fmt
+                        })
+                        # Use universal extraction with specified format
+                        content_result = await extractor.extract_intelligent_content(browser, prompt, fmt, job_id)
+                        # Save content with proper extension
+                        file_extension = get_file_extension(fmt)
+                        output_file = OUTPUT_DIR / f"{job_id}.{file_extension}"
+                        # Handle different content types
+                        saved_successfully = await save_content(content_result, output_file, fmt, job_id)
+                        if saved_successfully:
+                            print(f"💾 Content saved successfully: {output_file}")
+                            await broadcast(job_id, {
+                                "type": "extraction",
+                                "status": "completed",
+                                "format": fmt,
+                                "file_path": str(output_file),
+                                "file_extension": file_extension,
+                                "proxy_stats": browser.get_proxy_stats()
+                            })
+                        else:
+                            print(f"❌ Failed to save content")
+                        break
+                    else:
+                        print("⚠️ Maximum extraction attempts reached")
+                        break
+                elif action == "done":
+                    print("✅ Task marked as complete by AI")
+                    break
+                else:
+                    print(f"⚠️ Unknown action: {action}")
+            except Exception as e:
+                print(f"❌ Action execution failed: {e}")
+                await asyncio.sleep(1)
+            # Small delay between actions
+            await asyncio.sleep(0.5)
+        # Final extraction if not done yet
+        if extraction_attempts == 0:
+            print(f"🔍 Performing final extraction in {fmt} format...")
+            try:
+                content_result = await extractor.extract_intelligent_content(browser, prompt, fmt, job_id)
+                file_extension = get_file_extension(fmt)
+                output_file = OUTPUT_DIR / f"{job_id}.{file_extension}"
+                await save_content(content_result, output_file, fmt, job_id)
+                print(f"💾 Final content saved: {output_file}")
+            except Exception as e:
+                print(f"❌ Final extraction failed: {e}")
+        # Final proxy statistics
+        final_proxy_stats = browser.get_proxy_stats()
+        print(f"📊 Final proxy stats: {final_proxy_stats}")
+        await broadcast(job_id, {
+            "status": "finished",
+            "final_format": fmt,
+            "final_proxy_stats": final_proxy_stats
+        })
+async def save_content(content_result: str, output_file: Path, fmt: str, job_id: str) -> bool:
+    """Save content based on format type with enhanced error handling"""
+    try:
+        if fmt == "pdf":
+            # Handle PDF - check for direct save indicator
+            if content_result.startswith("PDF_DIRECT_SAVE:"):
+                # PDF was saved directly to the correct location
+                pdf_path = content_result.split("PDF_DIRECT_SAVE:")[1].strip()
+                print(f"📄 PDF saved directly: {pdf_path}")
+                # Verify the file exists at expected location
+                if Path(pdf_path).exists():
+                    return True
+                else:
+                    print(f"❌ PDF file not found at expected location: {pdf_path}")
+                    return False
+            elif content_result.startswith("PDF saved to:"):
+                # Legacy format - PDF was saved elsewhere, need to copy
+                pdf_path = content_result.split("PDF saved to: ")[1].strip()
+                import shutil
+                shutil.copy2(pdf_path, output_file)
+                print(f"📄 PDF copied to standard location: {output_file}")
+                return True
+            else:
+                # Content is text, save as fallback
+                with open(output_file.with_suffix('.txt'), "w", encoding="utf-8") as f:
+                    f.write("PDF GENERATION FAILED - TEXT FALLBACK\n")
+                    f.write("="*50 + "\n\n")
+                    f.write(content_result)
+                print(f"📄 PDF fallback saved as text: {output_file.with_suffix('.txt')}")
+                return True
+        else:
+            # Handle text-based formats
+            with open(output_file, "w", encoding="utf-8") as f:
+                f.write(content_result)
+            print(f"📝 {fmt.upper()} content saved: {output_file}")
+            return True
+    except Exception as e:
+        print(f"❌ Error saving content: {e}")
+        return False
+def determine_starting_url(prompt: str) -> str:
+    """Determine the best starting URL based on the user's goal"""
+    prompt_lower = prompt.lower()
+    # Search-related tasks
+    if any(word in prompt_lower for word in ["search", "find", "look for", "google"]):
+        return "https://duckduckgo.com/"
+    # Code repositories
+    if "github" in prompt_lower or "code repository" in prompt_lower:
+        return "https://www.github.com"
+    # E-commerce
+    if any(word in prompt_lower for word in ["buy", "purchase", "product", "price", "amazon"]):
+        return "https://www.amazon.com"
+    # Default to Google for most tasks
+    return "https://duckduckgo.com/"
+def determine_max_steps(prompt: str) -> int:
+    """Determine max steps based on task complexity"""
+    prompt_lower = prompt.lower()
+    # Simple extraction tasks
+    if any(word in prompt_lower for word in ["extract", "get info", "save", "download"]):
+        return 15
+    # Complex research tasks
+    if any(word in prompt_lower for word in ["research", "analyze", "compare", "comprehensive"]):
+        return 25
+    # Form filling or multi-step processes
+    if any(word in prompt_lower for word in ["fill", "submit", "register", "apply", "multiple"]):
+        return 20
+    # Shopping or product research
+    if any(word in prompt_lower for word in ["buy", "product", "price", "review"]):
+        return 18
+    # Job searching
+    if any(word in prompt_lower for word in ["job", "career", "position"]):
+        return 20
+    # Default
+    return 20

backend/anti_bot_detection.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os
+import base64
+import google.generativeai as genai
+import json
+import asyncio
+import functools
+from PIL import Image
+import io
+class AntiBotVisionModel:
+    def __init__(self):
+        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+        self.model = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
+    async def analyze_anti_bot_page(self, screenshot_b64: str, detection_prompt: str, page_url: str) -> dict:
+        """Analyze page screenshot to detect anti-bot systems"""
+        try:
+            # Convert base64 to PIL Image
+            image_data = base64.b64decode(screenshot_b64)
+            image = Image.open(io.BytesIO(image_data))
+            # Compress image for token efficiency
+            max_size = (1024, 768)
+            image.thumbnail(max_size, Image.Resampling.LANCZOS)
+            # Create content for analysis
+            content = [detection_prompt, image]
+            # Send to vision model
+            response = await asyncio.to_thread(
+                functools.partial(self.model.generate_content, content)
+            )
+            raw_text = response.text
+            print(f"🔍 Anti-bot detection response: {raw_text[:200]}...")
+            # Parse JSON response
+            try:
+                start = raw_text.find('{')
+                end = raw_text.rfind('}') + 1
+                if start != -1 and end > start:
+                    json_str = raw_text[start:end]
+                    result = json.loads(json_str)
+                    return result
+                else:
+                    # Fallback parsing
+                    return self._parse_fallback_response(raw_text, page_url)
+            except json.JSONDecodeError:
+                return self._parse_fallback_response(raw_text, page_url)
+        except Exception as e:
+            print(f"❌ Error in anti-bot vision analysis: {e}")
+            return {
+                "is_anti_bot": False,
+                "detection_type": "none",
+                "confidence": 0.0,
+                "description": f"Analysis failed: {str(e)}",
+                "can_solve": False,
+                "suggested_action": "retry"
+            }
+    def _parse_fallback_response(self, raw_text: str, page_url: str) -> dict:
+        """Fallback parsing when JSON extraction fails"""
+        text_lower = raw_text.lower()
+        # Simple keyword detection as fallback
+        anti_bot_keywords = [
+            "cloudflare", "captcha", "verification", "access denied",
+            "blocked", "rate limit", "checking your browser", "security check",
+            "automated traffic", "unusual activity"
+        ]
+        detected_keywords = [kw for kw in anti_bot_keywords if kw in text_lower]
+        if detected_keywords:
+            return {
+                "is_anti_bot": True,
+                "detection_type": detected_keywords[0],
+                "confidence": 0.7,
+                "description": f"Detected keywords: {', '.join(detected_keywords)}",
+                "can_solve": "captcha" in detected_keywords,
+                "suggested_action": "solve_captcha" if "captcha" in detected_keywords else "rotate_proxy"
+            }
+        return {
+            "is_anti_bot": False,
+            "detection_type": "none",
+            "confidence": 0.5,
+            "description": "No clear anti-bot indicators found",
+            "can_solve": False,
+            "suggested_action": "continue"
+        }
+    async def solve_captcha(self, screenshot_b64: str, page_url: str, captcha_type: str) -> dict:
+        """Attempt to solve CAPTCHA using vision model"""
+        try:
+            # Convert base64 to PIL Image
+            image_data = base64.b64decode(screenshot_b64)
+            image = Image.open(io.BytesIO(image_data))
+            captcha_prompt = f"""
+            CAPTCHA SOLVING TASK:
+            You are looking at a CAPTCHA challenge on: {page_url}
+            CAPTCHA Type: {captcha_type}
+            Analyze the image and provide the solution:
+            For text CAPTCHAs:
+            - Read and transcribe the text/numbers exactly as shown
+            For image selection CAPTCHAs:
+            - Identify which images match the requested criteria
+            - Provide grid positions or image descriptions
+            For math CAPTCHAs:
+            - Solve the mathematical expression
+            Respond with JSON:
+            {{
+                "can_solve": true/false,
+                "solution_type": "text|selection|math|unknown",
+                "solution": "the answer or list of selections",
+                "confidence": 0.0-1.0,
+                "instructions": "step by step what to do"
+            }}
+            """
+            content = [captcha_prompt, image]
+            response = await asyncio.to_thread(
+                functools.partial(self.model.generate_content, content)
+            )
+            raw_text = response.text
+            # Parse response
+            try:
+                start = raw_text.find('{')
+                end = raw_text.rfind('}') + 1
+                if start != -1 and end > start:
+                    json_str = raw_text[start:end]
+                    return json.loads(json_str)
+            except:
+                pass
+            return {
+                "can_solve": False,
+                "solution_type": "unknown",
+                "solution": "",
+                "confidence": 0.0,
+                "instructions": "Could not parse CAPTCHA solution"
+            }
+        except Exception as e:
+            print(f"❌ Error solving CAPTCHA: {e}")
+            return {
+                "can_solve": False,
+                "solution_type": "error",
+                "solution": "",
+                "confidence": 0.0,
+                "instructions": f"CAPTCHA solving failed: {str(e)}"
+            }

backend/browser_controller.py ADDED Viewed

	@@ -0,0 +1,757 @@

+import asyncio
+import subprocess
+import os
+import logging
+import json
+import base64
+from typing import Optional, Dict, List, Any, Tuple
+import hashlib
+from dataclasses import dataclass, asdict
+from pydantic import BaseModel
+from pathlib import Path
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+from playwright.async_api import async_playwright, Page, CDPSession
+@dataclass
+class ElementInfo:
+    """DOM element information compatible with browser-use"""
+    index: int
+    id: str
+    tag_name: str
+    xpath: str
+    css_selector: str
+    text: str
+    attributes: Dict[str, str]
+    is_clickable: bool
+    is_input: bool
+    is_visible: bool = True
+    is_in_viewport: bool = True
+    input_type: Optional[str] = None
+    placeholder: Optional[str] = None
+    bounding_box: Optional[Dict[str, float]] = None
+    center_coordinates: Optional[Dict[str, float]] = None
+    viewport_coordinates: Optional[Dict[str, float]] = None
+class PageState:
+    """Page state compatible with browser-use"""
+    def __init__(self, url: str, title: str, elements: List[ElementInfo], selector_map: Dict[int, ElementInfo], screenshot: Optional[str] = None):
+        self.url = url
+        self.title = title
+        self.elements = elements
+        self.selector_map = selector_map
+        self.screenshot = screenshot
+        self.clickable_elements = [e for e in elements if e.is_clickable]
+        self.input_elements = [e for e in elements if e.is_input]
+class BrowserController:
+    def __init__(self, headless: bool, proxy: dict | None, enable_streaming: bool = False):
+        self.headless = headless
+        self.proxy = proxy
+        self.enable_streaming = enable_streaming
+        self.play = None
+        self.browser = None
+        self.page = None
+        self.cdp_session = None
+        self.streaming_active = False
+        self.stream_clients = set()
+        self._cached_page_state = None
+        self._cached_url = None
+        self._last_action_timestamp = None
+        self.input_enabled = False  # Track if Input domain is available
+        self._original_display = os.environ.get("DISPLAY")
+        self._display_was_set = False
+        self._xvfb_process: subprocess.Popen | None = None
+        self._xvfb_display: str | None = None
+        # Load the robust DOM extraction JavaScript
+        self.dom_js = self._get_dom_extraction_js()
+    def _find_free_display(self, start: int = 99, end: int = 110) -> int:
+        """Locate a free X display number for Xvfb."""
+        for display in range(start, end):
+            lock_file = Path(f"/tmp/.X{display}-lock")
+            if not lock_file.exists():
+                return display
+        # Fall back to the starting display even if locked (Xvfb will fail clearly)
+        return start
+    def _terminate_xvfb(self):
+        """Stop the Xvfb process if it was started."""
+        if not self._xvfb_process:
+            return
+        self._xvfb_process.terminate()
+        try:
+            self._xvfb_process.wait(timeout=5)
+        except subprocess.TimeoutExpired:
+            self._xvfb_process.kill()
+        finally:
+            self._xvfb_process = None
+            self._xvfb_display = None
+    async def _ensure_display(self):
+        """Start a virtual X server when running in headful mode without DISPLAY."""
+        if self.headless or os.environ.get("DISPLAY"):
+            return
+        display_number = self._find_free_display()
+        display = f":{display_number}"
+        xvfb_cmd = [
+            "Xvfb",
+            display,
+            "-screen",
+            "0",
+            "1280x800x24",
+            "-nolisten",
+            "tcp",
+        ]
+        try:
+            self._xvfb_process = subprocess.Popen(
+                xvfb_cmd,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            self._xvfb_display = display
+            logger.info("🖥️ Started Xvfb on display %s for headful browser session", display)
+        except FileNotFoundError:
+            logger.warning("⚠️ Xvfb not available; falling back to headless mode")
+            self.headless = True
+            return
+        # Wait briefly for Xvfb to be ready
+        ready = False
+        for _ in range(30):
+            if self._xvfb_process.poll() is not None:
+                logger.error(
+                    "❌ Xvfb exited prematurely with code %s", self._xvfb_process.returncode
+                )
+                self._terminate_xvfb()
+                self.headless = True
+                return
+            if Path(f"/tmp/.X{display_number}-lock").exists():
+                ready = True
+                break
+            await asyncio.sleep(0.1)
+        if not ready:
+            logger.warning("⚠️ Timed out waiting for Xvfb; falling back to headless mode")
+            self._terminate_xvfb()
+            self.headless = True
+            return
+        os.environ["DISPLAY"] = display
+        self._display_was_set = True
+    def _restore_display(self):
+        """Restore the DISPLAY environment variable and stop Xvfb if needed."""
+        self._terminate_xvfb()
+        if not self._display_was_set:
+            return
+        if self._original_display is None:
+            os.environ.pop("DISPLAY", None)
+        else:
+            os.environ["DISPLAY"] = self._original_display
+        self._display_was_set = False
+    async def __aenter__(self):
+        """Initialize browser with CDP streaming support"""
+        await self._ensure_display()
+        self.play = await async_playwright().start()
+        launch_options = {
+            "headless": self.headless,
+            "args": [
+                "--no-sandbox",
+                "--disable-dev-shm-usage",
+                "--disable-gpu",
+                "--disable-web-security",
+                "--disable-features=VizDisplayCompositor",
+                "--window-size=1280,800",
+                "--window-position=0,0",
+                "--disable-blink-features=AutomationControlled",
+                "--disable-extensions",
+                "--no-first-run",
+                "--disable-default-apps",
+                # Enable remote debugging for CDP
+                "--remote-debugging-port=0"  # Use random port
+            ]
+        }
+        if self.proxy:
+            launch_options["proxy"] = self.proxy
+        self.browser = await self.play.chromium.launch(**launch_options)
+        self.page = await self.browser.new_page(viewport={"width": 1280, "height": 800})
+        # Set up CDP session for streaming
+        if self.enable_streaming:
+            await self._setup_cdp_streaming()
+        await self.page.set_extra_http_headers({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        })
+        return self
+    async def __aexit__(self, exc_type, exc, tb):
+        """Cleanup browser and CDP session"""
+        if self.streaming_active:
+            await self._stop_cdp_streaming()
+        if self.browser:
+            await self.browser.close()
+        if self.play:
+            await self.play.stop()
+        self._restore_display()
+    async def _setup_cdp_streaming(self):
+        """Setup CDP session for real-time streaming with proper error handling"""
+        try:
+            # Get CDP session - ensure it's for the page target
+            self.cdp_session = await self.page.context.new_cdp_session(self.page)
+            # Enable required domains with error handling
+            await self._enable_cdp_domain('Runtime')
+            await self._enable_cdp_domain('Page')
+            # Try to enable Input domain (optional)
+            self.input_enabled = await self._enable_cdp_domain('Input', optional=True)
+            if self.input_enabled:
+                logger.info("✅ Input domain enabled - full interaction support available")
+            else:
+                logger.warning("⚠️ Input domain not available - using Playwright for interactions")
+            logger.info("✅ CDP session established for streaming")
+        except Exception as e:
+            logger.error(f"❌ Failed to setup CDP streaming: {e}")
+            raise
+    async def _enable_cdp_domain(self, domain: str, optional: bool = False) -> bool:
+        """Enable a CDP domain with proper error handling"""
+        try:
+            await self.cdp_session.send(f'{domain}.enable')
+            logger.info(f"✅ {domain} domain enabled")
+            return True
+        except Exception as e:
+            if optional:
+                logger.warning(f"⚠️ {domain} domain not available: {e}")
+                return False
+            else:
+                logger.error(f"❌ Required {domain} domain failed: {e}")
+                raise
+    async def start_streaming(self, quality: int = 80):
+        """Start CDP screencast streaming with enhanced error handling"""
+        if not self.cdp_session:
+            raise RuntimeError("CDP session not initialized")
+        try:
+            # Check if Page.startScreencast is available
+            await self.cdp_session.send('Page.startScreencast', {
+                'format': 'jpeg',
+                'quality': quality,
+                'maxWidth': 1280,
+                'maxHeight': 800,
+                'everyNthFrame': 1  # Stream every frame for real-time
+            })
+            # Set up frame listener
+            self.cdp_session.on('Page.screencastFrame', self._handle_screencast_frame)
+            self.streaming_active = True
+            logger.info("🎥 CDP streaming started successfully")
+        except Exception as e:
+            logger.error(f"❌ Failed to start CDP streaming: {e}")
+            # Try alternative approach with screenshots
+            await self._start_screenshot_polling()
+    async def _start_screenshot_polling(self):
+        """Fallback: Use screenshot polling if screencast not available"""
+        logger.info("🔄 Starting screenshot polling as fallback")
+        self.streaming_active = True
+        async def screenshot_loop():
+            while self.streaming_active:
+                try:
+                    screenshot_bytes = await self.page.screenshot(type='jpeg', quality=80)
+                    screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
+                    frame_data = {
+                        'type': 'frame',
+                        'data': screenshot_b64,
+                        'timestamp': asyncio.get_event_loop().time(),
+                        'method': 'polling'
+                    }
+                    await self._broadcast_to_clients(frame_data)
+                    await asyncio.sleep(0.1)  # 10 FPS
+                except Exception as e:
+                    logger.error(f"Screenshot polling error: {e}")
+                    await asyncio.sleep(1)
+        # Start screenshot polling in background
+        asyncio.create_task(screenshot_loop())
+    async def stop_streaming(self):
+        """Stop CDP screencast streaming"""
+        if self.cdp_session and self.streaming_active:
+            try:
+                await self.cdp_session.send('Page.stopScreencast')
+                logger.info("🛑 CDP streaming stopped")
+            except Exception as e:
+                logger.warning(f"⚠️ Error stopping screencast (may not have been active): {e}")
+            finally:
+                self.streaming_active = False
+    async def _stop_cdp_streaming(self):
+        """Internal cleanup for CDP streaming"""
+        await self.stop_streaming()
+        if self.cdp_session:
+            try:
+                await self.cdp_session.detach()
+            except Exception as e:
+                logger.warning(f"⚠️ Error detaching CDP session: {e}")
+    async def _handle_screencast_frame(self, params):
+        """Handle incoming screencast frames"""
+        try:
+            # Acknowledge frame immediately
+            await self.cdp_session.send('Page.screencastFrameAck', {
+                'sessionId': params['sessionId']
+            })
+            # Broadcast frame to all connected clients
+            frame_data = {
+                'type': 'frame',
+                'data': params['data'],  # Base64 encoded JPEG
+                'timestamp': params.get('timestamp'),
+                'method': 'screencast',
+                'metadata': {
+                    'sessionId': params['sessionId']
+                }
+            }
+            # Send to all connected streaming clients
+            await self._broadcast_to_clients(frame_data)
+        except Exception as e:
+            logger.error(f"❌ Error handling screencast frame: {e}")
+    async def _broadcast_to_clients(self, data):
+        """Broadcast data to all connected streaming clients"""
+        if not self.stream_clients:
+            return
+        disconnected_clients = []
+        for client in self.stream_clients:
+            try:
+                await client.send_text(json.dumps(data))
+            except Exception:
+                disconnected_clients.append(client)
+        # Remove disconnected clients
+        for client in disconnected_clients:
+            self.stream_clients.discard(client)
+    def add_stream_client(self, websocket):
+        """Add a new streaming client"""
+        self.stream_clients.add(websocket)
+        logger.info(f"🔗 Stream client connected. Total clients: {len(self.stream_clients)}")
+    def remove_stream_client(self, websocket):
+        """Remove a streaming client"""
+        self.stream_clients.discard(websocket)
+        logger.info(f"🔌 Stream client disconnected. Total clients: {len(self.stream_clients)}")
+    async def handle_mouse_event(self, event_data):
+        """Handle mouse events with fallback support"""
+        try:
+            if self.input_enabled and self.cdp_session:
+                # Use CDP Input domain if available
+                await self.cdp_session.send('Input.dispatchMouseEvent', {
+                    'type': event_data['eventType'],
+                    'x': event_data['x'],
+                    'y': event_data['y'],
+                    'button': event_data.get('button', 'left'),
+                    'clickCount': event_data.get('clickCount', 1)
+                })
+            else:
+                # Fallback to Playwright mouse actions
+                if event_data['eventType'] == 'mousePressed':
+                    await self.page.mouse.click(event_data['x'], event_data['y'])
+                elif event_data['eventType'] == 'mouseMoved':
+                    await self.page.mouse.move(event_data['x'], event_data['y'])
+        except Exception as e:
+            logger.error(f"❌ Error handling mouse event: {e}")
+    async def handle_keyboard_event(self, event_data):
+        """Handle keyboard events with fallback support"""
+        try:
+            if self.input_enabled and self.cdp_session:
+                # Use CDP Input domain if available
+                await self.cdp_session.send('Input.dispatchKeyEvent', {
+                    'type': event_data['eventType'],
+                    'text': event_data.get('text', ''),
+                    'key': event_data.get('key', ''),
+                    'code': event_data.get('code', ''),
+                    'keyCode': event_data.get('keyCode', 0)
+                })
+            else:
+                # Fallback to Playwright keyboard actions
+                if event_data['eventType'] == 'keyDown' and event_data.get('key'):
+                    await self.page.keyboard.press(event_data['key'])
+                elif event_data.get('text'):
+                    await self.page.keyboard.type(event_data['text'])
+        except Exception as e:
+            logger.error(f"❌ Error handling keyboard event: {e}")
+    def get_streaming_info(self):
+        """Get streaming connection information"""
+        if self.enable_streaming:
+            return {
+                "enabled": True,
+                "active": self.streaming_active,
+                "clients": len(self.stream_clients),
+                "websocket_url": "ws://localhost:8000/stream",
+                "input_enabled": self.input_enabled,
+                "method": "screencast" if self.input_enabled else "polling"
+            }
+        return {"enabled": False}
+    # Keep all your existing methods from the original code
+    def _get_dom_extraction_js(self) -> str:
+        """Get the robust DOM extraction JavaScript similar to browser-use"""
+        return """
+        (args) => {
+            const { doHighlightElements = true, debugMode = false } = args || {};
+            // Performance tracking
+            const startTime = performance.now();
+            let nodeCount = 0;
+            let processedCount = 0;
+            // Results
+            const elementMap = new Map();
+            const selectorMap = {};
+            let highlightIndex = 0;
+            // Helper functions
+            function getClassName(element) {
+                if (!element.className) return '';
+                if (typeof element.className === 'string') return element.className;
+                if (element.className.toString) return element.className.toString();
+                if (element.classList && element.classList.length > 0) {
+                    return Array.from(element.classList).join(' ');
+                }
+                return '';
+            }
+            function isInteractive(element) {
+                const tagName = element.tagName.toLowerCase();
+                const interactiveTags = ['a', 'button', 'input', 'select', 'textarea', 'label'];
+                if (interactiveTags.includes(tagName)) return true;
+                if (element.onclick || element.getAttribute('onclick')) return true;
+                if (element.getAttribute('role') === 'button') return true;
+                if (element.getAttribute('role') === 'link') return true;
+                if (element.hasAttribute('tabindex')) return true;
+                if (element.contentEditable === 'true') return true;
+                const style = window.getComputedStyle(element);
+                if (style.cursor === 'pointer') return true;
+                return false;
+            }
+            function isInput(element) {
+                const tagName = element.tagName.toLowerCase();
+                return ['input', 'textarea', 'select'].includes(tagName) ||
+                       element.contentEditable === 'true';
+            }
+            function getTextContent(element) {
+                let text = '';
+                if (element.textContent) {
+                    text = element.textContent.trim();
+                }
+                if (element.value) {
+                    text = element.value;
+                } else if (element.placeholder) {
+                    text = element.placeholder;
+                }
+                if (element.tagName === 'IMG' && element.alt) {
+                    text = element.alt;
+                }
+                return text.substring(0, 200);
+            }
+            function isVisibleAndInViewport(element) {
+                const rect = element.getBoundingClientRect();
+                const style = window.getComputedStyle(element);
+                const hasDimensions = rect.width > 0 && rect.height > 0;
+                const isVisible = style.visibility !== 'hidden' &&
+                                style.display !== 'none' &&
+                                style.opacity !== '0';
+                const isInViewport = rect.top < window.innerHeight &&
+                                   rect.bottom > 0 &&
+                                   rect.left < window.innerWidth &&
+                                   rect.right > 0;
+                return hasDimensions && isVisible && isInViewport;
+            }
+            // Process elements
+            const allElements = document.querySelectorAll('*');
+            const elements = [];
+            allElements.forEach(element => {
+                nodeCount++;
+                if (!element || element.nodeType !== 1) return;
+                const isElementVisible = isVisibleAndInViewport(element);
+                const isElementInteractive = isInteractive(element);
+                const isElementInput = isInput(element);
+                if (!isElementVisible && !isElementInteractive) return;
+                processedCount++;
+                const rect = element.getBoundingClientRect();
+                const elementId = `element_${processedCount}`;
+                let currentHighlightIndex = null;
+                if (isElementInteractive || isElementInput) {
+                    currentHighlightIndex = highlightIndex++;
+                    if (doHighlightElements) {
+                        element.style.outline = '2px solid red';
+                        element.style.outlineOffset = '1px';
+                        const label = document.createElement('div');
+                        label.textContent = currentHighlightIndex.toString();
+                        label.style.cssText = `
+                            position: absolute;
+                            top: ${rect.top + window.scrollY - 20}px;
+                            left: ${rect.left + window.scrollX}px;
+                            background: red;
+                            color: white;
+                            padding: 2px 6px;
+                            font-size: 12px;
+                            font-weight: bold;
+                            z-index: 10000;
+                            border-radius: 3px;
+                            pointer-events: none;
+                        `;
+                        document.body.appendChild(label);
+                    }
+                }
+                const elementData = {
+                    index: currentHighlightIndex,
+                    id: elementId,
+                    tagName: element.tagName.toLowerCase(),
+                    xpath: '',
+                    cssSelector: '',
+                    text: getTextContent(element),
+                    attributes: {},
+                    isClickable: isElementInteractive,
+                    isInput: isElementInput,
+                    isVisible: isElementVisible,
+                    isInViewport: isElementVisible,
+                    inputType: element.type || null,
+                    placeholder: element.placeholder || null,
+                    boundingBox: {
+                        x: rect.x,
+                        y: rect.y,
+                        width: rect.width,
+                        height: rect.height,
+                        top: rect.top,
+                        bottom: rect.bottom,
+                        left: rect.left,
+                        right: rect.right
+                    },
+                    centerCoordinates: {
+                        x: rect.left + rect.width / 2,
+                        y: rect.top + rect.height / 2
+                    }
+                };
+                if (element.attributes) {
+                    for (let attr of element.attributes) {
+                        elementData.attributes[attr.name] = attr.value;
+                    }
+                }
+                elements.push(elementData);
+                if (currentHighlightIndex !== null) {
+                    selectorMap[currentHighlightIndex] = elementData;
+                }
+            });
+            const endTime = performance.now();
+            return {
+                elements: elements,
+                selectorMap: selectorMap,
+                stats: {
+                    totalNodes: nodeCount,
+                    processedNodes: processedCount,
+                    interactiveElements: Object.keys(selectorMap).length,
+                    executionTime: endTime - startTime
+                }
+            };
+        }
+        """
+    # Add all your existing methods here (goto, get_page_state, click_element_by_index, etc.)
+    async def goto(self, url: str, wait_until: str = "domcontentloaded", timeout: int = 30000):
+        """Navigate to a URL with proper waiting"""
+        try:
+            logger.info(f"Navigating to: {url}")
+            await self.page.goto(url, wait_until=wait_until, timeout=timeout)
+            await asyncio.sleep(2)
+            logger.info(f"Successfully navigated to: {url}")
+        except Exception as e:
+            logger.error(f"Failed to navigate to {url}: {e}")
+            raise
+    async def get_page_state(self, include_screenshot: bool = True, highlight_elements: bool = True) -> PageState:
+        """Get current page state with elements"""
+        try:
+            await self.page.wait_for_load_state("domcontentloaded", timeout=10000)
+            await asyncio.sleep(1)
+            url = self.page.url
+            title = await self.page.title()
+            screenshot = None
+            if include_screenshot:
+                screenshot_bytes = await self.page.screenshot(
+                    full_page=False,
+                    clip={'x': 0, 'y': 0, 'width': 1250, 'height': 800}
+                )
+                screenshot = base64.b64encode(screenshot_bytes).decode('utf-8')
+            # Extract DOM elements
+            try:
+                dom_result = await self.page.evaluate(self.dom_js, {"doHighlightElements": highlight_elements})
+                logger.info(f"Extracted {len(dom_result.get('elements', []))} interactive elements")
+            except Exception as e:
+                logger.error(f"DOM extraction failed: {e}")
+                return PageState(url, title, [], {}, screenshot)
+            elements = []
+            selector_map = {}
+            for elem_data in dom_result.get('elements', []):
+                element_info = ElementInfo(
+                    index=elem_data.get('index'),
+                    id=elem_data.get('id', ''),
+                    tag_name=elem_data.get('tagName', ''),
+                    xpath=elem_data.get('xpath', ''),
+                    css_selector=elem_data.get('cssSelector', ''),
+                    text=elem_data.get('text', ''),
+                    attributes=elem_data.get('attributes', {}),
+                    is_clickable=elem_data.get('isClickable', False),
+                    is_input=elem_data.get('isInput', False),
+                    center_coordinates=elem_data.get('centerCoordinates')
+                )
+                elements.append(element_info)
+                if element_info.index is not None:
+                    selector_map[element_info.index] = element_info
+            return PageState(url, title, elements, selector_map, screenshot)
+        except Exception as e:
+            logger.error(f"Failed to get page state: {e}")
+            return PageState("", "", [], {}, None)
+    async def click_element_by_index(self, index: int, page_state: PageState = None) -> bool:
+        """Click element by index"""
+        try:
+            if page_state is None:
+                page_state = await self.get_page_state(include_screenshot=False, highlight_elements=False)
+            if index not in page_state.selector_map:
+                logger.error(f"Element with index {index} not found")
+                return False
+            element = page_state.selector_map[index]
+            if not element.center_coordinates:
+                logger.error(f"Element at index {index} has no coordinates")
+                return False
+            x = element.center_coordinates['x']
+            y = element.center_coordinates['y']
+            logger.info(f"Clicking element {index}: {element.text[:50]}... at ({x}, {y})")
+            await self.page.mouse.click(x, y)
+            await asyncio.sleep(1)
+            logger.info(f"Successfully clicked element {index}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to click element at index {index}: {e}")
+            return False
+    async def input_text_by_index(self, index: int, text: str, page_state: PageState = None) -> bool:
+        """Input text into element by index"""
+        try:
+            if page_state is None:
+                page_state = await self.get_page_state(include_screenshot=False, highlight_elements=False)
+            if index not in page_state.selector_map:
+                logger.error(f"Element with index {index} not found")
+                return False
+            element = page_state.selector_map[index]
+            if not element.center_coordinates:
+                logger.error(f"Element at index {index} has no coordinates")
+                return False
+            x = element.center_coordinates['x']
+            y = element.center_coordinates['y']
+            logger.info(f"Typing '{text}' into element {index}")
+            await self.page.mouse.click(x, y)
+            await asyncio.sleep(0.5)
+            await self.page.keyboard.press('Control+a')
+            await self.page.keyboard.type(text)
+            logger.info(f"Successfully typed text into element {index}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to input text into element at index {index}: {e}")
+            return False
+    async def scroll_page(self, direction: str = "down", amount: int = 500):
+        """Scroll the page"""
+        if direction == "down":
+            await self.page.mouse.wheel(0, amount)
+        elif direction == "up":
+            await self.page.mouse.wheel(0, -amount)
+        await asyncio.sleep(1)
+    async def press_key(self, key: str) -> bool:
+        """Press a keyboard key"""
+        try:
+            await self.page.keyboard.press(key)
+            logger.info(f"Pressed key: {key}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to press key {key}: {e}")
+            return False

backend/cdp_streamer.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# backend/cdp_streamer.py
+import asyncio
+import json
+import websockets
+from playwright.async_api import CDPSession
+class CDPBrowserStreamer:
+    def __init__(self, page):
+        self.page = page
+        self.cdp_session = CDPSession()
+        self.streaming = False
+    async def start_streaming(self, websocket_port: int = 8080):
+        """Start CDP-based streaming"""
+        try:
+            # Get CDP session from Playwright page
+            self.cdp_session = await self.page.context.new_cdp_session(self.page)
+            # Enable necessary CDP domains
+            await self.cdp_session.send('Runtime.enable')
+            await self.cdp_session.send('Page.enable')
+            await self.cdp_session.send('Page.startScreencast', {
+                'format': 'jpeg',
+                'quality': 80,
+                'maxWidth': 1280,
+                'maxHeight': 800,
+                'everyNthFrame': 1  # Stream every frame for real-time
+            })
+            # Start WebSocket server for streaming
+            await websockets.serve(self.handle_client, "localhost", websocket_port)
+            print(f"🎥 CDP Streaming started on port {websocket_port}")
+        except Exception as e:
+            print(f"❌ Failed to start CDP streaming: {e}")
+    async def handle_client(self, websocket, path):
+        """Handle WebSocket clients for streaming"""
+        print("🔗 Client connected to CDP stream")
+        try:
+            # Listen for screencast frames
+            self.cdp_session.on('Page.screencastFrame', lambda params:
+                asyncio.create_task(self.send_frame(websocket, params)))
+            # Keep connection alive and handle client messages
+            async for message in websocket:
+                data = json.loads(message)
+                if data['type'] == 'mouse':
+                    await self.handle_mouse_event(data)
+                elif data['type'] == 'keyboard':
+                    await self.handle_keyboard_event(data)
+        except websockets.exceptions.ConnectionClosed:
+            print("🔌 Client disconnected from CDP stream")
+    async def send_frame(self, websocket, params):
+        """Send screencast frame to client"""
+        try:
+            frame_data = {
+                'type': 'frame',
+                'data': params['data'],  # Base64 encoded JPEG
+                'metadata': {
+                    'sessionId': params['sessionId'],
+                    'timestamp': params.get('timestamp')
+                }
+            }
+            await websocket.send(json.dumps(frame_data))
+            # Acknowledge frame
+            await self.cdp_session.send('Page.screencastFrameAck', {
+                'sessionId': params['sessionId']
+            })
+        except Exception as e:
+            print(f"❌ Error sending frame: {e}")
+    async def handle_mouse_event(self, data):
+        """Handle mouse events from client"""
+        await self.cdp_session.send('Input.dispatchMouseEvent', {
+            'type': data['eventType'],  # 'mousePressed', 'mouseReleased', 'mouseMoved'
+            'x': data['x'],
+            'y': data['y'],
+            'button': data.get('button', 'left'),
+            'clickCount': data.get('clickCount', 1)
+        })
+    async def handle_keyboard_event(self, data):
+        """Handle keyboard events from client"""
+        await self.cdp_session.send('Input.dispatchKeyEvent', {
+            'type': data['eventType'],  # 'keyDown', 'keyUp', 'char'
+            'text': data.get('text', ''),
+            'key': data.get('key', ''),
+            'code': data.get('code', '')
+        })

backend/database.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""
+Neon PostgreSQL Database Integration
+Stores job history, results, and metadata
+"""
+import asyncpg
+import json
+import os
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+class Database:
+    def __init__(self):
+        self.pool: Optional[asyncpg.Pool] = None
+        self.database_url = os.getenv("DATABASE_URL")
+    async def connect(self):
+        """Initialize database connection pool"""
+        if not self.database_url:
+            print("⚠️ DATABASE_URL not set, database features disabled")
+            return
+        try:
+            self.pool = await asyncpg.create_pool(
+                self.database_url,
+                min_size=2,
+                max_size=10,
+                command_timeout=60
+            )
+            await self.init_tables()
+            print("✅ Database connected successfully")
+        except Exception as e:
+            print(f"❌ Database connection failed: {e}")
+            self.pool = None
+    async def disconnect(self):
+        """Close database connections"""
+        if self.pool:
+            await self.pool.close()
+            print("🔌 Database disconnected")
+    async def init_tables(self):
+        """Create tables if they don't exist"""
+        if not self.pool:
+            return
+        async with self.pool.acquire() as conn:
+            # Jobs table
+            await conn.execute("""
+                CREATE TABLE IF NOT EXISTS jobs (
+                    id TEXT PRIMARY KEY,
+                    prompt TEXT NOT NULL,
+                    format TEXT NOT NULL,
+                    status TEXT DEFAULT 'pending',
+                    created_at TIMESTAMPTZ DEFAULT NOW(),
+                    completed_at TIMESTAMPTZ,
+                    file_extension TEXT,
+                    content_type TEXT,
+                    proxy_server TEXT,
+                    headless BOOLEAN DEFAULT FALSE,
+                    streaming_enabled BOOLEAN DEFAULT FALSE,
+                    error_message TEXT
+                )
+            """)
+            # Job results table (stores extracted content metadata)
+            await conn.execute("""
+                CREATE TABLE IF NOT EXISTS job_results (
+                    id SERIAL PRIMARY KEY,
+                    job_id TEXT REFERENCES jobs(id) ON DELETE CASCADE,
+                    content_length INTEGER,
+                    extraction_time TIMESTAMPTZ DEFAULT NOW(),
+                    format TEXT,
+                    metadata JSONB
+                )
+            """)
+            # Proxy usage tracking
+            await conn.execute("""
+                CREATE TABLE IF NOT EXISTS proxy_stats (
+                    id SERIAL PRIMARY KEY,
+                    job_id TEXT REFERENCES jobs(id) ON DELETE SET NULL,
+                    proxy_server TEXT,
+                    success BOOLEAN,
+                    error_message TEXT,
+                    recorded_at TIMESTAMPTZ DEFAULT NOW()
+                )
+            """)
+            print("📊 Database tables initialized")
+    async def create_job(self, job_id: str, prompt: str, format: str,
+                        headless: bool = False, streaming_enabled: bool = False,
+                        proxy_server: Optional[str] = None) -> bool:
+        """Create a new job record"""
+        if not self.pool:
+            return False
+        try:
+            async with self.pool.acquire() as conn:
+                await conn.execute("""
+                    INSERT INTO jobs (id, prompt, format, headless, streaming_enabled, proxy_server, status)
+                    VALUES ($1, $2, $3, $4, $5, $6, 'running')
+                """, job_id, prompt, format, headless, streaming_enabled, proxy_server)
+            return True
+        except Exception as e:
+            print(f"❌ Failed to create job: {e}")
+            return False
+    async def update_job_status(self, job_id: str, status: str,
+                               error_message: Optional[str] = None) -> bool:
+        """Update job status"""
+        if not self.pool:
+            return False
+        try:
+            async with self.pool.acquire() as conn:
+                completed_at = datetime.utcnow() if status in ['completed', 'failed'] else None
+                await conn.execute("""
+                    UPDATE jobs
+                    SET status = $2,
+                        completed_at = $3,
+                        error_message = $4
+                    WHERE id = $1
+                """, job_id, status, completed_at, error_message)
+            return True
+        except Exception as e:
+            print(f"❌ Failed to update job status: {e}")
+            return False
+    async def update_job_info(self, job_id: str, file_extension: str,
+                             content_type: str) -> bool:
+        """Update job file information"""
+        if not self.pool:
+            return False
+        try:
+            async with self.pool.acquire() as conn:
+                await conn.execute("""
+                    UPDATE jobs
+                    SET file_extension = $2, content_type = $3
+                    WHERE id = $1
+                """, job_id, file_extension, content_type)
+            return True
+        except Exception as e:
+            print(f"❌ Failed to update job info: {e}")
+            return False
+    async def save_job_result(self, job_id: str, content_length: int,
+                             format: str, metadata: Dict[str, Any]) -> bool:
+        """Save job result metadata"""
+        if not self.pool:
+            return False
+        try:
+            async with self.pool.acquire() as conn:
+                await conn.execute("""
+                    INSERT INTO job_results (job_id, content_length, format, metadata)
+                    VALUES ($1, $2, $3, $4)
+                """, job_id, content_length, format, json.dumps(metadata))
+            return True
+        except Exception as e:
+            print(f"❌ Failed to save job result: {e}")
+            return False
+    async def log_proxy_usage(self, job_id: str, proxy_server: str,
+                             success: bool, error_message: Optional[str] = None) -> bool:
+        """Log proxy usage for a job"""
+        if not self.pool:
+            return False
+        try:
+            async with self.pool.acquire() as conn:
+                await conn.execute("""
+                    INSERT INTO proxy_stats (job_id, proxy_server, success, error_message)
+                    VALUES ($1, $2, $3, $4)
+                """, job_id, proxy_server, success, error_message)
+            return True
+        except Exception as e:
+            print(f"❌ Failed to log proxy usage: {e}")
+            return False
+    async def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
+        """Get job by ID"""
+        if not self.pool:
+            return None
+        try:
+            async with self.pool.acquire() as conn:
+                row = await conn.fetchrow("""
+                    SELECT * FROM jobs WHERE id = $1
+                """, job_id)
+                if row:
+                    return dict(row)
+                return None
+        except Exception as e:
+            print(f"❌ Failed to get job: {e}")
+            return None
+    async def get_all_jobs(self, limit: int = 50, offset: int = 0) -> List[Dict[str, Any]]:
+        """Get all jobs with pagination"""
+        if not self.pool:
+            return []
+        try:
+            async with self.pool.acquire() as conn:
+                rows = await conn.fetch("""
+                    SELECT * FROM jobs
+                    ORDER BY created_at DESC
+                    LIMIT $1 OFFSET $2
+                """, limit, offset)
+                return [dict(row) for row in rows]
+        except Exception as e:
+            print(f"❌ Failed to get jobs: {e}")
+            return []
+    async def get_job_stats(self) -> Dict[str, Any]:
+        """Get overall job statistics"""
+        if not self.pool:
+            return {}
+        try:
+            async with self.pool.acquire() as conn:
+                stats = await conn.fetchrow("""
+                    SELECT
+                        COUNT(*) as total_jobs,
+                        COUNT(*) FILTER (WHERE status = 'completed') as completed,
+                        COUNT(*) FILTER (WHERE status = 'failed') as failed,
+                        COUNT(*) FILTER (WHERE status = 'running') as running
+                    FROM jobs
+                """)
+                return dict(stats) if stats else {}
+        except Exception as e:
+            print(f"❌ Failed to get stats: {e}")
+            return {}
+    async def delete_job(self, job_id: str) -> bool:
+        """Delete a job and its results"""
+        if not self.pool:
+            return False
+        try:
+            async with self.pool.acquire() as conn:
+                await conn.execute("DELETE FROM jobs WHERE id = $1", job_id)
+            return True
+        except Exception as e:
+            print(f"❌ Failed to delete job: {e}")
+            return False
+# Global database instance
+db = Database()

backend/main.py ADDED Viewed

	@@ -0,0 +1,487 @@

+import asyncio, json, os, uuid, shutil, base64
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, BackgroundTasks, UploadFile, Form
+from fastapi.responses import FileResponse
+from pydantic import BaseModel
+from pathlib import Path
+from backend.smart_browser_controller import SmartBrowserController  # Updated import
+from backend.proxy_manager import SmartProxyManager  # Updated import
+from backend.agent import run_agent
+from backend.database import db  # Database integration
+from backend.telegram_bot import bot_notifier, start_bot  # Telegram integration
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # TODO add specific origins in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+tasks = {} # job_id → async.Task
+ws_subscribers = {} # job_id → { websocket, … }
+streaming_sessions = {} # job_id → browser_controller
+job_info = {} # job_id → { format, content_type, extension, prompt }
+# Initialize global smart proxy manager
+smart_proxy_manager = SmartProxyManager()
+OUTPUT_DIR = Path("outputs")
+OUTPUT_DIR.mkdir(exist_ok=True)
+class JobRequest(BaseModel):
+    prompt: str
+    format: str = "txt" # txt | md | json | html | csv | pdf
+    headless: bool = False
+    enable_streaming: bool = False
+async def store_job_info(job_id: str, info: dict):
+    """Store job information for later retrieval"""
+    job_info[job_id] = info
+    print(f"📊 Stored job info for {job_id}: {info}")
+@app.post("/job")
+async def create_job(req: JobRequest):
+    # Validate format
+    valid_formats = ["txt", "md", "json", "html", "csv", "pdf"]
+    if req.format not in valid_formats:
+        print(f"⚠️ Invalid format '{req.format}', defaulting to 'txt'")
+        req.format = "txt"
+    job_id = str(uuid.uuid4())
+    # Use smart proxy manager to get the best available proxy
+    proxy_info = smart_proxy_manager.get_best_proxy()
+    proxy = proxy_info.to_playwright_dict() if proxy_info else None
+    proxy_server = proxy.get("server", "None") if proxy else "None"
+    print(f"🚀 Creating smart job {job_id}")
+    print(f"📋 Goal: {req.prompt}")
+    print(f"🌐 Format: {req.format}")
+    print(f"🖥️ Headless: {req.headless}")
+    print(f"📡 Streaming: {req.enable_streaming}")
+    print(f"🔄 Selected proxy: {proxy_server}")
+    # Get initial proxy stats
+    proxy_stats = smart_proxy_manager.get_proxy_stats()
+    print(f"📊 Proxy pool stats: {proxy_stats}")
+    # Store job in database
+    await db.create_job(
+        job_id=job_id,
+        prompt=req.prompt,
+        format=req.format,
+        headless=req.headless,
+        streaming_enabled=req.enable_streaming,
+        proxy_server=proxy_server
+    )
+    # Send Telegram notification
+    asyncio.create_task(bot_notifier.notify_job_started(job_id, req.prompt, req.format))
+    # Create the agent task
+    coro = run_agent(job_id, req.prompt, req.format, req.headless, proxy, req.enable_streaming)
+    task = asyncio.create_task(coro)
+    # Add callback to notify when done
+    def on_task_done(fut):
+        try:
+            result = fut.result()
+            # Job completed successfully
+            download_url = f"/download/{job_id}"
+            asyncio.create_task(bot_notifier.notify_job_completed(job_id, req.format, download_url))
+        except Exception as e:
+            # Job failed
+            asyncio.create_task(bot_notifier.notify_job_failed(job_id, str(e)))
+    task.add_done_callback(on_task_done)
+    tasks[job_id] = task
+    response = {
+        "job_id": job_id,
+        "format": req.format,
+        "proxy_stats": proxy_stats
+    }
+    if req.enable_streaming:
+        response["streaming_enabled"] = True
+        response["stream_url"] = f"ws://localhost:8000/stream/{job_id}"
+    return response
+@app.websocket("/ws/{job_id}")
+async def job_ws(ws: WebSocket, job_id: str):
+    await ws.accept()
+    ws_subscribers.setdefault(job_id, set()).add(ws)
+    # Send streaming info if available
+    if job_id in streaming_sessions:
+        browser_ctrl = streaming_sessions[job_id]
+        stream_info = browser_ctrl.get_streaming_info()
+        await ws.send_text(json.dumps({
+            "type": "streaming_info",
+            "streaming": stream_info
+        }))
+    # Send initial proxy stats
+    proxy_stats = smart_proxy_manager.get_proxy_stats()
+    await ws.send_text(json.dumps({
+        "type": "proxy_stats",
+        "stats": proxy_stats
+    }))
+    try:
+        while True:
+            await ws.receive_text() # keep connection alive
+    except WebSocketDisconnect:
+        ws_subscribers[job_id].discard(ws)
+@app.websocket("/stream/{job_id}")
+async def stream_ws(websocket: WebSocket, job_id: str):
+    """WebSocket endpoint for real-time browser streaming"""
+    await websocket.accept()
+    # Wait for streaming session to be available (with timeout)
+    max_wait = 30  # seconds
+    wait_time = 0
+    while job_id not in streaming_sessions and wait_time < max_wait:
+        await asyncio.sleep(0.5)
+        wait_time += 0.5
+    if job_id not in streaming_sessions:
+        await websocket.send_text(json.dumps({
+            "type": "error",
+            "message": "Streaming session not available - job may not have streaming enabled"
+        }))
+        await websocket.close()
+        return
+    browser_ctrl = streaming_sessions[job_id]
+    browser_ctrl.add_stream_client(websocket)
+    # Send initial connection confirmation
+    await websocket.send_text(json.dumps({
+        "type": "connected",
+        "message": "Connected to browser stream",
+        "streaming_active": browser_ctrl.streaming_active
+    }))
+    try:
+        while True:
+            try:
+                message = await websocket.receive_text()
+                data = json.loads(message)
+                if data['type'] == 'mouse':
+                    await browser_ctrl.handle_mouse_event(data)
+                elif data['type'] == 'keyboard':
+                    await browser_ctrl.handle_keyboard_event(data)
+                elif data['type'] == 'ping':
+                    await websocket.send_text(json.dumps({"type": "pong"}))
+            except asyncio.TimeoutError:
+                await websocket.send_text(json.dumps({"type": "ping"}))
+    except WebSocketDisconnect:
+        browser_ctrl.remove_stream_client(websocket)
+        print(f"Stream client disconnected from job {job_id}")
+    except Exception as e:
+        print(f"Error in stream WebSocket: {e}")
+        browser_ctrl.remove_stream_client(websocket)
+@app.post("/streaming/create/{job_id}")
+async def create_streaming_session(job_id: str):
+    """Create a streaming session without starting a job"""
+    if job_id in streaming_sessions:
+        browser_ctrl = streaming_sessions[job_id]
+        return browser_ctrl.get_streaming_info()
+    try:
+        # Get best available proxy for streaming session
+        proxy_info = smart_proxy_manager.get_best_proxy()
+        proxy = proxy_info.to_playwright_dict() if proxy_info else None
+        print(f"🎥 Creating streaming session with proxy: {proxy.get('server', 'None') if proxy else 'None'}")
+        # Create smart browser controller with streaming enabled
+        browser_ctrl = SmartBrowserController(headless=False, proxy=proxy, enable_streaming=True)
+        await browser_ctrl.__aenter__()
+        await browser_ctrl.start_streaming(quality=80)
+        streaming_sessions[job_id] = browser_ctrl
+        stream_info = browser_ctrl.get_streaming_info()
+        # Add proxy information to stream info
+        stream_info["proxy_info"] = {
+            "current_proxy": proxy.get("server", "None") if proxy else "None",
+            "proxy_stats": smart_proxy_manager.get_proxy_stats()
+        }
+        # Broadcast to connected clients
+        await broadcast(job_id, {
+            "type": "streaming_info",
+            "streaming": stream_info
+        })
+        return stream_info
+    except Exception as e:
+        print(f"❌ Failed to create streaming session: {e}")
+        return {"enabled": False, "error": str(e)}
+@app.get("/streaming/{job_id}")
+async def get_streaming_info(job_id: str):
+    """Get streaming connection information for a job"""
+    if job_id in streaming_sessions:
+        browser_ctrl = streaming_sessions[job_id]
+        stream_info = browser_ctrl.get_streaming_info()
+        # Add current proxy stats
+        stream_info["proxy_stats"] = smart_proxy_manager.get_proxy_stats()
+        return stream_info
+    return {"enabled": False, "error": "Streaming not enabled for this job"}
+@app.delete("/streaming/{job_id}")
+async def cleanup_streaming(job_id: str):
+    """Clean up streaming session for a job"""
+    if job_id in streaming_sessions:
+        browser_ctrl = streaming_sessions[job_id]
+        try:
+            await browser_ctrl.__aexit__(None, None, None)
+        except Exception as e:
+            print(f"Error cleaning up streaming session: {e}")
+        finally:
+            del streaming_sessions[job_id]
+        return {"message": "Streaming session cleaned up"}
+    return {"message": "No streaming session found"}
+@app.get("/download/{job_id}")
+def download(job_id: str):
+    """Enhanced download endpoint that handles all file formats"""
+    print(f"📥 Download request for job {job_id}")
+    # Get job information
+    if job_id in job_info:
+        info = job_info[job_id]
+        extension = info.get("extension", "output")
+        content_type = info.get("content_type", "application/octet-stream")
+        format_name = info.get("format", "unknown")
+        print(f"📋 Job info found: {info}")
+    else:
+        # Fallback for jobs without stored info
+        extension = "output"
+        content_type = "application/octet-stream"
+        format_name = "unknown"
+        print(f"⚠️ No job info found for {job_id}, using fallback")
+    # Try to find the file with proper extension first
+    file_path = OUTPUT_DIR / f"{job_id}.{extension}"
+    if not file_path.exists():
+        # Fallback: try common extensions
+        for fallback_ext in ['txt', 'pdf', 'csv', 'json', 'html', 'md', 'output']:
+            fallback_path = OUTPUT_DIR / f"{job_id}.{fallback_ext}"
+            if fallback_path.exists():
+                file_path = fallback_path
+                extension = fallback_ext
+                print(f"📁 Found file with fallback extension: {file_path}")
+                break
+    if not file_path.exists():
+        print(f"❌ File not found: {file_path}")
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404, detail="File not found")
+    # Generate appropriate filename
+    safe_filename = f"extracted_data_{job_id}.{extension}"
+    print(f"✅ Serving file: {file_path}")
+    print(f"📄 Content-Type: {content_type}")
+    print(f"📎 Filename: {safe_filename}")
+    # Serve file with proper content type and filename
+    return FileResponse(
+        path=file_path,
+        filename=safe_filename,
+        media_type=content_type,
+        headers={
+            "Content-Disposition": f"attachment; filename={safe_filename}",
+            "X-File-Format": format_name,
+            "X-Original-Extension": extension
+        }
+    )
+@app.get("/job/{job_id}/info")
+def get_job_info(job_id: str):
+    """Get job information including format and status"""
+    if job_id in job_info:
+        info = job_info[job_id].copy()
+        # Add file existence check
+        extension = info.get("extension", "output")
+        file_path = OUTPUT_DIR / f"{job_id}.{extension}"
+        info["file_exists"] = file_path.exists()
+        info["file_path"] = str(file_path) if file_path.exists() else None
+        # Add current proxy stats
+        info["proxy_stats"] = smart_proxy_manager.get_proxy_stats()
+        return info
+    else:
+        return {"error": "Job not found", "job_id": job_id}
+@app.get("/proxy/stats")
+def get_proxy_stats():
+    """Get current proxy pool statistics"""
+    stats = smart_proxy_manager.get_proxy_stats()
+    return {
+        "proxy_stats": stats,
+        "timestamp": asyncio.get_event_loop().time()
+    }
+@app.post("/proxy/reload")
+def reload_proxies():
+    """Reload proxy list from environment"""
+    try:
+        global smart_proxy_manager
+        smart_proxy_manager = SmartProxyManager()
+        stats = smart_proxy_manager.get_proxy_stats()
+        return {
+            "success": True,
+            "message": "Proxy list reloaded successfully",
+            "proxy_stats": stats
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "message": f"Failed to reload proxies: {str(e)}"
+        }
+@app.get("/jobs")
+async def get_all_jobs(limit: int = 50, offset: int = 0):
+    """Get all jobs from database with pagination"""
+    jobs = await db.get_all_jobs(limit, offset)
+    stats = await db.get_job_stats()
+    return {
+        "jobs": jobs,
+        "stats": stats,
+        "pagination": {
+            "limit": limit,
+            "offset": offset
+        }
+    }
+@app.get("/job/{job_id}")
+async def get_job(job_id: str):
+    """Get detailed job information from database"""
+    job = await db.get_job(job_id)
+    if job:
+        # Add file existence check
+        extension = job.get("file_extension", "output")
+        file_path = OUTPUT_DIR / f"{job_id}.{extension}"
+        job["file_exists"] = file_path.exists()
+        job["file_path"] = str(file_path) if file_path.exists() else None
+        return {"job": job}
+    else:
+        return {"error": "Job not found", "job_id": job_id}
+@app.delete("/job/{job_id}")
+async def delete_job(job_id: str):
+    """Delete a job from database and remove output file"""
+    # Delete from database
+    success = await db.delete_job(job_id)
+    # Also delete output file if exists
+    job = await db.get_job(job_id)
+    if job:
+        extension = job.get("file_extension", "output")
+        file_path = OUTPUT_DIR / f"{job_id}.{extension}"
+        if file_path.exists():
+            file_path.unlink()
+    if success:
+        return {"message": f"Job {job_id} deleted successfully"}
+    else:
+        return {"error": "Failed to delete job"}
+@app.get("/stats")
+async def get_system_stats():
+    """Get overall system statistics from database"""
+    db_stats = await db.get_job_stats()
+    proxy_stats = smart_proxy_manager.get_proxy_stats()
+    return {
+        "database": db_stats,
+        "proxy": proxy_stats,
+        "active_jobs": len(tasks),
+        "active_streams": len(streaming_sessions)
+    }
+app.mount("/", StaticFiles(directory="frontend", html=True), name="static")
+# Helper functions
+async def broadcast(job_id: str, msg: dict):
+    """Broadcast message to all subscribers of a job"""
+    if job_id in ws_subscribers:
+        for ws in list(ws_subscribers[job_id]):
+            try:
+                await ws.send_text(json.dumps(msg))
+            except:
+                ws_subscribers[job_id].discard(ws)
+async def register_streaming_session(job_id: str, browser_ctrl):
+    """Register streaming session information"""
+    streaming_sessions[job_id] = browser_ctrl
+    if browser_ctrl.enable_streaming:
+        await browser_ctrl.start_streaming(quality=80)
+    stream_info = browser_ctrl.get_streaming_info()
+    await broadcast(job_id, {
+        "type": "streaming_info",
+        "streaming": stream_info
+    })
+# Cleanup on shutdown
+@app.on_event("shutdown")
+async def cleanup():
+    """Cleanup resources on shutdown"""
+    print("🧹 Cleaning up resources...")
+    # Cleanup streaming sessions
+    for job_id, browser_ctrl in streaming_sessions.items():
+        try:
+            await browser_ctrl.__aexit__(None, None, None)
+            print(f"✅ Cleaned up streaming session: {job_id}")
+        except Exception as e:
+            print(f"❌ Error cleaning up session {job_id}: {e}")
+    streaming_sessions.clear()
+    job_info.clear()
+    # Disconnect database
+    await db.disconnect()
+    # Print final proxy stats
+    final_stats = smart_proxy_manager.get_proxy_stats()
+    print(f"📊 Final proxy stats: {final_stats}")
+    print("✅ Cleanup completed")
+@app.on_event("startup")
+async def startup():
+    """Initialize database connection and Telegram bot on startup"""
+    print("🚀 Starting up BrowserPilot...")
+    await db.connect()
+    # Start Telegram bot in background
+    asyncio.create_task(start_bot())
+    print("✅ Startup completed")

backend/proxy_manager.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import os, json, random, time, asyncio, logging
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+from enum import Enum
+import base64
+logger = logging.getLogger(__name__)
+class ProxyHealth(Enum):
+    HEALTHY = "healthy"
+    DEGRADED = "degraded"
+    BLOCKED = "blocked"
+    FAILED = "failed"
+@dataclass
+class ProxyInfo:
+    server: str
+    username: Optional[str] = None
+    password: Optional[str] = None
+    location: str = "unknown"
+    health: ProxyHealth = ProxyHealth.HEALTHY
+    success_count: int = 0
+    failure_count: int = 0
+    last_used: float = 0
+    blocked_sites: set = None
+    response_time: float = 0
+    consecutive_failures: int = 0
+    def __post_init__(self):
+        if self.blocked_sites is None:
+            self.blocked_sites = set()
+    @property
+    def success_rate(self) -> float:
+        total = self.success_count + self.failure_count
+        return self.success_count / total if total > 0 else 1.0
+    def to_playwright_dict(self) -> Dict:
+        proxy_dict = {"server": self.server}
+        if self.username:
+            proxy_dict["username"] = self.username
+        if self.password:
+            proxy_dict["password"] = self.password
+        return proxy_dict
+class SmartProxyManager:
+    def __init__(self, vision_model=None):
+        self.proxies: List[ProxyInfo] = []
+        self.current_proxy_index = 0
+        self.vision_model = vision_model
+        self.max_proxy_retries = 5
+        self.max_consecutive_failures = 3
+        self._load_proxies()
+    def _load_proxies(self):
+        """Load proxies from environment or config"""
+        source = os.getenv("SCRAPER_PROXIES", "[]")
+        proxy_data = json.loads(source)
+        for proxy in proxy_data:
+            if isinstance(proxy, str):
+                self.proxies.append(ProxyInfo(server=proxy))
+            elif isinstance(proxy, dict):
+                self.proxies.append(ProxyInfo(
+                    server=proxy.get("server", ""),
+                    username=proxy.get("username"),
+                    password=proxy.get("password"),
+                    location=proxy.get("location", "unknown")
+                ))
+        logger.info(f"Loaded {len(self.proxies)} proxies for smart rotation")
+    def get_best_proxy(self, exclude_blocked_for: str = None) -> Optional[ProxyInfo]:
+        """Get the best available proxy based on performance metrics"""
+        if not self.proxies:
+            return None
+        # Filter out failed and heavily blocked proxies
+        available_proxies = [
+            p for p in self.proxies
+            if p.health != ProxyHealth.FAILED and
+            p.consecutive_failures < self.max_consecutive_failures and
+            (not exclude_blocked_for or exclude_blocked_for not in p.blocked_sites)
+        ]
+        if not available_proxies:
+            # Reset consecutive failures and try again
+            for proxy in self.proxies:
+                proxy.consecutive_failures = 0
+            available_proxies = [p for p in self.proxies if p.health != ProxyHealth.FAILED]
+        if not available_proxies:
+            logger.error("No available proxies found!")
+            return None
+        # Sort by success rate and response time
+        sorted_proxies = sorted(
+            available_proxies,
+            key=lambda p: (p.success_rate, -p.response_time, -p.last_used),
+            reverse=True
+        )
+        return sorted_proxies[0]
+    async def detect_anti_bot_with_vision(self, page, goal: str) -> Tuple[bool, str, Optional[str]]:
+        """Use vision model to detect anti-bot systems"""
+        if not self.vision_model:
+            return False, "", None
+        try:
+            # Take screenshot for vision analysis
+            screenshot_bytes = await page.screenshot(type='png')
+            screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
+            # Get page content for context
+            page_title = await page.title()
+            page_url = page.url
+            # Create anti-bot detection prompt
+            detection_prompt = f"""
+            ANTI-BOT DETECTION TASK:
+            You are analyzing a webpage screenshot to detect if we've encountered an anti-bot system, CAPTCHA, or access restriction.
+            Current URL: {page_url}
+            Page Title: {page_title}
+            Original Goal: {goal}
+            Look for these indicators:
+            1. **Cloudflare protection pages** - "Checking your browser", "Please wait", security checks
+            2. **CAPTCHA challenges** - Image puzzles, reCAPTCHA, hCaptcha, text verification
+            3. **Access denied pages** - "Access Denied", "Blocked", "Rate Limited"
+            4. **Bot detection warnings** - "Automated traffic detected", "Unusual activity"
+            5. **Verification pages** - Phone verification, email verification, identity checks
+            6. **Error pages** - 403 Forbidden, 429 Rate Limited, 503 Service Unavailable
+            7. **Loading/waiting pages** - Indefinite loading, "Please wait while we verify"
+            Respond with JSON:
+            {{
+                "is_anti_bot": true/false,
+                "detection_type": "cloudflare|captcha|access_denied|rate_limit|verification|error|none",
+                "confidence": 0.0-1.0,
+                "description": "Brief description of what you see",
+                "can_solve": true/false,
+                "suggested_action": "rotate_proxy|solve_captcha|wait|retry|abort"
+            }}
+            """
+            # Use vision model to analyze
+            result = await self.vision_model.analyze_anti_bot_page(
+                screenshot_b64, detection_prompt, page_url
+            )
+            if result.get("is_anti_bot", False):
+                detection_type = result.get("detection_type", "unknown")
+                suggested_action = result.get("suggested_action", "rotate_proxy")
+                description = result.get("description", "Anti-bot system detected")
+                logger.warning(f"🚫 Anti-bot detected: {detection_type} - {description}")
+                return True, detection_type, suggested_action
+            return False, "", None
+        except Exception as e:
+            logger.error(f"Error in vision-based anti-bot detection: {e}")
+            return False, "", None
+    def mark_proxy_success(self, proxy: ProxyInfo, response_time: float = 0):
+        """Mark proxy as successful"""
+        proxy.success_count += 1
+        proxy.consecutive_failures = 0
+        proxy.last_used = time.time()
+        proxy.response_time = response_time
+        proxy.health = ProxyHealth.HEALTHY
+        logger.debug(f"✅ Proxy {proxy.server} marked successful")
+    def mark_proxy_failure(self, proxy: ProxyInfo, site_url: str = None, detection_type: str = None):
+        """Mark proxy as failed"""
+        proxy.failure_count += 1
+        proxy.consecutive_failures += 1
+        if detection_type in ["cloudflare", "rate_limit"]:
+            proxy.blocked_sites.add(site_url)
+            proxy.health = ProxyHealth.BLOCKED
+            logger.warning(f"🚫 Proxy {proxy.server} blocked by {detection_type} for {site_url}")
+        else:
+            proxy.health = ProxyHealth.DEGRADED
+        # Mark as completely failed if too many consecutive failures
+        if proxy.consecutive_failures >= self.max_consecutive_failures:
+            proxy.health = ProxyHealth.FAILED
+            logger.error(f"❌ Proxy {proxy.server} marked as failed after {proxy.consecutive_failures} consecutive failures")
+    def get_proxy_stats(self) -> Dict:
+        """Get comprehensive proxy statistics"""
+        if not self.proxies:
+            return {"total": 0, "healthy": 0, "blocked": 0, "failed": 0, "available": 0}
+        stats = {
+            "total": len(self.proxies),
+            "healthy": len([p for p in self.proxies if p.health == ProxyHealth.HEALTHY]),
+            "degraded": len([p for p in self.proxies if p.health == ProxyHealth.DEGRADED]),
+            "blocked": len([p for p in self.proxies if p.health == ProxyHealth.BLOCKED]),
+            "failed": len([p for p in self.proxies if p.health == ProxyHealth.FAILED]),
+            "available": len([p for p in self.proxies if p.health != ProxyHealth.FAILED and p.consecutive_failures < self.max_consecutive_failures])
+        }
+        return stats

backend/smart_browser_controller.py ADDED Viewed

	@@ -0,0 +1,258 @@

+## used to manage browser navigation with intelligent anti-bot detection and proxy rotation
+import asyncio
+import time
+from urllib.parse import urlparse
+from backend.browser_controller import BrowserController
+from backend.proxy_manager import SmartProxyManager
+from backend.anti_bot_detection import AntiBotVisionModel
+import logging
+import base64
+logger = logging.getLogger(__name__)
+class SmartBrowserController(BrowserController):
+    def __init__(self, headless: bool, proxy: dict | None, enable_streaming: bool = False):
+        super().__init__(headless, proxy, enable_streaming)
+        # Initialize smart proxy management
+        self.vision_model = AntiBotVisionModel()
+        self.proxy_manager = SmartProxyManager(self.vision_model)
+        self.current_proxy = proxy
+        self.max_proxy_retries = 5
+        self.proxy_retry_count = 0
+        self.max_captcha_solve_attempts = 3
+        self.captcha_solve_count = 0
+    async def smart_navigate(self, url: str, wait_until: str = "domcontentloaded", timeout: int = 30000) -> bool:
+        """Navigate with intelligent anti-bot detection and proxy rotation"""
+        site_domain = urlparse(url).netloc
+        for attempt in range(self.max_proxy_retries):
+            try:
+                logger.info(f"🌐 Smart navigation attempt {attempt + 1}/{self.max_proxy_retries} to: {url}")
+                start_time = time.time()
+                # Navigate to the page
+                response = await self.page.goto(url, wait_until=wait_until, timeout=timeout)
+                response_time = time.time() - start_time
+                # Wait a moment for page to fully load
+                await asyncio.sleep(2)
+                # Use vision model to detect anti-bot systems
+                is_antibot, detection_type, suggested_action = await self.proxy_manager.detect_anti_bot_with_vision(
+                    self.page, f"navigate to {url}"
+                )
+                if is_antibot:
+                    logger.warning(f"🚫 Anti-bot detected: {detection_type}, suggested action: {suggested_action}")
+                    # Handle based on suggested action
+                    if suggested_action == "solve_captcha" and self.captcha_solve_count < self.max_captcha_solve_attempts:
+                        success = await self._attempt_captcha_solve(url, detection_type)
+                        if success:
+                            logger.info("✅ CAPTCHA solved successfully!")
+                            if self.current_proxy:
+                                proxy_info = next((p for p in self.proxy_manager.proxies if p.to_playwright_dict() == self.current_proxy), None)
+                                if proxy_info:
+                                    self.proxy_manager.mark_proxy_success(proxy_info, response_time)
+                            return True
+                        else:
+                            self.captcha_solve_count += 1
+                    if suggested_action in ["rotate_proxy", "retry"] or self.captcha_solve_count >= self.max_captcha_solve_attempts:
+                        # Mark current proxy as failed
+                        if self.current_proxy:
+                            proxy_info = next((p for p in self.proxy_manager.proxies if p.to_playwright_dict() == self.current_proxy), None)
+                            if proxy_info:
+                                self.proxy_manager.mark_proxy_failure(proxy_info, site_domain, detection_type)
+                        # Try with new proxy
+                        if attempt < self.max_proxy_retries - 1:
+                            new_proxy_info = self.proxy_manager.get_best_proxy(exclude_blocked_for=site_domain)
+                            if new_proxy_info:
+                                new_proxy = new_proxy_info.to_playwright_dict()
+                                logger.info(f"🔄 Rotating to new proxy: {new_proxy['server']}")
+                                await self._restart_browser_with_proxy(new_proxy)
+                                await asyncio.sleep(3)  # Wait before retry
+                                continue
+                            else:
+                                logger.error("❌ No available proxies for rotation")
+                                return False
+                    if suggested_action == "abort":
+                        logger.error(f"❌ Aborting navigation due to unresolvable anti-bot: {detection_type}")
+                        return False
+                else:
+                    # Success! No anti-bot detected
+                    logger.info(f"✅ Successfully navigated to: {url}")
+                    if self.current_proxy:
+                        proxy_info = next((p for p in self.proxy_manager.proxies if p.to_playwright_dict() == self.current_proxy), None)
+                        if proxy_info:
+                            self.proxy_manager.mark_proxy_success(proxy_info, response_time)
+                    self.proxy_retry_count = 0
+                    self.captcha_solve_count = 0
+                    return True
+            except Exception as e:
+                logger.error(f"❌ Navigation failed on attempt {attempt + 1}: {e}")
+                # Mark proxy failure and try another
+                if self.current_proxy:
+                    proxy_info = next((p for p in self.proxy_manager.proxies if p.to_playwright_dict() == self.current_proxy), None)
+                    if proxy_info:
+                        self.proxy_manager.mark_proxy_failure(proxy_info, site_domain, "connection_error")
+                if attempt < self.max_proxy_retries - 1:
+                    new_proxy_info = self.proxy_manager.get_best_proxy(exclude_blocked_for=site_domain)
+                    if new_proxy_info:
+                        new_proxy = new_proxy_info.to_playwright_dict()
+                        logger.info(f"🔄 Retrying with new proxy due to connection error")
+                        await self._restart_browser_with_proxy(new_proxy)
+                        await asyncio.sleep(3)
+                        continue
+        logger.error(f"❌ Failed to navigate to {url} after all retries")
+        return False
+    async def _attempt_captcha_solve(self, url: str, detection_type: str) -> bool:
+        """Attempt to solve CAPTCHA using vision model"""
+        try:
+            logger.info(f"🧩 Attempting to solve {detection_type} CAPTCHA...")
+            # Take screenshot for CAPTCHA analysis
+            screenshot_bytes = await self.page.screenshot(type='png')
+            screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
+            # Use vision model to solve CAPTCHA
+            solution = await self.vision_model.solve_captcha(screenshot_b64, url, detection_type)
+            if solution.get("can_solve", False) and solution.get("confidence", 0) > 0.7:
+                logger.info(f"🎯 CAPTCHA solution found: {solution.get('solution', 'N/A')}")
+                # Implement CAPTCHA solving logic based on solution type
+                success = await self._apply_captcha_solution(solution)
+                return success
+            else:
+                logger.warning(f"❌ Could not solve CAPTCHA: {solution.get('instructions', 'Unknown reason')}")
+                return False
+        except Exception as e:
+            logger.error(f"❌ Error attempting CAPTCHA solve: {e}")
+            return False
+    async def _apply_captcha_solution(self, solution: dict) -> bool:
+        """Apply the CAPTCHA solution to the page"""
+        try:
+            solution_type = solution.get("solution_type", "unknown")
+            solution_value = solution.get("solution", "")
+            if solution_type == "text":
+                # Find text input and enter solution
+                text_inputs = await self.page.query_selector_all('input[type="text"], input:not([type])')
+                for input_elem in text_inputs:
+                    if await input_elem.is_visible():
+                        await input_elem.fill(solution_value)
+                        await asyncio.sleep(1)
+                        # Look for submit button
+                        submit_buttons = await self.page.query_selector_all('button, input[type="submit"]')
+                        for button in submit_buttons:
+                            if await button.is_visible():
+                                await button.click()
+                                await asyncio.sleep(3)
+                                return True
+            elif solution_type == "selection":
+                # Handle image selection CAPTCHAs
+                logger.warning("🚧 Image selection CAPTCHA solving not fully implemented")
+                return False
+            elif solution_type == "math":
+                # Similar to text but specifically for math solutions
+                text_inputs = await self.page.query_selector_all('input[type="text"], input:not([type])')
+                for input_elem in text_inputs:
+                    if await input_elem.is_visible():
+                        await input_elem.fill(str(solution_value))
+                        await asyncio.sleep(1)
+                        submit_buttons = await self.page.query_selector_all('button, input[type="submit"]')
+                        for button in submit_buttons:
+                            if await button.is_visible():
+                                await button.click()
+                                await asyncio.sleep(3)
+                                return True
+            return False
+        except Exception as e:
+            logger.error(f"❌ Error applying CAPTCHA solution: {e}")
+            return False
+    async def _restart_browser_with_proxy(self, new_proxy: dict):
+        """Restart browser with new proxy"""
+        try:
+            # Close current browser
+            if self.browser:
+                await self.browser.close()
+            # Update proxy
+            self.current_proxy = new_proxy
+            # Launch new browser with new proxy
+            launch_options = {
+                "headless": self.headless,
+                "args": [
+                    "--no-sandbox",
+                    "--disable-dev-shm-usage",
+                    "--disable-gpu",
+                    "--disable-web-security",
+                    "--disable-features=VizDisplayCompositor",
+                    "--window-size=1280,800",
+                    "--window-position=0,0",
+                    "--disable-blink-features=AutomationControlled",
+                    "--disable-extensions",
+                    "--no-first-run",
+                    "--disable-default-apps",
+                    "--remote-debugging-port=0"
+                ]
+            }
+            if new_proxy:
+                launch_options["proxy"] = new_proxy
+            self.browser = await self.play.chromium.launch(**launch_options)
+            self.page = await self.browser.new_page(viewport={"width": 1280, "height": 800})
+            # Re-setup CDP streaming if enabled
+            if self.enable_streaming:
+                await self._setup_cdp_streaming()
+            # Set headers with randomization
+            await self.page.set_extra_http_headers({
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            })
+            logger.info("✅ Browser restarted with new proxy")
+        except Exception as e:
+            logger.error(f"❌ Failed to restart browser with new proxy: {e}")
+            raise
+    def get_proxy_stats(self) -> dict:
+        """Get current proxy statistics"""
+        stats = self.proxy_manager.get_proxy_stats()
+        stats.update({
+            "current_proxy": self.current_proxy.get("server", "None") if self.current_proxy else "None",
+            "retry_count": self.proxy_retry_count,
+            "captcha_solve_count": self.captcha_solve_count
+        })
+        return stats
+    # Override the goto method to use smart navigation
+    async def goto(self, url: str, wait_until: str = "domcontentloaded", timeout: int = 30000):
+        """Navigate to a URL with smart anti-bot detection"""
+        success = await self.smart_navigate(url, wait_until, timeout)
+        if not success:
+            raise Exception(f"Failed to navigate to {url} after intelligent retry attempts")

backend/telegram_bot.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""
+Telegram Bot Integration for BrowserPilot
+- Job completion notifications
+- Remote control commands
+- Keepalive alerts
+"""
+import os
+import asyncio
+from typing import Optional
+from telegram import Bot, Update
+from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
+class TelegramNotifier:
+    def __init__(self):
+        self.token = os.getenv("TELEGRAM_BOT_TOKEN")
+        self.chat_id = os.getenv("TELEGRAM_CHAT_ID")
+        self.bot: Optional[Bot] = None
+        self.app = None
+        self._initialized = False
+    async def initialize(self):
+        """Initialize bot"""
+        if not self.token or not self.chat_id:
+            print("⚠️ Telegram not configured (missing TOKEN or CHAT_ID)")
+            return
+        try:
+            self.bot = Bot(token=self.token)
+            await self.bot.get_me()
+            self._initialized = True
+            print(f"✅ Telegram bot initialized: @{self.bot.username}")
+        except Exception as e:
+            print(f"❌ Telegram init failed: {e}")
+            self._initialized = False
+    async def send_message(self, message: str, parse_mode: str = "HTML"):
+        """Send message to configured chat"""
+        if not self._initialized:
+            return
+        try:
+            await self.bot.send_message(
+                chat_id=self.chat_id,
+                text=message,
+                parse_mode=parse_mode
+            )
+        except Exception as e:
+            print(f"❌ Failed to send Telegram message: {e}")
+    async def notify_job_started(self, job_id: str, prompt: str, format: str):
+        """Notify when a job starts"""
+        message = (
+            "🚀 <b>Job Started</b>\n\n"
+            f"<b>ID:</b> <code>{job_id}</code>\n"
+            f"<b>Task:</b> {prompt[:200]}\n"
+            f"<b>Format:</b> {format}\n\n"
+            "⏳ Processing..."
+        )
+        await self.send_message(message)
+    async def notify_job_completed(self, job_id: str, format: str, download_url: str):
+        """Notify when a job completes"""
+        message = (
+            "✅ <b>Job Completed!</b>\n\n"
+            f"<b>ID:</b> <code>{job_id}</code>\n"
+            f"<b>Format:</b> {format}\n\n"
+            f"📥 <a href='{download_url}'>Download Result</a>"
+        )
+        await self.send_message(message)
+    async def notify_job_failed(self, job_id: str, error: str):
+        """Notify when a job fails"""
+        message = (
+            "❌ <b>Job Failed</b>\n\n"
+            f"<b>ID:</b> <code>{job_id}</code>\n"
+            f"<b>Error:</b> {error[:500]}"
+        )
+        await self.send_message(message)
+    async def notify_keepalive_failed(self, status_code: int):
+        """Notify when keepalive check fails"""
+        message = (
+            "⚠️ <b>KeepAlive Alert</b>\n\n"
+            "🔴 HF Space health check failed!\n"
+            f"<b>Status:</b> {status_code}\n\n"
+            "The Space might be sleeping or down."
+        )
+        await self.send_message(message)
+    async def notify_keepalive_restored(self):
+        """Notify when keepalive check succeeds after failure"""
+        message = (
+            "✅ <b>KeepAlive Restored</b>\n\n"
+            "🟢 HF Space is back online!\n\n"
+            "Health check passed."
+        )
+        await self.send_message(message)
+# Command handlers for bot control
+async def start_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Handle /start command"""
+    await update.message.reply_text(
+        "🤖 <b>BrowserPilot Bot</b>\n\n"
+        "Commands:\n"
+        "/start - Show this help\n"
+        "/status - Check system status\n"
+        "/jobs - List recent jobs\n"
+        "/ping - Check if bot is alive\n\n"
+        "To create a job, send a message with your task."
+    )
+async def ping_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Handle /ping command"""
+    await update.message.reply_text("🟢 Bot is alive!")
+async def status_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Handle /status command"""
+    from backend.main import smart_proxy_manager, tasks, streaming_sessions
+    proxy_stats = smart_proxy_manager.get_proxy_stats()
+    message = (
+        "📊 <b>System Status</b>\n\n"
+        f"<b>Active Jobs:</b> {len(tasks)}\n"
+        f"<b>Active Streams:</b> {len(streaming_sessions)}\n"
+        f"<b>Proxies Available:</b> {proxy_stats.get('available', 0)}/{proxy_stats.get('total', 0)}\n\n"
+        f"<b>Uptime:</b> Running"
+    )
+    await update.message.reply_text(message)
+async def jobs_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Handle /jobs command"""
+    from backend.database import db
+    jobs = await db.get_all_jobs(limit=5)
+    if not jobs:
+        await update.message.reply_text("📋 No jobs found.")
+        return
+    message = "📋 <b>Recent Jobs</b>\n\n"
+    for job in jobs[:5]:
+        status_emoji = {"completed": "✅", "failed": "❌", "running": "🔄"}.get(job.get("status"), "⏳")
+        message += (
+            f"{status_emoji} <code>{job.get('id', 'unknown')[:8]}</code>\n"
+            f"   {job.get('prompt', 'No prompt')[:50]}...\n"
+            f"   Format: {job.get('format', 'unknown')} | Status: {job.get('status', 'unknown')}\n\n"
+        )
+    await update.message.reply_text(message)
+async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Handle text messages as job prompts"""
+    from backend.main import create_job, JobRequest
+    prompt = update.message.text
+    if not prompt:
+        return
+    # Create a job request
+    req = JobRequest(prompt=prompt, format="json", headless=True, enable_streaming=False)
+    # Create the job
+    try:
+        result = await create_job(req)
+        job_id = result["job_id"]
+        await update.message.reply_text(
+            f"✅ <b>Job Created!</b>\n\n"
+            f"<b>ID:</b> <code>{job_id}</code>\n"
+            f"<b>Task:</b> {prompt[:100]}...\n\n"
+            "I'll notify you when it's done!"
+        )
+        # Also notify via notifier (for consistency)
+        notifier = TelegramNotifier()
+        await notifier.initialize()
+        await notifier.notify_job_started(job_id, prompt, "json")
+    except Exception as e:
+        await update.message.reply_text(f"❌ Failed to create job: {str(e)}")
+# Global bot instance
+bot_notifier = TelegramNotifier()
+async def start_bot():
+    """Start the Telegram bot"""
+    await bot_notifier.initialize()
+    if not bot_notifier._initialized:
+        print("⚠️ Telegram bot not started (missing credentials)")
+        return
+    # Create application
+    application = Application.builder().token(bot_notifier.token).build()
+    # Add handlers
+    application.add_handler(CommandHandler("start", start_command))
+    application.add_handler(CommandHandler("ping", ping_command))
+    application.add_handler(CommandHandler("status", status_command))
+    application.add_handler(CommandHandler("jobs", jobs_command))
+    application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
+    # Start polling
+    print("🤖 Starting Telegram bot polling...")
+    await application.start_polling(allowed_updates=Update.ALL_TYPES)

backend/universal_extractor.py ADDED Viewed

	@@ -0,0 +1,607 @@

+import json
+import asyncio
+import functools
+from typing import Dict, Any, List, Optional
+import google.generativeai as genai
+from backend.browser_controller import BrowserController
+import base64
+from bs4 import BeautifulSoup
+import pandas as pd
+from reportlab.lib.pagesizes import letter
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+from reportlab.lib.styles import getSampleStyleSheet
+from pathlib import Path
+import re
+MODEL = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
+UNIVERSAL_EXTRACTION_PROMPT = """
+You are a universal data extraction specialist. Your task is to analyze any webpage and extract the most relevant information based on the user's specific goal.
+USER'S GOAL: {goal}
+CURRENT URL: {url}
+PAGE TITLE: {title}
+WEBSITE TYPE: {website_type}
+EXTRACTION GUIDELINES:
+**For PERSON/PROFILE information:**
+- Full name and professional title
+- Current position and company
+- Professional background and experience
+- Education and credentials
+- Skills and expertise areas
+- Contact information (if publicly available)
+- Notable achievements or projects
+- Social media profiles and professional links
+**For COMPANY/ORGANIZATION information:**
+- Company name and industry
+- Mission, vision, and description
+- Products or services offered
+- Leadership team and key personnel
+- Company size and locations
+- Contact information and headquarters
+- Recent news, funding, or updates
+- Key statistics or metrics
+**For PRODUCT/SERVICE information:**
+- Product/service name and category
+- Key features and specifications
+- Pricing information
+- User reviews and ratings
+- Availability and purchasing options
+- Technical requirements
+- Comparison with alternatives
+**For NEWS/CONTENT information:**
+- Article headline and summary
+- Publication date and source
+- Key facts and main points
+- Author information
+- Related topics or tags
+- Important quotes or statistics
+**For DATA/RESEARCH information:**
+- Main findings or conclusions
+- Statistical data and metrics
+- Methodology or sources
+- Publication details
+- Key insights and implications
+**For GENERAL INFORMATION:**
+- Extract the main facts relevant to the user's goal
+- Include supporting details and context
+- Provide sources and references when available
+- Focus on actionable or useful information
+IMPORTANT:
+- Only extract information that is VISIBLE and RELEVANT to the user's goal
+- Organize information in a clear, structured format
+- Include metadata about the source and extraction context
+- Be comprehensive but avoid irrelevant details
+- If the page doesn't contain the requested information, clearly state what was found instead
+WEBPAGE CONTENT:
+{content}
+Return a well-structured JSON object with the extracted information:
+"""
+class UniversalExtractor:
+    def __init__(self):
+        self.extraction_cache = {}
+    async def extract_intelligent_content(self, browser: BrowserController, goal: str, fmt: str = "json", job_id: str = None) -> str:
+        """Extract content intelligently from any website based on user's goal"""
+        try:
+            # Get comprehensive page information
+            url = browser.page.url
+            title = await browser.page.title()
+            # Detect website type
+            website_type = self._detect_website_type(url, title)
+            # Get clean, structured content
+            content = await self._get_structured_content(browser)
+            # Use AI to extract relevant information
+            extracted_data = await self._ai_extract(goal, url, title, website_type, content)
+            # Format the output based on requested format
+            return await self._format_output(extracted_data, fmt, goal, job_id)  # Pass job_id
+        except Exception as e:
+            print(f"❌ Universal extraction failed: {e}")
+            return await self._fallback_extraction(browser, fmt, goal)
+    def _detect_website_type(self, url: str, title: str) -> str:
+        """Detect website type for better extraction strategy"""
+        url_lower = url.lower()
+        title_lower = title.lower()
+        # Professional networks
+        if "linkedin.com" in url_lower:
+            return "linkedin_profile"
+        if "github.com" in url_lower:
+            return "github_profile"
+        # Social media
+        if any(domain in url_lower for domain in ["twitter.com", "facebook.com", "instagram.com"]):
+            return "social_media"
+        # E-commerce
+        if any(domain in url_lower for domain in ["amazon", "ebay", "shopify", "etsy"]):
+            return "ecommerce"
+        # News and content
+        if any(word in title_lower for word in ["news", "article", "blog", "post"]):
+            return "news_content"
+        # Company websites
+        if any(word in title_lower for word in ["company", "corp", "about", "careers"]):
+            return "company_website"
+        # Search results
+        if "/search" in url_lower or "google.com" in url_lower:
+            return "search_results"
+        return "general_website"
+    async def _get_structured_content(self, browser: BrowserController) -> str:
+        """Get clean, structured content from the page"""
+        try:
+            # Get HTML content
+            html = await browser.page.content()
+            soup = BeautifulSoup(html, 'html.parser')
+            # Remove script, style, and other non-content elements
+            for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'advertisement']):
+                tag.decompose()
+            # Extract main content areas
+            main_content = []
+            # Look for main content containers
+            main_containers = soup.find_all(['main', 'article', 'section']) or [soup.find('body')]
+            for container in main_containers[:3]:  # Limit to avoid too much content
+                if container:
+                    # Extract headings
+                    headings = container.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
+                    for heading in headings:
+                        if heading.get_text(strip=True):
+                            main_content.append(f"HEADING: {heading.get_text(strip=True)}")
+                    # Extract paragraphs
+                    paragraphs = container.find_all('p')
+                    for p in paragraphs[:20]:  # Limit paragraphs
+                        text = p.get_text(strip=True)
+                        if len(text) > 20:  # Only meaningful paragraphs
+                            main_content.append(f"TEXT: {text}")
+                    # Extract lists
+                    lists = container.find_all(['ul', 'ol'])
+                    for list_elem in lists[:5]:  # Limit lists
+                        items = list_elem.find_all('li')
+                        if items:
+                            main_content.append("LIST:")
+                            for item in items[:10]:  # Limit list items
+                                text = item.get_text(strip=True)
+                                if text:
+                                    main_content.append(f"  - {text}")
+                    # Extract table data
+                    tables = container.find_all('table')
+                    for table in tables[:3]:  # Limit tables
+                        rows = table.find_all('tr')
+                        if rows:
+                            main_content.append("TABLE:")
+                            for row in rows[:10]:  # Limit rows
+                                cells = row.find_all(['td', 'th'])
+                                if cells:
+                                    row_text = " | ".join([cell.get_text(strip=True) for cell in cells])
+                                    if row_text.strip():
+                                        main_content.append(f"  {row_text}")
+            # Join and limit content
+            content = "\n".join(main_content)
+            return content[:12000]  # Limit total content to avoid token limits
+        except Exception as e:
+            print(f"❌ Error getting structured content: {e}")
+            # Fallback to simple text extraction
+            try:
+                return await browser.page.inner_text("body")[:8000]
+            except:
+                return "Content extraction failed"
+    async def _ai_extract(self, goal: str, url: str, title: str, website_type: str, content: str) -> Dict[str, Any]:
+        """Use AI to extract relevant information based on context"""
+        try:
+            prompt = UNIVERSAL_EXTRACTION_PROMPT.format(
+                goal=goal,
+                url=url,
+                title=title,
+                website_type=website_type,
+                content=content
+            )
+            response = await asyncio.to_thread(
+                functools.partial(MODEL.generate_content, prompt)
+            )
+            # Parse AI response
+            raw_text = response.text
+            # Extract JSON from response
+            start = raw_text.find('{')
+            end = raw_text.rfind('}') + 1
+            if start != -1 and end > start:
+                json_str = raw_text[start:end]
+                extracted_data = json.loads(json_str)
+                # Add metadata
+                extracted_data["_metadata"] = {
+                    "source_url": url,
+                    "page_title": title,
+                    "website_type": website_type,
+                    "extraction_goal": goal,
+                    "extraction_timestamp": asyncio.get_event_loop().time(),
+                    "extraction_method": "ai_powered"
+                }
+                return extracted_data
+            else:
+                # Fallback: structure the raw text
+                return {
+                    "extracted_content": raw_text,
+                    "content_type": "unstructured_text",
+                    "_metadata": {
+                        "source_url": url,
+                        "page_title": title,
+                        "website_type": website_type,
+                        "extraction_goal": goal,
+                        "extraction_timestamp": asyncio.get_event_loop().time(),
+                        "extraction_method": "text_fallback"
+                    }
+                }
+        except Exception as e:
+            print(f"❌ AI extraction failed: {e}")
+            return self._create_fallback_structure(content, url, title, website_type, goal)
+    def _create_fallback_structure(self, content: str, url: str, title: str, website_type: str, goal: str) -> Dict[str, Any]:
+        """Create structured fallback when AI extraction fails"""
+        return {
+            "extraction_status": "fallback_mode",
+            "raw_content": content[:2000],  # Truncated content
+            "content_summary": self._create_simple_summary(content),
+            "_metadata": {
+                "source_url": url,
+                "page_title": title,
+                "website_type": website_type,
+                "extraction_goal": goal,
+                "extraction_method": "fallback_structure",
+                "note": "AI extraction failed, using fallback method"
+            }
+        }
+    def _create_simple_summary(self, content: str) -> Dict[str, Any]:
+        """Create a simple summary of content without AI"""
+        lines = content.split('\n')
+        summary = {
+            "headings": [],
+            "key_text": [],
+            "lists": [],
+            "total_lines": len(lines)
+        }
+        current_list = []
+        for line in lines[:50]:  # Limit processing
+            line = line.strip()
+            if not line:
+                continue
+            if line.startswith("HEADING:"):
+                summary["headings"].append(line[8:].strip())
+            elif line.startswith("TEXT:"):
+                text = line[5:].strip()
+                if len(text) > 30:  # Only substantial text
+                    summary["key_text"].append(text[:200])
+            elif line.startswith("LIST:"):
+                if current_list:
+                    summary["lists"].append(current_list)
+                current_list = []
+            elif line.startswith("  -"):
+                current_list.append(line[4:].strip())
+        if current_list:
+            summary["lists"].append(current_list)
+        return summary
+    async def _format_output(self, data: Dict[str, Any], fmt: str, goal: str, job_id: str = None) -> str:
+        """Format extracted data in the requested format"""
+        if fmt == "json":
+            return json.dumps(data, indent=2, ensure_ascii=False)
+        elif fmt == "txt":
+            return self._format_as_text(data)
+        elif fmt == "md":
+            return self._format_as_markdown(data)
+        elif fmt == "html":
+            return self._format_as_html(data)
+        elif fmt == "csv":
+            return self._format_as_csv(data)
+        elif fmt == "pdf":
+            return await self._format_as_pdf(data, goal, job_id)  # Pass job_id
+        else:
+            return json.dumps(data, indent=2, ensure_ascii=False)
+    def _format_as_text(self, data: Dict[str, Any]) -> str:
+        """Format as clean text"""
+        lines = []
+        metadata = data.get("_metadata", {})
+        if metadata:
+            lines.append(f"EXTRACTED INFORMATION")
+            lines.append(f"Source: {metadata.get('source_url', 'Unknown')}")
+            lines.append(f"Goal: {metadata.get('extraction_goal', 'Unknown')}")
+            lines.append(f"Website Type: {metadata.get('website_type', 'Unknown')}")
+            lines.append("-" * 60)
+            lines.append("")
+        def format_item(key: str, value, indent: int = 0):
+            spaces = "  " * indent
+            if isinstance(value, dict):
+                if key != "_metadata":
+                    lines.append(f"{spaces}{key.replace('_', ' ').title()}:")
+                    for k, v in value.items():
+                        format_item(k, v, indent + 1)
+            elif isinstance(value, list):
+                lines.append(f"{spaces}{key.replace('_', ' ').title()}:")
+                for item in value:
+                    if isinstance(item, str):
+                        lines.append(f"{spaces}  • {item}")
+                    else:
+                        lines.append(f"{spaces}  • {str(item)}")
+            else:
+                lines.append(f"{spaces}{key.replace('_', ' ').title()}: {value}")
+        for key, value in data.items():
+            format_item(key, value)
+        return "\n".join(lines)
+    def _format_as_markdown(self, data: Dict[str, Any]) -> str:
+        """Format as Markdown"""
+        lines = []
+        metadata = data.get("_metadata", {})
+        if metadata:
+            lines.append("# Extracted Information")
+            lines.append("")
+            lines.append(f"**Source:** {metadata.get('source_url', 'Unknown')}")
+            lines.append(f"**Goal:** {metadata.get('extraction_goal', 'Unknown')}")
+            lines.append(f"**Website Type:** {metadata.get('website_type', 'Unknown')}")
+            lines.append("")
+            lines.append("---")
+            lines.append("")
+        def format_item(key: str, value, level: int = 2):
+            if isinstance(value, dict):
+                if key != "_metadata":
+                    lines.append(f"{'#' * level} {key.replace('_', ' ').title()}")
+                    lines.append("")
+                    for k, v in value.items():
+                        format_item(k, v, level + 1)
+            elif isinstance(value, list):
+                lines.append(f"{'#' * level} {key.replace('_', ' ').title()}")
+                lines.append("")
+                for item in value:
+                    lines.append(f"- {item}")
+                lines.append("")
+            else:
+                lines.append(f"**{key.replace('_', ' ').title()}:** {value}")
+                lines.append("")
+        for key, value in data.items():
+            format_item(key, value)
+        return "\n".join(lines)
+    def _format_as_html(self, data: Dict[str, Any]) -> str:
+        """Format as HTML"""
+        html_parts = ["<!DOCTYPE html><html><head><title>Extracted Information</title>"]
+        html_parts.append("<style>body{font-family:Arial,sans-serif;margin:40px;} h1,h2,h3{color:#333;} .metadata{background:#f5f5f5;padding:15px;border-radius:5px;margin-bottom:20px;}</style>")
+        html_parts.append("</head><body>")
+        metadata = data.get("_metadata", {})
+        if metadata:
+            html_parts.append("<h1>Extracted Information</h1>")
+            html_parts.append("<div class='metadata'>")
+            html_parts.append(f"<p><strong>Source:</strong> <a href='{metadata.get('source_url', '#')}'>{metadata.get('source_url', 'Unknown')}</a></p>")
+            html_parts.append(f"<p><strong>Goal:</strong> {metadata.get('extraction_goal', 'Unknown')}</p>")
+            html_parts.append(f"<p><strong>Website Type:</strong> {metadata.get('website_type', 'Unknown')}</p>")
+            html_parts.append("</div>")
+        def format_item(key: str, value, level: int = 2):
+            if isinstance(value, dict):
+                if key != "_metadata":
+                    html_parts.append(f"<h{level}>{key.replace('_', ' ').title()}</h{level}>")
+                    for k, v in value.items():
+                        format_item(k, v, min(level + 1, 6))
+            elif isinstance(value, list):
+                html_parts.append(f"<h{level}>{key.replace('_', ' ').title()}</h{level}>")
+                html_parts.append("<ul>")
+                for item in value:
+                    html_parts.append(f"<li>{item}</li>")
+                html_parts.append("</ul>")
+            else:
+                html_parts.append(f"<p><strong>{key.replace('_', ' ').title()}:</strong> {value}</p>")
+        for key, value in data.items():
+            format_item(key, value)
+        html_parts.append("</body></html>")
+        return "\n".join(html_parts)
+    def _format_as_csv(self, data: Dict[str, Any]) -> str:
+        """Format as CSV"""
+        try:
+            # Flatten the nested structure
+            flattened = self._flatten_dict(data)
+            # Create DataFrame
+            df = pd.DataFrame([flattened])
+            return df.to_csv(index=False)
+        except Exception as e:
+            print(f"❌ CSV formatting failed: {e}")
+            # Simple fallback
+            csv_lines = ["Field,Value"]
+            for key, value in data.items():
+                if key != "_metadata":
+                    clean_value = str(value).replace('"', '""').replace('\n', ' ')
+                    csv_lines.append(f'"{key}","{clean_value}"')
+            return "\n".join(csv_lines)
+    async def _format_as_pdf(self, data: Dict[str, Any], goal: str, job_id: str = None) -> str:
+        """Format as PDF and return file path"""
+        try:
+            from reportlab.lib.pagesizes import letter
+            from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+            from reportlab.lib.styles import getSampleStyleSheet
+            import html
+            output_dir = Path("outputs")
+            output_dir.mkdir(exist_ok=True)
+            # Use job_id if provided, otherwise use timestamp
+            if job_id:
+                filename = f"{job_id}.pdf"
+            else:
+                import time
+                timestamp = int(time.time())
+                filename = f"extracted_data_{timestamp}.pdf"
+            filepath = output_dir / filename
+            doc = SimpleDocTemplate(str(filepath), pagesize=letter, topMargin=72, bottomMargin=72)
+            styles = getSampleStyleSheet()
+            story = []
+            # Title
+            story.append(Paragraph("Extracted Information", styles['Title']))
+            story.append(Spacer(1, 20))
+            # Metadata
+            metadata = data.get("_metadata", {})
+            if metadata:
+                story.append(Paragraph(f"<b>Source:</b> {html.escape(str(metadata.get('source_url', 'Unknown')))}", styles['Normal']))
+                story.append(Paragraph(f"<b>Goal:</b> {html.escape(str(metadata.get('extraction_goal', 'Unknown')))}", styles['Normal']))
+                story.append(Paragraph(f"<b>Website Type:</b> {html.escape(str(metadata.get('website_type', 'Unknown')))}", styles['Normal']))
+                story.append(Spacer(1, 20))
+            # Content with better handling
+            def add_content(key: str, value, level: int = 0):
+                if isinstance(value, dict):
+                    if key != "_metadata":
+                        style = styles['Heading1'] if level == 0 else styles['Heading2']
+                        clean_key = html.escape(key.replace('_', ' ').title())
+                        story.append(Paragraph(clean_key, style))
+                        story.append(Spacer(1, 10))
+                        for k, v in value.items():
+                            add_content(k, v, level + 1)
+                elif isinstance(value, list):
+                    clean_key = html.escape(key.replace('_', ' ').title())
+                    story.append(Paragraph(f"<b>{clean_key}:</b>", styles['Normal']))
+                    story.append(Spacer(1, 6))
+                    for item in value:
+                        # Handle long text items and escape HTML
+                        item_str = html.escape(str(item))
+                        if len(item_str) > 300:
+                            item_str = item_str[:300] + "..."
+                        story.append(Paragraph(f"• {item_str}", styles['Normal']))
+                    story.append(Spacer(1, 10))
+                else:
+                    # Handle long text values and escape HTML
+                    clean_key = html.escape(key.replace('_', ' ').title())
+                    value_str = html.escape(str(value))
+                    if len(value_str) > 800:
+                        value_str = value_str[:800] + "..."
+                    story.append(Paragraph(f"<b>{clean_key}:</b> {value_str}", styles['Normal']))
+                    story.append(Spacer(1, 8))
+            for key, value in data.items():
+                add_content(key, value)
+            # Build PDF with error handling
+            try:
+                doc.build(story)
+                print(f"✅ PDF successfully generated: {filepath}")
+                return f"PDF_DIRECT_SAVE:{filepath}"  # Special indicator for direct save
+            except Exception as build_error:
+                print(f"❌ PDF build error: {build_error}")
+                raise build_error
+        except ImportError:
+            print("❌ ReportLab not installed. Installing...")
+            import subprocess
+            import sys
+            try:
+                subprocess.check_call([sys.executable, "-m", "pip", "install", "reportlab"])
+                # Try again after installation
+                return await self._format_as_pdf(data, goal, job_id)
+            except subprocess.CalledProcessError:
+                print("❌ Failed to install ReportLab")
+                raise ImportError("ReportLab installation failed")
+        except Exception as e:
+            print(f"❌ PDF generation failed: {e}")
+            # Return error indicator instead of fallback file
+            raise RuntimeError(f"PDF generation failed: {str(e)}")
+    def _flatten_dict(self, d: Dict[str, Any], parent_key: str = '', sep: str = '_') -> Dict[str, Any]:
+        """Flatten nested dictionary for CSV export"""
+        items = []
+        for k, v in d.items():
+            new_key = f"{parent_key}{sep}{k}" if parent_key else k
+            if isinstance(v, dict):
+                items.extend(self._flatten_dict(v, new_key, sep=sep).items())
+            elif isinstance(v, list):
+                items.append((new_key, '; '.join(map(str, v))))
+            else:
+                items.append((new_key, v))
+        return dict(items)
+    async def _fallback_extraction(self, browser: BrowserController, fmt: str, goal: str) -> str:
+        """Fallback extraction when AI fails"""
+        try:
+            content = await browser.page.inner_text("body")
+            url = browser.page.url
+            title = await browser.page.title()
+            fallback_data = {
+                "content": content[:3000],  # Truncated
+                "source": url,
+                "title": title,
+                "extraction_method": "fallback",
+                "note": "AI extraction failed, using basic text extraction"
+            }
+            if fmt == "json":
+                return json.dumps(fallback_data, indent=2)
+            elif fmt == "txt":
+                return f"Title: {title}\nSource: {url}\n\nContent:\n{content}"
+            else:
+                return content
+        except Exception as e:
+            return f"Extraction completely failed: {str(e)}"

backend/vision_model.py ADDED Viewed

	@@ -0,0 +1,368 @@

+import os
+import base64
+import google.generativeai as genai
+from dotenv import load_dotenv
+import json
+import asyncio
+import functools
+from PIL import Image
+import io
+load_dotenv()
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+MODEL = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
+# Universal system prompt - works for ANY website
+SYSTEM_PROMPT = """
+You are a universal web automation agent that can navigate and interact with ANY website to accomplish user goals.
+You will receive:
+1. A screenshot of the current webpage
+2. Interactive elements with indices
+3. The user's specific goal/task
+4. Current URL and page context
+Your job is to analyze the current page and determine the BEST next action to accomplish the user's goal, regardless of what type of website this is.
+AVAILABLE ACTIONS:
+CLICK - Click on any interactive element:
+{"action": "click", "index": N, "reason": "specific reason for clicking this element"}
+TYPE - Input text into any input field:
+{"action": "type", "index": N, "text": "text to enter", "reason": "reason for entering this text"}
+SCROLL - Navigate the page vertically:
+{"action": "scroll", "direction": "down|up", "amount": 300-800, "reason": "reason for scrolling"}
+PRESS_KEY - Press any keyboard key:
+{"action": "press_key", "key": "Enter|Tab|Escape|Space|etc", "reason": "reason for key press"}
+NAVIGATE - Go to a specific URL (only if needed):
+{"action": "navigate", "url": "https://example.com", "reason": "reason for navigation"}
+EXTRACT - Save current page content (when goal is achieved):
+{"action": "extract", "reason": "goal accomplished, extracting relevant information"}
+DONE - Mark task as complete:
+{"action": "done", "reason": "task successfully completed"}
+DECISION RULES:
+1. **Analyze the user's goal** - understand what information/action they want
+2. **Assess current page** - what type of page is this? What can be done here?
+3. **Choose best action** - what single action moves closest to the goal?
+4. **Be adaptive** - different sites have different patterns, adapt accordingly
+WEBSITE TYPE DETECTION:
+- **Search engines**: Look for search boxes, enter queries, click results
+- **E-commerce**: Find products, navigate categories, view details
+- **Social media**: Look for profiles, posts, navigation menus
+- **Forms/Applications**: Fill required fields, submit forms
+- **Content sites**: Navigate articles, extract information
+- **Databases/APIs**: Use search/filter features, extract data
+INTERACTION STRATEGY:
+- **First time on page**: Look for main navigation, search, or primary actions
+- **Search results**: Click on most relevant results for user's goal
+- **Product pages**: Look for details, specifications, reviews as needed
+- **Profile/About pages**: Extract relevant information about person/entity
+- **Forms**: Fill systematically, validate inputs
+- **Lists/Tables**: Use pagination, sorting, filtering as needed
+EXTRACTION TIMING:
+- Extract when you have found the specific information the user requested
+- Don't extract from search results - click through to detailed pages first
+- For research tasks: navigate to authoritative sources before extracting
+- For data collection: ensure you're on pages with comprehensive information
+REMEMBER: Be universal - work with ANY website structure, ANY content type, ANY user goal.
+"""
+async def decide(img_bytes: bytes, page_state, goal: str) -> dict:
+    """Universal AI decision making for any website"""
+    print(f"🤖 Universal AI decision")
+    print(f"📊 Image size: {len(img_bytes)} bytes")
+    print(f"🎯 Goal: {goal}")
+    print(f"🖱️ Interactive elements: {len(page_state.selector_map)}")
+    print(f"📍 Current URL: {page_state.url}")
+    try:
+        # Compress image efficiently
+        image = Image.open(io.BytesIO(img_bytes))
+        max_size = (1280, 800)
+        image.thumbnail(max_size, Image.Resampling.LANCZOS)
+        compressed_buffer = io.BytesIO()
+        image.save(compressed_buffer, format='JPEG', quality=75, optimize=True)
+        compressed_image = Image.open(compressed_buffer)
+        # Create comprehensive element information (dynamic based on content)
+        interactive_elements = []
+        max_elements = min(20, len(page_state.selector_map))  # Adaptive limit
+        for index in sorted(page_state.selector_map.keys())[:max_elements]:
+            elem = page_state.selector_map[index]
+            # Dynamic element description based on context
+            element_data = {
+                "index": index,
+                "tag": elem.tag_name,
+                "text": elem.text[:60] if elem.text else "",
+                "clickable": elem.is_clickable,
+                "input": elem.is_input,
+            }
+            # Add contextual attributes dynamically
+            if elem.attributes.get("href"):
+                element_data["link"] = elem.attributes["href"][:100]
+            if elem.attributes.get("placeholder"):
+                element_data["placeholder"] = elem.attributes["placeholder"][:30]
+            if elem.attributes.get("type"):
+                element_data["type"] = elem.attributes["type"]
+            if elem.attributes.get("class"):
+                # Extract meaningful class hints
+                classes = elem.attributes["class"].lower()
+                if any(hint in classes for hint in ["search", "login", "submit", "button", "nav", "menu"]):
+                    element_data["class_hint"] = classes[:50]
+            if elem.attributes.get("id"):
+                element_data["id"] = elem.attributes["id"][:30]
+            interactive_elements.append(element_data)
+        # Detect website type dynamically
+        website_type = detect_website_type(page_state.url, page_state.title, interactive_elements)
+        # Create dynamic context-aware prompt
+        prompt = f"""
+USER GOAL: {goal}
+CURRENT CONTEXT:
+- URL: {page_state.url}
+- Page Title: {page_state.title}
+- Website Type: {website_type}
+- Available Elements: {len(interactive_elements)}
+INTERACTIVE ELEMENTS:
+{json.dumps(interactive_elements, indent=1)}
+Based on the user's goal and current page context, what is the BEST next action?
+Consider the website type and adapt your strategy accordingly.
+"""
+        content = [SYSTEM_PROMPT, prompt, compressed_image]
+        # Count tokens and send request
+        token_count_response = await asyncio.to_thread(
+            functools.partial(MODEL.count_tokens, content)
+        )
+        input_tokens = token_count_response.total_tokens
+        response = await asyncio.to_thread(
+            functools.partial(MODEL.generate_content, content)
+        )
+        raw_text = response.text
+        response_tokens = await count_response_tokens(raw_text)
+        total_tokens = input_tokens + response_tokens
+        # Parse response with validation
+        result = parse_ai_response(raw_text, page_state, goal, website_type)
+        # Add token usage
+        result['token_usage'] = {
+            'prompt_tokens': input_tokens,
+            'response_tokens': response_tokens,
+            'total_tokens': total_tokens
+        }
+        print(f"🎯 Universal Result: {result}")
+        return result
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        return {
+            "action": "done",
+            "error": str(e),
+            "token_usage": {"prompt_tokens": 0, "response_tokens": 0, "total_tokens": 0}
+        }
+def detect_website_type(url: str, title: str, elements: list) -> str:
+    """Dynamically detect website type based on URL and content"""
+    url_lower = url.lower()
+    title_lower = title.lower()
+    # Search engines
+    if any(domain in url_lower for domain in ["google.com", "bing.com", "duckduckgo.com", "yahoo.com"]):
+        if "/search" in url_lower or any("search" in elem.get("text", "").lower() for elem in elements):
+            return "search_results"
+        return "search_engine"
+    # E-commerce
+    if any(domain in url_lower for domain in ["amazon", "ebay", "shopify", "etsy", "alibaba"]):
+        return "ecommerce"
+    if any(word in title_lower for word in ["shop", "store", "buy", "cart", "product"]):
+        return "ecommerce"
+    # Social media
+    if any(domain in url_lower for domain in ["linkedin", "twitter", "facebook", "instagram", "github"]):
+        return "social_profile"
+    # Forms/Applications
+    if any(elem.get("input") for elem in elements if len([e for e in elements if e.get("input")]) > 3):
+        return "form_application"
+    # Content/News sites
+    if any(word in title_lower for word in ["news", "article", "blog", "post"]):
+        return "content_site"
+    # Company/Organization
+    if any(word in title_lower for word in ["company", "corp", "inc", "ltd", "about", "contact"]):
+        return "company_site"
+    # Database/Directory
+    if any(word in url_lower for word in ["directory", "database", "catalog", "listing"]):
+        return "database_site"
+    return "general_website"
+def parse_ai_response(raw_text: str, page_state, goal: str, website_type: str) -> dict:
+    """Parse AI response with intelligent fallbacks"""
+    try:
+        # Extract JSON from response
+        start = raw_text.find('{')
+        end = raw_text.rfind('}') + 1
+        if start != -1 and end > start:
+            json_str = raw_text[start:end]
+            result = json.loads(json_str)
+            # Validate action
+            valid_actions = ["click", "type", "scroll", "press_key", "navigate", "extract", "done"]
+            if result.get("action") not in valid_actions:
+                return get_fallback_action(page_state, goal, website_type)
+            # Validate index if present
+            if "index" in result and result["index"] not in page_state.selector_map:
+                print(f"❌ Invalid index {result['index']}")
+                return get_fallback_action(page_state, goal, website_type)
+            return result
+        else:
+            return get_fallback_action(page_state, goal, website_type)
+    except json.JSONDecodeError as e:
+        print(f"❌ JSON error: {e}")
+        return get_fallback_action(page_state, goal, website_type)
+def get_fallback_action(page_state, goal: str, website_type: str) -> dict:
+    """Intelligent fallback based on context"""
+    goal_lower = goal.lower()
+    # Look for obvious search boxes
+    for index, elem in page_state.selector_map.items():
+        if elem.is_input and any(word in elem.text.lower() + str(elem.attributes).lower()
+                                for word in ["search", "query", "find"]):
+            if "search" in goal_lower:
+                return {"action": "type", "index": index, "text": extract_search_query(goal),
+                       "reason": "Found search box for user query"}
+    # Look for relevant links based on goal
+    for index, elem in page_state.selector_map.items():
+        if elem.is_clickable and elem.text:
+            if any(word in elem.text.lower() for word in goal_lower.split()[:3]):
+                return {"action": "click", "index": index,
+                       "reason": f"Found relevant link: {elem.text[:30]}"}
+    # Default behaviors by website type
+    if website_type == "search_results":
+        # Click first meaningful result
+        for index, elem in page_state.selector_map.items():
+            if elem.is_clickable and len(elem.text) > 10:
+                return {"action": "click", "index": index,
+                       "reason": "Clicking search result for more details"}
+    # Generic fallback
+    return {"action": "scroll", "direction": "down", "amount": 400,
+           "reason": "Exploring page to find relevant content"}
+def extract_search_query(goal: str) -> str:
+    """Extract search query from user goal"""
+    # Remove common command words
+    stop_words = ["go", "to", "search", "for", "find", "get", "save", "extract", "info", "about"]
+    words = goal.split()
+    query_words = [word for word in words if word.lower() not in stop_words]
+    return " ".join(query_words[:6])  # Limit query length
+async def count_response_tokens(response_text: str) -> int:
+    """Count tokens in the response text"""
+    try:
+        token_count_response = await asyncio.to_thread(
+            functools.partial(MODEL.count_tokens, response_text)
+        )
+        return token_count_response.total_tokens
+    except Exception as e:
+        print(f"❌ Error counting response tokens: {e}")
+        return len(response_text) // 4
+## This doesn't work with current response structure or generative model
+# extract token usage
+def extract_token_usage(response):
+    """
+    Extract token usage from various possible locations in the response
+    """
+    try:
+        # Method 1: Check usage_metadata attribute
+        if hasattr(response, 'usage_metadata') and response.usage_metadata:
+            print(f"📊 Found usage_metadata:")
+            print(f"   - Response object: {response.usage_metadata}")
+            return {
+                'prompt_tokens': getattr(response.usage_metadata, 'prompt_token_count', 0),
+                'response_tokens': getattr(response.usage_metadata, 'candidates_token_count', 0),
+                'total_tokens': getattr(response.usage_metadata, 'total_token_count', 0)
+            }
+        # Method 2: Check if it's in the result
+        if hasattr(response, 'result') and response.result:
+            result_dict = response.result.to_dict() if hasattr(response.result, 'to_dict') else {}
+            print(f"📊 Checking result dict: {result_dict.keys() if isinstance(result_dict, dict) else 'Not a dict'}")
+            if 'usage_metadata' in result_dict:
+                usage = result_dict['usage_metadata']
+                return {
+                    'prompt_tokens': usage.get('prompt_token_count', 0),
+                    'response_tokens': usage.get('candidates_token_count', 0),
+                    'total_tokens': usage.get('total_token_count', 0)
+                }
+        # Method 3: Check candidates for token_count
+        if hasattr(response, 'candidates') and response.candidates:
+            candidate = response.candidates[0]
+            if hasattr(candidate, 'token_count'):
+                print(f"📊 Found token_count in candidate: {candidate.token_count}")
+                # This might not give us the breakdown, but it's something
+                return {
+                    'prompt_tokens': 0,  # Not available separately
+                    'response_tokens': candidate.token_count,
+                    'total_tokens': candidate.token_count
+                }
+        # Method 4: Try to access through the internal result
+        if hasattr(response, 'result') and hasattr(response.result, 'candidates'):
+            candidates = response.result.candidates
+            if candidates and len(candidates) > 0:
+                candidate = candidates[0]
+                if hasattr(candidate, 'token_count'):
+                    return {
+                        'prompt_tokens': 0,
+                        'response_tokens': candidate.token_count,
+                        'total_tokens': candidate.token_count
+                    }
+        print("❌ No token usage found in any expected location")
+        return None
+    except Exception as e:
+        print(f"❌ Error extracting token usage: {e}")
+        return None

backend/vnc_proxy.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# backend/vnc_proxy.py
+import asyncio
+import websockets
+import socket
+import logging
+from typing import Optional
+logger = logging.getLogger(__name__)
+class VNCWebSocketProxy:
+    def __init__(self, vnc_host: str = "localhost", vnc_port: int = 5901):
+        self.vnc_host = vnc_host
+        self.vnc_port = vnc_port
+        self.server = None
+    async def start_proxy(self, websocket_port: int):
+        """Start the WebSocket to VNC proxy server"""
+        try:
+            self.server = await websockets.serve(
+                self.handle_websocket,
+                "localhost",
+                websocket_port
+            )
+            logger.info(f"VNC WebSocket proxy started on port {websocket_port}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to start VNC proxy: {e}")
+            return False
+    async def stop_proxy(self):
+        """Stop the proxy server"""
+        if self.server:
+            self.server.close()
+            await self.server.wait_closed()
+    async def handle_websocket(self, websocket, path):
+        """Handle WebSocket connections and proxy to VNC"""
+        vnc_socket = None
+        try:
+            # Connect to VNC server
+            vnc_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            vnc_socket.connect((self.vnc_host, self.vnc_port))
+            vnc_socket.setblocking(False)
+            logger.info(f"Connected to VNC server at {self.vnc_host}:{self.vnc_port}")
+            # Create tasks for bidirectional communication
+            ws_to_vnc_task = asyncio.create_task(
+                self.websocket_to_vnc(websocket, vnc_socket)
+            )
+            vnc_to_ws_task = asyncio.create_task(
+                self.vnc_to_websocket(vnc_socket, websocket)
+            )
+            # Wait for either task to complete (indicating disconnection)
+            await asyncio.gather(ws_to_vnc_task, vnc_to_ws_task, return_exceptions=True)
+        except Exception as e:
+            logger.error(f"Error in VNC proxy: {e}")
+        finally:
+            if vnc_socket:
+                vnc_socket.close()
+    async def websocket_to_vnc(self, websocket, vnc_socket):
+        """Forward WebSocket messages to VNC"""
+        try:
+            async for message in websocket:
+                if isinstance(message, bytes):
+                    await asyncio.get_event_loop().sock_sendall(vnc_socket, message)
+        except websockets.exceptions.ConnectionClosed:
+            pass
+        except Exception as e:
+            logger.error(f"Error forwarding WebSocket to VNC: {e}")
+    async def vnc_to_websocket(self, vnc_socket, websocket):
+        """Forward VNC messages to WebSocket"""
+        try:
+            while True:
+                data = await asyncio.get_event_loop().sock_recv(vnc_socket, 4096)
+                if not data:
+                    break
+                await websocket.send(data)
+        except websockets.exceptions.ConnectionClosed:
+            pass
+        except Exception as e:
+            logger.error(f"Error forwarding VNC to WebSocket: {e}")
+# Global proxy manager
+vnc_proxies = {}
+async def start_vnc_proxy(vnc_port: int) -> Optional[int]:
+    """Start a VNC WebSocket proxy for the given VNC port"""
+    websocket_port = vnc_port + 1000  # Offset for WebSocket port
+    if websocket_port in vnc_proxies:
+        return websocket_port
+    proxy = VNCWebSocketProxy("localhost", vnc_port)
+    success = await proxy.start_proxy(websocket_port)
+    if success:
+        vnc_proxies[websocket_port] = proxy
+        return websocket_port
+    return None
+async def stop_vnc_proxy(websocket_port: int):
+    """Stop a VNC WebSocket proxy"""
+    if websocket_port in vnc_proxies:
+        await vnc_proxies[websocket_port].stop_proxy()
+        del vnc_proxies[websocket_port]