Spaces:

fagun18
/

fagun-browser-testing

Sleeping

File size: 5,380 Bytes

2b89d68

"""
🤖 Fagun Browser Automation Testing Agent - Site Audit
======================================================

Site-wide intelligent audit: crawl pages, detect broken links, collect console/network errors,
run form tests where applicable, and generate an aggregated report.
"""

import asyncio
import logging
from typing import Any, Dict, List, Set, Tuple
from urllib.parse import urljoin, urlparse

from playwright.async_api import Page

from src.utils.intelligent_form_testing import IntelligentFormTester

logger = logging.getLogger(__name__)


class SiteAuditor:
    def __init__(self, page: Page, form_tester_factory):
        """page: a Playwright Page bound to a BrowserContext
        form_tester_factory: callable that returns IntelligentFormTester for a given page
        """
        self.page = page
        self.form_tester_factory = form_tester_factory

    async def audit(self, start_url: str, max_pages: int = 10, max_depth: int = 2) -> Dict[str, Any]:
        visited: Set[str] = set()
        queue: List[Tuple[str, int]] = [(start_url, 0)]

        origin = self._origin(start_url)

        pages_summary: List[Dict[str, Any]] = []
        broken_links: List[Dict[str, str]] = []

        while queue and len(visited) < max_pages:
            url, depth = queue.pop(0)
            if url in visited or depth > max_depth:
                continue
            visited.add(url)

            try:
                await self.page.goto(url, wait_until='domcontentloaded')
                await asyncio.sleep(0.5)

                page_result: Dict[str, Any] = {
                    "url": url,
                    "title": await self.page.title(),
                    "console_errors": await self._collect_console_errors(),
                    "network_issues": [],
                }

                # Basic broken link scan on current page (HEAD requests)
                links = await self._extract_links()
                same_origin_links = [l for l in links if self._origin(l) == origin]

                # Check a subset to keep runtime in bounds
                for link in same_origin_links[:50]:
                    status = await self._head_status(link)
                    if status >= 400:
                        broken = {"href": link, "status": str(status), "on_page": url}
                        broken_links.append(broken)

                # Run intelligent form testing if forms exist
                has_form = (await self.page.locator("form").count()) > 0
                if has_form:
                    tester: IntelligentFormTester = self.form_tester_factory(self.page)
                    try:
                        await tester.discover_form_fields()
                        scenarios = await tester.generate_test_scenarios()
                        await tester.execute_test_scenarios(scenarios)
                        form_report = await tester.generate_comprehensive_report()

                        # Add basic accessibility checks for the page
                        a11y = await tester.run_basic_accessibility_checks()

                        page_result["form_testing"] = form_report
                        page_result["accessibility"] = a11y
                    except Exception as e:
                        page_result["form_testing_error"] = str(e)

                pages_summary.append(page_result)

                # Enqueue next links
                for link in same_origin_links:
                    if link not in visited:
                        queue.append((link, depth + 1))

            except Exception as e:
                logger.warning(f"Audit navigation error at {url}: {e}")
                pages_summary.append({"url": url, "error": str(e)})

        return {
            "start_url": start_url,
            "total_pages_visited": len(visited),
            "pages": pages_summary,
            "broken_links": broken_links,
        }

    async def _extract_links(self) -> List[str]:
        anchors = await self.page.locator("a[href]").all()
        urls: List[str] = []
        base = self.page.url
        for a in anchors[:200]:
            try:
                href = await a.get_attribute("href")
                if href:
                    urls.append(urljoin(base, href))
            except Exception:
                continue
        return urls

    async def _head_status(self, url: str) -> int:
        try:
            # Use context.request for lightweight request
            resp = await self.page.context.request.get(url, max_redirects=2)
            return resp.status
        except Exception:
            return 599

    async def _collect_console_errors(self) -> List[str]:
        # Snapshot console errors present in DOM if any common containers exist
        errors: List[str] = []
        try:
            # Heuristic: look for aria role alert or typical error classes
            loc = self.page.locator(".error, .alert-danger, [role='alert']").all()
            for l in await loc:
                try:
                    txt = await l.text_content()
                    if txt:
                        errors.append(txt.strip())
                except Exception:
                    continue
        except Exception:
            pass
        return errors

    def _origin(self, url: str) -> str:
        u = urlparse(url)
        return f"{u.scheme}://{u.netloc}"