Spaces:

AkshayStark
/

webscrapper

Sleeping

File size: 2,048 Bytes

95f7828

import logging
from urllib.parse import urlparse

from playwright.async_api import Locator, Page

from .config import settings

logger  = logging.getLogger('fastapi_cli')

async def navigate_to_link(page: Page, link: Locator) -> bool:
    r"""
    Navigate to the link.

    Args:
        page (Page): The page to navigate.
        link (Locator): The link to navigate to.

    Returns:
        bool: True if navigated, False otherwise.
    """

    navigated = False

    # If the link does not open in a new tab, click it.
    try:
        target = await link.get_attribute("target", timeout=settings.PLAYWRIGHT_DEFAULT_TIMEOUT)

        if target != "_blank":
            await link.click(timeout=settings.PLAYWRIGHT_DEFAULT_TIMEOUT)
            await page.wait_for_load_state("domcontentloaded")
            navigated = True

    except Exception as e:
        logger.warning(f"Failed to click the link: {e}")

    # If the link opens in a new tab, replace the current url with the link's url.
    if not navigated:
        try:
            parsed_url = urlparse(page.url)
            base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
            base_url = base_url[:-1] if base_url.endswith("/") else base_url

            href = await link.get_attribute("href", timeout=settings.PLAYWRIGHT_DEFAULT_TIMEOUT)
            if not href:
                logger.warning("No href attribute found while trying to navigate to the link.")
                return navigated

            # NOTE: e.g. /contact -> https://example.com/contact
            if href.startswith("/"):
                href = base_url + href

            # NOTE: e.g. otoiawase/ -> https://example.com/otoiawase/
            elif not href.startswith("http"):
                href = base_url + "/" + href

            # NOTE: e.g. https://example.com/contact
            await page.goto(href, wait_until="domcontentloaded")

            navigated = True

        except Exception as e:
            logger.warning(f"Failed to navigate to the link: {e}")

    return navigated