Spaces:

lenson78
/

Scrapling

Paused

App Files Files Community

Karim shoair commited on Jun 23, 2025

Commit

42a1f3d

1 Parent(s): 4f7700a

feat(fetchers): Improve StealthyFetcher + Adding StealthySession/AsyncStealthySession classes

Browse files

Files changed (5) hide show

scrapling/engines/__init__.py +13 -4
scrapling/engines/_browsers/__init__.py +1 -0
scrapling/engines/{camo.py → _browsers/_camoufox.py} +510 -195
scrapling/engines/_browsers/_validators.py +65 -0
scrapling/fetchers.py +30 -37

scrapling/engines/__init__.py CHANGED Viewed

@@ -1,7 +1,16 @@
-from .camo import CamoufoxEngine
 from .constants import DEFAULT_DISABLED_RESOURCES, DEFAULT_STEALTH_FLAGS
 from .static import FetcherSession, FetcherClient, AsyncFetcherClient
-from .toolbelt import check_if_engine_usable
-from ._browsers import DynamicSession, AsyncDynamicSession
-__all__ = ["FetcherSession", "DynamicSession", "AsyncDynamicSession"]

 from .constants import DEFAULT_DISABLED_RESOURCES, DEFAULT_STEALTH_FLAGS
 from .static import FetcherSession, FetcherClient, AsyncFetcherClient
+from ._browsers import (
+    DynamicSession,
+    AsyncDynamicSession,
+    StealthySession,
+    AsyncStealthySession,
+)
+__all__ = [
+    "FetcherSession",
+    "DynamicSession",
+    "AsyncDynamicSession",
+    "StealthySession",
+    "AsyncStealthySession",
+]

scrapling/engines/_browsers/__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1	from ._controllers import DynamicSession, AsyncDynamicSession


1	from ._controllers import DynamicSession, AsyncDynamicSession
2	+ from ._camoufox import StealthySession, AsyncStealthySession

scrapling/engines/{camo.py → _browsers/_camoufox.py} RENAMED Viewed

@@ -1,37 +1,93 @@
-import re
-from camoufox import DefaultAddons
-from playwright.sync_api import Page
-from camoufox.sync_api import Camoufox
-from camoufox.async_api import AsyncCamoufox
-from playwright.async_api import Page as async_Page
 from scrapling.core._types import (
-    Callable,
     Dict,
-    List,
-    Literal,
     Optional,
-    SelectorWaitStates,
     Union,
-    Iterable,
 )
-from scrapling.core.utils import log
 from scrapling.engines.toolbelt import (
     Response,
     ResponseFactory,
     async_intercept_route,
-    check_type_validity,
-    construct_proxy_dict,
     generate_convincing_referer,
     get_os_name,
     intercept_route,
 )
-class CamoufoxEngine:
     def __init__(
         self,
         headless: Union[bool, Literal["virtual"]] = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
@@ -39,29 +95,29 @@ class CamoufoxEngine:
         allow_webgl: bool = True,
         network_idle: bool = False,
         humanize: Union[bool, float] = True,
-        solve_cloudflare: Optional[bool] = False,
-        wait: Optional[int] = 0,
-        timeout: Optional[float] = 30000,
-        page_action: Callable = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         wait_selector_state: SelectorWaitStates = "attached",
-        cookies: Optional[Iterable[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
         proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
-        adaptor_arguments: Dict = None,
-        additional_arguments: Dict = None,
     ):
-        """An engine that uses the Camoufox library; Check the `StealthyFetcher` class for more documentation.
         :param headless: Run the browser in headless/hidden (default), virtual screen mode, or headful/visible mode.
         :param block_images: Prevent the loading of images through Firefox preferences.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
-        :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         :param block_webrtc: Blocks WebRTC entirely.
@@ -76,65 +132,90 @@ class CamoufoxEngine:
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
-        :param wait_selector: Wait for a specific css selector to be in a specific state.
         :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
             It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
         :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
-        self.headless = headless
-        self.block_images = bool(block_images)
-        self.disable_resources = bool(disable_resources)
-        self.block_webrtc = bool(block_webrtc)
-        self.allow_webgl = bool(allow_webgl)
-        self.network_idle = bool(network_idle)
-        self.google_search = bool(google_search)
-        self.os_randomize = bool(os_randomize)
-        self.disable_ads = bool(disable_ads)
-        self.geoip = bool(geoip)
-        self.extra_headers = extra_headers or {}
-        self.additional_arguments = additional_arguments or {}
-        self.proxy = construct_proxy_dict(proxy)
-        self.addons = addons or []
-        self.cookies = cookies or []
-        self.humanize = humanize
-        self.solve_cloudflare = solve_cloudflare
-        self.timeout = check_type_validity(timeout, [int, float], 30_000)
-        self.wait = check_type_validity(wait, [int, float], 0)
-        if self.solve_cloudflare and self.timeout < 60_000:
-            self.timeout = 60_000
-        # Page action callable validation
-        self.page_action = None
-        if page_action is not None:
-            if callable(page_action):
-                self.page_action = page_action
-            else:
-                log.error('[Ignored] Argument "page_action" must be callable')
-        self.wait_selector = wait_selector
-        self.wait_selector_state = wait_selector_state
-        self.adaptor_arguments = adaptor_arguments if adaptor_arguments else {}
-    def _get_camoufox_options(self):
-        """Return consistent browser options dictionary for both sync and async methods"""
-        humanize = self.humanize
-        if self.solve_cloudflare:
-            humanize = True
-        return {
             "geoip": self.geoip,
             "proxy": self.proxy,
             "enable_cache": True,
             "addons": self.addons,
             "exclude_addons": [] if self.disable_ads else [DefaultAddons.UBO],
             "headless": self.headless,
-            "humanize": humanize,
             "i_know_what_im_doing": True,  # To turn warnings off with the user configurations
             "allow_webgl": self.allow_webgl,
             "block_webrtc": self.block_webrtc,
@@ -142,9 +223,76 @@ class CamoufoxEngine:
             "os": None if self.os_randomize else get_os_name(),
             **self.additional_arguments,
         }
     @staticmethod
-    def __detect_cloudflare(page_content):
         """
         Detect the type of Cloudflare challenge present in the provided page content.
@@ -179,8 +327,7 @@ class CamoufoxEngine:
         :param page: The targeted page
         :return:
         """
-        page_content = page.content()
-        challenge_type = self.__detect_cloudflare(page_content)
         if not challenge_type:
             log.error("No Cloudflare challenge found.")
             return
@@ -199,11 +346,7 @@ class CamoufoxEngine:
                     # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
                     page.wait_for_timeout(500)
-                iframe = page.frame(
-                    url=re.compile(
-                        "challenges.cloudflare.com/cdn-cgi/challenge-platform/.*"
-                    )
-                )
                 if iframe is None:
                     log.info("Didn't find Cloudflare iframe!")
                     return
@@ -224,14 +367,261 @@ class CamoufoxEngine:
                 log.info("Cloudflare captcha is solved")
                 return
-    async def _async_solve_cloudflare(self, page: async_Page):
         """Solve the cloudflare challenge displayed on the playwright page passed. The async version
         :param page: The async targeted page
         :return:
         """
-        page_content = await page.content()
-        challenge_type = self.__detect_cloudflare(page_content)
         if not challenge_type:
             log.error("No Cloudflare challenge found.")
             return
@@ -250,11 +640,7 @@ class CamoufoxEngine:
                     # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
                     await page.wait_for_timeout(500)
-                iframe = page.frame(
-                    url=re.compile(
-                        "challenges.cloudflare.com/cdn-cgi/challenge-platform/.*"
-                    )
-                )
                 if iframe is None:
                     log.info("Didn't find Cloudflare iframe!")
                     return
@@ -277,90 +663,19 @@ class CamoufoxEngine:
                 log.info("Cloudflare captcha is solved")
                 return
-    def fetch(self, url: str) -> Response:
         """Opens up the browser and do your request based on your chosen options.
-        :param url: Target url.
-        :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
-        final_response = None
-        referer = generate_convincing_referer(url) if self.google_search else None
-        def handle_response(finished_response):
-            nonlocal final_response
-            if (
-                finished_response.request.resource_type == "document"
-                and finished_response.request.is_navigation_request()
-            ):
-                final_response = finished_response
-        with Camoufox(**self._get_camoufox_options()) as browser:
-            context = browser.new_context()
-            if self.cookies:
-                context.add_cookies(self.cookies)
-            page = context.new_page()
-            page.set_default_navigation_timeout(self.timeout)
-            page.set_default_timeout(self.timeout)
-            page.on("response", handle_response)
-            if self.disable_resources:
-                page.route("**/*", intercept_route)
-            if self.extra_headers:
-                page.set_extra_http_headers(self.extra_headers)
-            first_response = page.goto(url, referer=referer)
-            page.wait_for_load_state(state="domcontentloaded")
-            if self.network_idle:
-                page.wait_for_load_state("networkidle")
-            if self.solve_cloudflare:
-                self._solve_cloudflare(page)
-                # Make sure the page is fully loaded after the captcha
-                page.wait_for_load_state(state="load")
-                page.wait_for_load_state(state="domcontentloaded")
-                if self.network_idle:
-                    page.wait_for_load_state("networkidle")
-            if self.page_action is not None:
-                try:
-                    page = self.page_action(page)
-                except Exception as e:
-                    log.error(f"Error executing page_action: {e}")
-            if self.wait_selector and type(self.wait_selector) is str:
-                try:
-                    waiter = page.locator(self.wait_selector)
-                    waiter.first.wait_for(state=self.wait_selector_state)
-                    # Wait again after waiting for the selector, helpful with protections like Cloudflare
-                    page.wait_for_load_state(state="load")
-                    page.wait_for_load_state(state="domcontentloaded")
-                    if self.network_idle:
-                        page.wait_for_load_state("networkidle")
-                except Exception as e:
-                    log.error(f"Error waiting for selector {self.wait_selector}: {e}")
-            page.wait_for_timeout(self.wait)
-            response = ResponseFactory.from_playwright_response(
-                page, first_response, final_response, self.adaptor_arguments
-            )
-            page.close()
-            context.close()
-        return response
-    async def async_fetch(self, url: str) -> Response:
-        """Opens up the browser and do your request based on your chosen options.
-        :param url: Target url.
-        :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
-        """
         final_response = None
         referer = generate_convincing_referer(url) if self.google_search else None
-        async def handle_response(finished_response):
             nonlocal final_response
             if (
                 finished_response.request.resource_type == "document"
@@ -368,59 +683,59 @@ class CamoufoxEngine:
             ):
                 final_response = finished_response
-        async with AsyncCamoufox(**self._get_camoufox_options()) as browser:
-            context = await browser.new_context()
-            if self.cookies:
-                await context.add_cookies(self.cookies)
-            page = await context.new_page()
-            page.set_default_navigation_timeout(self.timeout)
-            page.set_default_timeout(self.timeout)
-            page.on("response", handle_response)
-            if self.disable_resources:
-                await page.route("**/*", async_intercept_route)
-            if self.extra_headers:
-                await page.set_extra_http_headers(self.extra_headers)
-            first_response = await page.goto(url, referer=referer)
-            await page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
-                await page.wait_for_load_state("networkidle")
             if self.solve_cloudflare:
-                await self._async_solve_cloudflare(page)
                 # Make sure the page is fully loaded after the captcha
-                await page.wait_for_load_state(state="load")
-                await page.wait_for_load_state(state="domcontentloaded")
                 if self.network_idle:
-                    await page.wait_for_load_state("networkidle")
             if self.page_action is not None:
                 try:
-                    page = await self.page_action(page)
                 except Exception as e:
-                    log.error(f"Error executing async page_action: {e}")
-            if self.wait_selector and type(self.wait_selector) is str:
                 try:
-                    waiter = page.locator(self.wait_selector)
                     await waiter.first.wait_for(state=self.wait_selector_state)
                     # Wait again after waiting for the selector, helpful with protections like Cloudflare
-                    await page.wait_for_load_state(state="load")
-                    await page.wait_for_load_state(state="domcontentloaded")
                     if self.network_idle:
-                        await page.wait_for_load_state("networkidle")
                 except Exception as e:
                     log.error(f"Error waiting for selector {self.wait_selector}: {e}")
-            await page.wait_for_timeout(self.wait)
             response = await ResponseFactory.from_async_playwright_response(
-                page, first_response, final_response, self.adaptor_arguments
             )
-            await page.close()
-            await context.close()
-        return response

+from time import time, sleep
+from re import compile as re_compile
+from asyncio import sleep as asyncio_sleep, Lock
+from camoufox import AsyncNewBrowser, NewBrowser, DefaultAddons
+from playwright.sync_api import (
+    Response as SyncPlaywrightResponse,
+    sync_playwright,
+    BrowserType,
+    Browser,
+    BrowserContext,
+    Playwright,
+    Locator,
+    Page,
+)
+from playwright.async_api import (
+    async_playwright,
+    Response as AsyncPlaywrightResponse,
+    BrowserType as AsyncBrowserType,
+    Browser as AsyncBrowser,
+    BrowserContext as AsyncBrowserContext,
+    Playwright as AsyncPlaywright,
+    Locator as AsyncLocator,
+    Page as async_Page,
+)
+from scrapling.core.utils import log
+from ._page import PageInfo, PagePool
+from ._validators import validate, CamoufoxConfig
 from scrapling.core._types import (
     Dict,
     Optional,
     Union,
+    Callable,
+    Literal,
+    List,
+    SelectorWaitStates,
 )
 from scrapling.engines.toolbelt import (
     Response,
     ResponseFactory,
     async_intercept_route,
     generate_convincing_referer,
     get_os_name,
     intercept_route,
 )
+__CF_PATTERN__ = re_compile("challenges.cloudflare.com/cdn-cgi/challenge-platform/.*")
+class StealthySession:
+    """A Stealthy session manager with page pooling."""
+    __slots__ = (
+        "max_pages",
+        "headless",
+        "block_images",
+        "disable_resources",
+        "block_webrtc",
+        "allow_webgl",
+        "network_idle",
+        "humanize",
+        "solve_cloudflare",
+        "wait",
+        "timeout",
+        "page_action",
+        "wait_selector",
+        "addons",
+        "wait_selector_state",
+        "cookies",
+        "google_search",
+        "extra_headers",
+        "proxy",
+        "os_randomize",
+        "disable_ads",
+        "geoip",
+        "adaptor_arguments",
+        "additional_arguments",
+        "playwright",
+        "browser",
+        "context",
+        "page_pool",
+        "_closed",
+        "launch_options",
+        "context_options",
+    )
     def __init__(
         self,
+        max_pages: int = 1,
         headless: Union[bool, Literal["virtual"]] = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
         humanize: Union[bool, float] = True,
+        solve_cloudflare: bool = False,
+        wait: Union[int, float] = 0,
+        timeout: Union[int, float] = 30000,
+        page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         wait_selector_state: SelectorWaitStates = "attached",
+        cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
         proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
+        adaptor_arguments: Optional[Dict] = None,
+        additional_arguments: Optional[Dict] = None,
     ):
+        """A Browser session manager with page pooling
         :param headless: Run the browser in headless/hidden (default), virtual screen mode, or headful/visible mode.
         :param block_images: Prevent the loading of images through Firefox preferences.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+        :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         :param block_webrtc: Blocks WebRTC entirely.
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
+        :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
             It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+        :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
         :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
         :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
+        params = {
+            "max_pages": max_pages,
+            "headless": headless,
+            "block_images": block_images,
+            "disable_resources": disable_resources,
+            "block_webrtc": block_webrtc,
+            "allow_webgl": allow_webgl,
+            "network_idle": network_idle,
+            "humanize": humanize,
+            "solve_cloudflare": solve_cloudflare,
+            "wait": wait,
+            "timeout": timeout,
+            "page_action": page_action,
+            "wait_selector": wait_selector,
+            "addons": addons,
+            "wait_selector_state": wait_selector_state,
+            "cookies": cookies,
+            "google_search": google_search,
+            "extra_headers": extra_headers,
+            "proxy": proxy,
+            "os_randomize": os_randomize,
+            "disable_ads": disable_ads,
+            "geoip": geoip,
+            "adaptor_arguments": adaptor_arguments,
+            "additional_arguments": additional_arguments,
+        }
+        config = validate(params, CamoufoxConfig)
+        self.max_pages = config.max_pages
+        self.headless = config.headless
+        self.block_images = config.block_images
+        self.disable_resources = config.disable_resources
+        self.block_webrtc = config.block_webrtc
+        self.allow_webgl = config.allow_webgl
+        self.network_idle = config.network_idle
+        self.humanize = config.humanize
+        self.solve_cloudflare = config.solve_cloudflare
+        self.wait = config.wait
+        self.timeout = config.timeout
+        self.page_action = config.page_action
+        self.wait_selector = config.wait_selector
+        self.addons = config.addons
+        self.wait_selector_state = config.wait_selector_state
+        self.cookies = config.cookies
+        self.google_search = config.google_search
+        self.extra_headers = config.extra_headers
+        self.proxy = config.proxy
+        self.os_randomize = config.os_randomize
+        self.disable_ads = config.disable_ads
+        self.geoip = config.geoip
+        self.adaptor_arguments = config.adaptor_arguments
+        self.additional_arguments = config.additional_arguments
+        self.playwright: Optional[Playwright] = None
+        self.browser: Optional[Union[BrowserType, Browser]] = None
+        self.context: Optional[BrowserContext] = None
+        self.page_pool = PagePool(self.max_pages)
+        self._closed = False
+        self.adaptor_arguments = config.adaptor_arguments
+        self.page_action = config.page_action
+        self.__initiate_browser_options__()
+    def __initiate_browser_options__(self):
+        """Initiate browser options."""
+        self.launch_options = {
             "geoip": self.geoip,
             "proxy": self.proxy,
             "enable_cache": True,
             "addons": self.addons,
             "exclude_addons": [] if self.disable_ads else [DefaultAddons.UBO],
             "headless": self.headless,
+            "humanize": True if self.solve_cloudflare else self.humanize,
             "i_know_what_im_doing": True,  # To turn warnings off with the user configurations
             "allow_webgl": self.allow_webgl,
             "block_webrtc": self.block_webrtc,
             "os": None if self.os_randomize else get_os_name(),
             **self.additional_arguments,
         }
+        self.context_options = {}
+    def __create__(self):
+        """Create a browser for this instance and context."""
+        self.playwright = sync_playwright().start()
+        self.browser = NewBrowser(self.playwright, **self.launch_options)
+        self.context = self.browser.new_context(**self.context_options)
+        if self.cookies:
+            self.context.add_cookies(self.cookies)
+    def __enter__(self):
+        self.__create__()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+    def close(self):
+        """Close all resources"""
+        if self._closed:
+            return
+        if self.context:
+            self.context.close()
+            self.context = None
+        if self.browser:
+            self.browser.close()
+            self.browser = None
+        if self.playwright:
+            self.playwright.stop()
+            self.playwright = None
+        self._closed = True
+    def _get_or_create_page(self) -> PageInfo:
+        """Get an available page or create a new one"""
+        # Try to get a ready page first
+        page_info = self.page_pool.get_ready_page()
+        if page_info:
+            return page_info
+        # Create a new page if under limit
+        if self.page_pool.pages_count < self.max_pages:
+            page = self.context.new_page()
+            page.set_default_navigation_timeout(self.timeout)
+            page.set_default_timeout(self.timeout)
+            if self.extra_headers:
+                page.set_extra_http_headers(self.extra_headers)
+            if self.disable_resources:
+                page.route("**/*", intercept_route)
+            return self.page_pool.add_page(page)
+        # Wait for a page to become available
+        max_wait = 30
+        start_time = time()
+        while time() - start_time < max_wait:
+            page_info = self.page_pool.get_ready_page()
+            if page_info:
+                return page_info
+            sleep(0.05)
+        raise TimeoutError("No pages available within timeout period")
     @staticmethod
+    def _detect_cloudflare(page_content):
         """
         Detect the type of Cloudflare challenge present in the provided page content.
         :param page: The targeted page
         :return:
         """
+        challenge_type = self._detect_cloudflare(page.content())
         if not challenge_type:
             log.error("No Cloudflare challenge found.")
             return
                     # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
                     page.wait_for_timeout(500)
+                iframe = page.frame(url=__CF_PATTERN__)
                 if iframe is None:
                     log.info("Didn't find Cloudflare iframe!")
                     return
                 log.info("Cloudflare captcha is solved")
                 return
+    def fetch(self, url: str) -> Response:
+        """Opens up the browser and do your request based on your chosen options.
+        :param url: The Target url.
+        :return: A `Response` object.
+        """
+        if self._closed:
+            raise RuntimeError("Context manager has been closed")
+        final_response = None
+        referer = generate_convincing_referer(url) if self.google_search else None
+        def handle_response(finished_response: SyncPlaywrightResponse):
+            nonlocal final_response
+            if (
+                finished_response.request.resource_type == "document"
+                and finished_response.request.is_navigation_request()
+            ):
+                final_response = finished_response
+        page_info = self._get_or_create_page()
+        page_info.mark_busy(url=url)
+        try:
+            # Navigate to URL and wait for a specified state
+            page_info.page.on("response", handle_response)
+            first_response = page_info.page.goto(url, referer=referer)
+            page_info.page.wait_for_load_state(state="domcontentloaded")
+            if self.network_idle:
+                page_info.page.wait_for_load_state("networkidle")
+            if not first_response:
+                raise RuntimeError(f"Failed to get response for {url}")
+            if self.solve_cloudflare:
+                self._solve_cloudflare(page_info.page)
+                # Make sure the page is fully loaded after the captcha
+                page_info.page.wait_for_load_state(state="load")
+                page_info.page.wait_for_load_state(state="domcontentloaded")
+                if self.network_idle:
+                    page_info.page.wait_for_load_state("networkidle")
+            if self.page_action is not None:
+                try:
+                    page_info.page = self.page_action(page_info.page)
+                except Exception as e:
+                    log.error(f"Error executing page_action: {e}")
+            if self.wait_selector:
+                try:
+                    waiter: Locator = page_info.page.locator(self.wait_selector)
+                    waiter.first.wait_for(state=self.wait_selector_state)
+                    # Wait again after waiting for the selector, helpful with protections like Cloudflare
+                    page_info.page.wait_for_load_state(state="load")
+                    page_info.page.wait_for_load_state(state="domcontentloaded")
+                    if self.network_idle:
+                        page_info.page.wait_for_load_state("networkidle")
+                except Exception as e:
+                    log.error(f"Error waiting for selector {self.wait_selector}: {e}")
+            page_info.page.wait_for_timeout(self.wait)
+            response = ResponseFactory.from_playwright_response(
+                page_info.page, first_response, final_response, self.adaptor_arguments
+            )
+            # Mark the page as ready for next use
+            page_info.mark_ready()
+            return response
+        except Exception as e:
+            page_info.mark_error()
+            raise e
+    def get_pool_stats(self) -> Dict[str, int]:
+        """Get statistics about the current page pool"""
+        return {
+            "total_pages": self.page_pool.pages_count,
+            "ready_pages": self.page_pool.ready_count,
+            "busy_pages": self.page_pool.busy_count,
+            "max_pages": self.max_pages,
+        }
+class AsyncStealthySession(StealthySession):
+    """A Stealthy session manager with page pooling."""
+    def __init__(
+        self,
+        max_pages: int = 1,
+        headless: Union[bool, Literal["virtual"]] = True,  # noqa: F821
+        block_images: bool = False,
+        disable_resources: bool = False,
+        block_webrtc: bool = False,
+        allow_webgl: bool = True,
+        network_idle: bool = False,
+        humanize: Union[bool, float] = True,
+        solve_cloudflare: bool = False,
+        wait: Union[int, float] = 0,
+        timeout: Union[int, float] = 30000,
+        page_action: Optional[Callable] = None,
+        wait_selector: Optional[str] = None,
+        addons: Optional[List[str]] = None,
+        wait_selector_state: SelectorWaitStates = "attached",
+        cookies: Optional[List[Dict]] = None,
+        google_search: bool = True,
+        extra_headers: Optional[Dict[str, str]] = None,
+        proxy: Optional[Union[str, Dict[str, str]]] = None,
+        os_randomize: bool = False,
+        disable_ads: bool = False,
+        geoip: bool = False,
+        adaptor_arguments: Optional[Dict] = None,
+        additional_arguments: Optional[Dict] = None,
+    ):
+        """A Browser session manager with page pooling
+        :param headless: Run the browser in headless/hidden (default), virtual screen mode, or headful/visible mode.
+        :param block_images: Prevent the loading of images through Firefox preferences.
+            This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+        :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
+            Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
+            This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+        :param block_webrtc: Blocks WebRTC entirely.
+        :param cookies: Set cookies for the next request.
+        :param addons: List of Firefox addons to use. Must be paths to extracted addons.
+        :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
+        :param solve_cloudflare: Solves all 3 types of the Cloudflare's Turnstile wait page before returning the response to you.
+        :param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
+        :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
+        :param disable_ads: Disabled by default, this installs the `uBlock Origin` addon on the browser if enabled.
+        :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
+        :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
+        :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
+        :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
+        :param wait_selector: Wait for a specific CSS selector to be in a specific state.
+        :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
+            It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
+        :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
+        :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name.
+        :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
+        :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+        :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
+        :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
+        :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
+        """
+        super().__init__(
+            max_pages,
+            headless,
+            block_images,
+            disable_resources,
+            block_webrtc,
+            allow_webgl,
+            network_idle,
+            humanize,
+            solve_cloudflare,
+            wait,
+            timeout,
+            page_action,
+            wait_selector,
+            addons,
+            wait_selector_state,
+            cookies,
+            google_search,
+            extra_headers,
+            proxy,
+            os_randomize,
+            disable_ads,
+            geoip,
+            adaptor_arguments,
+            additional_arguments,
+        )
+        self.playwright: Optional[AsyncPlaywright] = None
+        self.browser: Optional[Union[AsyncBrowserType, AsyncBrowser]] = None
+        self.context: Optional[AsyncBrowserContext] = None
+        self._lock = Lock()
+        self.__enter__ = None
+        self.__exit__ = None
+    async def __create__(self):
+        """Create a browser for this instance and context."""
+        self.playwright: AsyncPlaywright = await async_playwright().start()
+        self.browser = await AsyncNewBrowser(self.playwright, **self.launch_options)
+        self.context: AsyncBrowserContext = await self.browser.new_context(
+            **self.context_options
+        )
+        if self.cookies:
+            await self.context.add_cookies(self.cookies)
+    async def __aenter__(self):
+        await self.__create__()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
+    async def close(self):
+        """Close all resources"""
+        if self._closed:
+            return
+        if self.context:
+            await self.context.close()
+            self.context = None
+        if self.browser:
+            await self.browser.close()
+            self.browser = None
+        if self.playwright:
+            await self.playwright.stop()
+            self.playwright = None
+        self._closed = True
+    async def _get_or_create_page(self) -> PageInfo:
+        """Get an available page or create a new one"""
+        async with self._lock:
+            # Try to get a ready page first
+            page_info = self.page_pool.get_ready_page()
+            if page_info:
+                return page_info
+            # Create a new page if under limit
+            if self.page_pool.pages_count < self.max_pages:
+                page = await self.context.new_page()
+                page.set_default_navigation_timeout(self.timeout)
+                page.set_default_timeout(self.timeout)
+                if self.extra_headers:
+                    await page.set_extra_http_headers(self.extra_headers)
+                if self.disable_resources:
+                    await page.route("**/*", async_intercept_route)
+                return self.page_pool.add_page(page)
+        # Wait for a page to become available
+        max_wait = 30
+        start_time = time()
+        while time() - start_time < max_wait:
+            page_info = self.page_pool.get_ready_page()
+            if page_info:
+                return page_info
+            await asyncio_sleep(0.05)
+        raise TimeoutError("No pages available within timeout period")
+    async def _solve_cloudflare(self, page: async_Page):
         """Solve the cloudflare challenge displayed on the playwright page passed. The async version
         :param page: The async targeted page
         :return:
         """
+        challenge_type = self._detect_cloudflare(await page.content())
         if not challenge_type:
             log.error("No Cloudflare challenge found.")
             return
                     # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
                     await page.wait_for_timeout(500)
+                iframe = page.frame(url=__CF_PATTERN__)
                 if iframe is None:
                     log.info("Didn't find Cloudflare iframe!")
                     return
                 log.info("Cloudflare captcha is solved")
                 return
+    async def fetch(self, url: str) -> Response:
         """Opens up the browser and do your request based on your chosen options.
+        :param url: The Target url.
+        :return: A `Response` object.
         """
+        if self._closed:
+            raise RuntimeError("Context manager has been closed")
         final_response = None
         referer = generate_convincing_referer(url) if self.google_search else None
+        async def handle_response(finished_response: AsyncPlaywrightResponse):
             nonlocal final_response
             if (
                 finished_response.request.resource_type == "document"
             ):
                 final_response = finished_response
+        page_info = await self._get_or_create_page()
+        page_info.mark_busy(url=url)
+        try:
+            # Navigate to URL and wait for a specified state
+            page_info.page.on("response", handle_response)
+            first_response = await page_info.page.goto(url, referer=referer)
+            await page_info.page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
+                await page_info.page.wait_for_load_state("networkidle")
+            if not first_response:
+                raise RuntimeError(f"Failed to get response for {url}")
             if self.solve_cloudflare:
+                await self._solve_cloudflare(page_info.page)
                 # Make sure the page is fully loaded after the captcha
+                await page_info.page.wait_for_load_state(state="load")
+                await page_info.page.wait_for_load_state(state="domcontentloaded")
                 if self.network_idle:
+                    await page_info.page.wait_for_load_state("networkidle")
             if self.page_action is not None:
                 try:
+                    page_info.page = await self.page_action(page_info.page)
                 except Exception as e:
+                    log.error(f"Error executing page_action: {e}")
+            if self.wait_selector:
                 try:
+                    waiter: AsyncLocator = page_info.page.locator(self.wait_selector)
                     await waiter.first.wait_for(state=self.wait_selector_state)
                     # Wait again after waiting for the selector, helpful with protections like Cloudflare
+                    await page_info.page.wait_for_load_state(state="load")
+                    await page_info.page.wait_for_load_state(state="domcontentloaded")
                     if self.network_idle:
+                        await page_info.page.wait_for_load_state("networkidle")
                 except Exception as e:
                     log.error(f"Error waiting for selector {self.wait_selector}: {e}")
+            await page_info.page.wait_for_timeout(self.wait)
+            # Create response object
             response = await ResponseFactory.from_async_playwright_response(
+                page_info.page, first_response, final_response, self.adaptor_arguments
             )
+            # Mark the page as ready for next use
+            page_info.mark_ready()
+            return response
+        except Exception as e:
+            page_info.mark_error()
+            raise e

scrapling/engines/_browsers/_validators.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from msgspec import Struct, convert, ValidationError
 from urllib.parse import urlparse
 from scrapling.core._types import (
     Optional,
@@ -78,6 +79,70 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
             raise ValueError(f"Invalid CDP URL '{cdp_url}': {str(e)}")
 def validate(params, model):
     try:
         config = convert(params, model)

 from msgspec import Struct, convert, ValidationError
 from urllib.parse import urlparse
+from os.path import exists, isdir
 from scrapling.core._types import (
     Optional,
             raise ValueError(f"Invalid CDP URL '{cdp_url}': {str(e)}")
+class CamoufoxConfig(Struct, kw_only=True, frozen=False):
+    """Configuration struct for validation"""
+    max_pages: int = 1
+    headless: Union[bool, Literal["virtual"]] = True  # noqa: F821
+    block_images: bool = False
+    disable_resources: bool = False
+    block_webrtc: bool = False
+    allow_webgl: bool = True
+    network_idle: bool = False
+    humanize: Union[bool, float] = True
+    solve_cloudflare: bool = False
+    wait: Union[int, float] = 0
+    timeout: Union[int, float] = 30000
+    page_action: Optional[Callable] = None
+    wait_selector: Optional[str] = None
+    addons: Optional[List[str]] = None
+    wait_selector_state: SelectorWaitStates = "attached"
+    cookies: Optional[List[Dict]] = None
+    google_search: bool = True
+    extra_headers: Optional[Dict[str, str]] = None
+    proxy: Optional[Union[str, Dict[str, str]]] = (
+        None  # The default value for proxy in Playwright's source is `None`
+    )
+    os_randomize: bool = False
+    disable_ads: bool = False
+    geoip: bool = False
+    adaptor_arguments: Optional[Dict] = None
+    additional_arguments: Optional[Dict] = None
+    def __post_init__(self):
+        """Custom validation after msgspec validation"""
+        if self.max_pages < 1 or self.max_pages > 50:
+            raise ValueError("max_pages must be between 1 and 50")
+        if self.timeout < 0:
+            raise ValueError("timeout must be >= 0")
+        if self.page_action is not None and not callable(self.page_action):
+            raise TypeError(
+                f"page_action must be callable, got {type(self.page_action).__name__}"
+            )
+        if self.proxy:
+            self.proxy = construct_proxy_dict(self.proxy, as_tuple=True)
+        if not self.addons:
+            self.addons = []
+        else:
+            for addon in self.addons:
+                if not exists(addon):
+                    raise FileNotFoundError(f"Addon's path not found: {addon}")
+                elif not isdir(addon):
+                    raise ValueError(
+                        f"Addon's path is not a folder, you need to pass a folder of the extracted addon: {addon}"
+                    )
+        if not self.cookies:
+            self.cookies = []
+        if self.solve_cloudflare and self.timeout < 60_000:
+            self.timeout = 60_000
+        if not self.adaptor_arguments:
+            self.adaptor_arguments = {}
+        if not self.additional_arguments:
+            self.additional_arguments = {}
 def validate(params, model):
     try:
         config = convert(params, model)

scrapling/fetchers.py CHANGED Viewed

@@ -10,10 +10,10 @@ from scrapling.core._types import (
 )
 from scrapling.engines import (
     FetcherSession,
-    CamoufoxEngine,
     DynamicSession,
     AsyncDynamicSession,
-    check_if_engine_usable,
     FetcherClient as _FetcherClient,
     AsyncFetcherClient as _AsyncFetcherClient,
 )
@@ -57,23 +57,23 @@ class StealthyFetcher(BaseFetcher):
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
-        addons: Optional[List[str]] = None,
-        cookies: Optional[Iterable[Dict]] = None,
-        wait: Optional[int] = 0,
-        timeout: Optional[float] = 30000,
-        page_action: Callable = None,
         wait_selector: Optional[str] = None,
-        humanize: Optional[Union[bool, float]] = True,
-        solve_cloudflare: Optional[bool] = False,
         wait_selector_state: SelectorWaitStates = "attached",
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
         proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
-        custom_config: Dict = None,
-        additional_arguments: Dict = None,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
@@ -106,7 +106,7 @@ class StealthyFetcher(BaseFetcher):
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
         :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
-        :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
         if not custom_config:
             custom_config = {}
@@ -115,8 +115,9 @@ class StealthyFetcher(BaseFetcher):
                 f"The custom parser config must be of type dictionary, got {cls.__class__}"
             )
-        engine = CamoufoxEngine(
             wait=wait,
             proxy=proxy,
             geoip=geoip,
             addons=addons,
@@ -139,8 +140,8 @@ class StealthyFetcher(BaseFetcher):
             wait_selector_state=wait_selector_state,
             adaptor_arguments={**cls._generate_parser_arguments(), **custom_config},
             additional_arguments=additional_arguments or {},
-        )
-        return engine.fetch(url)
     @classmethod
     async def async_fetch(
@@ -150,25 +151,25 @@ class StealthyFetcher(BaseFetcher):
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
-        cookies: Optional[Iterable[Dict]] = None,
         allow_webgl: bool = True,
         network_idle: bool = False,
-        addons: Optional[List[str]] = None,
-        wait: Optional[int] = 0,
-        timeout: Optional[float] = 30000,
-        page_action: Callable = None,
         wait_selector: Optional[str] = None,
-        humanize: Optional[Union[bool, float]] = True,
-        solve_cloudflare: Optional[bool] = False,
         wait_selector_state: SelectorWaitStates = "attached",
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
         proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
-        custom_config: Dict = None,
-        additional_arguments: Dict = None,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
@@ -201,7 +202,7 @@ class StealthyFetcher(BaseFetcher):
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
         :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
-        :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
         if not custom_config:
             custom_config = {}
@@ -210,8 +211,9 @@ class StealthyFetcher(BaseFetcher):
                 f"The custom parser config must be of type dictionary, got {cls.__class__}"
             )
-        engine = CamoufoxEngine(
             wait=wait,
             proxy=proxy,
             geoip=geoip,
             addons=addons,
@@ -234,8 +236,8 @@ class StealthyFetcher(BaseFetcher):
             wait_selector_state=wait_selector_state,
             adaptor_arguments={**cls._generate_parser_arguments(), **custom_config},
             additional_arguments=additional_arguments or {},
-        )
-        return await engine.async_fetch(url)
 class DynamicFetcher(BaseFetcher):
@@ -425,12 +427,3 @@ class DynamicFetcher(BaseFetcher):
 PlayWrightFetcher = DynamicFetcher  # For backward-compatibility
-class CustomFetcher(BaseFetcher):
-    @classmethod
-    def fetch(cls, url: str, browser_engine, **kwargs) -> Response:
-        engine = check_if_engine_usable(browser_engine)(
-            adaptor_arguments=cls._generate_parser_arguments(), **kwargs
-        )
-        return engine.fetch(url)

 )
 from scrapling.engines import (
     FetcherSession,
+    StealthySession,
+    AsyncStealthySession,
     DynamicSession,
     AsyncDynamicSession,
     FetcherClient as _FetcherClient,
     AsyncFetcherClient as _AsyncFetcherClient,
 )
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
+        humanize: Union[bool, float] = True,
+        solve_cloudflare: bool = False,
+        wait: Union[int, float] = 0,
+        timeout: Union[int, float] = 30000,
+        page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
+        addons: Optional[List[str]] = None,
         wait_selector_state: SelectorWaitStates = "attached",
+        cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
         proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
+        custom_config: Optional[Dict] = None,
+        additional_arguments: Optional[Dict] = None,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
         :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
+        :return: A `Response` object.
         """
         if not custom_config:
             custom_config = {}
                 f"The custom parser config must be of type dictionary, got {cls.__class__}"
             )
+        with StealthySession(
             wait=wait,
+            max_pages=1,
             proxy=proxy,
             geoip=geoip,
             addons=addons,
             wait_selector_state=wait_selector_state,
             adaptor_arguments={**cls._generate_parser_arguments(), **custom_config},
             additional_arguments=additional_arguments or {},
+        ) as engine:
+            return engine.fetch(url)
     @classmethod
     async def async_fetch(
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
+        humanize: Union[bool, float] = True,
+        solve_cloudflare: bool = False,
+        wait: Union[int, float] = 0,
+        timeout: Union[int, float] = 30000,
+        page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
+        addons: Optional[List[str]] = None,
         wait_selector_state: SelectorWaitStates = "attached",
+        cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
         proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
+        custom_config: Optional[Dict] = None,
+        additional_arguments: Optional[Dict] = None,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
         :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
+        :return: A `Response` object.
         """
         if not custom_config:
             custom_config = {}
                 f"The custom parser config must be of type dictionary, got {cls.__class__}"
             )
+        async with AsyncStealthySession(
             wait=wait,
+            max_pages=1,
             proxy=proxy,
             geoip=geoip,
             addons=addons,
             wait_selector_state=wait_selector_state,
             adaptor_arguments={**cls._generate_parser_arguments(), **custom_config},
             additional_arguments=additional_arguments or {},
+        ) as engine:
+            return await engine.fetch(url)
 class DynamicFetcher(BaseFetcher):
 PlayWrightFetcher = DynamicFetcher  # For backward-compatibility