Spaces:

lenson78
/

Scrapling

Paused

Karim shoair commited on Dec 26, 2025

Commit

ee2299e

1 Parent(s): 123011a

refactor(fetchers)!: Replace Camoufox with patchright and many optimizations

- DynamicFetcher became 20% faster
- StealthyFetcher became 99% faster
- Scrapling size decreased
- Code became ~400 lines shorter
- Most importantly, scrapling is more stable and reliable now.
- Less confusing for new users.
- More...

Files changed (12) hide show

scrapling/cli.py +18 -51
scrapling/core/_types.py +14 -0
scrapling/core/ai.py +79 -92
scrapling/engines/_browsers/_base.py +108 -150
scrapling/engines/_browsers/_config_tools.py +0 -85
scrapling/engines/_browsers/_controllers.py +34 -65
scrapling/engines/_browsers/{_camoufox.py → _stealth.py} +112 -114
scrapling/engines/_browsers/_types.py +16 -25
scrapling/engines/_browsers/_validators.py +29 -74
scrapling/engines/constants.py +0 -1
scrapling/fetchers/__init__.py +4 -4
scrapling/fetchers/{firefox.py → stealth_chrome.py} +48 -39

scrapling/cli.py CHANGED Viewed

@@ -125,14 +125,9 @@ def install(force):  # pragma: no cover
                 "playwright",
                 "install-deps",
                 "chromium",
-                "firefox",
             ],
             "Playwright dependencies",
         )
-        __Execute(
-            [python_executable, "-m", "camoufox", "fetch", "--browserforge"],
-            "Camoufox browser and databases",
-        )
         # if no errors raised by the above commands, then we add the below file
         __PACKAGE_DIR__.joinpath(".scrapling_dependencies_installed").touch()
     else:
@@ -611,16 +606,10 @@ def delete(
 )
 @option("--wait-selector", help="CSS selector to wait for before proceeding")
 @option("--locale", default="en-US", help="Browser locale (default: en-US)")
-@option("--stealth/--no-stealth", default=False, help="Enable stealth mode (default: False)")
-@option(
-    "--hide-canvas/--show-canvas",
-    default=False,
-    help="Add noise to canvas operations (default: False)",
-)
 @option(
-    "--disable-webgl/--enable-webgl",
     default=False,
-    help="Disable WebGL support (default: False)",
 )
 @option("--proxy", help='Proxy URL in format "http://username:password@host:port"')
 @option(
@@ -640,9 +629,7 @@ def fetch(
     css_selector,
     wait_selector,
     locale,
-    stealth,
-    hide_canvas,
-    disable_webgl,
     proxy,
     extra_headers,
 ):
@@ -659,9 +646,7 @@ def fetch(
     :param css_selector: CSS selector to extract specific content.
     :param wait_selector: Wait for a specific CSS selector to be in a specific state.
     :param locale: Set the locale for the browser.
-    :param stealth: Enables stealth mode.
-    :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
-    :param disable_webgl: Disables WebGL and WebGL 2.0 support entirely.
     :param proxy: The proxy to be used with requests.
     :param extra_headers: Extra headers to add to the request.
     """
@@ -676,9 +661,7 @@ def fetch(
         "network_idle": network_idle,
         "timeout": timeout,
         "locale": locale,
-        "stealth": stealth,
-        "hide_canvas": hide_canvas,
-        "disable_webgl": disable_webgl,
     }
     if wait > 0:
@@ -703,11 +686,6 @@ def fetch(
     default=True,
     help="Run browser in headless mode (default: True)",
 )
-@option(
-    "--block-images/--allow-images",
-    default=False,
-    help="Block image loading (default: False)",
-)
 @option(
     "--disable-resources/--enable-resources",
     default=False,
@@ -718,11 +696,6 @@ def fetch(
     default=False,
     help="Block WebRTC entirely (default: False)",
 )
-@option(
-    "--humanize/--no-humanize",
-    default=False,
-    help="Humanize cursor movement (default: False)",
-)
 @option(
     "--solve-cloudflare/--no-solve-cloudflare",
     default=False,
@@ -735,9 +708,14 @@ def fetch(
     help="Wait for network idle (default: False)",
 )
 @option(
-    "--disable-ads/--allow-ads",
     default=False,
-    help="Install uBlock Origin addon (default: False)",
 )
 @option(
     "--timeout",
@@ -757,11 +735,6 @@ def fetch(
     help="CSS selector to extract specific content from the page. It returns all matches.",
 )
 @option("--wait-selector", help="CSS selector to wait for before proceeding")
-@option(
-    "--geoip/--no-geoip",
-    default=False,
-    help="Use IP geolocation for timezone/locale (default: False)",
-)
 @option("--proxy", help='Proxy URL in format "http://username:password@host:port"')
 @option(
     "--extra-headers",
@@ -773,19 +746,17 @@ def stealthy_fetch(
     url,
     output_file,
     headless,
-    block_images,
     disable_resources,
     block_webrtc,
-    humanize,
     solve_cloudflare,
     allow_webgl,
     network_idle,
-    disable_ads,
     timeout,
     wait,
     css_selector,
     wait_selector,
-    geoip,
     proxy,
     extra_headers,
 ):
@@ -795,19 +766,17 @@ def stealthy_fetch(
     :param url: Target url.
     :param output_file: Output file path (.md for Markdown, .html for HTML).
     :param headless: Run the browser in headless/hidden, or headful/visible mode.
-    :param block_images: Prevent the loading of images through Firefox preferences.
     :param disable_resources: Drop requests of unnecessary resources for a speed boost.
     :param block_webrtc: Blocks WebRTC entirely.
-    :param humanize: Humanize the cursor movement.
     :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges.
     :param allow_webgl: Allow WebGL (recommended to keep enabled).
     :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
-    :param disable_ads: Install the uBlock Origin addon on the browser.
     :param timeout: The timeout in milliseconds that is used in all operations and waits through the page.
     :param wait: The time (milliseconds) the fetcher will wait after everything finishes before returning.
     :param css_selector: CSS selector to extract specific content.
     :param wait_selector: Wait for a specific CSS selector to be in a specific state.
-    :param geoip: Automatically use IP's longitude, latitude, timezone, country, locale.
     :param proxy: The proxy to be used with requests.
     :param extra_headers: Extra headers to add to the request.
     """
@@ -818,16 +787,14 @@ def stealthy_fetch(
     # Build request arguments
     kwargs = {
         "headless": headless,
-        "block_images": block_images,
         "disable_resources": disable_resources,
         "block_webrtc": block_webrtc,
-        "humanize": humanize,
         "solve_cloudflare": solve_cloudflare,
         "allow_webgl": allow_webgl,
         "network_idle": network_idle,
-        "disable_ads": disable_ads,
         "timeout": timeout,
-        "geoip": geoip,
     }
     if wait > 0:

                 "playwright",
                 "install-deps",
                 "chromium",
             ],
             "Playwright dependencies",
         )
         # if no errors raised by the above commands, then we add the below file
         __PACKAGE_DIR__.joinpath(".scrapling_dependencies_installed").touch()
     else:
 )
 @option("--wait-selector", help="CSS selector to wait for before proceeding")
 @option("--locale", default="en-US", help="Browser locale (default: en-US)")
 @option(
+    "--real-chrome/--no-real-chrome",
     default=False,
+    help="If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it. (default: False)",
 )
 @option("--proxy", help='Proxy URL in format "http://username:password@host:port"')
 @option(
     css_selector,
     wait_selector,
     locale,
+    real_chrome,
     proxy,
     extra_headers,
 ):
     :param css_selector: CSS selector to extract specific content.
     :param wait_selector: Wait for a specific CSS selector to be in a specific state.
     :param locale: Set the locale for the browser.
+    :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
     :param proxy: The proxy to be used with requests.
     :param extra_headers: Extra headers to add to the request.
     """
         "network_idle": network_idle,
         "timeout": timeout,
         "locale": locale,
+        "real_chrome": real_chrome,
     }
     if wait > 0:
     default=True,
     help="Run browser in headless mode (default: True)",
 )
 @option(
     "--disable-resources/--enable-resources",
     default=False,
     default=False,
     help="Block WebRTC entirely (default: False)",
 )
 @option(
     "--solve-cloudflare/--no-solve-cloudflare",
     default=False,
     help="Wait for network idle (default: False)",
 )
 @option(
+    "--real-chrome/--no-real-chrome",
     default=False,
+    help="If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it. (default: False)",
+)
+@option(
+    "--hide-canvas/--show-canvas",
+    default=False,
+    help="Add noise to canvas operations (default: False)",
 )
 @option(
     "--timeout",
     help="CSS selector to extract specific content from the page. It returns all matches.",
 )
 @option("--wait-selector", help="CSS selector to wait for before proceeding")
 @option("--proxy", help='Proxy URL in format "http://username:password@host:port"')
 @option(
     "--extra-headers",
     url,
     output_file,
     headless,
     disable_resources,
     block_webrtc,
     solve_cloudflare,
     allow_webgl,
     network_idle,
+    real_chrome,
+    hide_canvas,
     timeout,
     wait,
     css_selector,
     wait_selector,
     proxy,
     extra_headers,
 ):
     :param url: Target url.
     :param output_file: Output file path (.md for Markdown, .html for HTML).
     :param headless: Run the browser in headless/hidden, or headful/visible mode.
     :param disable_resources: Drop requests of unnecessary resources for a speed boost.
     :param block_webrtc: Blocks WebRTC entirely.
     :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges.
     :param allow_webgl: Allow WebGL (recommended to keep enabled).
     :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
+    :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
+    :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
     :param timeout: The timeout in milliseconds that is used in all operations and waits through the page.
     :param wait: The time (milliseconds) the fetcher will wait after everything finishes before returning.
     :param css_selector: CSS selector to extract specific content.
     :param wait_selector: Wait for a specific CSS selector to be in a specific state.
     :param proxy: The proxy to be used with requests.
     :param extra_headers: Extra headers to add to the request.
     """
     # Build request arguments
     kwargs = {
         "headless": headless,
         "disable_resources": disable_resources,
         "block_webrtc": block_webrtc,
         "solve_cloudflare": solve_cloudflare,
         "allow_webgl": allow_webgl,
         "network_idle": network_idle,
+        "real_chrome": real_chrome,
+        "hide_canvas": hide_canvas,
         "timeout": timeout,
     }
     if wait > 0:

scrapling/core/_types.py CHANGED Viewed

@@ -57,3 +57,17 @@ except ImportError:  # pragma: no cover
         from typing_extensions import Self  # Backport
     except ImportError:
         Self = object

         from typing_extensions import Self  # Backport
     except ImportError:
         Self = object
+# Copied from `playwright._impl._api_structures.SetCookieParam`
+class SetCookieParam(TypedDict, total=False):
+    name: str
+    value: str
+    url: Optional[str]
+    domain: Optional[str]
+    path: Optional[str]
+    expires: Optional[float]
+    httpOnly: Optional[bool]
+    secure: Optional[bool]
+    sameSite: Optional[Literal["Lax", "None", "Strict"]]
+    partitionKey: Optional[str]

scrapling/core/ai.py CHANGED Viewed

@@ -213,13 +213,11 @@ class ScraplingMCPServer:
         main_content_only: bool = True,
         headless: bool = False,
         google_search: bool = True,
-        hide_canvas: bool = False,
-        disable_webgl: bool = False,
         real_chrome: bool = False,
-        stealth: bool = False,
         wait: int | float = 0,
         proxy: Optional[str | Dict[str, str]] = None,
-        locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
@@ -251,12 +249,11 @@ class ScraplingMCPServer:
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
-        :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
-        :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
         :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
-        :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
-        :param disable_webgl: Disables WebGL and WebGL 2.0 support entirely.
         :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
@@ -269,15 +266,13 @@ class ScraplingMCPServer:
             locale=locale,
             timeout=timeout,
             cookies=cookies,
-            stealth=stealth,
             cdp_url=cdp_url,
             headless=headless,
             useragent=useragent,
-            hide_canvas=hide_canvas,
             real_chrome=real_chrome,
             network_idle=network_idle,
             wait_selector=wait_selector,
-            disable_webgl=disable_webgl,
             extra_headers=extra_headers,
             google_search=google_search,
             disable_resources=disable_resources,
@@ -301,13 +296,11 @@ class ScraplingMCPServer:
         main_content_only: bool = True,
         headless: bool = False,
         google_search: bool = True,
-        hide_canvas: bool = False,
-        disable_webgl: bool = False,
         real_chrome: bool = False,
-        stealth: bool = False,
         wait: int | float = 0,
         proxy: Optional[str | Dict[str, str]] = None,
-        locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
@@ -339,12 +332,11 @@ class ScraplingMCPServer:
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
-        :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
-        :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
         :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
-        :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
-        :param disable_webgl: Disables WebGL and WebGL 2.0 support entirely.
         :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
@@ -356,17 +348,15 @@ class ScraplingMCPServer:
             locale=locale,
             timeout=timeout,
             cookies=cookies,
-            stealth=stealth,
             cdp_url=cdp_url,
             headless=headless,
             max_pages=len(urls),
             useragent=useragent,
-            hide_canvas=hide_canvas,
             real_chrome=real_chrome,
             network_idle=network_idle,
             wait_selector=wait_selector,
             google_search=google_search,
-            disable_webgl=disable_webgl,
             extra_headers=extra_headers,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
@@ -393,29 +383,29 @@ class ScraplingMCPServer:
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
         headless: bool = True,  # noqa: F821
-        block_images: bool = False,
-        disable_resources: bool = False,
-        block_webrtc: bool = False,
-        allow_webgl: bool = True,
-        network_idle: bool = False,
-        humanize: bool | float = True,
-        solve_cloudflare: bool = False,
         wait: int | float = 0,
         timeout: int | float = 30000,
         wait_selector: Optional[str] = None,
-        addons: Optional[List[str]] = None,
-        wait_selector_state: SelectorWaitStates = "attached",
         cookies: Optional[List[Dict]] = None,
-        google_search: bool = True,
-        extra_headers: Optional[Dict[str, str]] = None,
-        proxy: Optional[str | Dict[str, str]] = None,
-        os_randomize: bool = False,
-        disable_ads: bool = False,
-        geoip: bool = False,
         additional_args: Optional[Dict] = None,
     ) -> ResponseModel:
-        """Use Scrapling's version of the Camoufox browser to fetch a URL and return a structured output of the result.
-        Note: This is best suitable for high protection levels. It's slower than the other tools.
         Note: If the `css_selector` resolves to more than one element, all the elements will be returned.
         :param url: The URL to request.
@@ -426,54 +416,53 @@ class ScraplingMCPServer:
         :param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
         :param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
         :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
-        :param block_images: Prevent the loading of images through Firefox preferences.
-            This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
-        :param block_webrtc: Blocks WebRTC entirely.
         :param cookies: Set cookies for the next request.
-        :param addons: List of Firefox addons to use. Must be paths to extracted addons.
-        :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
         :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
         :param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
         :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
-        :param disable_ads: Disabled by default, this installs the `uBlock Origin` addon on the browser if enabled.
-        :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
-        :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
-            It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
-        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         page = await StealthyFetcher.async_fetch(
             url,
             wait=wait,
             proxy=proxy,
-            geoip=geoip,
-            addons=addons,
             timeout=timeout,
             cookies=cookies,
             headless=headless,
-            humanize=humanize,
             allow_webgl=allow_webgl,
-            disable_ads=disable_ads,
             network_idle=network_idle,
-            block_images=block_images,
             block_webrtc=block_webrtc,
-            os_randomize=os_randomize,
             wait_selector=wait_selector,
             google_search=google_search,
             extra_headers=extra_headers,
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
-            additional_args=additional_args,
         )
         return _ContentTranslator(
             Convertor._extract_content(
@@ -492,29 +481,29 @@ class ScraplingMCPServer:
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
         headless: bool = True,  # noqa: F821
-        block_images: bool = False,
-        disable_resources: bool = False,
-        block_webrtc: bool = False,
-        allow_webgl: bool = True,
-        network_idle: bool = False,
-        humanize: bool | float = True,
-        solve_cloudflare: bool = False,
         wait: int | float = 0,
         timeout: int | float = 30000,
         wait_selector: Optional[str] = None,
-        addons: Optional[List[str]] = None,
-        wait_selector_state: SelectorWaitStates = "attached",
         cookies: Optional[List[Dict]] = None,
-        google_search: bool = True,
-        extra_headers: Optional[Dict[str, str]] = None,
-        proxy: Optional[str | Dict[str, str]] = None,
-        os_randomize: bool = False,
-        disable_ads: bool = False,
-        geoip: bool = False,
         additional_args: Optional[Dict] = None,
     ) -> List[ResponseModel]:
-        """Use Scrapling's version of the Camoufox browser to fetch a group of URLs at the same time, and for each page return a structured output of the result.
-        Note: This is best suitable for high protection levels. It's slower than the other tools.
         Note: If the `css_selector` resolves to more than one element, all the elements will be returned.
         :param urls: A tuple of the URLs to request.
@@ -525,54 +514,52 @@ class ScraplingMCPServer:
         :param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
         :param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
         :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
-        :param block_images: Prevent the loading of images through Firefox preferences.
-            This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
-        :param block_webrtc: Blocks WebRTC entirely.
         :param cookies: Set cookies for the next request.
-        :param addons: List of Firefox addons to use. Must be paths to extracted addons.
-        :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
         :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
         :param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
         :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
-        :param disable_ads: Disabled by default, this installs the `uBlock Origin` addon on the browser if enabled.
-        :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
-        :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
-            It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
-        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         async with AsyncStealthySession(
             wait=wait,
             proxy=proxy,
-            geoip=geoip,
-            addons=addons,
             timeout=timeout,
             cookies=cookies,
             headless=headless,
-            humanize=humanize,
-            max_pages=len(urls),
             allow_webgl=allow_webgl,
-            disable_ads=disable_ads,
-            block_images=block_images,
-            block_webrtc=block_webrtc,
             network_idle=network_idle,
-            os_randomize=os_randomize,
             wait_selector=wait_selector,
             google_search=google_search,
             extra_headers=extra_headers,
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
-            additional_args=additional_args,
         ) as session:
             tasks = [session.fetch(url) for url in urls]
             responses = await gather(*tasks)

         main_content_only: bool = True,
         headless: bool = False,
         google_search: bool = True,
         real_chrome: bool = False,
         wait: int | float = 0,
         proxy: Optional[str | Dict[str, str]] = None,
+        timezone_id: str | None = None,
+        locale: str | None = None,
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
+        :param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
+        :param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+            rules. Defaults to the system default locale.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
         :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
             locale=locale,
             timeout=timeout,
             cookies=cookies,
             cdp_url=cdp_url,
             headless=headless,
             useragent=useragent,
+            timezone_id=timezone_id,
             real_chrome=real_chrome,
             network_idle=network_idle,
             wait_selector=wait_selector,
             extra_headers=extra_headers,
             google_search=google_search,
             disable_resources=disable_resources,
         main_content_only: bool = True,
         headless: bool = False,
         google_search: bool = True,
         real_chrome: bool = False,
         wait: int | float = 0,
         proxy: Optional[str | Dict[str, str]] = None,
+        timezone_id: str | None = None,
+        locale: str | None = None,
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
+        :param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
+        :param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+            rules. Defaults to the system default locale.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
         :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
             locale=locale,
             timeout=timeout,
             cookies=cookies,
             cdp_url=cdp_url,
             headless=headless,
             max_pages=len(urls),
             useragent=useragent,
+            timezone_id=timezone_id,
             real_chrome=real_chrome,
             network_idle=network_idle,
             wait_selector=wait_selector,
             google_search=google_search,
             extra_headers=extra_headers,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
         headless: bool = True,  # noqa: F821
+        google_search: bool = True,
+        real_chrome: bool = False,
         wait: int | float = 0,
+        proxy: Optional[str | Dict[str, str]] = None,
+        timezone_id: str | None = None,
+        locale: str | None = None,
+        extra_headers: Optional[Dict[str, str]] = None,
+        useragent: Optional[str] = None,
+        hide_canvas: bool = False,
+        cdp_url: Optional[str] = None,
         timeout: int | float = 30000,
+        disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,
+        network_idle: bool = False,
+        wait_selector_state: SelectorWaitStates = "attached",
+        block_webrtc: bool = False,
+        allow_webgl: bool = True,
+        solve_cloudflare: bool = False,
         additional_args: Optional[Dict] = None,
     ) -> ResponseModel:
+        """Use the stealthy fetcher to fetch a URL and return a structured output of the result.
+        Note: This is the only suitable fetcher for high protection levels.
         Note: If the `css_selector` resolves to more than one element, all the elements will be returned.
         :param url: The URL to request.
         :param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
         :param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
         :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
         :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+        :param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
         :param cookies: Set cookies for the next request.
         :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
         :param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
         :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
+        :param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
+        :param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+            rules. Defaults to the system default locale.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
+        :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
+        :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
+        :param block_webrtc: Forces WebRTC to respect proxy settings to prevent local IP address leak.
+        :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+        :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         """
         page = await StealthyFetcher.async_fetch(
             url,
             wait=wait,
             proxy=proxy,
+            locale=locale,
+            cdp_url=cdp_url,
             timeout=timeout,
             cookies=cookies,
             headless=headless,
+            useragent=useragent,
+            timezone_id=timezone_id,
+            real_chrome=real_chrome,
+            hide_canvas=hide_canvas,
             allow_webgl=allow_webgl,
             network_idle=network_idle,
             block_webrtc=block_webrtc,
             wait_selector=wait_selector,
             google_search=google_search,
             extra_headers=extra_headers,
+            additional_args=additional_args,
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
         )
         return _ContentTranslator(
             Convertor._extract_content(
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
         headless: bool = True,  # noqa: F821
+        google_search: bool = True,
+        real_chrome: bool = False,
         wait: int | float = 0,
+        proxy: Optional[str | Dict[str, str]] = None,
+        timezone_id: str | None = None,
+        locale: str | None = None,
+        extra_headers: Optional[Dict[str, str]] = None,
+        useragent: Optional[str] = None,
+        hide_canvas: bool = False,
+        cdp_url: Optional[str] = None,
         timeout: int | float = 30000,
+        disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,
+        network_idle: bool = False,
+        wait_selector_state: SelectorWaitStates = "attached",
+        block_webrtc: bool = False,
+        allow_webgl: bool = True,
+        solve_cloudflare: bool = False,
         additional_args: Optional[Dict] = None,
     ) -> List[ResponseModel]:
+        """Use the stealthy fetcher to fetch a group of URLs at the same time, and for each page return a structured output of the result.
+        Note: This is the only suitable fetcher for high protection levels.
         Note: If the `css_selector` resolves to more than one element, all the elements will be returned.
         :param urls: A tuple of the URLs to request.
         :param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
         :param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
         :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
         :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+        :param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
         :param cookies: Set cookies for the next request.
         :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
         :param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
         :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
         :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
+        :param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
+        :param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+            rules. Defaults to the system default locale.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
+        :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
+        :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
+        :param block_webrtc: Forces WebRTC to respect proxy settings to prevent local IP address leak.
+        :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+        :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         """
         async with AsyncStealthySession(
             wait=wait,
             proxy=proxy,
+            locale=locale,
+            cdp_url=cdp_url,
             timeout=timeout,
             cookies=cookies,
             headless=headless,
+            useragent=useragent,
+            timezone_id=timezone_id,
+            real_chrome=real_chrome,
+            hide_canvas=hide_canvas,
             allow_webgl=allow_webgl,
             network_idle=network_idle,
+            block_webrtc=block_webrtc,
             wait_selector=wait_selector,
             google_search=google_search,
             extra_headers=extra_headers,
+            additional_args=additional_args,
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
         ) as session:
             tasks = [session.fetch(url) for url in urls]
             responses = await gather(*tasks)

scrapling/engines/_browsers/_base.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from time import time
 from asyncio import sleep as asyncio_sleep, Lock
-from camoufox import DefaultAddons
 from playwright.sync_api._generated import Page
 from playwright.sync_api import (
     Frame,
@@ -17,18 +16,18 @@ from playwright.async_api import (
     BrowserContext as AsyncBrowserContext,
 )
 from playwright._impl._errors import Error as PlaywrightError
-from camoufox.pkgman import installed_verstr as camoufox_version
-from camoufox.utils import launch_options as generate_launch_options
 from ._page import PageInfo, PagePool
 from scrapling.parser import Selector
-from scrapling.core._types import Any, cast, Dict, List, Optional, Callable, TYPE_CHECKING
-from scrapling.engines.toolbelt.fingerprints import get_os_name
-from ._validators import validate, PlaywrightConfig, CamoufoxConfig
-from ._config_tools import _compiled_stealth_scripts, _launch_kwargs, _context_kwargs
 from scrapling.engines.toolbelt.navigation import intercept_route, async_intercept_route
-__ff_version_str__ = camoufox_version().split(".", 1)[0]
 class SyncSession:
@@ -84,10 +83,6 @@ class SyncSession:
         if disable_resources:
             page.route("**/*", intercept_route)
-        if getattr(self, "stealth", False):
-            for script in _compiled_stealth_scripts():
-                page.add_init_script(script=script)
         page_info = self.page_pool.add_page(page)
         page_info.mark_busy()
         return page_info
@@ -202,10 +197,6 @@ class AsyncSession:
             if disable_resources:
                 await page.route("**/*", async_intercept_route)
-            if getattr(self, "stealth", False):
-                for script in _compiled_stealth_scripts():
-                    await page.add_init_script(script=script)
             return self.page_pool.add_page(page)
     def get_pool_stats(self) -> Dict[str, int]:
@@ -251,151 +242,118 @@ class AsyncSession:
         return handle_response
-class DynamicSessionMixin:
-    def __validate__(self, **params):
         if "__max_pages" in params:
             params["max_pages"] = params.pop("__max_pages")
-        config = validate(params, model=PlaywrightConfig)
-        self._max_pages = config.max_pages
-        self._headless = config.headless
-        self._hide_canvas = config.hide_canvas
-        self._disable_webgl = config.disable_webgl
-        self._real_chrome = config.real_chrome
-        self._stealth = config.stealth
-        self._google_search = config.google_search
-        self._wait = config.wait
-        self._proxy = config.proxy
-        self._locale = config.locale
-        self._extra_headers = config.extra_headers
-        self._useragent = config.useragent
-        self._timeout = config.timeout
-        self._cookies = config.cookies
-        self._disable_resources = config.disable_resources
-        self._cdp_url = config.cdp_url
-        self._network_idle = config.network_idle
-        self._load_dom = config.load_dom
-        self._wait_selector = config.wait_selector
-        self._init_script = config.init_script
-        self._wait_selector_state = config.wait_selector_state
-        self._extra_flags = config.extra_flags
-        self._selector_config = config.selector_config
-        self._timezone_id = config.timezone_id
-        self._additional_args = config.additional_args
-        self._page_action = config.page_action
-        self._user_data_dir = config.user_data_dir
-        self._headers_keys = {header.lower() for header in self._extra_headers.keys()} if self._extra_headers else set()
-        self.__initiate_browser_options__()
-    def __initiate_browser_options__(self):
-        if TYPE_CHECKING:
-            assert isinstance(self._proxy, tuple)
-        if not self._cdp_url:
-            # `launch_options` is used with persistent context
-            self.launch_options = dict(
-                _launch_kwargs(
-                    self._headless,
-                    self._proxy,
-                    self._locale,
-                    tuple(self._extra_headers.items()) if self._extra_headers else tuple(),
-                    self._useragent,
-                    self._real_chrome,
-                    self._stealth,
-                    self._hide_canvas,
-                    self._disable_webgl,
-                    self._timezone_id,
-                    tuple(self._extra_flags) if self._extra_flags else tuple(),
-                )
             )
-            self.launch_options["extra_http_headers"] = dict(self.launch_options["extra_http_headers"])
-            self.launch_options["proxy"] = dict(self.launch_options["proxy"]) or None
-            self.launch_options["user_data_dir"] = self._user_data_dir
-            self.launch_options.update(cast(Dict, self._additional_args))
-            self.context_options = dict()
         else:
             # while `context_options` is left to be used when cdp mode is enabled
-            self.launch_options = dict()
-            self.context_options = dict(
-                _context_kwargs(
-                    self._proxy,
-                    self._locale,
-                    tuple(self._extra_headers.items()) if self._extra_headers else tuple(),
-                    self._useragent,
-                    self._stealth,
-                )
-            )
-            self.context_options["extra_http_headers"] = dict(self.context_options["extra_http_headers"])
-            self.context_options["proxy"] = dict(self.context_options["proxy"]) or None
-            self.context_options.update(cast(Dict, self._additional_args))
-class StealthySessionMixin:
     def __validate__(self, **params):
-        if "__max_pages" in params:
-            params["max_pages"] = params.pop("__max_pages")
-        config: CamoufoxConfig = validate(params, model=CamoufoxConfig)
-        self._max_pages = config.max_pages
-        self._headless = config.headless
-        self._block_images = config.block_images
-        self._disable_resources = config.disable_resources
-        self._block_webrtc = config.block_webrtc
-        self._allow_webgl = config.allow_webgl
-        self._network_idle = config.network_idle
-        self._load_dom = config.load_dom
-        self._humanize = config.humanize
-        self._solve_cloudflare = config.solve_cloudflare
-        self._wait = config.wait
-        self._timeout = config.timeout
-        self._page_action = config.page_action
-        self._wait_selector = config.wait_selector
-        self._init_script = config.init_script
-        self._addons = config.addons
-        self._wait_selector_state = config.wait_selector_state
-        self._cookies = config.cookies
-        self._google_search = config.google_search
-        self._extra_headers = config.extra_headers
-        self._proxy = config.proxy
-        self._os_randomize = config.os_randomize
-        self._disable_ads = config.disable_ads
-        self._geoip = config.geoip
-        self._selector_config = config.selector_config
-        self._additional_args = config.additional_args
-        self._user_data_dir = config.user_data_dir
-        self._headers_keys = {header.lower() for header in self._extra_headers.keys()} if self._extra_headers else set()
-        self.__initiate_browser_options__()
-    def __initiate_browser_options__(self):
-        """Initiate browser options."""
-        self.launch_options: Dict[str, Any] = generate_launch_options(
-            **{
-                "geoip": self._geoip,
-                "proxy": dict(self._proxy) if self._proxy and isinstance(self._proxy, tuple) else self._proxy,
-                "addons": self._addons,
-                "exclude_addons": [] if self._disable_ads else [DefaultAddons.UBO],
-                "headless": self._headless,
-                "humanize": True if self._solve_cloudflare else self._humanize,
-                "i_know_what_im_doing": True,  # To turn warnings off with the user configurations
-                "allow_webgl": self._allow_webgl,
-                "block_webrtc": self._block_webrtc,
-                "block_images": self._block_images,  # Careful! it makes some websites don't finish loading at all like stackoverflow even in headful mode.
-                "os": None if self._os_randomize else get_os_name(),
-                "user_data_dir": self._user_data_dir,
-                "ff_version": __ff_version_str__,
-                "firefox_user_prefs": {
-                    # This is what enabling `enable_cache` does internally, so we do it from here instead
-                    "browser.sessionhistory.max_entries": 10,
-                    "browser.sessionhistory.max_total_viewers": -1,
-                    "browser.cache.memory.enable": True,
-                    "browser.cache.disk_cache_ssl": True,
-                    "browser.cache.disk.smart_size.enabled": True,
-                },
-                **cast(Dict, self._additional_args),
             }
         )
     @staticmethod
     def _detect_cloudflare(page_content: str) -> str | None:

 from time import time
 from asyncio import sleep as asyncio_sleep, Lock
 from playwright.sync_api._generated import Page
 from playwright.sync_api import (
     Frame,
     BrowserContext as AsyncBrowserContext,
 )
 from playwright._impl._errors import Error as PlaywrightError
 from ._page import PageInfo, PagePool
 from scrapling.parser import Selector
+from ._validators import validate, PlaywrightConfig, StealthConfig
+from ._config_tools import __default_chrome_useragent__, __default_useragent__
 from scrapling.engines.toolbelt.navigation import intercept_route, async_intercept_route
+from scrapling.core._types import Any, cast, Dict, List, Optional, Callable, TYPE_CHECKING, overload, Tuple
+from scrapling.engines.constants import (
+    DEFAULT_STEALTH_FLAGS,
+    HARMFUL_DEFAULT_ARGS,
+    DEFAULT_FLAGS,
+)
 class SyncSession:
         if disable_resources:
             page.route("**/*", intercept_route)
         page_info = self.page_pool.add_page(page)
         page_info.mark_busy()
         return page_info
             if disable_resources:
                 await page.route("**/*", async_intercept_route)
             return self.page_pool.add_page(page)
     def get_pool_stats(self) -> Dict[str, int]:
         return handle_response
+class BaseSessionMixin:
+    @overload
+    def __validate_routine__(self, params: Dict, model: type[StealthConfig]) -> StealthConfig: ...
+    @overload
+    def __validate_routine__(self, params: Dict, model: type[PlaywrightConfig]) -> PlaywrightConfig: ...
+    def __validate_routine__(
+        self, params: Dict, model: type[PlaywrightConfig] | type[StealthConfig]
+    ) -> PlaywrightConfig | StealthConfig:
+        # Dark color scheme bypasses the 'prefersLightColor' check in creepjs
+        self._context_options: Dict[str, Any] = {"color_scheme": "dark", "device_scale_factor": 2}
+        self._launch_options: Dict[str, Any] = self._context_options | {
+            "args": DEFAULT_FLAGS,
+            "ignore_default_args": HARMFUL_DEFAULT_ARGS,
+        }
         if "__max_pages" in params:
             params["max_pages"] = params.pop("__max_pages")
+        config = validate(params, model=model)
+        self._headers_keys = (
+            {header.lower() for header in config.extra_headers.keys()} if config.extra_headers else set()
+        )
+        return config
+    def __generate_options__(self, extra_flags: Tuple | None = None) -> None:
+        config = cast(PlaywrightConfig, getattr(self, "_config", None))
+        self._context_options.update(
+            {
+                "proxy": config.proxy,
+                "locale": config.locale,
+                "timezone_id": config.timezone_id,
+                "extra_http_headers": config.extra_headers,
+            }
+        )
+        # The default useragent in the headful is always correct now in the current versions of Playwright
+        if config.useragent:
+            self._context_options["user_agent"] = config.useragent
+        elif not config.useragent and config.headless:
+            self._context_options["user_agent"] = (
+                __default_chrome_useragent__ if config.real_chrome else __default_useragent__
             )
+        if not config.cdp_url:
+            self._launch_options |= self._context_options
+            self._context_options = {}
+            flags = self._launch_options["args"]
+            if config.extra_flags or extra_flags:
+                flags = list(set(flags + (config.extra_flags or extra_flags)))
+            self._launch_options.update(
+                {
+                    "args": flags,
+                    "headless": config.headless,
+                    "user_data_dir": config.user_data_dir,
+                    "channel": "chrome" if config.real_chrome else "chromium",
+                }
+            )
+            if config.additional_args:
+                self._launch_options.update(config.additional_args)
         else:
             # while `context_options` is left to be used when cdp mode is enabled
+            self._launch_options = dict()
+            if config.additional_args:
+                self._context_options.update(config.additional_args)
+class DynamicSessionMixin(BaseSessionMixin):
     def __validate__(self, **params):
+        self._config = self.__validate_routine__(params, model=PlaywrightConfig)
+        self.__generate_options__()
+class StealthySessionMixin(BaseSessionMixin):
+    def __validate__(self, **params):
+        self._config: StealthConfig = self.__validate_routine__(params, model=StealthConfig)
+        self._context_options.update(
+            {
+                "is_mobile": False,
+                "has_touch": False,
+                # I'm thinking about disabling it to rest from all Service Workers' headache, but let's keep it as it is for now
+                "service_workers": "allow",
+                "ignore_https_errors": True,
+                "screen": {"width": 1920, "height": 1080},
+                "viewport": {"width": 1920, "height": 1080},
+                "permissions": ["geolocation", "notifications"],
             }
         )
+        self.__generate_stealth_options()
+    def __generate_stealth_options(self) -> None:
+        flags = tuple()
+        if not self._config.cdp_url:
+            flags = DEFAULT_FLAGS + DEFAULT_STEALTH_FLAGS
+            if self._config.block_webrtc:
+                flags += (
+                    "--webrtc-ip-handling-policy=disable_non_proxied_udp",
+                    "--force-webrtc-ip-handling-policy",  # Ensures the policy is enforced
+                )
+            if not self._config.allow_webgl:
+                flags += (
+                    "--disable-webgl",
+                    "--disable-webgl-image-chromium",
+                    "--disable-webgl2",
+                )
+            if self._config.hide_canvas:
+                flags += ("--fingerprinting-canvas-image-data-noise",)
+        super(StealthySessionMixin, self).__generate_options__(flags)
     @staticmethod
     def _detect_cloudflare(page_content: str) -> str | None:

scrapling/engines/_browsers/_config_tools.py CHANGED Viewed

@@ -58,88 +58,3 @@ def _set_flags(hide_canvas, disable_webgl):  # pragma: no cover
         )
     return flags
-@lru_cache(2, typed=True)
-def _launch_kwargs(
-    headless,
-    proxy: Tuple,
-    locale,
-    extra_headers,
-    useragent,
-    real_chrome,
-    stealth,
-    hide_canvas,
-    disable_webgl,
-    timezone_id,
-    extra_flags: Tuple,
-) -> Tuple:
-    """Creates the arguments we will use while launching playwright's browser"""
-    base_args = DEFAULT_FLAGS
-    if extra_flags:
-        base_args = base_args + extra_flags
-    launch_kwargs = {
-        "locale": locale,
-        "timezone_id": timezone_id or None,
-        "headless": headless,
-        "args": base_args,
-        "color_scheme": "dark",  # Bypasses the 'prefersLightColor' check in creepjs
-        "proxy": proxy or tuple(),
-        "device_scale_factor": 2,
-        "ignore_default_args": HARMFUL_DEFAULT_ARGS,
-        "channel": "chrome" if real_chrome else "chromium",
-        "extra_http_headers": extra_headers or tuple(),
-    }
-    # The default useragent in the headful is always correct now in the current versions of Playwright
-    if useragent:
-        launch_kwargs["user_agent"] = useragent
-    elif not useragent and headless:
-        launch_kwargs["user_agent"] = __default_chrome_useragent__ if real_chrome else __default_useragent__
-    if stealth:
-        stealth_args = base_args + _set_flags(hide_canvas, disable_webgl)
-        launch_kwargs.update(
-            {
-                "args": stealth_args,
-                "chromium_sandbox": True,
-                "is_mobile": False,
-                "has_touch": False,
-                # I'm thinking about disabling it to rest from all Service Workers' headache, but let's keep it as it is for now
-                "service_workers": "allow",
-                "ignore_https_errors": True,
-                "screen": {"width": 1920, "height": 1080},
-                "viewport": {"width": 1920, "height": 1080},
-                "permissions": ["geolocation", "notifications"],
-            }
-        )
-    return tuple(launch_kwargs.items())
-@lru_cache(2, typed=True)
-def _context_kwargs(proxy, locale, extra_headers, useragent, stealth) -> Tuple:
-    """Creates the arguments for the browser context"""
-    context_kwargs = {
-        "proxy": proxy or tuple(),
-        "locale": locale,
-        "color_scheme": "dark",  # Bypasses the 'prefersLightColor' check in creepjs
-        "device_scale_factor": 2,
-        "extra_http_headers": extra_headers or tuple(),
-        "user_agent": useragent or __default_useragent__,
-    }
-    if stealth:
-        context_kwargs.update(
-            {
-                "is_mobile": False,
-                "has_touch": False,
-                # I'm thinking about disabling it to rest from all Service Workers' headache, but let's keep it as it is for now
-                "service_workers": "allow",
-                "ignore_https_errors": True,
-                "screen": {"width": 1920, "height": 1080},
-                "viewport": {"width": 1920, "height": 1080},
-                "permissions": ["geolocation", "notifications"],
-            }
-        )
-    return tuple(context_kwargs.items())


58	)
59
60	return flags

scrapling/engines/_browsers/_controllers.py CHANGED Viewed

@@ -9,8 +9,6 @@ from playwright.async_api import (
     Playwright as AsyncPlaywright,
     BrowserContext as AsyncBrowserContext,
 )
-from patchright.sync_api import sync_playwright as sync_patchright
-from patchright.async_api import async_playwright as async_patchright
 from scrapling.core.utils import log
 from scrapling.core._types import Unpack, TYPE_CHECKING
@@ -21,44 +19,19 @@ from scrapling.engines.toolbelt.convertor import Response, ResponseFactory
 from scrapling.engines.toolbelt.fingerprints import generate_convincing_referer
-class DynamicSession(DynamicSessionMixin, SyncSession):
     """A Browser session manager with page pooling."""
     __slots__ = (
-        "_max_pages",
-        "_headless",
-        "_hide_canvas",
-        "_disable_webgl",
-        "_real_chrome",
-        "_stealth",
-        "_google_search",
-        "_proxy",
-        "_locale",
-        "_extra_headers",
-        "_useragent",
-        "_timeout",
-        "_cookies",
-        "_disable_resources",
-        "_network_idle",
-        "_load_dom",
-        "_wait_selector",
-        "_init_script",
-        "_wait_selector_state",
-        "_wait",
         "playwright",
-        "browser",
         "context",
-        "page_pool",
         "_closed",
-        "_selector_config",
-        "_page_action",
-        "launch_options",
-        "context_options",
-        "_cdp_url",
-        "_headers_keys",
-        "_extra_flags",
-        "_additional_args",
-        "_user_data_dir",
     )
     def __init__(self, **kwargs: Unpack[PlaywrightSession]):
@@ -76,8 +49,9 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
         :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
-        :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
-        :param timezone_id: Set the timezone for the browser if wanted.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
         :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
@@ -94,27 +68,24 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
         :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         """
         self.__validate__(**kwargs)
-        super().__init__(max_pages=self._max_pages)
     def start(self):
         """Create a browser for this instance and context."""
         if not self.playwright:
-            sync_context = sync_patchright if self._stealth else sync_playwright
-            self.playwright: Playwright = sync_context().start()  # pyright: ignore [reportAttributeAccessIssue]
-            if self._cdp_url:  # pragma: no cover
-                self.context = self.playwright.chromium.connect_over_cdp(endpoint_url=self._cdp_url).new_context(
-                    **self.context_options
-                )
             else:
-                self.context = self.playwright.chromium.launch_persistent_context(**self.launch_options)
-            if self._init_script:  # pragma: no cover
-                self.context.add_init_script(path=self._init_script)
-            if self._cookies:  # pragma: no cover
-                self.context.add_cookies(self._cookies)
         else:
             raise RuntimeError("Session has been already started")
@@ -139,7 +110,6 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
         :return: A `Response` object.
         """
         params = _validate(kwargs, self, PlaywrightConfig)
         if self._closed:  # pragma: no cover
             raise RuntimeError("Context manager has been closed")
@@ -193,7 +163,7 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
             raise e
-class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
     """An async Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory."""
     def __init__(self, **kwargs: Unpack[PlaywrightSession]):
@@ -212,8 +182,9 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
         :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
-        :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
-        :param timezone_id: Set the timezone for the browser if wanted.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
         :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
@@ -230,28 +201,26 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
         :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         """
         self.__validate__(**kwargs)
-        super().__init__(max_pages=self._max_pages)
     async def start(self):
         """Create a browser for this instance and context."""
         if not self.playwright:
-            async_context = async_patchright if self._stealth else async_playwright
-            self.playwright: AsyncPlaywright = await async_context().start()  # pyright: ignore [reportAttributeAccessIssue]
-            if self._cdp_url:
-                browser = await self.playwright.chromium.connect_over_cdp(endpoint_url=self._cdp_url)
-                self.context: AsyncBrowserContext = await browser.new_context(**self.context_options)
             else:
                 self.context: AsyncBrowserContext = await self.playwright.chromium.launch_persistent_context(
-                    **self.launch_options
                 )
-            if self._init_script:  # pragma: no cover
-                await self.context.add_init_script(path=self._init_script)
-            if self._cookies:
-                await self.context.add_cookies(self._cookies)  # pyright: ignore
         else:
             raise RuntimeError("Session has been already started")

     Playwright as AsyncPlaywright,
     BrowserContext as AsyncBrowserContext,
 )
 from scrapling.core.utils import log
 from scrapling.core._types import Unpack, TYPE_CHECKING
 from scrapling.engines.toolbelt.fingerprints import generate_convincing_referer
+class DynamicSession(SyncSession, DynamicSessionMixin):
     """A Browser session manager with page pooling."""
     __slots__ = (
+        "_config",
+        "_context_options",
+        "_launch_options",
+        "max_pages",
+        "page_pool",
+        "_max_wait_for_page",
         "playwright",
         "context",
         "_closed",
     )
     def __init__(self, **kwargs: Unpack[PlaywrightSession]):
         :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
+        :param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+            rules. Defaults to the system default locale.
+        :param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
         :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
         :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         """
         self.__validate__(**kwargs)
+        super().__init__()
     def start(self):
         """Create a browser for this instance and context."""
         if not self.playwright:
+            self.playwright: Playwright = sync_playwright().start()  # pyright: ignore [reportAttributeAccessIssue]
+            if self._config.cdp_url:  # pragma: no cover
+                browser = self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
+                self.context = browser.new_context(**self._context_options)
             else:
+                self.context = self.playwright.chromium.launch_persistent_context(**self._launch_options)
+            if self._config.init_script:  # pragma: no cover
+                self.context.add_init_script(path=self._config.init_script)
+            if self._config.cookies:  # pragma: no cover
+                self.context.add_cookies(self._config.cookies)
         else:
             raise RuntimeError("Session has been already started")
         :return: A `Response` object.
         """
         params = _validate(kwargs, self, PlaywrightConfig)
         if self._closed:  # pragma: no cover
             raise RuntimeError("Context manager has been closed")
             raise e
+class AsyncDynamicSession(AsyncSession, DynamicSessionMixin):
     """An async Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory."""
     def __init__(self, **kwargs: Unpack[PlaywrightSession]):
         :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
+        :param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+            rules. Defaults to the system default locale.
+        :param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
         :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
         :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         """
         self.__validate__(**kwargs)
+        super().__init__(max_pages=self._config.max_pages)
     async def start(self):
         """Create a browser for this instance and context."""
         if not self.playwright:
+            self.playwright: AsyncPlaywright = await async_playwright().start()  # pyright: ignore [reportAttributeAccessIssue]
+            if self._config.cdp_url:
+                browser = await self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
+                self.context: AsyncBrowserContext = await browser.new_context(**self._context_options)
             else:
                 self.context: AsyncBrowserContext = await self.playwright.chromium.launch_persistent_context(
+                    **self._launch_options
                 )
+            if self._config.init_script:  # pragma: no cover
+                await self.context.add_init_script(path=self._config.init_script)
+            if self._config.cookies:
+                await self.context.add_cookies(self._config.cookies)  # pyright: ignore
         else:
             raise RuntimeError("Session has been already started")

scrapling/engines/_browsers/{_camoufox.py → _stealth.py} RENAMED Viewed

@@ -2,117 +2,102 @@ from random import randint
 from re import compile as re_compile
 from playwright.sync_api import (
-    Page,
     Locator,
-    sync_playwright,
 )
 from playwright.async_api import (
-    async_playwright,
     Page as async_Page,
     Locator as AsyncLocator,
     Playwright as AsyncPlaywright,
     BrowserContext as AsyncBrowserContext,
 )
 from scrapling.core.utils import log
-from ._types import CamoufoxSession, CamoufoxFetchParams
-from scrapling.core._types import Any, Unpack, TYPE_CHECKING
 from ._base import SyncSession, AsyncSession, StealthySessionMixin
-from ._validators import validate_fetch as _validate, CamoufoxConfig
 from scrapling.engines.toolbelt.convertor import Response, ResponseFactory
 from scrapling.engines.toolbelt.fingerprints import generate_convincing_referer
 __CF_PATTERN__ = re_compile("challenges.cloudflare.com/cdn-cgi/challenge-platform/.*")
-class StealthySession(StealthySessionMixin, SyncSession):
-    """A Stealthy session manager with page pooling."""
     __slots__ = (
-        "_max_pages",
-        "_headless",
-        "_block_images",
-        "_disable_resources",
-        "_block_webrtc",
-        "_allow_webgl",
-        "_network_idle",
-        "_load_dom",
-        "_humanize",
-        "_solve_cloudflare",
-        "_wait",
-        "_timeout",
-        "_page_action",
-        "_wait_selector",
-        "_init_script",
-        "_addons",
-        "_wait_selector_state",
-        "_cookies",
-        "_google_search",
-        "_extra_headers",
-        "_proxy",
-        "_os_randomize",
-        "_disable_ads",
-        "_geoip",
-        "_selector_config",
-        "_additional_args",
         "playwright",
-        "browser",
         "context",
-        "page_pool",
         "_closed",
-        "launch_options",
-        "_headers_keys",
-        "_user_data_dir",
     )
-    def __init__(self, **kwargs: Unpack[CamoufoxSession]):
-        """A Browser session manager with page pooling
         :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
-        :param block_images: Prevent the loading of images through Firefox preferences.
-            This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
-        :param block_webrtc: Blocks WebRTC entirely.
         :param cookies: Set cookies for the next request.
-        :param addons: List of Firefox addons to use. Must be paths to extracted addons.
-        :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
-        :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
-        :param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
         :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
-        :param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
-        :param disable_ads: Disabled by default, this installs the `uBlock Origin` addon on the browser if enabled.
-        :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
-        :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
-        :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
-            It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
-        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         self.__validate__(**kwargs)
-        super().__init__(max_pages=self._max_pages)
     def start(self):
         """Create a browser for this instance and context."""
         if not self.playwright:
-            self.playwright = sync_playwright().start()
-            self.context = self.playwright.firefox.launch_persistent_context(**self.launch_options)
-            if self._init_script:  # pragma: no cover
-                self.context.add_init_script(path=self._init_script)
-            if self._cookies:  # pragma: no cover
-                self.context.add_cookies(self._cookies)
         else:
             raise RuntimeError("Session has been already started")
@@ -148,22 +133,27 @@ class StealthySession(StealthySessionMixin, SyncSession):
                 outer_box = {}
                 iframe = page.frame(url=__CF_PATTERN__)
                 if iframe is not None:
-                    self._wait_for_page_stability(iframe, True, True)
                     if challenge_type != "embedded":
                         while not iframe.frame_element().is_visible():
                             # Double-checking that the iframe is loaded
                             page.wait_for_timeout(500)
                     outer_box: Any = iframe.frame_element().bounding_box()
                 if not iframe or not outer_box:
                     outer_box: Any = page.locator(box_selector).last.bounding_box()
                 # Calculate the Captcha coordinates for any viewport
                 captcha_x, captcha_y = outer_box["x"] + randint(26, 28), outer_box["y"] + randint(25, 27)
                 # Move the mouse to the center of the window, then press and hold the left mouse button
-                page.mouse.click(captcha_x, captcha_y, delay=60, button="left")
                 self._wait_for_networkidle(page)
                 if iframe is not None:
                     # Wait for the frame to be removed from the page (with 30s timeout = 300 iterations * 100 ms)
@@ -182,7 +172,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
                 log.info("Cloudflare captcha is solved")
                 return
-    def fetch(self, url: str, **kwargs: Unpack[CamoufoxFetchParams]) -> Response:
         """Opens up the browser and do your request based on your chosen options.
         :param url: The Target url.
@@ -203,8 +193,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
             - selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
-        params = _validate(kwargs, self, CamoufoxConfig)
         if self._closed:  # pragma: no cover
             raise RuntimeError("Context manager has been closed")
@@ -233,7 +222,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
             if params.page_action:
                 try:
                     _ = params.page_action(page_info.page)
-                except Exception as e:
                     log.error(f"Error executing page_action: {e}")
             if params.wait_selector:
@@ -242,10 +231,12 @@ class StealthySession(StealthySessionMixin, SyncSession):
                     waiter.first.wait_for(state=params.wait_selector_state)
                     # Wait again after waiting for the selector, helpful with protections like Cloudflare
                     self._wait_for_page_stability(page_info.page, params.load_dom, params.network_idle)
-                except Exception as e:
                     log.error(f"Error waiting for selector {params.wait_selector}: {e}")
             page_info.page.wait_for_timeout(params.wait)
             response = ResponseFactory.from_playwright_response(
                 page_info.page, first_response, final_response[0], params.selector_config
             )
@@ -256,72 +247,79 @@ class StealthySession(StealthySessionMixin, SyncSession):
             return response
-        except Exception as e:  # pragma: no cover
             page_info.mark_error()
             raise e
-class AsyncStealthySession(StealthySessionMixin, AsyncSession):
-    """A Stealthy session manager with page pooling."""
-    def __init__(self, **kwargs: Unpack[CamoufoxSession]):
-        """A Browser session manager with page pooling
         :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
-        :param block_images: Prevent the loading of images through Firefox preferences.
-            This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
-        :param block_webrtc: Blocks WebRTC entirely.
         :param cookies: Set cookies for the next request.
-        :param addons: List of Firefox addons to use. Must be paths to extracted addons.
-        :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
-        :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
-        :param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
         :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
-        :param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
-        :param disable_ads: Disabled by default, this installs the `uBlock Origin` addon on the browser if enabled.
-        :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
-        :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
         :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
-        :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
-            It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
-        :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
         :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
-        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         self.__validate__(**kwargs)
-        super().__init__(max_pages=self._max_pages)
     async def start(self):
         """Create a browser for this instance and context."""
         if not self.playwright:
-            self.playwright: AsyncPlaywright = await async_playwright().start()
-            self.context: AsyncBrowserContext = await self.playwright.firefox.launch_persistent_context(
-                **self.launch_options
-            )
-            if self._init_script:  # pragma: no cover
-                await self.context.add_init_script(path=self._init_script)
-            if self._cookies:
-                await self.context.add_cookies(self._cookies)  # pyright: ignore [reportArgumentType]
         else:
             raise RuntimeError("Session has been already started")
-    async def _cloudflare_solver(self, page: async_Page):  # pragma: no cover
-        """Solve the cloudflare challenge displayed on the playwright page passed. The async version
-        :param page: The async targeted page
         :return:
         """
         await self._wait_for_networkidle(page, timeout=5000)
@@ -331,7 +329,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
             return
         else:
             log.info(f'The turnstile version discovered is "{challenge_type}"')
-            if challenge_type == "non-interactive":  # pragma: no cover
                 while "<title>Just a moment...</title>" in (await ResponseFactory._get_async_page_content(page)):
                     log.info("Waiting for Cloudflare wait page to disappear.")
                     await page.wait_for_timeout(1000)
@@ -350,22 +348,27 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
                 outer_box = {}
                 iframe = page.frame(url=__CF_PATTERN__)
                 if iframe is not None:
-                    await self._wait_for_page_stability(iframe, True, True)
                     if challenge_type != "embedded":
                         while not await (await iframe.frame_element()).is_visible():
                             # Double-checking that the iframe is loaded
                             await page.wait_for_timeout(500)
-                    outer_box: Any = await (await iframe.frame_element()).bounding_box()
                 if not iframe or not outer_box:
                     outer_box: Any = await page.locator(box_selector).last.bounding_box()
                 # Calculate the Captcha coordinates for any viewport
                 captcha_x, captcha_y = outer_box["x"] + randint(26, 28), outer_box["y"] + randint(25, 27)
                 # Move the mouse to the center of the window, then press and hold the left mouse button
-                await page.mouse.click(captcha_x, captcha_y, delay=60, button="left")
                 await self._wait_for_networkidle(page)
                 if iframe is not None:
                     # Wait for the frame to be removed from the page (with 30s timeout = 300 iterations * 100 ms)
@@ -377,14 +380,14 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
                         await page.wait_for_timeout(100)
                         attempts += 1
                 if challenge_type != "embedded":
-                    await page.locator(box_selector).wait_for(state="detached")
                     await page.locator(".zone-name-title").wait_for(state="hidden")
                 await self._wait_for_page_stability(page, True, False)
                 log.info("Cloudflare captcha is solved")
                 return
-    async def fetch(self, url: str, **kwargs: Unpack[CamoufoxFetchParams]) -> Response:
         """Opens up the browser and do your request based on your chosen options.
         :param url: The Target url.
@@ -405,7 +408,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
             - selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
-        params = _validate(kwargs, self, CamoufoxConfig)
         if self._closed:  # pragma: no cover
             raise RuntimeError("Context manager has been closed")
@@ -418,10 +421,6 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
         final_response = [None]
         handle_response = self._create_response_handler(page_info, final_response)
-        if TYPE_CHECKING:
-            if not isinstance(page_info.page, async_Page):
-                raise TypeError
         try:
             # Navigate to URL and wait for a specified state
             page_info.page.on("response", handle_response)
@@ -461,9 +460,8 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
             # Close the page to free up resources
             await page_info.page.close()
             self.page_pool.pages.remove(page_info)
             return response
-        except Exception as e:
             page_info.mark_error()
             raise e

 from re import compile as re_compile
 from playwright.sync_api import (
     Locator,
+    Page,
+    Playwright,
 )
 from playwright.async_api import (
     Page as async_Page,
     Locator as AsyncLocator,
     Playwright as AsyncPlaywright,
     BrowserContext as AsyncBrowserContext,
 )
+from patchright.sync_api import sync_playwright
+from patchright.async_api import async_playwright
 from scrapling.core.utils import log
+from scrapling.core._types import Any, Unpack
+from ._config_tools import _compiled_stealth_scripts
+from ._types import StealthSession, StealthFetchParams
 from ._base import SyncSession, AsyncSession, StealthySessionMixin
+from ._validators import validate_fetch as _validate, StealthConfig
 from scrapling.engines.toolbelt.convertor import Response, ResponseFactory
 from scrapling.engines.toolbelt.fingerprints import generate_convincing_referer
 __CF_PATTERN__ = re_compile("challenges.cloudflare.com/cdn-cgi/challenge-platform/.*")
+class StealthySession(SyncSession, StealthySessionMixin):
+    """A Stealthy Browser session manager with page pooling."""
     __slots__ = (
+        "_config",
+        "_context_options",
+        "_launch_options",
+        "max_pages",
+        "page_pool",
+        "_max_wait_for_page",
         "playwright",
         "context",
         "_closed",
     )
+    def __init__(self, **kwargs: Unpack[StealthSession]):
+        """A Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory.
         :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
         :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+        :param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
         :param cookies: Set cookies for the next request.
         :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
+        :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
+        :param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+            rules. Defaults to the system default locale.
+        :param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
+        :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
+        :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
+        :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
+        :param block_webrtc: Forces WebRTC to respect proxy settings to prevent local IP address leak.
+        :param allow_webgl: Enabled by default. Disabling it disables WebGL and WebGL 2.0 support entirely. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
+        :param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
+        :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
+        :param extra_flags: A list of additional browser flags to pass to the browser on launch.
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
+        :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         """
         self.__validate__(**kwargs)
+        super().__init__()
     def start(self):
         """Create a browser for this instance and context."""
         if not self.playwright:
+            self.playwright: Playwright = sync_playwright().start()  # pyright: ignore [reportAttributeAccessIssue]
+            if self._config.cdp_url:  # pragma: no cover
+                browser = self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
+                self.context = browser.new_context(**self._context_options)
+            else:
+                self.context = self.playwright.chromium.launch_persistent_context(**self._launch_options)
+            for script in _compiled_stealth_scripts():
+                self.context.add_init_script(script=script)
+            if self._config.init_script:  # pragma: no cover
+                self.context.add_init_script(path=self._config.init_script)
+            if self._config.cookies:  # pragma: no cover
+                self.context.add_cookies(self._config.cookies)
         else:
             raise RuntimeError("Session has been already started")
                 outer_box = {}
                 iframe = page.frame(url=__CF_PATTERN__)
                 if iframe is not None:
+                    self._wait_for_page_stability(iframe, True, False)
                     if challenge_type != "embedded":
                         while not iframe.frame_element().is_visible():
                             # Double-checking that the iframe is loaded
                             page.wait_for_timeout(500)
                     outer_box: Any = iframe.frame_element().bounding_box()
                 if not iframe or not outer_box:
+                    if "<title>Just a moment...</title>" not in (ResponseFactory._get_page_content(page)):
+                        log.info("Cloudflare captcha is solved")
+                        return
                     outer_box: Any = page.locator(box_selector).last.bounding_box()
                 # Calculate the Captcha coordinates for any viewport
                 captcha_x, captcha_y = outer_box["x"] + randint(26, 28), outer_box["y"] + randint(25, 27)
                 # Move the mouse to the center of the window, then press and hold the left mouse button
+                page.mouse.click(captcha_x, captcha_y, delay=randint(100, 200), button="left")
                 self._wait_for_networkidle(page)
                 if iframe is not None:
                     # Wait for the frame to be removed from the page (with 30s timeout = 300 iterations * 100 ms)
                 log.info("Cloudflare captcha is solved")
                 return
+    def fetch(self, url: str, **kwargs: Unpack[StealthFetchParams]) -> Response:
         """Opens up the browser and do your request based on your chosen options.
         :param url: The Target url.
             - selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
+        params = _validate(kwargs, self, StealthConfig)
         if self._closed:  # pragma: no cover
             raise RuntimeError("Context manager has been closed")
             if params.page_action:
                 try:
                     _ = params.page_action(page_info.page)
+                except Exception as e:  # pragma: no cover
                     log.error(f"Error executing page_action: {e}")
             if params.wait_selector:
                     waiter.first.wait_for(state=params.wait_selector_state)
                     # Wait again after waiting for the selector, helpful with protections like Cloudflare
                     self._wait_for_page_stability(page_info.page, params.load_dom, params.network_idle)
+                except Exception as e:  # pragma: no cover
                     log.error(f"Error waiting for selector {params.wait_selector}: {e}")
             page_info.page.wait_for_timeout(params.wait)
+            # Create response object
             response = ResponseFactory.from_playwright_response(
                 page_info.page, first_response, final_response[0], params.selector_config
             )
             return response
+        except Exception as e:
             page_info.mark_error()
             raise e
+class AsyncStealthySession(AsyncSession, StealthySessionMixin):
+    """An async Stealthy Browser session manager with page pooling."""
+    def __init__(self, **kwargs: Unpack[StealthSession]):
+        """A Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory.
         :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
         :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
             Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
             This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+        :param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
         :param cookies: Set cookies for the next request.
         :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
         :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
+        :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
         :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
         :param wait_selector: Wait for a specific CSS selector to be in a specific state.
         :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
+        :param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+            rules. Defaults to the system default locale.
+        :param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
         :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
+        :param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
+        :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
+        :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
+        :param block_webrtc: Forces WebRTC to respect proxy settings to prevent local IP address leak.
+        :param allow_webgl: Enabled by default. Disabling it disables WebGL and WebGL 2.0 support entirely. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
+        :param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
+        :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
+        :param extra_flags: A list of additional browser flags to pass to the browser on launch.
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
+        :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         """
         self.__validate__(**kwargs)
+        super().__init__(max_pages=self._config.max_pages)
     async def start(self):
         """Create a browser for this instance and context."""
         if not self.playwright:
+            self.playwright: AsyncPlaywright = await async_playwright().start()  # pyright: ignore [reportAttributeAccessIssue]
+            if self._config.cdp_url:
+                browser = await self.playwright.chromium.connect_over_cdp(endpoint_url=self._config.cdp_url)
+                self.context: AsyncBrowserContext = await browser.new_context(**self._context_options)
+            else:
+                self.context: AsyncBrowserContext = await self.playwright.chromium.launch_persistent_context(
+                    **self._launch_options
+                )
+            for script in _compiled_stealth_scripts():
+                await self.context.add_init_script(script=script)
+            if self._config.init_script:  # pragma: no cover
+                await self.context.add_init_script(path=self._config.init_script)
+            if self._config.cookies:
+                await self.context.add_cookies(self._config.cookies)  # pyright: ignore
         else:
             raise RuntimeError("Session has been already started")
+    async def _cloudflare_solver(self, page: async_Page) -> None:  # pragma: no cover
+        """Solve the cloudflare challenge displayed on the playwright page passed
+        :param page: The targeted page
         :return:
         """
         await self._wait_for_networkidle(page, timeout=5000)
             return
         else:
             log.info(f'The turnstile version discovered is "{challenge_type}"')
+            if challenge_type == "non-interactive":
                 while "<title>Just a moment...</title>" in (await ResponseFactory._get_async_page_content(page)):
                     log.info("Waiting for Cloudflare wait page to disappear.")
                     await page.wait_for_timeout(1000)
                 outer_box = {}
                 iframe = page.frame(url=__CF_PATTERN__)
                 if iframe is not None:
+                    await self._wait_for_page_stability(iframe, True, False)
                     if challenge_type != "embedded":
                         while not await (await iframe.frame_element()).is_visible():
                             # Double-checking that the iframe is loaded
                             await page.wait_for_timeout(500)
+                    outer_box: Any = (await iframe.frame_element()).bounding_box()
                 if not iframe or not outer_box:
+                    if "<title>Just a moment...</title>" not in (await ResponseFactory._get_async_page_content(page)):
+                        log.info("Cloudflare captcha is solved")
+                        return
                     outer_box: Any = await page.locator(box_selector).last.bounding_box()
                 # Calculate the Captcha coordinates for any viewport
                 captcha_x, captcha_y = outer_box["x"] + randint(26, 28), outer_box["y"] + randint(25, 27)
                 # Move the mouse to the center of the window, then press and hold the left mouse button
+                await page.mouse.click(captcha_x, captcha_y, delay=randint(100, 200), button="left")
                 await self._wait_for_networkidle(page)
                 if iframe is not None:
                     # Wait for the frame to be removed from the page (with 30s timeout = 300 iterations * 100 ms)
                         await page.wait_for_timeout(100)
                         attempts += 1
                 if challenge_type != "embedded":
+                    await page.locator(box_selector).last.wait_for(state="detached")
                     await page.locator(".zone-name-title").wait_for(state="hidden")
                 await self._wait_for_page_stability(page, True, False)
                 log.info("Cloudflare captcha is solved")
                 return
+    async def fetch(self, url: str, **kwargs: Unpack[StealthFetchParams]) -> Response:
         """Opens up the browser and do your request based on your chosen options.
         :param url: The Target url.
             - selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
+        params = _validate(kwargs, self, StealthConfig)
         if self._closed:  # pragma: no cover
             raise RuntimeError("Context manager has been closed")
         final_response = [None]
         handle_response = self._create_response_handler(page_info, final_response)
         try:
             # Navigate to URL and wait for a specified state
             page_info.page.on("response", handle_response)
             # Close the page to free up resources
             await page_info.page.close()
             self.page_pool.pages.remove(page_info)
             return response
+        except Exception as e:  # pragma: no cover
             page_info.mark_error()
             raise e

scrapling/engines/_browsers/_types.py CHANGED Viewed

@@ -53,7 +53,7 @@ if TYPE_CHECKING:  # pragma: no cover
         json: Optional[Dict | List]
     # Types for browser session
-    class BrowserSession(TypedDict, total=False):
         max_pages: int
         headless: bool
         disable_resources: bool
@@ -64,6 +64,7 @@ if TYPE_CHECKING:  # pragma: no cover
         cookies: Optional[Iterable[Dict]]
         google_search: bool
         wait: int | float
         page_action: Optional[Callable]
         proxy: Optional[str | Dict[str, str] | Tuple]
         extra_headers: Optional[Dict[str, str]]
@@ -72,42 +73,32 @@ if TYPE_CHECKING:  # pragma: no cover
         user_data_dir: str
         selector_config: Optional[Dict]
         additional_args: Optional[Dict]
-    class PlaywrightSession(BrowserSession, total=False):
-        cdp_url: Optional[str]
-        hide_canvas: bool
-        disable_webgl: bool
         real_chrome: bool
-        stealth: bool
-        locale: str
         useragent: Optional[str]
         extra_flags: Optional[List[str]]
     class PlaywrightFetchParams(TypedDict, total=False):
         google_search: bool
         timeout: int | float
-        wait: int | float
-        page_action: Optional[Callable]
-        extra_headers: Optional[Dict[str, str]]
         disable_resources: bool
         wait_selector: Optional[str]
-        wait_selector_state: SelectorWaitStates
-        network_idle: bool
-        load_dom: bool
         selector_config: Optional[Dict]
-    class CamoufoxSession(BrowserSession, total=False):
-        block_images: bool
-        block_webrtc: bool
         allow_webgl: bool
-        humanize: bool | float
         solve_cloudflare: bool
-        addons: Optional[List[str]]
-        os_randomize: bool
-        disable_ads: bool
-        geoip: bool
-    class CamoufoxFetchParams(PlaywrightFetchParams, total=False):
         solve_cloudflare: bool
 else:  # pragma: no cover
@@ -116,5 +107,5 @@ else:  # pragma: no cover
     DataRequestParams = TypedDict
     PlaywrightSession = TypedDict
     PlaywrightFetchParams = TypedDict
-    CamoufoxSession = TypedDict
-    CamoufoxFetchParams = TypedDict

         json: Optional[Dict | List]
     # Types for browser session
+    class PlaywrightSession(TypedDict, total=False):
         max_pages: int
         headless: bool
         disable_resources: bool
         cookies: Optional[Iterable[Dict]]
         google_search: bool
         wait: int | float
+        timezone_id: str | None
         page_action: Optional[Callable]
         proxy: Optional[str | Dict[str, str] | Tuple]
         extra_headers: Optional[Dict[str, str]]
         user_data_dir: str
         selector_config: Optional[Dict]
         additional_args: Optional[Dict]
+        locale: Optional[str]
         real_chrome: bool
+        cdp_url: Optional[str]
         useragent: Optional[str]
         extra_flags: Optional[List[str]]
     class PlaywrightFetchParams(TypedDict, total=False):
+        load_dom: bool
+        wait: int | float
+        network_idle: bool
         google_search: bool
         timeout: int | float
         disable_resources: bool
         wait_selector: Optional[str]
+        page_action: Optional[Callable]
         selector_config: Optional[Dict]
+        extra_headers: Optional[Dict[str, str]]
+        wait_selector_state: SelectorWaitStates
+    class StealthSession(PlaywrightSession, total=False):
         allow_webgl: bool
+        hide_canvas: bool
+        block_webrtc: bool
         solve_cloudflare: bool
+    class StealthFetchParams(PlaywrightFetchParams, total=False):
         solve_cloudflare: bool
 else:  # pragma: no cover
     DataRequestParams = TypedDict
     PlaywrightSession = TypedDict
     PlaywrightFetchParams = TypedDict
+    StealthSession = TypedDict
+    StealthFetchParams = TypedDict

scrapling/engines/_browsers/_validators.py CHANGED Viewed

@@ -14,11 +14,13 @@ from scrapling.core._types import (
     Optional,
     Callable,
     Iterable,
-    SelectorWaitStates,
     overload,
 )
 from scrapling.engines.toolbelt.navigation import construct_proxy_dict
-from scrapling.engines._browsers._types import PlaywrightFetchParams, CamoufoxFetchParams
 # Custom validators for msgspec
@@ -68,26 +70,26 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False, weakref=True):
     cdp_url: Optional[str] = None
     headless: bool = True
     google_search: bool = True
-    hide_canvas: bool = False
-    disable_webgl: bool = False
     real_chrome: bool = False
-    stealth: bool = False
     wait: Seconds = 0
     page_action: Optional[Callable] = None
     proxy: Optional[str | Dict[str, str] | Tuple] = None  # The default value for proxy in Playwright's source is `None`
-    locale: str = "en-US"
     extra_headers: Optional[Dict[str, str]] = None
     useragent: Optional[str] = None
     timeout: Seconds = 30000
     init_script: Optional[str] = None
     disable_resources: bool = False
     wait_selector: Optional[str] = None
-    cookies: Optional[Iterable[Dict]] = None
     network_idle: bool = False
     load_dom: bool = True
     wait_selector_state: SelectorWaitStates = "attached"
     user_data_dir: str = ""
-    timezone_id: str = ""
     extra_flags: Optional[List[str]] = None
     selector_config: Optional[Dict] = {}
     additional_args: Optional[Dict] = {}
@@ -118,64 +120,18 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False, weakref=True):
                 raise ValueError(validation_msg)
-class CamoufoxConfig(Struct, kw_only=True, frozen=False, weakref=True):
-    """Configuration struct for validation"""
-    max_pages: PagesCount = 1
-    headless: bool = True  # noqa: F821
-    block_images: bool = False
-    disable_resources: bool = False
-    block_webrtc: bool = False
     allow_webgl: bool = True
-    network_idle: bool = False
-    load_dom: bool = True
-    humanize: bool | float = True
     solve_cloudflare: bool = False
-    wait: Seconds = 0
-    timeout: Seconds = 30000
-    init_script: Optional[str] = None
-    page_action: Optional[Callable] = None
-    wait_selector: Optional[str] = None
-    addons: Optional[List[str]] = None
-    wait_selector_state: SelectorWaitStates = "attached"
-    cookies: Optional[Iterable[Dict]] = None
-    google_search: bool = True
-    extra_headers: Optional[Dict[str, str]] = None
-    proxy: Optional[str | Dict[str, str] | Tuple] = None  # The default value for proxy in Playwright's source is `None`
-    os_randomize: bool = False
-    disable_ads: bool = False
-    geoip: bool = False
-    user_data_dir: str = ""
-    selector_config: Optional[Dict] = {}
-    additional_args: Optional[Dict] = {}
     def __post_init__(self):
         """Custom validation after msgspec validation"""
-        if self.page_action and not callable(self.page_action):
-            raise TypeError(f"page_action must be callable, got {type(self.page_action).__name__}")
-        if self.proxy:
-            self.proxy = construct_proxy_dict(self.proxy, as_tuple=True)
-        if self.addons:
-            for addon in self.addons:
-                _validate_addon_path(addon)
-        else:
-            self.addons = []
-        if self.init_script is not None:
-            validation_msg = _is_invalid_file_path(self.init_script)
-            if validation_msg:
-                raise ValueError(validation_msg)
-        if not self.cookies:
-            self.cookies = []
         # Cloudflare timeout adjustment
         if self.solve_cloudflare and self.timeout < 60_000:
             self.timeout = 60_000
-        if not self.selector_config:
-            self.selector_config = {}
-        if not self.additional_args:
-            self.additional_args = {}
 @dataclass
@@ -197,9 +153,9 @@ class _fetch_params:
 def validate_fetch(
-    method_kwargs: Dict | PlaywrightFetchParams | CamoufoxFetchParams,
     session: Any,
-    model: type[PlaywrightConfig] | type[CamoufoxConfig],
 ) -> _fetch_params:  # pragma: no cover
     result = {}
     overrides = {}
@@ -210,21 +166,20 @@ def validate_fetch(
     for key in fetch_param_fields:
         if key in method_kwargs:
             overrides[key] = method_kwargs[key]
-        else:
-            # Check for underscore-prefixed attribute (private)
-            attr_name = f"_{key}"
-            if hasattr(session, attr_name):
-                result[key] = getattr(session, attr_name)
     if overrides:
         validated_config = validate(overrides, model)
-        # Extract only the fields that _fetch_params needs from validated_config
         validated_dict = {
-            f.name: getattr(validated_config, f.name)
-            for f in fields(_fetch_params)
-            if hasattr(validated_config, f.name)
         }
-        validated_dict.setdefault("solve_cloudflare", False)
         # Start with session defaults, then overwrite with validated overrides
         result.update(validated_dict)
@@ -238,7 +193,7 @@ def validate_fetch(
 # Cache default values for each model to reduce validation overhead
 models_default_values = {}
-for _model in (CamoufoxConfig, PlaywrightConfig):
     _defaults = {}
     if hasattr(_model, "__struct_defaults__") and hasattr(_model, "__struct_fields__"):
         for field_name, default_value in zip(_model.__struct_fields__, _model.__struct_defaults__):  # type: ignore
@@ -256,14 +211,14 @@ def _filter_defaults(params: Dict, model: str) -> Dict:
 @overload
-def validate(params: Dict, model: type[PlaywrightConfig]) -> PlaywrightConfig: ...
 @overload
-def validate(params: Dict, model: type[CamoufoxConfig]) -> CamoufoxConfig: ...
-def validate(params: Dict, model: type[PlaywrightConfig] | type[CamoufoxConfig]) -> PlaywrightConfig | CamoufoxConfig:
     try:
         # Filter out params with the default values (no need to validate them) to speed up validation
         filtered = _filter_defaults(params, model.__name__)

     Optional,
     Callable,
     Iterable,
+    Sequence,
     overload,
+    SetCookieParam,
+    SelectorWaitStates,
 )
 from scrapling.engines.toolbelt.navigation import construct_proxy_dict
+from scrapling.engines._browsers._types import PlaywrightFetchParams, StealthFetchParams
 # Custom validators for msgspec
     cdp_url: Optional[str] = None
     headless: bool = True
     google_search: bool = True
+    # hide_canvas: bool = False
+    # disable_webgl: bool = False
     real_chrome: bool = False
+    # stealth: bool = False
     wait: Seconds = 0
     page_action: Optional[Callable] = None
     proxy: Optional[str | Dict[str, str] | Tuple] = None  # The default value for proxy in Playwright's source is `None`
+    locale: str | None = None
     extra_headers: Optional[Dict[str, str]] = None
     useragent: Optional[str] = None
     timeout: Seconds = 30000
     init_script: Optional[str] = None
     disable_resources: bool = False
     wait_selector: Optional[str] = None
+    cookies: Sequence[SetCookieParam] | None = []
     network_idle: bool = False
     load_dom: bool = True
     wait_selector_state: SelectorWaitStates = "attached"
     user_data_dir: str = ""
+    timezone_id: str | None = ""
     extra_flags: Optional[List[str]] = None
     selector_config: Optional[Dict] = {}
     additional_args: Optional[Dict] = {}
                 raise ValueError(validation_msg)
+class StealthConfig(PlaywrightConfig, kw_only=True, frozen=False, weakref=True):
     allow_webgl: bool = True
+    hide_canvas: bool = False
+    block_webrtc: bool = False
     solve_cloudflare: bool = False
     def __post_init__(self):
         """Custom validation after msgspec validation"""
+        super(StealthConfig, self).__post_init__()
         # Cloudflare timeout adjustment
         if self.solve_cloudflare and self.timeout < 60_000:
             self.timeout = 60_000
 @dataclass
 def validate_fetch(
+    method_kwargs: Dict | PlaywrightFetchParams | StealthFetchParams,
     session: Any,
+    model: type[PlaywrightConfig] | type[StealthConfig],
 ) -> _fetch_params:  # pragma: no cover
     result = {}
     overrides = {}
     for key in fetch_param_fields:
         if key in method_kwargs:
             overrides[key] = method_kwargs[key]
+        elif hasattr(session, "_config") and hasattr(session._config, key):
+            result[key] = getattr(session._config, key)
     if overrides:
         validated_config = validate(overrides, model)
+        # Extract ONLY the fields that were actually overridden (not all fields)
+        # This prevents validated defaults from overwriting session config values
         validated_dict = {
+            field: getattr(validated_config, field) for field in overrides.keys() if hasattr(validated_config, field)
         }
+        # Preserve solve_cloudflare if the user explicitly provided it, even if the model doesn't have it
+        if "solve_cloudflare" in overrides:
+            validated_dict["solve_cloudflare"] = overrides["solve_cloudflare"]
         # Start with session defaults, then overwrite with validated overrides
         result.update(validated_dict)
 # Cache default values for each model to reduce validation overhead
 models_default_values = {}
+for _model in (StealthConfig, PlaywrightConfig):
     _defaults = {}
     if hasattr(_model, "__struct_defaults__") and hasattr(_model, "__struct_fields__"):
         for field_name, default_value in zip(_model.__struct_fields__, _model.__struct_defaults__):  # type: ignore
 @overload
+def validate(params: Dict, model: type[StealthConfig]) -> StealthConfig: ...
 @overload
+def validate(params: Dict, model: type[PlaywrightConfig]) -> PlaywrightConfig: ...
+def validate(params: Dict, model: type[PlaywrightConfig] | type[StealthConfig]) -> PlaywrightConfig | StealthConfig:
     try:
         # Filter out params with the default values (no need to validate them) to speed up validation
         filtered = _filter_defaults(params, model.__name__)

scrapling/engines/constants.py CHANGED Viewed

@@ -74,7 +74,6 @@ DEFAULT_STEALTH_FLAGS = (
     "--disable-domain-reliability",
     "--disable-threaded-animation",
     "--disable-threaded-scrolling",
-    # '--disable-reading-from-canvas', # For Firefox
     "--enable-simple-cache-backend",
     "--disable-background-networking",
     "--enable-surface-synchronization",

     "--disable-domain-reliability",
     "--disable-threaded-animation",
     "--disable-threaded-scrolling",
     "--enable-simple-cache-backend",
     "--disable-background-networking",
     "--enable-surface-synchronization",

scrapling/fetchers/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from scrapling.fetchers.requests import Fetcher, AsyncFetcher, FetcherSession
     from scrapling.fetchers.chrome import DynamicFetcher, DynamicSession, AsyncDynamicSession
-    from scrapling.fetchers.firefox import StealthyFetcher, StealthySession, AsyncStealthySession
 # Lazy import mapping
@@ -14,9 +14,9 @@ _LAZY_IMPORTS = {
     "DynamicFetcher": ("scrapling.fetchers.chrome", "DynamicFetcher"),
     "DynamicSession": ("scrapling.fetchers.chrome", "DynamicSession"),
     "AsyncDynamicSession": ("scrapling.fetchers.chrome", "AsyncDynamicSession"),
-    "StealthyFetcher": ("scrapling.fetchers.firefox", "StealthyFetcher"),
-    "StealthySession": ("scrapling.fetchers.firefox", "StealthySession"),
-    "AsyncStealthySession": ("scrapling.fetchers.firefox", "AsyncStealthySession"),
 }
 __all__ = [

 if TYPE_CHECKING:
     from scrapling.fetchers.requests import Fetcher, AsyncFetcher, FetcherSession
     from scrapling.fetchers.chrome import DynamicFetcher, DynamicSession, AsyncDynamicSession
+    from scrapling.fetchers.stealth_chrome import StealthyFetcher, StealthySession, AsyncStealthySession
 # Lazy import mapping
     "DynamicFetcher": ("scrapling.fetchers.chrome", "DynamicFetcher"),
     "DynamicSession": ("scrapling.fetchers.chrome", "DynamicSession"),
     "AsyncDynamicSession": ("scrapling.fetchers.chrome", "AsyncDynamicSession"),
+    "StealthyFetcher": ("scrapling.fetchers.stealth_chrome", "StealthyFetcher"),
+    "StealthySession": ("scrapling.fetchers.stealth_chrome", "StealthySession"),
+    "AsyncStealthySession": ("scrapling.fetchers.stealth_chrome", "AsyncStealthySession"),
 }
 __all__ = [

scrapling/fetchers/{firefox.py → stealth_chrome.py} RENAMED Viewed

@@ -1,48 +1,52 @@
 from scrapling.core._types import Unpack
-from scrapling.engines._browsers._types import CamoufoxSession
 from scrapling.engines.toolbelt.custom import BaseFetcher, Response
-from scrapling.engines._browsers._camoufox import StealthySession, AsyncStealthySession
 class StealthyFetcher(BaseFetcher):
-    """A `Fetcher` class type that is a completely stealthy fetcher that uses a modified version of Firefox.
-    It works as real browsers passing almost all online tests/protections based on Camoufox.
-    Other added flavors include setting the faked OS fingerprints to match the user's OS, and the referer of every request is set as if this request came from Google's search of this URL's domain.
     """
     @classmethod
-    def fetch(cls, url: str, **kwargs: Unpack[CamoufoxSession]) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
         :param url: Target url.
         :param kwargs: Browser session configuration options including:
             - headless: Run the browser in headless/hidden (default), or headful/visible mode.
-            - block_images: Prevent the loading of images through Firefox preferences.
-            - disable_resources: Drop requests of unnecessary resources for a speed boost.
-            - block_webrtc: Blocks WebRTC entirely.
-            - allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
             - network_idle: Wait for the page until there are no network connections for at least 500 ms.
-            - load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
-            - humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement.
-            - solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
-            - wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the Response object.
             - timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
             - page_action: Added for automation. A function that takes the `page` object and does the automation you need.
             - wait_selector: Wait for a specific CSS selector to be in a specific state.
-            - init_script: An absolute path to a JavaScript file to be executed on page creation with this request.
-            - addons: List of Firefox addons to use. Must be paths to extracted addons.
             - wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
-            - cookies: Set cookies for the next request.
             - google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
-            - extra_headers: A dictionary of extra headers to add to the request.
             - proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
-            - os_randomize: If enabled, Scrapling will randomize the OS fingerprints used.
-            - disable_ads: Disabled by default, this installs the `uBlock Origin` addon on the browser if enabled.
-            - geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
             - selector_config: The arguments that will be passed in the end while creating the final Selector's class.
-            - additional_args: Additional arguments to be passed to Camoufox as additional settings.
         :return: A `Response` object.
         """
         selector_config = kwargs.get("selector_config", {}) or kwargs.get(
@@ -57,37 +61,42 @@ class StealthyFetcher(BaseFetcher):
             return engine.fetch(url)
     @classmethod
-    async def async_fetch(cls, url: str, **kwargs: Unpack[CamoufoxSession]) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
         :param url: Target url.
         :param kwargs: Browser session configuration options including:
             - headless: Run the browser in headless/hidden (default), or headful/visible mode.
-            - block_images: Prevent the loading of images through Firefox preferences.
-            - disable_resources: Drop requests of unnecessary resources for a speed boost.
-            - block_webrtc: Blocks WebRTC entirely.
-            - allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
             - network_idle: Wait for the page until there are no network connections for at least 500 ms.
-            - load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
-            - humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement.
-            - solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
-            - wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the Response object.
             - timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
             - page_action: Added for automation. A function that takes the `page` object and does the automation you need.
             - wait_selector: Wait for a specific CSS selector to be in a specific state.
-            - init_script: An absolute path to a JavaScript file to be executed on page creation with this request.
-            - addons: List of Firefox addons to use. Must be paths to extracted addons.
             - wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
-            - cookies: Set cookies for the next request.
             - google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
-            - extra_headers: A dictionary of extra headers to add to the request.
             - proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
-            - os_randomize: If enabled, Scrapling will randomize the OS fingerprints used.
-            - disable_ads: Disabled by default, this installs the `uBlock Origin` addon on the browser if enabled.
-            - geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
             - selector_config: The arguments that will be passed in the end while creating the final Selector's class.
-            - additional_args: Additional arguments to be passed to Camoufox as additional settings.
         :return: A `Response` object.
         """
         selector_config = kwargs.get("selector_config", {}) or kwargs.get(

 from scrapling.core._types import Unpack
+from scrapling.engines._browsers._types import StealthSession
 from scrapling.engines.toolbelt.custom import BaseFetcher, Response
+from scrapling.engines._browsers._stealth import StealthySession, AsyncStealthySession
 class StealthyFetcher(BaseFetcher):
+    """A `Fetcher` class type which is a completely stealthy built on top of Chromium.
+    It works as real browsers passing almost all online tests/protections with many customization options.
     """
     @classmethod
+    def fetch(cls, url: str, **kwargs: Unpack[StealthSession]) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
         :param url: Target url.
         :param kwargs: Browser session configuration options including:
             - headless: Run the browser in headless/hidden (default), or headful/visible mode.
+            - disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
+                Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
+                This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+            - useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
+            - cookies: Set cookies for the next request.
             - network_idle: Wait for the page until there are no network connections for at least 500 ms.
             - timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
+            - wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
             - page_action: Added for automation. A function that takes the `page` object and does the automation you need.
             - wait_selector: Wait for a specific CSS selector to be in a specific state.
+            - init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
+            - locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+                rules. Defaults to the system default locale.
+            - timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
             - wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
+            - solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
+            - real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
+            - hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
+            - block_webrtc: Forces WebRTC to respect proxy settings to prevent local IP address leak.
+            - allow_webgl: Enabled by default. Disabling it disables WebGL and WebGL 2.0 support entirely. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
+            - load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
+            - cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
             - google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
+            - extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
             - proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+            - user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
+            - extra_flags: A list of additional browser flags to pass to the browser on launch.
             - selector_config: The arguments that will be passed in the end while creating the final Selector's class.
+            - additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         :return: A `Response` object.
         """
         selector_config = kwargs.get("selector_config", {}) or kwargs.get(
             return engine.fetch(url)
     @classmethod
+    async def async_fetch(cls, url: str, **kwargs: Unpack[StealthSession]) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
         :param url: Target url.
         :param kwargs: Browser session configuration options including:
             - headless: Run the browser in headless/hidden (default), or headful/visible mode.
+            - disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
+                Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
+                This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
+            - useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
+            - cookies: Set cookies for the next request.
             - network_idle: Wait for the page until there are no network connections for at least 500 ms.
             - timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
+            - wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
             - page_action: Added for automation. A function that takes the `page` object and does the automation you need.
             - wait_selector: Wait for a specific CSS selector to be in a specific state.
+            - init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
+            - locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
+                rules. Defaults to the system default locale.
+            - timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
             - wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
+            - solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
+            - real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
+            - hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
+            - block_webrtc: Forces WebRTC to respect proxy settings to prevent local IP address leak.
+            - allow_webgl: Enabled by default. Disabling it disables WebGL and WebGL 2.0 support entirely. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
+            - load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
+            - cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
             - google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
+            - extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
             - proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+            - user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
+            - extra_flags: A list of additional browser flags to pass to the browser on launch.
             - selector_config: The arguments that will be passed in the end while creating the final Selector's class.
+            - additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
         :return: A `Response` object.
         """
         selector_config = kwargs.get("selector_config", {}) or kwargs.get(