Spaces:

lenson78
/

Scrapling

Paused

Karim shoair commited on Jul 29, 2025

Commit

8e67a4c

1 Parent(s): d4fe1d6

refactor: huge change, many features/class got a better naming

- `Adaptor` became `Selector`
- `Adaptors` became `Selectors`
- `auto_match` argument/feature became `adaptive`
- `adaptor_arguments` argument became `selector_config`
- `automatch_domain` argument became `adaptive_domain`
- `additional_arguments` argument became `additional_args`
- `storage_adaptors` file became just `storage`

Files changed (22) hide show

README.md +5 -5
benchmarks.py +4 -4
scrapling/__init__.py +5 -5
scrapling/core/ai.py +6 -6
scrapling/core/shell.py +17 -17
scrapling/core/{storage_adaptors.py → storage.py} +0 -0
scrapling/engines/_browsers/_camoufox.py +20 -20
scrapling/engines/_browsers/_controllers.py +10 -10
scrapling/engines/_browsers/_validators.py +9 -9
scrapling/engines/static.py +12 -18
scrapling/engines/toolbelt/convertor.py +1 -1
scrapling/engines/toolbelt/custom.py +20 -22
scrapling/fetchers.py +10 -10
scrapling/parser.py +105 -107
tests/fetchers/async/test_camoufox.py +1 -1
tests/fetchers/async/test_dynamic.py +1 -1
tests/fetchers/async/test_requests.py +1 -1
tests/fetchers/sync/test_camoufox.py +1 -1
tests/fetchers/sync/test_dynamic.py +1 -1
tests/fetchers/sync/test_requests.py +1 -1
tests/parser/{test_automatch.py → test_adaptive.py} +8 -8
tests/parser/test_general.py +12 -12

README.md CHANGED Viewed

@@ -52,14 +52,14 @@ Scrapling is a high-performance, intelligent web scraping library for Python tha
 ```python
 >> from scrapling.fetchers import Fetcher, AsyncFetcher, StealthyFetcher, DynamicFetcher
->> StealthyFetcher.auto_match = True
 # Fetch websites' source under the radar!
 >> page = StealthyFetcher.fetch('https://example.com', headless=True, network_idle=True)
 >> print(page.status)
 200
 >> products = page.css('.product', auto_save=True)  # Scrape data that survives website design changes!
->> # Later, if the website structure changes, pass `auto_match=True`
->> products = page.css('.product', auto_match=True)  # and Scrapling still finds them!
 ```
 # Sponsors
@@ -150,7 +150,7 @@ Tired of your PC slowing you down? Can’t keep your machine on 24/7 for scrapin
 ```python
 from scrapling.fetchers import Fetcher
-# Do HTTP GET request to a web page and create an Adaptor instance
 page = Fetcher.get('https://quotes.toscrape.com/', stealthy_headers=True)
 # Get all text content from all HTML tags in the page except the `script` and `style` tags
 page.get_all_text(ignore_tags=('script', 'style'))
@@ -219,7 +219,7 @@ Here are the results:
 |  Scrapling  |   2.51    |     1.0x     |
 | AutoScraper |   11.41   |    4.546x    |
-Scrapling can find elements with more methods and returns the entire element's `Adaptor` object, not only text like AutoScraper. So, to make this test fair, both libraries will extract an element with text, find similar elements, and then extract the text content for all of them.
 As you see, Scrapling is still 4.5 times faster at the same task.

 ```python
 >> from scrapling.fetchers import Fetcher, AsyncFetcher, StealthyFetcher, DynamicFetcher
+>> StealthyFetcher.adaptive = True
 # Fetch websites' source under the radar!
 >> page = StealthyFetcher.fetch('https://example.com', headless=True, network_idle=True)
 >> print(page.status)
 200
 >> products = page.css('.product', auto_save=True)  # Scrape data that survives website design changes!
+>> # Later, if the website structure changes, pass `adaptive=True`
+>> products = page.css('.product', adaptive=True)  # and Scrapling still finds them!
 ```
 # Sponsors
 ```python
 from scrapling.fetchers import Fetcher
+# Do HTTP GET request to a web page and create an Selector instance
 page = Fetcher.get('https://quotes.toscrape.com/', stealthy_headers=True)
 # Get all text content from all HTML tags in the page except the `script` and `style` tags
 page.get_all_text(ignore_tags=('script', 'style'))
 |  Scrapling  |   2.51    |     1.0x     |
 | AutoScraper |   11.41   |    4.546x    |
+Scrapling can find elements with more methods and returns the entire element's `Selector` object, not only text like AutoScraper. So, to make this test fair, both libraries will extract an element with text, find similar elements, and then extract the text content for all of them.
 As you see, Scrapling is still 4.5 times faster at the same task.

benchmarks.py CHANGED Viewed

@@ -12,7 +12,7 @@ from parsel import Selector
 from pyquery import PyQuery as pq
 from selectolax.parser import HTMLParser
-from scrapling import Adaptor
 large_html = (
     "<html><body>" + '<div class="item">' * 5000 + "</div>" * 5000 + "</body></html>"
@@ -73,9 +73,9 @@ def test_pyquery():
 @benchmark
 def test_scrapling():
     # No need to do `.extract()` like parsel to extract text
-    # Also, this is faster than `[t.text for t in Adaptor(large_html, auto_match=False).css('.item')]`
     # for obvious reasons, of course.
-    return Adaptor(large_html, auto_match=False).css(".item::text")
 @benchmark
@@ -112,7 +112,7 @@ def test_scrapling_text(request_html):
     # Will loop over resulted elements to get text too to make comparison even more fair otherwise Scrapling will be even faster
     return [
         element.text
-        for element in Adaptor(request_html, auto_match=False)
         .find_by_text("Tipping the Velvet", first_match=True)
         .find_similar(ignore_attributes=["title"])
     ]

 from pyquery import PyQuery as pq
 from selectolax.parser import HTMLParser
+from scrapling import Selector as ScraplingSelector
 large_html = (
     "<html><body>" + '<div class="item">' * 5000 + "</div>" * 5000 + "</body></html>"
 @benchmark
 def test_scrapling():
     # No need to do `.extract()` like parsel to extract text
+    # Also, this is faster than `[t.text for t in Selector(large_html, adaptive=False).css('.item')]`
     # for obvious reasons, of course.
+    return ScraplingSelector(large_html, adaptive=False).css(".item::text")
 @benchmark
     # Will loop over resulted elements to get text too to make comparison even more fair otherwise Scrapling will be even faster
     return [
         element.text
+        for element in ScraplingSelector(request_html, adaptive=False)
         .find_by_text("Tipping the Velvet", first_match=True)
         .find_similar(ignore_attributes=["title"])
     ]

scrapling/__init__.py CHANGED Viewed

@@ -10,12 +10,12 @@ def __getattr__(name):
         from scrapling.fetchers import Fetcher as cls
         return cls
-    elif name == "Adaptor":
-        from scrapling.parser import Adaptor as cls
         return cls
-    elif name == "Adaptors":
-        from scrapling.parser import Adaptors as cls
         return cls
     elif name == "AttributesHandler":
@@ -46,4 +46,4 @@ def __getattr__(name):
         raise AttributeError(f"module 'scrapling' has no attribute '{name}'")
-__all__ = ["Adaptor", "Fetcher", "AsyncFetcher", "StealthyFetcher", "DynamicFetcher"]

         from scrapling.fetchers import Fetcher as cls
         return cls
+    elif name == "Selector":
+        from scrapling.parser import Selector as cls
         return cls
+    elif name == "Selectors":
+        from scrapling.parser import Selectors as cls
         return cls
     elif name == "AttributesHandler":
         raise AttributeError(f"module 'scrapling' has no attribute '{name}'")
+__all__ = ["Selector", "Fetcher", "AsyncFetcher", "StealthyFetcher", "DynamicFetcher"]

scrapling/core/ai.py CHANGED Viewed

@@ -430,7 +430,7 @@ class ScraplingMCPServer:
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
-        additional_arguments: Optional[Dict] = None,
     ) -> ResponseModel:
         """Use Scrapling's version of the Camoufox browser to fetch a URL and return a structured output of the result.
         Note: This is best suitable for high protection levels. It's slower than the other tools.
@@ -467,7 +467,7 @@ class ScraplingMCPServer:
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
-        :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         page = await StealthyFetcher.async_fetch(
             url,
@@ -491,7 +491,7 @@ class ScraplingMCPServer:
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
-            additional_arguments=additional_arguments,
         )
         return _ContentTranslator(
             Convertor._extract_content(
@@ -530,7 +530,7 @@ class ScraplingMCPServer:
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
-        additional_arguments: Optional[Dict] = None,
     ) -> List[ResponseModel]:
         """Use Scrapling's version of the Camoufox browser to fetch a group of URLs at the same time, and for each page return a structured output of the result.
         Note: This is best suitable for high protection levels. It's slower than the other tools.
@@ -567,7 +567,7 @@ class ScraplingMCPServer:
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
-        :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         async with AsyncStealthySession(
             wait=wait,
@@ -591,7 +591,7 @@ class ScraplingMCPServer:
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
-            additional_arguments=additional_arguments,
         ) as session:
             tasks = [session.fetch(url) for url in urls]
             responses = await gather(*tasks)

         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
+        additional_args: Optional[Dict] = None,
     ) -> ResponseModel:
         """Use Scrapling's version of the Camoufox browser to fetch a URL and return a structured output of the result.
         Note: This is best suitable for high protection levels. It's slower than the other tools.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         page = await StealthyFetcher.async_fetch(
             url,
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
+            additional_args=additional_args,
         )
         return _ContentTranslator(
             Convertor._extract_content(
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
+        additional_args: Optional[Dict] = None,
     ) -> List[ResponseModel]:
         """Use Scrapling's version of the Camoufox browser to fetch a group of URLs at the same time, and for each page return a structured output of the result.
         Note: This is best suitable for high protection levels. It's slower than the other tools.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         async with AsyncStealthySession(
             wait=wait,
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
+            additional_args=additional_args,
         ) as session:
             tasks = [session.fetch(url) for url in urls]
             responses = await gather(*tasks)

scrapling/core/shell.py CHANGED Viewed

@@ -27,7 +27,7 @@ from orjson import loads as json_loads, JSONDecodeError
 from scrapling import __version__
 from scrapling.core.custom_types import TextHandler
 from scrapling.core.utils import log
-from scrapling.parser import Adaptor, Adaptors
 from scrapling.core._types import (
     List,
     Optional,
@@ -399,9 +399,9 @@ class CurlParser:
         return None
-def show_page_in_browser(page: Adaptor):
-    if not page or not isinstance(page, Adaptor):
-        log.error("Input must be of type `Adaptor`")
         return
     try:
@@ -421,7 +421,7 @@ class CustomShell:
     def __init__(self, code, log_level="debug"):
         self.code = code
         self.page = None
-        self.pages = Adaptors([])
         self._curl_parser = CurlParser()
         log_level = log_level.strip().lower()
@@ -457,7 +457,7 @@ class CustomShell:
    - Fetcher/AsyncFetcher
    - DynamicFetcher
    - StealthyFetcher
-   - Adaptor
 -> Useful shortcuts:
    - {"get":<30} Shortcut for `Fetcher.get`
@@ -469,7 +469,7 @@ class CustomShell:
 -> Useful commands
    - {"page / response":<30} The response object of the last page you fetched
-   - {"pages":<30} Adaptors object of the last 5 response objects you fetched
    - {"uncurl('curl_command')":<30} Convert curl command to a Request object. (Optimized to handle curl commands copied from DevTools network tab.)
    - {"curl2fetcher('curl_command')":<30} Convert curl command and make the request with Fetcher. (Optimized to handle curl commands copied from DevTools network tab.)
    - {"view(page)":<30} View page in a browser
@@ -481,7 +481,7 @@ Type 'exit' or press Ctrl+D to exit.
     def update_page(self, result):
         """Update the current page and add to pages history"""
         self.page = result
-        if isinstance(result, (Response, Adaptor)):
             self.pages.append(result)
             if len(self.pages) > 5:
                 self.pages.pop(0)  # Remove oldest item
@@ -528,7 +528,7 @@ Type 'exit' or press Ctrl+D to exit.
             "DynamicFetcher": DynamicFetcher,
             "stealthy_fetch": stealthy_fetch,
             "StealthyFetcher": StealthyFetcher,
-            "Adaptor": Adaptor,
             "page": self.page,
             "response": self.page,
             "pages": self.pages,
@@ -586,14 +586,14 @@ class Convertor:
     @classmethod
     def _extract_content(
         cls,
-        page: Adaptor,
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = False,
     ) -> Generator[str, None, None]:
-        """Extract the content of an Adaptor"""
-        if not page or not isinstance(page, Adaptor):
-            raise TypeError("Input must be of type `Adaptor`")
         elif not extraction_type or extraction_type not in cls._extension_map.values():
             raise ValueError(f"Unknown extraction type: {extraction_type}")
         else:
@@ -622,11 +622,11 @@ class Convertor:
     @classmethod
     def write_content_to_file(
-        cls, page: Adaptor, filename: str, css_selector: Optional[str] = None
     ) -> None:
-        """Write an Adaptor's content to a file"""
-        if not page or not isinstance(page, Adaptor):
-            raise TypeError("Input must be of type `Adaptor`")
         elif not filename or not isinstance(filename, str) or not filename.strip():
             raise ValueError("Filename must be provided")
         elif not filename.endswith((".md", ".html", ".txt")):

 from scrapling import __version__
 from scrapling.core.custom_types import TextHandler
 from scrapling.core.utils import log
+from scrapling.parser import Selector, Selectors
 from scrapling.core._types import (
     List,
     Optional,
         return None
+def show_page_in_browser(page: Selector):
+    if not page or not isinstance(page, Selector):
+        log.error("Input must be of type `Selector`")
         return
     try:
     def __init__(self, code, log_level="debug"):
         self.code = code
         self.page = None
+        self.pages = Selectors([])
         self._curl_parser = CurlParser()
         log_level = log_level.strip().lower()
    - Fetcher/AsyncFetcher
    - DynamicFetcher
    - StealthyFetcher
+   - Selector
 -> Useful shortcuts:
    - {"get":<30} Shortcut for `Fetcher.get`
 -> Useful commands
    - {"page / response":<30} The response object of the last page you fetched
+   - {"pages":<30} Selectors object of the last 5 response objects you fetched
    - {"uncurl('curl_command')":<30} Convert curl command to a Request object. (Optimized to handle curl commands copied from DevTools network tab.)
    - {"curl2fetcher('curl_command')":<30} Convert curl command and make the request with Fetcher. (Optimized to handle curl commands copied from DevTools network tab.)
    - {"view(page)":<30} View page in a browser
     def update_page(self, result):
         """Update the current page and add to pages history"""
         self.page = result
+        if isinstance(result, (Response, Selector)):
             self.pages.append(result)
             if len(self.pages) > 5:
                 self.pages.pop(0)  # Remove oldest item
             "DynamicFetcher": DynamicFetcher,
             "stealthy_fetch": stealthy_fetch,
             "StealthyFetcher": StealthyFetcher,
+            "Selector": Selector,
             "page": self.page,
             "response": self.page,
             "pages": self.pages,
     @classmethod
     def _extract_content(
         cls,
+        page: Selector,
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = False,
     ) -> Generator[str, None, None]:
+        """Extract the content of an Selector"""
+        if not page or not isinstance(page, Selector):
+            raise TypeError("Input must be of type `Selector`")
         elif not extraction_type or extraction_type not in cls._extension_map.values():
             raise ValueError(f"Unknown extraction type: {extraction_type}")
         else:
     @classmethod
     def write_content_to_file(
+        cls, page: Selector, filename: str, css_selector: Optional[str] = None
     ) -> None:
+        """Write an Selector's content to a file"""
+        if not page or not isinstance(page, Selector):
+            raise TypeError("Input must be of type `Selector`")
         elif not filename or not isinstance(filename, str) or not filename.strip():
             raise ValueError("Filename must be provided")
         elif not filename.endswith((".md", ".html", ".txt")):

scrapling/core/{storage_adaptors.py → storage.py} RENAMED Viewed

File without changes

scrapling/engines/_browsers/_camoufox.py CHANGED Viewed

@@ -70,8 +70,8 @@ class StealthySession:
         "os_randomize",
         "disable_ads",
         "geoip",
-        "adaptor_arguments",
-        "additional_arguments",
         "playwright",
         "browser",
         "context",
@@ -105,8 +105,8 @@ class StealthySession:
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
-        adaptor_arguments: Optional[Dict] = None,
-        additional_arguments: Optional[Dict] = None,
     ):
         """A Browser session manager with page pooling
@@ -136,8 +136,8 @@ class StealthySession:
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
-        :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
-        :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         params = {
@@ -163,8 +163,8 @@ class StealthySession:
             "os_randomize": os_randomize,
             "disable_ads": disable_ads,
             "geoip": geoip,
-            "adaptor_arguments": adaptor_arguments,
-            "additional_arguments": additional_arguments,
         }
         config = validate(params, CamoufoxConfig)
@@ -190,14 +190,14 @@ class StealthySession:
         self.os_randomize = config.os_randomize
         self.disable_ads = config.disable_ads
         self.geoip = config.geoip
-        self.adaptor_arguments = config.adaptor_arguments
-        self.additional_arguments = config.additional_arguments
         self.playwright: Optional[Playwright] = None
         self.context: Optional[BrowserContext] = None
         self.page_pool = PagePool(self.max_pages)
         self._closed = False
-        self.adaptor_arguments = config.adaptor_arguments
         self.page_action = config.page_action
         self._headers_keys = (
             set(map(str.lower, self.extra_headers.keys()))
@@ -223,7 +223,7 @@ class StealthySession:
                 "block_images": self.block_images,  # Careful! it makes some websites don't finish loading at all like stackoverflow even in headful mode.
                 "os": None if self.os_randomize else get_os_name(),
                 "user_data_dir": "",
-                **self.additional_arguments,
             }
         )
@@ -433,7 +433,7 @@ class StealthySession:
             page_info.page.wait_for_timeout(self.wait)
             response = ResponseFactory.from_playwright_response(
-                page_info.page, first_response, final_response, self.adaptor_arguments
             )
             # Mark the page as ready for next use
@@ -482,8 +482,8 @@ class AsyncStealthySession(StealthySession):
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
-        adaptor_arguments: Optional[Dict] = None,
-        additional_arguments: Optional[Dict] = None,
     ):
         """A Browser session manager with page pooling
@@ -513,8 +513,8 @@ class AsyncStealthySession(StealthySession):
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
-        :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
-        :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         super().__init__(
             max_pages,
@@ -539,8 +539,8 @@ class AsyncStealthySession(StealthySession):
             os_randomize,
             disable_ads,
             geoip,
-            adaptor_arguments,
-            additional_arguments,
         )
         self.playwright: Optional[AsyncPlaywright] = None
         self.context: Optional[AsyncBrowserContext] = None
@@ -731,7 +731,7 @@ class AsyncStealthySession(StealthySession):
             # Create response object
             response = await ResponseFactory.from_async_playwright_response(
-                page_info.page, first_response, final_response, self.adaptor_arguments
             )
             # Mark the page as ready for next use

         "os_randomize",
         "disable_ads",
         "geoip",
+        "selector_config",
+        "additional_args",
         "playwright",
         "browser",
         "context",
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
+        selector_config: Optional[Dict] = None,
+        additional_args: Optional[Dict] = None,
     ):
         """A Browser session manager with page pooling
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
+        :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
+        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         params = {
             "os_randomize": os_randomize,
             "disable_ads": disable_ads,
             "geoip": geoip,
+            "selector_config": selector_config,
+            "additional_args": additional_args,
         }
         config = validate(params, CamoufoxConfig)
         self.os_randomize = config.os_randomize
         self.disable_ads = config.disable_ads
         self.geoip = config.geoip
+        self.selector_config = config.selector_config
+        self.additional_args = config.additional_args
         self.playwright: Optional[Playwright] = None
         self.context: Optional[BrowserContext] = None
         self.page_pool = PagePool(self.max_pages)
         self._closed = False
+        self.selector_config = config.selector_config
         self.page_action = config.page_action
         self._headers_keys = (
             set(map(str.lower, self.extra_headers.keys()))
                 "block_images": self.block_images,  # Careful! it makes some websites don't finish loading at all like stackoverflow even in headful mode.
                 "os": None if self.os_randomize else get_os_name(),
                 "user_data_dir": "",
+                **self.additional_args,
             }
         )
             page_info.page.wait_for_timeout(self.wait)
             response = ResponseFactory.from_playwright_response(
+                page_info.page, first_response, final_response, self.selector_config
             )
             # Mark the page as ready for next use
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
+        selector_config: Optional[Dict] = None,
+        additional_args: Optional[Dict] = None,
     ):
         """A Browser session manager with page pooling
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
+        :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
+        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         """
         super().__init__(
             max_pages,
             os_randomize,
             disable_ads,
             geoip,
+            selector_config,
+            additional_args,
         )
         self.playwright: Optional[AsyncPlaywright] = None
         self.context: Optional[AsyncBrowserContext] = None
             # Create response object
             response = await ResponseFactory.from_async_playwright_response(
+                page_info.page, first_response, final_response, self.selector_config
             )
             # Mark the page as ready for next use

scrapling/engines/_browsers/_controllers.py CHANGED Viewed

@@ -70,7 +70,7 @@ class DynamicSession:
         "context",
         "page_pool",
         "_closed",
-        "adaptor_arguments",
         "page_action",
         "launch_options",
         "context_options",
@@ -100,7 +100,7 @@ class DynamicSession:
         cookies: Optional[List[Dict]] = None,
         network_idle: bool = False,
         wait_selector_state: SelectorWaitStates = "attached",
-        adaptor_arguments: Optional[Dict] = None,
     ):
         """A Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory.
@@ -125,7 +125,7 @@ class DynamicSession:
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
-        :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
         """
         params = {
@@ -143,7 +143,7 @@ class DynamicSession:
             "extra_headers": extra_headers,
             "useragent": useragent,
             "timeout": timeout,
-            "adaptor_arguments": adaptor_arguments,
             "disable_resources": disable_resources,
             "wait_selector": wait_selector,
             "cookies": cookies,
@@ -177,7 +177,7 @@ class DynamicSession:
         self.context: Optional[BrowserContext] = None
         self.page_pool = PagePool(self.max_pages)
         self._closed = False
-        self.adaptor_arguments = config.adaptor_arguments
         self.page_action = config.page_action
         self._headers_keys = (
             set(map(str.lower, self.extra_headers.keys()))
@@ -370,7 +370,7 @@ class DynamicSession:
             # Create response object
             response = ResponseFactory.from_playwright_response(
-                page_info.page, first_response, final_response, self.adaptor_arguments
             )
             # Mark the page as ready for next use
@@ -417,7 +417,7 @@ class AsyncDynamicSession(DynamicSession):
         cookies: Optional[List[Dict]] = None,
         network_idle: bool = False,
         wait_selector_state: SelectorWaitStates = "attached",
-        adaptor_arguments: Optional[Dict] = None,
     ):
         """A Browser session manager with page pooling
@@ -443,7 +443,7 @@ class AsyncDynamicSession(DynamicSession):
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
-        :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
         """
         super().__init__(
@@ -467,7 +467,7 @@ class AsyncDynamicSession(DynamicSession):
             cookies,
             network_idle,
             wait_selector_state,
-            adaptor_arguments,
         )
         self.playwright: Optional[AsyncPlaywright] = None
@@ -623,7 +623,7 @@ class AsyncDynamicSession(DynamicSession):
             # Create response object
             response = await ResponseFactory.from_async_playwright_response(
-                page_info.page, first_response, final_response, self.adaptor_arguments
             )
             # Mark the page as ready for next use

         "context",
         "page_pool",
         "_closed",
+        "selector_config",
         "page_action",
         "launch_options",
         "context_options",
         cookies: Optional[List[Dict]] = None,
         network_idle: bool = False,
         wait_selector_state: SelectorWaitStates = "attached",
+        selector_config: Optional[Dict] = None,
     ):
         """A Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory.
         :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
+        :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         """
         params = {
             "extra_headers": extra_headers,
             "useragent": useragent,
             "timeout": timeout,
+            "selector_config": selector_config,
             "disable_resources": disable_resources,
             "wait_selector": wait_selector,
             "cookies": cookies,
         self.context: Optional[BrowserContext] = None
         self.page_pool = PagePool(self.max_pages)
         self._closed = False
+        self.selector_config = config.selector_config
         self.page_action = config.page_action
         self._headers_keys = (
             set(map(str.lower, self.extra_headers.keys()))
             # Create response object
             response = ResponseFactory.from_playwright_response(
+                page_info.page, first_response, final_response, self.selector_config
             )
             # Mark the page as ready for next use
         cookies: Optional[List[Dict]] = None,
         network_idle: bool = False,
         wait_selector_state: SelectorWaitStates = "attached",
+        selector_config: Optional[Dict] = None,
     ):
         """A Browser session manager with page pooling
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
+        :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         """
         super().__init__(
             cookies,
             network_idle,
             wait_selector_state,
+            selector_config,
         )
         self.playwright: Optional[AsyncPlaywright] = None
             # Create response object
             response = await ResponseFactory.from_async_playwright_response(
+                page_info.page, first_response, final_response, self.selector_config
             )
             # Mark the page as ready for next use

scrapling/engines/_browsers/_validators.py CHANGED Viewed

@@ -39,7 +39,7 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
     cookies: Optional[List[Dict]] = None
     network_idle: bool = False
     wait_selector_state: SelectorWaitStates = "attached"
-    adaptor_arguments: Optional[Dict] = None
     def __post_init__(self):
         """Custom validation after msgspec validation"""
@@ -57,8 +57,8 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
             self.__validate_cdp(self.cdp_url)
         if not self.cookies:
             self.cookies = []
-        if not self.adaptor_arguments:
-            self.adaptor_arguments = {}
     @staticmethod
     def __validate_cdp(cdp_url):
@@ -105,8 +105,8 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
     os_randomize: bool = False
     disable_ads: bool = False
     geoip: bool = False
-    adaptor_arguments: Optional[Dict] = None
-    additional_arguments: Optional[Dict] = None
     def __post_init__(self):
         """Custom validation after msgspec validation"""
@@ -136,10 +136,10 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
             self.cookies = []
         if self.solve_cloudflare and self.timeout < 60_000:
             self.timeout = 60_000
-        if not self.adaptor_arguments:
-            self.adaptor_arguments = {}
-        if not self.additional_arguments:
-            self.additional_arguments = {}
 def validate(params, model):

     cookies: Optional[List[Dict]] = None
     network_idle: bool = False
     wait_selector_state: SelectorWaitStates = "attached"
+    selector_config: Optional[Dict] = None
     def __post_init__(self):
         """Custom validation after msgspec validation"""
             self.__validate_cdp(self.cdp_url)
         if not self.cookies:
             self.cookies = []
+        if not self.selector_config:
+            self.selector_config = {}
     @staticmethod
     def __validate_cdp(cdp_url):
     os_randomize: bool = False
     disable_ads: bool = False
     geoip: bool = False
+    selector_config: Optional[Dict] = None
+    additional_args: Optional[Dict] = None
     def __post_init__(self):
         """Custom validation after msgspec validation"""
             self.cookies = []
         if self.solve_cloudflare and self.timeout < 60_000:
             self.timeout = 60_000
+        if not self.selector_config:
+            self.selector_config = {}
+        if not self.additional_args:
+            self.additional_args = {}
 def validate(params, model):

scrapling/engines/static.py CHANGED Viewed

@@ -63,7 +63,7 @@ class FetcherSession:
         max_redirects: int = 30,
         verify: bool = True,
         cert: Optional[Union[str, Tuple[str, str]]] = None,
-        adaptor_arguments: Optional[Dict] = None,
     ):
         """
         :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
@@ -81,7 +81,7 @@ class FetcherSession:
         :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
         :param verify: Whether to verify HTTPS certificates. Defaults to True.
         :param cert: Tuple of (cert, key) filenames for the client certificate.
-        :param adaptor_arguments: Arguments passed when creating the final Adaptor class.
         """
         self.default_impersonate = impersonate
         self.stealth = stealthy_headers
@@ -97,7 +97,7 @@ class FetcherSession:
         self.default_verify = verify
         self.default_cert = cert
         self.default_http3 = http3
-        self.adaptor_arguments = adaptor_arguments or {}
         self._curl_session: Optional[CurlSession] = None
         self._async_curl_session: Optional[AsyncCurlSession] = None
@@ -260,7 +260,7 @@ class FetcherSession:
         request_args: Dict[str, Any],
         max_retries: int,
         retry_delay: int,
-        adaptor_arguments: Optional[Dict] = None,
     ) -> Response:
         """
         Perform an HTTP request using the configured session.
@@ -270,7 +270,7 @@ class FetcherSession:
         :param request_args: Arguments to be passed to the session's `request()` method.
         :param max_retries: Maximum number of retries for the request.
         :param retry_delay: Number of seconds to wait between retries.
-        :param adaptor_arguments: Arguments passed when creating the final Adaptor class.
         :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
         """
         session = self._curl_session
@@ -286,9 +286,7 @@ class FetcherSession:
                 try:
                     response = session.request(method, **request_args)
                     # response.raise_for_status()  # Retry responses with a status code between 200-400
-                    return ResponseFactory.from_http_request(
-                        response, adaptor_arguments
-                    )
                 except CurlError as e:
                     if attempt < max_retries - 1:
                         log.error(
@@ -307,7 +305,7 @@ class FetcherSession:
         request_args: Dict[str, Any],
         max_retries: int,
         retry_delay: int,
-        adaptor_arguments: Optional[Dict] = None,
     ) -> Response:
         """
         Perform an HTTP request using the configured session.
@@ -317,7 +315,7 @@ class FetcherSession:
         :param request_args: Arguments to be passed to the session's `request()` method.
         :param max_retries: Maximum number of retries for the request.
         :param retry_delay: Number of seconds to wait between retries.
-        :param adaptor_arguments: Arguments passed when creating the final Adaptor class.
         :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
         """
         session = self._async_curl_session
@@ -335,9 +333,7 @@ class FetcherSession:
                 try:
                     response = await session.request(method, **request_args)
                     # response.raise_for_status()  # Retry responses with a status code between 200-400
-                    return ResponseFactory.from_http_request(
-                        response, adaptor_arguments
-                    )
                 except CurlError as e:
                     if attempt < max_retries - 1:
                         log.error(
@@ -373,9 +369,7 @@ class FetcherSession:
         """
         stealth = self.stealth if stealth is None else stealth
-        adaptor_arguments = (
-            kwargs.pop("adaptor_arguments", {}) or self.adaptor_arguments
-        )
         max_retries = self.get_with_precedence(kwargs, "retries", self.default_retries)
         retry_delay = self.get_with_precedence(
             kwargs, "retry_delay", self.default_retry_delay
@@ -383,12 +377,12 @@ class FetcherSession:
         request_args = self._merge_request_args(stealth=stealth, **kwargs)
         if self._curl_session:
             return self.__make_request(
-                method, request_args, max_retries, retry_delay, adaptor_arguments
             )
         elif self._async_curl_session:
             # The returned value is a Coroutine
             return self.__make_async_request(
-                method, request_args, max_retries, retry_delay, adaptor_arguments
             )
         raise RuntimeError("No active session available.")

         max_redirects: int = 30,
         verify: bool = True,
         cert: Optional[Union[str, Tuple[str, str]]] = None,
+        selector_config: Optional[Dict] = None,
     ):
         """
         :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
         :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
         :param verify: Whether to verify HTTPS certificates. Defaults to True.
         :param cert: Tuple of (cert, key) filenames for the client certificate.
+        :param selector_config: Arguments passed when creating the final Selector class.
         """
         self.default_impersonate = impersonate
         self.stealth = stealthy_headers
         self.default_verify = verify
         self.default_cert = cert
         self.default_http3 = http3
+        self.selector_config = selector_config or {}
         self._curl_session: Optional[CurlSession] = None
         self._async_curl_session: Optional[AsyncCurlSession] = None
         request_args: Dict[str, Any],
         max_retries: int,
         retry_delay: int,
+        selector_config: Optional[Dict] = None,
     ) -> Response:
         """
         Perform an HTTP request using the configured session.
         :param request_args: Arguments to be passed to the session's `request()` method.
         :param max_retries: Maximum number of retries for the request.
         :param retry_delay: Number of seconds to wait between retries.
+        :param selector_config: Arguments passed when creating the final Selector class.
         :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
         """
         session = self._curl_session
                 try:
                     response = session.request(method, **request_args)
                     # response.raise_for_status()  # Retry responses with a status code between 200-400
+                    return ResponseFactory.from_http_request(response, selector_config)
                 except CurlError as e:
                     if attempt < max_retries - 1:
                         log.error(
         request_args: Dict[str, Any],
         max_retries: int,
         retry_delay: int,
+        selector_config: Optional[Dict] = None,
     ) -> Response:
         """
         Perform an HTTP request using the configured session.
         :param request_args: Arguments to be passed to the session's `request()` method.
         :param max_retries: Maximum number of retries for the request.
         :param retry_delay: Number of seconds to wait between retries.
+        :param selector_config: Arguments passed when creating the final Selector class.
         :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
         """
         session = self._async_curl_session
                 try:
                     response = await session.request(method, **request_args)
                     # response.raise_for_status()  # Retry responses with a status code between 200-400
+                    return ResponseFactory.from_http_request(response, selector_config)
                 except CurlError as e:
                     if attempt < max_retries - 1:
                         log.error(
         """
         stealth = self.stealth if stealth is None else stealth
+        selector_config = kwargs.pop("selector_config", {}) or self.selector_config
         max_retries = self.get_with_precedence(kwargs, "retries", self.default_retries)
         retry_delay = self.get_with_precedence(
             kwargs, "retry_delay", self.default_retry_delay
         request_args = self._merge_request_args(stealth=stealth, **kwargs)
         if self._curl_session:
             return self.__make_request(
+                method, request_args, max_retries, retry_delay, selector_config
             )
         elif self._async_curl_session:
             # The returned value is a Coroutine
             return self.__make_async_request(
+                method, request_args, max_retries, retry_delay, selector_config
             )
         raise RuntimeError("No active session available.")

scrapling/engines/toolbelt/convertor.py CHANGED Viewed

@@ -239,7 +239,7 @@ class ResponseFactory:
         :param response: `curl_cffi` response object
         :param parser_arguments: Additional arguments to be passed to the `Response` object constructor.
-        :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
         return Response(
             url=response.url,

         :param response: `curl_cffi` response object
         :param parser_arguments: Additional arguments to be passed to the `Response` object constructor.
+        :return: A `Response` object that is the same as `Selector` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
         return Response(
             url=response.url,

scrapling/engines/toolbelt/custom.py CHANGED Viewed

@@ -15,7 +15,7 @@ from scrapling.core._types import (
 )
 from scrapling.core.custom_types import MappingProxyType
 from scrapling.core.utils import log, lru_cache
-from scrapling.parser import Adaptor, SQLiteStorageSystem
 class ResponseEncoding:
@@ -97,7 +97,7 @@ class ResponseEncoding:
             return cls.__DEFAULT_ENCODING
-class Response(Adaptor):
     """This class is returned by all engines as a way to unify response type between different libraries."""
     def __init__(
@@ -113,9 +113,9 @@ class Response(Adaptor):
         encoding: str = "utf-8",
         method: str = "GET",
         history: List = None,
-        **adaptor_arguments: Dict,
     ):
-        automatch_domain = adaptor_arguments.pop("automatch_domain", None)
         self.status = status
         self.reason = reason
         self.cookies = cookies
@@ -126,12 +126,10 @@ class Response(Adaptor):
         super().__init__(
             text=text,
             body=body,
-            url=automatch_domain or url,
             encoding=encoding,
-            **adaptor_arguments,
         )
-        # For backward compatibility
-        self.adaptor = self
         # For easier debugging while working from a Python shell
         log.info(
             f"Fetched ({status}) <{method} {url}> (referer: {request_headers.get('referer')})"
@@ -144,20 +142,20 @@ class Response(Adaptor):
 class BaseFetcher:
     __slots__ = ()
     huge_tree: bool = True
-    auto_match: Optional[bool] = False
     storage: Any = SQLiteStorageSystem
     keep_cdata: Optional[bool] = False
     storage_args: Optional[Dict] = None
     keep_comments: Optional[bool] = False
-    automatch_domain: Optional[str] = None
     parser_keywords: Tuple = (
         "huge_tree",
-        "auto_match",
         "storage",
         "keep_cdata",
         "storage_args",
         "keep_comments",
-        "automatch_domain",
     )  # Left open for the user
     def __init__(self, *args, **kwargs):
@@ -178,17 +176,17 @@ class BaseFetcher:
             huge_tree=cls.huge_tree,
             keep_comments=cls.keep_comments,
             keep_cdata=cls.keep_cdata,
-            auto_match=cls.auto_match,
             storage=cls.storage,
             storage_args=cls.storage_args,
-            automatch_domain=cls.automatch_domain,
         )
     @classmethod
     def configure(cls, **kwargs):
         """Set multiple arguments for the parser at once globally
-        :param kwargs: The keywords can be any arguments of the following: huge_tree, keep_comments, keep_cdata, auto_match, storage, storage_args, automatch_domain
         """
         for key, value in kwargs.items():
             key = key.strip().lower()
@@ -212,23 +210,23 @@ class BaseFetcher:
     @classmethod
     def _generate_parser_arguments(cls) -> Dict:
-        # Adaptor class parameters
-        # I won't validate Adaptor's class parameters here again, I will leave it to be validated later
         parser_arguments = dict(
             huge_tree=cls.huge_tree,
             keep_comments=cls.keep_comments,
             keep_cdata=cls.keep_cdata,
-            auto_match=cls.auto_match,
             storage=cls.storage,
             storage_args=cls.storage_args,
         )
-        if cls.automatch_domain:
-            if type(cls.automatch_domain) is not str:
                 log.warning(
-                    '[Ignored] The argument "automatch_domain" must be of string type'
                 )
             else:
-                parser_arguments.update({"automatch_domain": cls.automatch_domain})
         return parser_arguments

 )
 from scrapling.core.custom_types import MappingProxyType
 from scrapling.core.utils import log, lru_cache
+from scrapling.parser import Selector, SQLiteStorageSystem
 class ResponseEncoding:
             return cls.__DEFAULT_ENCODING
+class Response(Selector):
     """This class is returned by all engines as a way to unify response type between different libraries."""
     def __init__(
         encoding: str = "utf-8",
         method: str = "GET",
         history: List = None,
+        **selector_config: Dict,
     ):
+        adaptive_domain = selector_config.pop("adaptive_domain", None)
         self.status = status
         self.reason = reason
         self.cookies = cookies
         super().__init__(
             text=text,
             body=body,
+            url=adaptive_domain or url,
             encoding=encoding,
+            **selector_config,
         )
         # For easier debugging while working from a Python shell
         log.info(
             f"Fetched ({status}) <{method} {url}> (referer: {request_headers.get('referer')})"
 class BaseFetcher:
     __slots__ = ()
     huge_tree: bool = True
+    adaptive: Optional[bool] = False
     storage: Any = SQLiteStorageSystem
     keep_cdata: Optional[bool] = False
     storage_args: Optional[Dict] = None
     keep_comments: Optional[bool] = False
+    adaptive_domain: Optional[str] = None
     parser_keywords: Tuple = (
         "huge_tree",
+        "adaptive",
         "storage",
         "keep_cdata",
         "storage_args",
         "keep_comments",
+        "adaptive_domain",
     )  # Left open for the user
     def __init__(self, *args, **kwargs):
             huge_tree=cls.huge_tree,
             keep_comments=cls.keep_comments,
             keep_cdata=cls.keep_cdata,
+            adaptive=cls.adaptive,
             storage=cls.storage,
             storage_args=cls.storage_args,
+            adaptive_domain=cls.adaptive_domain,
         )
     @classmethod
     def configure(cls, **kwargs):
         """Set multiple arguments for the parser at once globally
+        :param kwargs: The keywords can be any arguments of the following: huge_tree, keep_comments, keep_cdata, adaptive, storage, storage_args, adaptive_domain
         """
         for key, value in kwargs.items():
             key = key.strip().lower()
     @classmethod
     def _generate_parser_arguments(cls) -> Dict:
+        # Selector class parameters
+        # I won't validate Selector's class parameters here again, I will leave it to be validated later
         parser_arguments = dict(
             huge_tree=cls.huge_tree,
             keep_comments=cls.keep_comments,
             keep_cdata=cls.keep_cdata,
+            adaptive=cls.adaptive,
             storage=cls.storage,
             storage_args=cls.storage_args,
         )
+        if cls.adaptive_domain:
+            if type(cls.adaptive_domain) is not str:
                 log.warning(
+                    '[Ignored] The argument "adaptive_domain" must be of string type'
                 )
             else:
+                parser_arguments.update({"adaptive_domain": cls.adaptive_domain})
         return parser_arguments

scrapling/fetchers.py CHANGED Viewed

@@ -74,7 +74,7 @@ class StealthyFetcher(BaseFetcher):
         disable_ads: bool = False,
         geoip: bool = False,
         custom_config: Optional[Dict] = None,
-        additional_arguments: Optional[Dict] = None,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
@@ -106,7 +106,7 @@ class StealthyFetcher(BaseFetcher):
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
-        :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         :return: A `Response` object.
         """
         if not custom_config:
@@ -139,8 +139,8 @@ class StealthyFetcher(BaseFetcher):
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
-            adaptor_arguments={**cls._generate_parser_arguments(), **custom_config},
-            additional_arguments=additional_arguments or {},
         ) as engine:
             return engine.fetch(url)
@@ -170,7 +170,7 @@ class StealthyFetcher(BaseFetcher):
         disable_ads: bool = False,
         geoip: bool = False,
         custom_config: Optional[Dict] = None,
-        additional_arguments: Optional[Dict] = None,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
@@ -202,7 +202,7 @@ class StealthyFetcher(BaseFetcher):
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
-        :param additional_arguments: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         :return: A `Response` object.
         """
         if not custom_config:
@@ -235,8 +235,8 @@ class StealthyFetcher(BaseFetcher):
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
-            adaptor_arguments={**cls._generate_parser_arguments(), **custom_config},
-            additional_arguments=additional_arguments or {},
         ) as engine:
             return await engine.fetch(url)
@@ -337,7 +337,7 @@ class DynamicFetcher(BaseFetcher):
             disable_webgl=disable_webgl,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
-            adaptor_arguments={**cls._generate_parser_arguments(), **custom_config},
         ) as session:
             return session.fetch(url)
@@ -421,7 +421,7 @@ class DynamicFetcher(BaseFetcher):
             disable_webgl=disable_webgl,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
-            adaptor_arguments={**cls._generate_parser_arguments(), **custom_config},
         ) as session:
             return await session.fetch(url)

         disable_ads: bool = False,
         geoip: bool = False,
         custom_config: Optional[Dict] = None,
+        additional_args: Optional[Dict] = None,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
+        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         :return: A `Response` object.
         """
         if not custom_config:
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
+            selector_config={**cls._generate_parser_arguments(), **custom_config},
+            additional_args=additional_args or {},
         ) as engine:
             return engine.fetch(url)
         disable_ads: bool = False,
         geoip: bool = False,
         custom_config: Optional[Dict] = None,
+        additional_args: Optional[Dict] = None,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
         :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
+        :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
         :return: A `Response` object.
         """
         if not custom_config:
             solve_cloudflare=solve_cloudflare,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
+            selector_config={**cls._generate_parser_arguments(), **custom_config},
+            additional_args=additional_args or {},
         ) as engine:
             return await engine.fetch(url)
             disable_webgl=disable_webgl,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
+            selector_config={**cls._generate_parser_arguments(), **custom_config},
         ) as session:
             return session.fetch(url)
             disable_webgl=disable_webgl,
             disable_resources=disable_resources,
             wait_selector_state=wait_selector_state,
+            selector_config={**cls._generate_parser_arguments(), **custom_config},
         ) as session:
             return await session.fetch(url)

scrapling/parser.py CHANGED Viewed

@@ -24,7 +24,7 @@ from scrapling.core._types import (
 )
 from scrapling.core.custom_types import AttributesHandler, TextHandler, TextHandlers
 from scrapling.core.mixins import SelectorsGeneration
-from scrapling.core.storage_adaptors import (
     SQLiteStorageSystem,
     StorageSystemMixin,
     _StorageTools,
@@ -33,11 +33,11 @@ from scrapling.core.translator import translator_instance
 from scrapling.core.utils import clean_spaces, flatten, html_forbidden, is_jsonable, log
-class Adaptor(SelectorsGeneration):
     __slots__ = (
         "url",
         "encoding",
-        "__auto_match_enabled",
         "_root",
         "_storage",
         "__keep_comments",
@@ -58,7 +58,7 @@ class Adaptor(SelectorsGeneration):
         root: Optional[html.HtmlElement] = None,
         keep_comments: Optional[bool] = False,
         keep_cdata: Optional[bool] = False,
-        auto_match: Optional[bool] = False,
         _storage: object = None,
         storage: Any = SQLiteStorageSystem,
         storage_args: Optional[Dict] = None,
@@ -82,7 +82,7 @@ class Adaptor(SelectorsGeneration):
             Don't use it unless you know what you are doing!
         :param keep_comments: While parsing the HTML body, drop comments or not. Disabled by default for obvious reasons
         :param keep_cdata: While parsing the HTML body, drop cdata or not. Disabled by default for cleaner HTML.
-        :param auto_match: Globally turn off the auto-match feature in all functions, this argument takes higher
             priority over all auto-match related arguments/functions in the class.
         :param storage: The storage class to be passed for auto-matching functionalities, see ``Docs`` for more info.
         :param storage_args: A dictionary of ``argument->value`` pairs to be passed for the storage class.
@@ -90,7 +90,7 @@ class Adaptor(SelectorsGeneration):
         """
         if root is None and not body and text is None:
             raise ValueError(
-                "Adaptor class needs text, body, or root arguments to work"
             )
         self.__text = ""
@@ -134,9 +134,9 @@ class Adaptor(SelectorsGeneration):
             self._root = root
-        self.__auto_match_enabled = auto_match
-        if self.__auto_match_enabled:
             if _storage is not None:
                 self._storage = _storage
             else:
@@ -214,17 +214,17 @@ class Adaptor(SelectorsGeneration):
         """
         return TextHandler(str(element))
-    def __element_convertor(self, element: html.HtmlElement) -> "Adaptor":
-        """Used internally to convert a single HtmlElement to Adaptor directly without checks"""
         db_instance = (
             self._storage if (hasattr(self, "_storage") and self._storage) else None
         )
-        return Adaptor(
             root=element,
             url=self.url,
             encoding=self.encoding,
-            auto_match=self.__auto_match_enabled,
-            _storage=db_instance,  # Reuse existing storage if it exists otherwise it won't be checked if `auto_match` is turned off
             keep_comments=self.__keep_comments,
             keep_cdata=self.__keep_cdata,
             huge_tree=self.__huge_tree_enabled,
@@ -233,8 +233,8 @@ class Adaptor(SelectorsGeneration):
     def __handle_element(
         self, element: Union[html.HtmlElement, etree._ElementUnicodeResult]
-    ) -> Union[TextHandler, "Adaptor", None]:
-        """Used internally in all functions to convert a single element to type (Adaptor|TextHandler) when possible"""
         if element is None:
             return None
         elif self._is_text_node(element):
@@ -245,23 +245,23 @@ class Adaptor(SelectorsGeneration):
     def __handle_elements(
         self, result: List[Union[html.HtmlElement, etree._ElementUnicodeResult]]
-    ) -> Union["Adaptors", "TextHandlers", List]:
-        """Used internally in all functions to convert results to type (Adaptors|TextHandlers) in bulk when possible"""
         if not len(
             result
         ):  # Lxml will give a warning if I used something like `not result`
-            return Adaptors([])
         # From within the code, this method will always get a list of the same type,
         # so we will continue without checks for a slight performance boost
         if self._is_text_node(result[0]):
             return TextHandlers(list(map(self.__content_convertor, result)))
-        return Adaptors(list(map(self.__element_convertor, result)))
     def __getstate__(self) -> Any:
         # lxml don't like it :)
-        raise TypeError("Can't pickle Adaptor objects")
     # The following four properties I made them into functions instead of variables directly
     # So they don't slow down the process of initializing many instances of the class and gets executed only
@@ -322,7 +322,7 @@ class Adaptor(SelectorsGeneration):
         return TextHandler(separator).join(_all_strings)
     def urljoin(self, relative_url: str) -> str:
-        """Join this Adaptor's url with a relative url to form an absolute full URL."""
         return urljoin(self.url, relative_url)
     @property
@@ -363,20 +363,20 @@ class Adaptor(SelectorsGeneration):
         return class_name in self._root.classes
     @property
-    def parent(self) -> Union["Adaptor", None]:
         """Return the direct parent of the element or ``None`` otherwise"""
         return self.__handle_element(self._root.getparent())
     @property
-    def below_elements(self) -> "Adaptors[Adaptor]":
         """Return all elements under the current element in the DOM tree"""
         below = self._root.xpath(".//*")
         return self.__handle_elements(below)
     @property
-    def children(self) -> "Adaptors[Adaptor]":
         """Return the children elements of the current element or empty list otherwise"""
-        return Adaptors(
             [
                 self.__element_convertor(child)
                 for child in self._root.iterchildren()
@@ -385,22 +385,22 @@ class Adaptor(SelectorsGeneration):
         )
     @property
-    def siblings(self) -> "Adaptors[Adaptor]":
         """Return other children of the current element's parent or empty list otherwise"""
         if self.parent:
-            return Adaptors(
                 [child for child in self.parent.children if child._root != self._root]
             )
-        return Adaptors([])
-    def iterancestors(self) -> Generator["Adaptor", None, None]:
         """Return a generator that loops over all ancestors of the element, starting with the element's parent."""
         for ancestor in self._root.iterancestors():
             yield self.__element_convertor(ancestor)
     def find_ancestor(
-        self, func: Callable[["Adaptor"], bool]
-    ) -> Union["Adaptor", None]:
         """Loop over all ancestors of the element till one match the passed function
         :param func: A function that takes each ancestor as an argument and returns True/False
         :return: The first ancestor that match the function or ``None`` otherwise.
@@ -411,13 +411,13 @@ class Adaptor(SelectorsGeneration):
         return None
     @property
-    def path(self) -> "Adaptors[Adaptor]":
-        """Returns a list of type `Adaptors` that contains the path leading to the current element from the root."""
         lst = list(self.iterancestors())
-        return Adaptors(lst)
     @property
-    def next(self) -> Union["Adaptor", None]:
         """Returns the next element of the current element in the children of the parent or ``None`` otherwise."""
         next_element = self._root.getnext()
         if next_element is not None:
@@ -428,7 +428,7 @@ class Adaptor(SelectorsGeneration):
         return self.__handle_element(next_element)
     @property
-    def previous(self) -> Union["Adaptor", None]:
         """Returns the previous element of the current element in the children of the parent or ``None`` otherwise."""
         prev_element = self._root.getprevious()
         if prev_element is not None:
@@ -471,18 +471,18 @@ class Adaptor(SelectorsGeneration):
     # From here we start with the selecting functions
     def relocate(
         self,
-        element: Union[Dict, html.HtmlElement, "Adaptor"],
         percentage: int = 0,
-        adaptor_type: bool = False,
-    ) -> Union[List[Union[html.HtmlElement, None]], "Adaptors"]:
         """This function will search again for the element in the page tree, used automatically on page structure change
         :param element: The element we want to relocate in the tree
         :param percentage: The minimum percentage to accept and not going lower than that. Be aware that the percentage
          calculation depends solely on the page structure, so don't play with this number unless you must know
          what you are doing!
-        :param adaptor_type: If True, the return result will be converted to `Adaptors` object
-        :return: List of pure HTML elements that got the highest matching score or 'Adaptors' object
         """
         score_table = {}
         # Note: `element` will most likely always be a dictionary at this point.
@@ -511,7 +511,7 @@ class Adaptor(SelectorsGeneration):
                             f"{percent} -> {self.__handle_elements(score_table[percent])}"
                         )
-                if not adaptor_type:
                     return score_table[highest_probability]
                 return self.__handle_elements(score_table[highest_probability])
         return []
@@ -520,10 +520,10 @@ class Adaptor(SelectorsGeneration):
         self,
         selector: str,
         identifier: str = "",
-        auto_match: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
-    ) -> Union["Adaptor", "TextHandler", None]:
         """Search the current tree with CSS3 selectors and return the first result if possible, otherwise return `None`
         **Important:
@@ -531,17 +531,15 @@ class Adaptor(SelectorsGeneration):
         and want to relocate the same element(s)**
         :param selector: The CSS3 selector to be used.
-        :param auto_match: Enabled will make the function try to relocate the element if it was 'saved' before
         :param identifier: A string that will be used to save/retrieve element's data in auto-matching,
          otherwise the selector will be used.
-        :param auto_save: Automatically save new elements for `auto_match` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
         """
-        for element in self.css(
-            selector, identifier, auto_match, auto_save, percentage
-        ):
             return element
         return None
@@ -549,11 +547,11 @@ class Adaptor(SelectorsGeneration):
         self,
         selector: str,
         identifier: str = "",
-        auto_match: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
-    ) -> Union["Adaptor", "TextHandler", None]:
         """Search the current tree with XPath selectors and return the first result if possible, otherwise return `None`
         **Important:
@@ -563,16 +561,16 @@ class Adaptor(SelectorsGeneration):
          Note: **Additional keyword arguments will be passed as XPath variables in the XPath expression!**
         :param selector: The XPath selector to be used.
-        :param auto_match: Enabled will make the function try to relocate the element if it was 'saved' before
         :param identifier: A string that will be used to save/retrieve element's data in auto-matching,
          otherwise the selector will be used.
-        :param auto_save: Automatically save new elements for `auto_match` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
         """
         for element in self.xpath(
-            selector, identifier, auto_match, auto_save, percentage, **kwargs
         ):
             return element
         return None
@@ -581,10 +579,10 @@ class Adaptor(SelectorsGeneration):
         self,
         selector: str,
         identifier: str = "",
-        auto_match: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
-    ) -> Union["Adaptors[Adaptor]", List, "TextHandlers[TextHandler]"]:
         """Search the current tree with CSS3 selectors
         **Important:
@@ -592,24 +590,24 @@ class Adaptor(SelectorsGeneration):
         and want to relocate the same element(s)**
         :param selector: The CSS3 selector to be used.
-        :param auto_match: Enabled will make the function try to relocate the element if it was 'saved' before
         :param identifier: A string that will be used to save/retrieve element's data in auto-matching,
          otherwise the selector will be used.
-        :param auto_save: Automatically save new elements for `auto_match` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
-        :return: `Adaptors` class.
         """
         try:
-            if not self.__auto_match_enabled or "," not in selector:
                 # No need to split selectors in this case, let's save some CPU cycles :)
                 xpath_selector = translator_instance.css_to_xpath(selector)
                 return self.xpath(
                     xpath_selector,
                     identifier or selector,
-                    auto_match,
                     auto_save,
                     percentage,
                 )
@@ -625,7 +623,7 @@ class Adaptor(SelectorsGeneration):
                     results += self.xpath(
                         xpath_selector,
                         identifier or single_selector.canonical(),
-                        auto_match,
                         auto_save,
                         percentage,
                     )
@@ -643,11 +641,11 @@ class Adaptor(SelectorsGeneration):
         self,
         selector: str,
         identifier: str = "",
-        auto_match: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
-    ) -> Union["Adaptors[Adaptor]", List, "TextHandlers[TextHandler]"]:
         """Search the current tree with XPath selectors
         **Important:
@@ -657,31 +655,31 @@ class Adaptor(SelectorsGeneration):
          Note: **Additional keyword arguments will be passed as XPath variables in the XPath expression!**
         :param selector: The XPath selector to be used.
-        :param auto_match: Enabled will make the function try to relocate the element if it was 'saved' before
         :param identifier: A string that will be used to save/retrieve element's data in auto-matching,
          otherwise the selector will be used.
-        :param auto_save: Automatically save new elements for `auto_match` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
-        :return: `Adaptors` class.
         """
         try:
             elements = self._root.xpath(selector, **kwargs)
             if elements:
                 if auto_save:
-                    if not self.__auto_match_enabled:
                         log.warning(
-                            "Argument `auto_save` will be ignored because `auto_match` wasn't enabled on initialization. Check docs for more info."
                         )
                     else:
                         self.save(elements[0], identifier or selector)
                 return self.__handle_elements(elements)
-            elif self.__auto_match_enabled:
-                if auto_match:
                     element_data = self.retrieve(identifier or selector)
                     if element_data:
                         elements = self.relocate(element_data, percentage)
@@ -690,13 +688,13 @@ class Adaptor(SelectorsGeneration):
                 return self.__handle_elements(elements)
             else:
-                if auto_match:
                     log.warning(
-                        "Argument `auto_match` will be ignored because `auto_match` wasn't enabled on initialization. Check docs for more info."
                     )
                 elif auto_save:
                     log.warning(
-                        "Argument `auto_save` will be ignored because `auto_match` wasn't enabled on initialization. Check docs for more info."
                     )
                 return self.__handle_elements(elements)
@@ -713,12 +711,12 @@ class Adaptor(SelectorsGeneration):
         self,
         *args: Union[str, Iterable[str], Pattern, Callable, Dict[str, str]],
         **kwargs: str,
-    ) -> "Adaptors":
         """Find elements by filters of your creations for ease.
         :param args: Tag name(s), iterable of tag names, regex patterns, function, or a dictionary of elements' attributes. Leave empty for selecting all.
         :param kwargs: The attributes you want to filter elements based on it.
-        :return: The `Adaptors` object of the elements or empty list
         """
         # Attributes that are Python reserved words and can't be used directly
         # Ex: find_all('a', class="blah") -> find_all('a', class_="blah")
@@ -735,7 +733,7 @@ class Adaptor(SelectorsGeneration):
         attributes = dict()
         tags, patterns = set(), set()
-        results, functions, selectors = Adaptors([]), [], []
         # Brace yourself for a wonderful journey!
         for arg in args:
@@ -766,7 +764,7 @@ class Adaptor(SelectorsGeneration):
                     functions.append(arg)
                 else:
                     raise TypeError(
-                        "Callable filter function must have at least one argument to take `Adaptor` objects."
                     )
             else:
@@ -820,12 +818,12 @@ class Adaptor(SelectorsGeneration):
         self,
         *args: Union[str, Iterable[str], Pattern, Callable, Dict[str, str]],
         **kwargs: str,
-    ) -> Union["Adaptor", None]:
         """Find elements by filters of your creations for ease, then return the first result. Otherwise return `None`.
         :param args: Tag name(s), iterable of tag names, regex patterns, function, or a dictionary of elements' attributes. Leave empty for selecting all.
         :param kwargs: The attributes you want to filter elements based on it.
-        :return: The `Adaptor` object of the element or `None` if the result didn't match
         """
         for element in self.find_all(*args, **kwargs):
             return element
@@ -928,15 +926,15 @@ class Adaptor(SelectorsGeneration):
         return score
     def save(
-        self, element: Union["Adaptor", html.HtmlElement], identifier: str
     ) -> None:
         """Saves the element's unique properties to the storage for retrieval and relocation later
-        :param element: The element itself that we want to save to storage, it can be an ` Adaptor ` or pure ` HtmlElement `
         :param identifier: This is the identifier that will be used to retrieve the element later from the storage. See
             the docs for more info.
         """
-        if self.__auto_match_enabled:
             if isinstance(element, self.__class__):
                 element = element._root
@@ -956,7 +954,7 @@ class Adaptor(SelectorsGeneration):
             the docs for more info.
         :return: A dictionary of the unique properties
         """
-        if self.__auto_match_enabled:
             return self._storage.retrieve(identifier)
         log.critical(
@@ -1065,7 +1063,7 @@ class Adaptor(SelectorsGeneration):
             "src",
         ),
         match_text: bool = False,
-    ) -> Union["Adaptors[Adaptor]", List]:
         """Find elements that are in the same tree depth in the page with the same tag name and same parent tag etc...
         then return the ones that match the current element attributes with a percentage higher than the input threshold.
@@ -1084,7 +1082,7 @@ class Adaptor(SelectorsGeneration):
         :param match_text: If True, element text content will be taken into calculation while matching.
             Not recommended to use in normal cases, but it depends.
-        :return: A ``Adaptors`` container of ``Adaptor`` objects or empty list
         """
         # We will use the elements' root from now on to get the speed boost of using Lxml directly
         root = self._root
@@ -1128,7 +1126,7 @@ class Adaptor(SelectorsGeneration):
         partial: bool = False,
         case_sensitive: bool = False,
         clean_match: bool = True,
-    ) -> Union["Adaptors[Adaptor]", "Adaptor"]:
         """Find elements that its text content fully/partially matches input.
         :param text: Text query to match
         :param first_match: Returns the first element that matches conditions, enabled by default
@@ -1137,7 +1135,7 @@ class Adaptor(SelectorsGeneration):
         :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
         """
-        results = Adaptors([])
         if not case_sensitive:
             text = text.lower()
@@ -1174,14 +1172,14 @@ class Adaptor(SelectorsGeneration):
         first_match: bool = True,
         case_sensitive: bool = False,
         clean_match: bool = True,
-    ) -> Union["Adaptors[Adaptor]", "Adaptor"]:
         """Find elements that its text content matches the input regex pattern.
         :param query: Regex query/pattern to match
         :param first_match: Return the first element that matches conditions; enabled by default.
         :param case_sensitive: If enabled, the letters case will be taken into consideration in the regex.
         :param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching.
         """
-        results = Adaptors([])
         # This selector gets all elements with text content
         for node in self.__handle_elements(
@@ -1206,24 +1204,24 @@ class Adaptor(SelectorsGeneration):
         return results
-class Adaptors(List[Adaptor]):
     """
-    The `Adaptors` class is a subclass of the builtin ``List`` class, which provides a few additional methods.
     """
     __slots__ = ()
     @typing.overload
-    def __getitem__(self, pos: SupportsIndex) -> Adaptor:
         pass
     @typing.overload
-    def __getitem__(self, pos: slice) -> "Adaptors":
         pass
     def __getitem__(
         self, pos: Union[SupportsIndex, slice]
-    ) -> Union[Adaptor, "Adaptors"]:
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
             return self.__class__(lst)
@@ -1237,10 +1235,10 @@ class Adaptors(List[Adaptor]):
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
-    ) -> "Adaptors[Adaptor]":
         """
         Call the ``.xpath()`` method for each element in this list and return
-        their results as another `Adaptors` class.
         **Important:
         It's recommended to use the identifier argument if you plan to use a different selector later
@@ -1251,12 +1249,12 @@ class Adaptors(List[Adaptor]):
         :param selector: The XPath selector to be used.
         :param identifier: A string that will be used to retrieve element's data in auto-matching,
          otherwise the selector will be used.
-        :param auto_save: Automatically save new elements for `auto_match` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
-        :return: `Adaptors` class.
         """
         results = [
             n.xpath(
@@ -1272,10 +1270,10 @@ class Adaptors(List[Adaptor]):
         identifier: str = "",
         auto_save: bool = False,
         percentage: int = 0,
-    ) -> "Adaptors[Adaptor]":
         """
         Call the ``.css()`` method for each element in this list and return
-        their results flattened as another `Adaptors` class.
         **Important:
         It's recommended to use the identifier argument if you plan to use a different selector later
@@ -1284,12 +1282,12 @@ class Adaptors(List[Adaptor]):
         :param selector: The CSS3 selector to be used.
         :param identifier: A string that will be used to retrieve element's data in auto-matching,
          otherwise the selector will be used.
-        :param auto_save: Automatically save new elements for `auto_match` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
-        :return: `Adaptors` class.
         """
         results = [
             n.css(selector, identifier or selector, False, auto_save, percentage)
@@ -1340,7 +1338,7 @@ class Adaptors(List[Adaptor]):
                 return result
         return default
-    def search(self, func: Callable[["Adaptor"], bool]) -> Union["Adaptor", None]:
         """Loop over all current elements and return the first element that matches the passed function
         :param func: A function that takes each element as an argument and returns True/False
         :return: The first element that match the function or ``None`` otherwise.
@@ -1350,10 +1348,10 @@ class Adaptors(List[Adaptor]):
                 return element
         return None
-    def filter(self, func: Callable[["Adaptor"], bool]) -> "Adaptors[Adaptor]":
         """Filter current elements based on the passed function
         :param func: A function that takes each element as an argument and returns True/False
-        :return: The new `Adaptors` object or empty list otherwise.
         """
         return self.__class__([element for element in self if func(element)])
@@ -1382,4 +1380,4 @@ class Adaptors(List[Adaptor]):
     def __getstate__(self) -> Any:
         # lxml don't like it :)
-        raise TypeError("Can't pickle Adaptors object")

 )
 from scrapling.core.custom_types import AttributesHandler, TextHandler, TextHandlers
 from scrapling.core.mixins import SelectorsGeneration
+from scrapling.core.storage import (
     SQLiteStorageSystem,
     StorageSystemMixin,
     _StorageTools,
 from scrapling.core.utils import clean_spaces, flatten, html_forbidden, is_jsonable, log
+class Selector(SelectorsGeneration):
     __slots__ = (
         "url",
         "encoding",
+        "__adaptive_enabled",
         "_root",
         "_storage",
         "__keep_comments",
         root: Optional[html.HtmlElement] = None,
         keep_comments: Optional[bool] = False,
         keep_cdata: Optional[bool] = False,
+        adaptive: Optional[bool] = False,
         _storage: object = None,
         storage: Any = SQLiteStorageSystem,
         storage_args: Optional[Dict] = None,
             Don't use it unless you know what you are doing!
         :param keep_comments: While parsing the HTML body, drop comments or not. Disabled by default for obvious reasons
         :param keep_cdata: While parsing the HTML body, drop cdata or not. Disabled by default for cleaner HTML.
+        :param adaptive: Globally turn off the auto-match feature in all functions, this argument takes higher
             priority over all auto-match related arguments/functions in the class.
         :param storage: The storage class to be passed for auto-matching functionalities, see ``Docs`` for more info.
         :param storage_args: A dictionary of ``argument->value`` pairs to be passed for the storage class.
         """
         if root is None and not body and text is None:
             raise ValueError(
+                "Selector class needs text, body, or root arguments to work"
             )
         self.__text = ""
             self._root = root
+        self.__adaptive_enabled = adaptive
+        if self.__adaptive_enabled:
             if _storage is not None:
                 self._storage = _storage
             else:
         """
         return TextHandler(str(element))
+    def __element_convertor(self, element: html.HtmlElement) -> "Selector":
+        """Used internally to convert a single HtmlElement to Selector directly without checks"""
         db_instance = (
             self._storage if (hasattr(self, "_storage") and self._storage) else None
         )
+        return Selector(
             root=element,
             url=self.url,
             encoding=self.encoding,
+            adaptive=self.__adaptive_enabled,
+            _storage=db_instance,  # Reuse existing storage if it exists otherwise it won't be checked if `adaptive` is turned off
             keep_comments=self.__keep_comments,
             keep_cdata=self.__keep_cdata,
             huge_tree=self.__huge_tree_enabled,
     def __handle_element(
         self, element: Union[html.HtmlElement, etree._ElementUnicodeResult]
+    ) -> Union[TextHandler, "Selector", None]:
+        """Used internally in all functions to convert a single element to type (Selector|TextHandler) when possible"""
         if element is None:
             return None
         elif self._is_text_node(element):
     def __handle_elements(
         self, result: List[Union[html.HtmlElement, etree._ElementUnicodeResult]]
+    ) -> Union["Selectors", "TextHandlers", List]:
+        """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
         if not len(
             result
         ):  # Lxml will give a warning if I used something like `not result`
+            return Selectors([])
         # From within the code, this method will always get a list of the same type,
         # so we will continue without checks for a slight performance boost
         if self._is_text_node(result[0]):
             return TextHandlers(list(map(self.__content_convertor, result)))
+        return Selectors(list(map(self.__element_convertor, result)))
     def __getstate__(self) -> Any:
         # lxml don't like it :)
+        raise TypeError("Can't pickle Selector objects")
     # The following four properties I made them into functions instead of variables directly
     # So they don't slow down the process of initializing many instances of the class and gets executed only
         return TextHandler(separator).join(_all_strings)
     def urljoin(self, relative_url: str) -> str:
+        """Join this Selector's url with a relative url to form an absolute full URL."""
         return urljoin(self.url, relative_url)
     @property
         return class_name in self._root.classes
     @property
+    def parent(self) -> Union["Selector", None]:
         """Return the direct parent of the element or ``None`` otherwise"""
         return self.__handle_element(self._root.getparent())
     @property
+    def below_elements(self) -> "Selectors[Selector]":
         """Return all elements under the current element in the DOM tree"""
         below = self._root.xpath(".//*")
         return self.__handle_elements(below)
     @property
+    def children(self) -> "Selectors[Selector]":
         """Return the children elements of the current element or empty list otherwise"""
+        return Selectors(
             [
                 self.__element_convertor(child)
                 for child in self._root.iterchildren()
         )
     @property
+    def siblings(self) -> "Selectors[Selector]":
         """Return other children of the current element's parent or empty list otherwise"""
         if self.parent:
+            return Selectors(
                 [child for child in self.parent.children if child._root != self._root]
             )
+        return Selectors([])
+    def iterancestors(self) -> Generator["Selector", None, None]:
         """Return a generator that loops over all ancestors of the element, starting with the element's parent."""
         for ancestor in self._root.iterancestors():
             yield self.__element_convertor(ancestor)
     def find_ancestor(
+        self, func: Callable[["Selector"], bool]
+    ) -> Union["Selector", None]:
         """Loop over all ancestors of the element till one match the passed function
         :param func: A function that takes each ancestor as an argument and returns True/False
         :return: The first ancestor that match the function or ``None`` otherwise.
         return None
     @property
+    def path(self) -> "Selectors[Selector]":
+        """Returns a list of type `Selectors` that contains the path leading to the current element from the root."""
         lst = list(self.iterancestors())
+        return Selectors(lst)
     @property
+    def next(self) -> Union["Selector", None]:
         """Returns the next element of the current element in the children of the parent or ``None`` otherwise."""
         next_element = self._root.getnext()
         if next_element is not None:
         return self.__handle_element(next_element)
     @property
+    def previous(self) -> Union["Selector", None]:
         """Returns the previous element of the current element in the children of the parent or ``None`` otherwise."""
         prev_element = self._root.getprevious()
         if prev_element is not None:
     # From here we start with the selecting functions
     def relocate(
         self,
+        element: Union[Dict, html.HtmlElement, "Selector"],
         percentage: int = 0,
+        selector_type: bool = False,
+    ) -> Union[List[Union[html.HtmlElement, None]], "Selectors"]:
         """This function will search again for the element in the page tree, used automatically on page structure change
         :param element: The element we want to relocate in the tree
         :param percentage: The minimum percentage to accept and not going lower than that. Be aware that the percentage
          calculation depends solely on the page structure, so don't play with this number unless you must know
          what you are doing!
+        :param selector_type: If True, the return result will be converted to `Selectors` object
+        :return: List of pure HTML elements that got the highest matching score or 'Selectors' object
         """
         score_table = {}
         # Note: `element` will most likely always be a dictionary at this point.
                             f"{percent} -> {self.__handle_elements(score_table[percent])}"
                         )
+                if not selector_type:
                     return score_table[highest_probability]
                 return self.__handle_elements(score_table[highest_probability])
         return []
         self,
         selector: str,
         identifier: str = "",
+        adaptive: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
+    ) -> Union["Selector", "TextHandler", None]:
         """Search the current tree with CSS3 selectors and return the first result if possible, otherwise return `None`
         **Important:
         and want to relocate the same element(s)**
         :param selector: The CSS3 selector to be used.
+        :param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
         :param identifier: A string that will be used to save/retrieve element's data in auto-matching,
          otherwise the selector will be used.
+        :param auto_save: Automatically save new elements for `adaptive` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
         """
+        for element in self.css(selector, identifier, adaptive, auto_save, percentage):
             return element
         return None
         self,
         selector: str,
         identifier: str = "",
+        adaptive: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
+    ) -> Union["Selector", "TextHandler", None]:
         """Search the current tree with XPath selectors and return the first result if possible, otherwise return `None`
         **Important:
          Note: **Additional keyword arguments will be passed as XPath variables in the XPath expression!**
         :param selector: The XPath selector to be used.
+        :param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
         :param identifier: A string that will be used to save/retrieve element's data in auto-matching,
          otherwise the selector will be used.
+        :param auto_save: Automatically save new elements for `adaptive` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
         """
         for element in self.xpath(
+            selector, identifier, adaptive, auto_save, percentage, **kwargs
         ):
             return element
         return None
         self,
         selector: str,
         identifier: str = "",
+        adaptive: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
+    ) -> Union["Selectors[Selector]", List, "TextHandlers[TextHandler]"]:
         """Search the current tree with CSS3 selectors
         **Important:
         and want to relocate the same element(s)**
         :param selector: The CSS3 selector to be used.
+        :param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
         :param identifier: A string that will be used to save/retrieve element's data in auto-matching,
          otherwise the selector will be used.
+        :param auto_save: Automatically save new elements for `adaptive` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
+        :return: `Selectors` class.
         """
         try:
+            if not self.__adaptive_enabled or "," not in selector:
                 # No need to split selectors in this case, let's save some CPU cycles :)
                 xpath_selector = translator_instance.css_to_xpath(selector)
                 return self.xpath(
                     xpath_selector,
                     identifier or selector,
+                    adaptive,
                     auto_save,
                     percentage,
                 )
                     results += self.xpath(
                         xpath_selector,
                         identifier or single_selector.canonical(),
+                        adaptive,
                         auto_save,
                         percentage,
                     )
         self,
         selector: str,
         identifier: str = "",
+        adaptive: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
+    ) -> Union["Selectors[Selector]", List, "TextHandlers[TextHandler]"]:
         """Search the current tree with XPath selectors
         **Important:
          Note: **Additional keyword arguments will be passed as XPath variables in the XPath expression!**
         :param selector: The XPath selector to be used.
+        :param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
         :param identifier: A string that will be used to save/retrieve element's data in auto-matching,
          otherwise the selector will be used.
+        :param auto_save: Automatically save new elements for `adaptive` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
+        :return: `Selectors` class.
         """
         try:
             elements = self._root.xpath(selector, **kwargs)
             if elements:
                 if auto_save:
+                    if not self.__adaptive_enabled:
                         log.warning(
+                            "Argument `auto_save` will be ignored because `adaptive` wasn't enabled on initialization. Check docs for more info."
                         )
                     else:
                         self.save(elements[0], identifier or selector)
                 return self.__handle_elements(elements)
+            elif self.__adaptive_enabled:
+                if adaptive:
                     element_data = self.retrieve(identifier or selector)
                     if element_data:
                         elements = self.relocate(element_data, percentage)
                 return self.__handle_elements(elements)
             else:
+                if adaptive:
                     log.warning(
+                        "Argument `adaptive` will be ignored because `adaptive` wasn't enabled on initialization. Check docs for more info."
                     )
                 elif auto_save:
                     log.warning(
+                        "Argument `auto_save` will be ignored because `adaptive` wasn't enabled on initialization. Check docs for more info."
                     )
                 return self.__handle_elements(elements)
         self,
         *args: Union[str, Iterable[str], Pattern, Callable, Dict[str, str]],
         **kwargs: str,
+    ) -> "Selectors":
         """Find elements by filters of your creations for ease.
         :param args: Tag name(s), iterable of tag names, regex patterns, function, or a dictionary of elements' attributes. Leave empty for selecting all.
         :param kwargs: The attributes you want to filter elements based on it.
+        :return: The `Selectors` object of the elements or empty list
         """
         # Attributes that are Python reserved words and can't be used directly
         # Ex: find_all('a', class="blah") -> find_all('a', class_="blah")
         attributes = dict()
         tags, patterns = set(), set()
+        results, functions, selectors = Selectors([]), [], []
         # Brace yourself for a wonderful journey!
         for arg in args:
                     functions.append(arg)
                 else:
                     raise TypeError(
+                        "Callable filter function must have at least one argument to take `Selector` objects."
                     )
             else:
         self,
         *args: Union[str, Iterable[str], Pattern, Callable, Dict[str, str]],
         **kwargs: str,
+    ) -> Union["Selector", None]:
         """Find elements by filters of your creations for ease, then return the first result. Otherwise return `None`.
         :param args: Tag name(s), iterable of tag names, regex patterns, function, or a dictionary of elements' attributes. Leave empty for selecting all.
         :param kwargs: The attributes you want to filter elements based on it.
+        :return: The `Selector` object of the element or `None` if the result didn't match
         """
         for element in self.find_all(*args, **kwargs):
             return element
         return score
     def save(
+        self, element: Union["Selector", html.HtmlElement], identifier: str
     ) -> None:
         """Saves the element's unique properties to the storage for retrieval and relocation later
+        :param element: The element itself that we want to save to storage, it can be an ` Selector ` or pure ` HtmlElement `
         :param identifier: This is the identifier that will be used to retrieve the element later from the storage. See
             the docs for more info.
         """
+        if self.__adaptive_enabled:
             if isinstance(element, self.__class__):
                 element = element._root
             the docs for more info.
         :return: A dictionary of the unique properties
         """
+        if self.__adaptive_enabled:
             return self._storage.retrieve(identifier)
         log.critical(
             "src",
         ),
         match_text: bool = False,
+    ) -> Union["Selectors[Selector]", List]:
         """Find elements that are in the same tree depth in the page with the same tag name and same parent tag etc...
         then return the ones that match the current element attributes with a percentage higher than the input threshold.
         :param match_text: If True, element text content will be taken into calculation while matching.
             Not recommended to use in normal cases, but it depends.
+        :return: A ``Selectors`` container of ``Selector`` objects or empty list
         """
         # We will use the elements' root from now on to get the speed boost of using Lxml directly
         root = self._root
         partial: bool = False,
         case_sensitive: bool = False,
         clean_match: bool = True,
+    ) -> Union["Selectors[Selector]", "Selector"]:
         """Find elements that its text content fully/partially matches input.
         :param text: Text query to match
         :param first_match: Returns the first element that matches conditions, enabled by default
         :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
         """
+        results = Selectors([])
         if not case_sensitive:
             text = text.lower()
         first_match: bool = True,
         case_sensitive: bool = False,
         clean_match: bool = True,
+    ) -> Union["Selectors[Selector]", "Selector"]:
         """Find elements that its text content matches the input regex pattern.
         :param query: Regex query/pattern to match
         :param first_match: Return the first element that matches conditions; enabled by default.
         :param case_sensitive: If enabled, the letters case will be taken into consideration in the regex.
         :param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching.
         """
+        results = Selectors([])
         # This selector gets all elements with text content
         for node in self.__handle_elements(
         return results
+class Selectors(List[Selector]):
     """
+    The `Selectors` class is a subclass of the builtin ``List`` class, which provides a few additional methods.
     """
     __slots__ = ()
     @typing.overload
+    def __getitem__(self, pos: SupportsIndex) -> Selector:
         pass
     @typing.overload
+    def __getitem__(self, pos: slice) -> "Selectors":
         pass
     def __getitem__(
         self, pos: Union[SupportsIndex, slice]
+    ) -> Union[Selector, "Selectors"]:
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
             return self.__class__(lst)
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
+    ) -> "Selectors[Selector]":
         """
         Call the ``.xpath()`` method for each element in this list and return
+        their results as another `Selectors` class.
         **Important:
         It's recommended to use the identifier argument if you plan to use a different selector later
         :param selector: The XPath selector to be used.
         :param identifier: A string that will be used to retrieve element's data in auto-matching,
          otherwise the selector will be used.
+        :param auto_save: Automatically save new elements for `adaptive` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
+        :return: `Selectors` class.
         """
         results = [
             n.xpath(
         identifier: str = "",
         auto_save: bool = False,
         percentage: int = 0,
+    ) -> "Selectors[Selector]":
         """
         Call the ``.css()`` method for each element in this list and return
+        their results flattened as another `Selectors` class.
         **Important:
         It's recommended to use the identifier argument if you plan to use a different selector later
         :param selector: The CSS3 selector to be used.
         :param identifier: A string that will be used to retrieve element's data in auto-matching,
          otherwise the selector will be used.
+        :param auto_save: Automatically save new elements for `adaptive` later
         :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
          Be aware that the percentage calculation depends solely on the page structure, so don't play with this
          number unless you must know what you are doing!
+        :return: `Selectors` class.
         """
         results = [
             n.css(selector, identifier or selector, False, auto_save, percentage)
                 return result
         return default
+    def search(self, func: Callable[["Selector"], bool]) -> Union["Selector", None]:
         """Loop over all current elements and return the first element that matches the passed function
         :param func: A function that takes each element as an argument and returns True/False
         :return: The first element that match the function or ``None`` otherwise.
                 return element
         return None
+    def filter(self, func: Callable[["Selector"], bool]) -> "Selectors[Selector]":
         """Filter current elements based on the passed function
         :param func: A function that takes each element as an argument and returns True/False
+        :return: The new `Selectors` object or empty list otherwise.
         """
         return self.__class__([element for element in self if func(element)])
     def __getstate__(self) -> Any:
         # lxml don't like it :)
+        raise TypeError("Can't pickle Selectors object")

tests/fetchers/async/test_camoufox.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pytest_httpbin
 from scrapling import StealthyFetcher
-StealthyFetcher.auto_match = True
 @pytest_httpbin.use_class_based_httpbin

 from scrapling import StealthyFetcher
+StealthyFetcher.adaptive = True
 @pytest_httpbin.use_class_based_httpbin

tests/fetchers/async/test_dynamic.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pytest_httpbin
 from scrapling import DynamicFetcher
-DynamicFetcher.auto_match = True
 @pytest_httpbin.use_class_based_httpbin

 from scrapling import DynamicFetcher
+DynamicFetcher.adaptive = True
 @pytest_httpbin.use_class_based_httpbin

tests/fetchers/async/test_requests.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pytest_httpbin
 from scrapling.fetchers import AsyncFetcher
-AsyncFetcher.auto_match = True
 @pytest_httpbin.use_class_based_httpbin

 from scrapling.fetchers import AsyncFetcher
+AsyncFetcher.adaptive = True
 @pytest_httpbin.use_class_based_httpbin

tests/fetchers/sync/test_camoufox.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pytest_httpbin
 from scrapling import StealthyFetcher
-StealthyFetcher.auto_match = True
 @pytest_httpbin.use_class_based_httpbin

 from scrapling import StealthyFetcher
+StealthyFetcher.adaptive = True
 @pytest_httpbin.use_class_based_httpbin

tests/fetchers/sync/test_dynamic.py CHANGED Viewed

@@ -5,7 +5,7 @@ import pytest_httpbin
 from scrapling import DynamicFetcher
-DynamicFetcher.auto_match = True
 @pytest_httpbin.use_class_based_httpbin

 from scrapling import DynamicFetcher
+DynamicFetcher.adaptive = True
 @pytest_httpbin.use_class_based_httpbin

tests/fetchers/sync/test_requests.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pytest_httpbin
 from scrapling import Fetcher
-Fetcher.auto_match = True
 @pytest_httpbin.use_class_based_httpbin

 from scrapling import Fetcher
+Fetcher.adaptive = True
 @pytest_httpbin.use_class_based_httpbin

tests/parser/{test_automatch.py → test_adaptive.py} RENAMED Viewed

@@ -2,10 +2,10 @@ import asyncio
 import pytest
-from scrapling import Adaptor
-class TestParserAutoMatch:
     def test_element_relocation(self):
         """Test relocating element after structure change"""
         original_html = """
@@ -43,13 +43,13 @@ class TestParserAutoMatch:
                 </div>
                 """
-        old_page = Adaptor(original_html, url="example.com", auto_match=True)
-        new_page = Adaptor(changed_html, url="example.com", auto_match=True)
         # 'p1' was used as ID and now it's not and all the path elements have changes
         # Also at the same time testing auto-match vs combined selectors
         _ = old_page.css("#p1, #p2", auto_save=True)[0]
-        relocated = new_page.css("#p1", auto_match=True)
         assert relocated is not None
         assert relocated[0].attrib["data-id"] == "p1"
@@ -97,13 +97,13 @@ class TestParserAutoMatch:
         # Simulate async operation
         await asyncio.sleep(0.1)  # Minimal async operation
-        old_page = Adaptor(original_html, url="example.com", auto_match=True)
-        new_page = Adaptor(changed_html, url="example.com", auto_match=True)
         # 'p1' was used as ID and now it's not and all the path elements have changes
         # Also at the same time testing auto-match vs combined selectors
         _ = old_page.css("#p1, #p2", auto_save=True)[0]
-        relocated = new_page.css("#p1", auto_match=True)
         assert relocated is not None
         assert relocated[0].attrib["data-id"] == "p1"

 import pytest
+from scrapling import Selector
+class TestParserAdaptive:
     def test_element_relocation(self):
         """Test relocating element after structure change"""
         original_html = """
                 </div>
                 """
+        old_page = Selector(original_html, url="example.com", adaptive=True)
+        new_page = Selector(changed_html, url="example.com", adaptive=True)
         # 'p1' was used as ID and now it's not and all the path elements have changes
         # Also at the same time testing auto-match vs combined selectors
         _ = old_page.css("#p1, #p2", auto_save=True)[0]
+        relocated = new_page.css("#p1", adaptive=True)
         assert relocated is not None
         assert relocated[0].attrib["data-id"] == "p1"
         # Simulate async operation
         await asyncio.sleep(0.1)  # Minimal async operation
+        old_page = Selector(original_html, url="example.com", adaptive=True)
+        new_page = Selector(changed_html, url="example.com", adaptive=True)
         # 'p1' was used as ID and now it's not and all the path elements have changes
         # Also at the same time testing auto-match vs combined selectors
         _ = old_page.css("#p1, #p2", auto_save=True)[0]
+        relocated = new_page.css("#p1", adaptive=True)
         assert relocated is not None
         assert relocated[0].attrib["data-id"] == "p1"

tests/parser/test_general.py CHANGED Viewed

@@ -4,7 +4,7 @@ import time
 import pytest
 from cssselect import SelectorError, SelectorSyntaxError
-from scrapling import Adaptor
 @pytest.fixture
@@ -78,7 +78,7 @@ def html_content():
 @pytest.fixture
 def page(html_content):
-    return Adaptor(html_content, auto_match=False)
 # CSS Selector Tests
@@ -162,26 +162,26 @@ class TestSimilarElements:
 # Error Handling Tests
 class TestErrorHandling:
-    def test_invalid_adaptor_initialization(self):
-        """Test various invalid Adaptor initializations"""
         # No arguments
         with pytest.raises(ValueError):
-            _ = Adaptor(auto_match=False)
         # Invalid argument types
         with pytest.raises(TypeError):
-            _ = Adaptor(root="ayo", auto_match=False)
         with pytest.raises(TypeError):
-            _ = Adaptor(text=1, auto_match=False)
         with pytest.raises(TypeError):
-            _ = Adaptor(body=1, auto_match=False)
     def test_invalid_storage(self, page, html_content):
         """Test invalid storage parameter"""
         with pytest.raises(ValueError):
-            _ = Adaptor(html_content, storage=object, auto_match=True)
     def test_bad_selectors(self, page):
         """Test handling of invalid selectors"""
@@ -195,7 +195,7 @@ class TestErrorHandling:
 # Pickling and Object Representation Tests
 class TestPicklingAndRepresentation:
     def test_unpickleable_objects(self, page):
-        """Test that Adaptor objects cannot be pickled"""
         table = page.css(".product-list")[0]
         with pytest.raises(TypeError):
             pickle.dumps(table)
@@ -299,7 +299,7 @@ def test_large_html_parsing_performance():
     )
     start_time = time.time()
-    parsed = Adaptor(large_html, auto_match=False)
     elements = parsed.css(".item")
     end_time = time.time()
@@ -315,7 +315,7 @@ def test_large_html_parsing_performance():
 def test_selectors_generation(page):
     """Try to create selectors for all elements in the page"""
-    def _traverse(element: Adaptor):
         assert isinstance(element.generate_css_selector, str)
         assert isinstance(element.generate_xpath_selector, str)
         for branch in element.children:

 import pytest
 from cssselect import SelectorError, SelectorSyntaxError
+from scrapling import Selector
 @pytest.fixture
 @pytest.fixture
 def page(html_content):
+    return Selector(html_content, adaptive=False)
 # CSS Selector Tests
 # Error Handling Tests
 class TestErrorHandling:
+    def test_invalid_selector_initialization(self):
+        """Test various invalid Selector initializations"""
         # No arguments
         with pytest.raises(ValueError):
+            _ = Selector(adaptive=False)
         # Invalid argument types
         with pytest.raises(TypeError):
+            _ = Selector(root="ayo", adaptive=False)
         with pytest.raises(TypeError):
+            _ = Selector(text=1, adaptive=False)
         with pytest.raises(TypeError):
+            _ = Selector(body=1, adaptive=False)
     def test_invalid_storage(self, page, html_content):
         """Test invalid storage parameter"""
         with pytest.raises(ValueError):
+            _ = Selector(html_content, storage=object, adaptive=True)
     def test_bad_selectors(self, page):
         """Test handling of invalid selectors"""
 # Pickling and Object Representation Tests
 class TestPicklingAndRepresentation:
     def test_unpickleable_objects(self, page):
+        """Test that Selector objects cannot be pickled"""
         table = page.css(".product-list")[0]
         with pytest.raises(TypeError):
             pickle.dumps(table)
     )
     start_time = time.time()
+    parsed = Selector(large_html, adaptive=False)
     elements = parsed.css(".item")
     end_time = time.time()
 def test_selectors_generation(page):
     """Try to create selectors for all elements in the page"""
+    def _traverse(element: Selector):
         assert isinstance(element.generate_css_selector, str)
         assert isinstance(element.generate_xpath_selector, str)
         for branch in element.children: