|
|
"""Integration code for CSS selectors using `Soup Sieve <https://facelessuser.github.io/soupsieve/>`_ (pypi: ``soupsieve``). |
|
|
|
|
|
Acquire a `CSS` object through the `element.Tag.css` attribute of |
|
|
the starting point of your CSS selector, or (if you want to run a |
|
|
selector against the entire document) of the `BeautifulSoup` object |
|
|
itself. |
|
|
|
|
|
The main advantage of doing this instead of using ``soupsieve`` |
|
|
functions is that you don't need to keep passing the `element.Tag` to be |
|
|
selected against, since the `CSS` object is permanently scoped to that |
|
|
`element.Tag`. |
|
|
|
|
|
""" |
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
from types import ModuleType |
|
|
from typing import ( |
|
|
Any, |
|
|
cast, |
|
|
Iterable, |
|
|
Iterator, |
|
|
Optional, |
|
|
TYPE_CHECKING, |
|
|
) |
|
|
import warnings |
|
|
from bs4._typing import _NamespaceMapping |
|
|
|
|
|
if TYPE_CHECKING: |
|
|
from soupsieve import SoupSieve |
|
|
from bs4 import element |
|
|
from bs4.element import ResultSet, Tag |
|
|
|
|
|
soupsieve: Optional[ModuleType] |
|
|
try: |
|
|
import soupsieve |
|
|
except ImportError: |
|
|
soupsieve = None |
|
|
warnings.warn( |
|
|
"The soupsieve package is not installed. CSS selectors cannot be used." |
|
|
) |
|
|
|
|
|
|
|
|
class CSS(object): |
|
|
"""A proxy object against the ``soupsieve`` library, to simplify its |
|
|
CSS selector API. |
|
|
|
|
|
You don't need to instantiate this class yourself; instead, use |
|
|
`element.Tag.css`. |
|
|
|
|
|
:param tag: All CSS selectors run by this object will use this as |
|
|
their starting point. |
|
|
|
|
|
:param api: An optional drop-in replacement for the ``soupsieve`` module, |
|
|
intended for use in unit tests. |
|
|
""" |
|
|
|
|
|
def __init__(self, tag: element.Tag, api: Optional[ModuleType] = None): |
|
|
if api is None: |
|
|
api = soupsieve |
|
|
if api is None: |
|
|
raise NotImplementedError( |
|
|
"Cannot execute CSS selectors because the soupsieve package is not installed." |
|
|
) |
|
|
self.api = api |
|
|
self.tag = tag |
|
|
|
|
|
def escape(self, ident: str) -> str: |
|
|
"""Escape a CSS identifier. |
|
|
|
|
|
This is a simple wrapper around `soupsieve.escape() <https://facelessuser.github.io/soupsieve/api/#soupsieveescape>`_. See the |
|
|
documentation for that function for more information. |
|
|
""" |
|
|
if soupsieve is None: |
|
|
raise NotImplementedError( |
|
|
"Cannot escape CSS identifiers because the soupsieve package is not installed." |
|
|
) |
|
|
return cast(str, self.api.escape(ident)) |
|
|
|
|
|
def _ns( |
|
|
self, ns: Optional[_NamespaceMapping], select: str |
|
|
) -> Optional[_NamespaceMapping]: |
|
|
"""Normalize a dictionary of namespaces.""" |
|
|
if not isinstance(select, self.api.SoupSieve) and ns is None: |
|
|
|
|
|
|
|
|
|
|
|
ns = self.tag._namespaces |
|
|
return ns |
|
|
|
|
|
def _rs(self, results: Iterable[Tag]) -> ResultSet[Tag]: |
|
|
"""Normalize a list of results to a py:class:`ResultSet`. |
|
|
|
|
|
A py:class:`ResultSet` is more consistent with the rest of |
|
|
Beautiful Soup's API, and :py:meth:`ResultSet.__getattr__` has |
|
|
a helpful error message if you try to treat a list of results |
|
|
as a single result (a common mistake). |
|
|
""" |
|
|
|
|
|
from bs4 import ResultSet |
|
|
|
|
|
return ResultSet(None, results) |
|
|
|
|
|
def compile( |
|
|
self, |
|
|
select: str, |
|
|
namespaces: Optional[_NamespaceMapping] = None, |
|
|
flags: int = 0, |
|
|
**kwargs: Any, |
|
|
) -> SoupSieve: |
|
|
"""Pre-compile a selector and return the compiled object. |
|
|
|
|
|
:param selector: A CSS selector. |
|
|
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
|
used in the CSS selector to namespace URIs. By default, |
|
|
Beautiful Soup will use the prefixes it encountered while |
|
|
parsing the document. |
|
|
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
|
`soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method. |
|
|
|
|
|
:param kwargs: Keyword arguments to be passed into Soup Sieve's |
|
|
`soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method. |
|
|
|
|
|
:return: A precompiled selector object. |
|
|
:rtype: soupsieve.SoupSieve |
|
|
""" |
|
|
return self.api.compile(select, self._ns(namespaces, select), flags, **kwargs) |
|
|
|
|
|
def select_one( |
|
|
self, |
|
|
select: str, |
|
|
namespaces: Optional[_NamespaceMapping] = None, |
|
|
flags: int = 0, |
|
|
**kwargs: Any, |
|
|
) -> element.Tag | None: |
|
|
"""Perform a CSS selection operation on the current Tag and return the |
|
|
first result, if any. |
|
|
|
|
|
This uses the Soup Sieve library. For more information, see |
|
|
that library's documentation for the `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method. |
|
|
|
|
|
:param selector: A CSS selector. |
|
|
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
|
used in the CSS selector to namespace URIs. By default, |
|
|
Beautiful Soup will use the prefixes it encountered while |
|
|
parsing the document. |
|
|
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
|
`soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method. |
|
|
|
|
|
:param kwargs: Keyword arguments to be passed into Soup Sieve's |
|
|
`soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method. |
|
|
""" |
|
|
return self.api.select_one( |
|
|
select, self.tag, self._ns(namespaces, select), flags, **kwargs |
|
|
) |
|
|
|
|
|
def select( |
|
|
self, |
|
|
select: str, |
|
|
namespaces: Optional[_NamespaceMapping] = None, |
|
|
limit: int = 0, |
|
|
flags: int = 0, |
|
|
**kwargs: Any, |
|
|
) -> ResultSet[element.Tag]: |
|
|
"""Perform a CSS selection operation on the current `element.Tag`. |
|
|
|
|
|
This uses the Soup Sieve library. For more information, see |
|
|
that library's documentation for the `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method. |
|
|
|
|
|
:param selector: A CSS selector. |
|
|
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
|
used in the CSS selector to namespace URIs. By default, |
|
|
Beautiful Soup will pass in the prefixes it encountered while |
|
|
parsing the document. |
|
|
|
|
|
:param limit: After finding this number of results, stop looking. |
|
|
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
|
`soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method. |
|
|
|
|
|
:param kwargs: Keyword arguments to be passed into Soup Sieve's |
|
|
`soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method. |
|
|
""" |
|
|
if limit is None: |
|
|
limit = 0 |
|
|
|
|
|
return self._rs( |
|
|
self.api.select( |
|
|
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs |
|
|
) |
|
|
) |
|
|
|
|
|
def iselect( |
|
|
self, |
|
|
select: str, |
|
|
namespaces: Optional[_NamespaceMapping] = None, |
|
|
limit: int = 0, |
|
|
flags: int = 0, |
|
|
**kwargs: Any, |
|
|
) -> Iterator[element.Tag]: |
|
|
"""Perform a CSS selection operation on the current `element.Tag`. |
|
|
|
|
|
This uses the Soup Sieve library. For more information, see |
|
|
that library's documentation for the `soupsieve.iselect() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ |
|
|
method. It is the same as select(), but it returns a generator |
|
|
instead of a list. |
|
|
|
|
|
:param selector: A string containing a CSS selector. |
|
|
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
|
used in the CSS selector to namespace URIs. By default, |
|
|
Beautiful Soup will pass in the prefixes it encountered while |
|
|
parsing the document. |
|
|
|
|
|
:param limit: After finding this number of results, stop looking. |
|
|
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
|
`soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method. |
|
|
|
|
|
:param kwargs: Keyword arguments to be passed into Soup Sieve's |
|
|
`soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method. |
|
|
""" |
|
|
return self.api.iselect( |
|
|
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs |
|
|
) |
|
|
|
|
|
def closest( |
|
|
self, |
|
|
select: str, |
|
|
namespaces: Optional[_NamespaceMapping] = None, |
|
|
flags: int = 0, |
|
|
**kwargs: Any, |
|
|
) -> Optional[element.Tag]: |
|
|
"""Find the `element.Tag` closest to this one that matches the given selector. |
|
|
|
|
|
This uses the Soup Sieve library. For more information, see |
|
|
that library's documentation for the `soupsieve.closest() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ |
|
|
method. |
|
|
|
|
|
:param selector: A string containing a CSS selector. |
|
|
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
|
used in the CSS selector to namespace URIs. By default, |
|
|
Beautiful Soup will pass in the prefixes it encountered while |
|
|
parsing the document. |
|
|
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
|
`soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method. |
|
|
|
|
|
:param kwargs: Keyword arguments to be passed into Soup Sieve's |
|
|
`soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method. |
|
|
|
|
|
""" |
|
|
return self.api.closest( |
|
|
select, self.tag, self._ns(namespaces, select), flags, **kwargs |
|
|
) |
|
|
|
|
|
def match( |
|
|
self, |
|
|
select: str, |
|
|
namespaces: Optional[_NamespaceMapping] = None, |
|
|
flags: int = 0, |
|
|
**kwargs: Any, |
|
|
) -> bool: |
|
|
"""Check whether or not this `element.Tag` matches the given CSS selector. |
|
|
|
|
|
This uses the Soup Sieve library. For more information, see |
|
|
that library's documentation for the `soupsieve.match() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_ |
|
|
method. |
|
|
|
|
|
:param: a CSS selector. |
|
|
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
|
used in the CSS selector to namespace URIs. By default, |
|
|
Beautiful Soup will pass in the prefixes it encountered while |
|
|
parsing the document. |
|
|
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
|
`soupsieve.match() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_ |
|
|
method. |
|
|
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
|
`soupsieve.match() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_ |
|
|
method. |
|
|
""" |
|
|
return cast( |
|
|
bool, |
|
|
self.api.match( |
|
|
select, self.tag, self._ns(namespaces, select), flags, **kwargs |
|
|
), |
|
|
) |
|
|
|
|
|
def filter( |
|
|
self, |
|
|
select: str, |
|
|
namespaces: Optional[_NamespaceMapping] = None, |
|
|
flags: int = 0, |
|
|
**kwargs: Any, |
|
|
) -> ResultSet[element.Tag]: |
|
|
"""Filter this `element.Tag`'s direct children based on the given CSS selector. |
|
|
|
|
|
This uses the Soup Sieve library. It works the same way as |
|
|
passing a `element.Tag` into that library's `soupsieve.filter() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_ |
|
|
method. For more information, see the documentation for |
|
|
`soupsieve.filter() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_. |
|
|
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
|
used in the CSS selector to namespace URIs. By default, |
|
|
Beautiful Soup will pass in the prefixes it encountered while |
|
|
parsing the document. |
|
|
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
|
`soupsieve.filter() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_ |
|
|
method. |
|
|
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
|
`soupsieve.filter() |
|
|
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_ |
|
|
method. |
|
|
""" |
|
|
return self._rs( |
|
|
self.api.filter( |
|
|
select, self.tag, self._ns(namespaces, select), flags, **kwargs |
|
|
) |
|
|
) |
|
|
|