Spaces:
Paused
Paused
| """Integration code for CSS selectors using Soup Sieve (pypi: soupsieve).""" | |
| import warnings | |
| try: | |
| import soupsieve | |
| except ImportError as e: | |
| soupsieve = None | |
| warnings.warn( | |
| 'The soupsieve package is not installed. CSS selectors cannot be used.' | |
| ) | |
| class CSS(object): | |
| """A proxy object against the soupsieve library, to simplify its | |
| CSS selector API. | |
| Acquire this object through the .css attribute on the | |
| BeautifulSoup object, or on the Tag you want to use as the | |
| starting point for a CSS selector. | |
| The main advantage of doing this is that the tag to be selected | |
| against doesn't need to be explicitly specified in the function | |
| calls, since it's already scoped to a tag. | |
| """ | |
| def __init__(self, tag, api=soupsieve): | |
| """Constructor. | |
| You don't need to instantiate this class yourself; instead, | |
| access the .css attribute on the BeautifulSoup object, or on | |
| the Tag you want to use as the starting point for your CSS | |
| selector. | |
| :param tag: All CSS selectors will use this as their starting | |
| point. | |
| :param api: A plug-in replacement for the soupsieve module, | |
| designed mainly for use in tests. | |
| """ | |
| if api is None: | |
| raise NotImplementedError( | |
| "Cannot execute CSS selectors because the soupsieve package is not installed." | |
| ) | |
| self.api = api | |
| self.tag = tag | |
| def escape(self, ident): | |
| """Escape a CSS identifier. | |
| This is a simple wrapper around soupselect.escape(). See the | |
| documentation for that function for more information. | |
| """ | |
| if soupsieve is None: | |
| raise NotImplementedError( | |
| "Cannot escape CSS identifiers because the soupsieve package is not installed." | |
| ) | |
| return self.api.escape(ident) | |
| def _ns(self, ns, select): | |
| """Normalize a dictionary of namespaces.""" | |
| if not isinstance(select, self.api.SoupSieve) and ns is None: | |
| # If the selector is a precompiled pattern, it already has | |
| # a namespace context compiled in, which cannot be | |
| # replaced. | |
| ns = self.tag._namespaces | |
| return ns | |
| def _rs(self, results): | |
| """Normalize a list of results to a Resultset. | |
| A ResultSet is more consistent with the rest of Beautiful | |
| Soup's API, and ResultSet.__getattr__ has a helpful error | |
| message if you try to treat a list of results as a single | |
| result (a common mistake). | |
| """ | |
| # Import here to avoid circular import | |
| from bs4.element import ResultSet | |
| return ResultSet(None, results) | |
| def compile(self, select, namespaces=None, flags=0, **kwargs): | |
| """Pre-compile a selector and return the compiled object. | |
| :param selector: A CSS selector. | |
| :param namespaces: A dictionary mapping namespace prefixes | |
| used in the CSS selector to namespace URIs. By default, | |
| Beautiful Soup will use the prefixes it encountered while | |
| parsing the document. | |
| :param flags: Flags to be passed into Soup Sieve's | |
| soupsieve.compile() method. | |
| :param kwargs: Keyword arguments to be passed into SoupSieve's | |
| soupsieve.compile() method. | |
| :return: A precompiled selector object. | |
| :rtype: soupsieve.SoupSieve | |
| """ | |
| return self.api.compile( | |
| select, self._ns(namespaces, select), flags, **kwargs | |
| ) | |
| def select_one(self, select, namespaces=None, flags=0, **kwargs): | |
| """Perform a CSS selection operation on the current Tag and return the | |
| first result. | |
| This uses the Soup Sieve library. For more information, see | |
| that library's documentation for the soupsieve.select_one() | |
| method. | |
| :param selector: A CSS selector. | |
| :param namespaces: A dictionary mapping namespace prefixes | |
| used in the CSS selector to namespace URIs. By default, | |
| Beautiful Soup will use the prefixes it encountered while | |
| parsing the document. | |
| :param flags: Flags to be passed into Soup Sieve's | |
| soupsieve.select_one() method. | |
| :param kwargs: Keyword arguments to be passed into SoupSieve's | |
| soupsieve.select_one() method. | |
| :return: A Tag, or None if the selector has no match. | |
| :rtype: bs4.element.Tag | |
| """ | |
| return self.api.select_one( | |
| select, self.tag, self._ns(namespaces, select), flags, **kwargs | |
| ) | |
| def select(self, select, namespaces=None, limit=0, flags=0, **kwargs): | |
| """Perform a CSS selection operation on the current Tag. | |
| This uses the Soup Sieve library. For more information, see | |
| that library's documentation for the soupsieve.select() | |
| method. | |
| :param selector: A string containing a CSS selector. | |
| :param namespaces: A dictionary mapping namespace prefixes | |
| used in the CSS selector to namespace URIs. By default, | |
| Beautiful Soup will pass in the prefixes it encountered while | |
| parsing the document. | |
| :param limit: After finding this number of results, stop looking. | |
| :param flags: Flags to be passed into Soup Sieve's | |
| soupsieve.select() method. | |
| :param kwargs: Keyword arguments to be passed into SoupSieve's | |
| soupsieve.select() method. | |
| :return: A ResultSet of Tag objects. | |
| :rtype: bs4.element.ResultSet | |
| """ | |
| if limit is None: | |
| limit = 0 | |
| return self._rs( | |
| self.api.select( | |
| select, self.tag, self._ns(namespaces, select), limit, flags, | |
| **kwargs | |
| ) | |
| ) | |
| def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs): | |
| """Perform a CSS selection operation on the current Tag. | |
| This uses the Soup Sieve library. For more information, see | |
| that library's documentation for the soupsieve.iselect() | |
| method. It is the same as select(), but it returns a generator | |
| instead of a list. | |
| :param selector: A string containing a CSS selector. | |
| :param namespaces: A dictionary mapping namespace prefixes | |
| used in the CSS selector to namespace URIs. By default, | |
| Beautiful Soup will pass in the prefixes it encountered while | |
| parsing the document. | |
| :param limit: After finding this number of results, stop looking. | |
| :param flags: Flags to be passed into Soup Sieve's | |
| soupsieve.iselect() method. | |
| :param kwargs: Keyword arguments to be passed into SoupSieve's | |
| soupsieve.iselect() method. | |
| :return: A generator | |
| :rtype: types.GeneratorType | |
| """ | |
| return self.api.iselect( | |
| select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs | |
| ) | |
| def closest(self, select, namespaces=None, flags=0, **kwargs): | |
| """Find the Tag closest to this one that matches the given selector. | |
| This uses the Soup Sieve library. For more information, see | |
| that library's documentation for the soupsieve.closest() | |
| method. | |
| :param selector: A string containing a CSS selector. | |
| :param namespaces: A dictionary mapping namespace prefixes | |
| used in the CSS selector to namespace URIs. By default, | |
| Beautiful Soup will pass in the prefixes it encountered while | |
| parsing the document. | |
| :param flags: Flags to be passed into Soup Sieve's | |
| soupsieve.closest() method. | |
| :param kwargs: Keyword arguments to be passed into SoupSieve's | |
| soupsieve.closest() method. | |
| :return: A Tag, or None if there is no match. | |
| :rtype: bs4.Tag | |
| """ | |
| return self.api.closest( | |
| select, self.tag, self._ns(namespaces, select), flags, **kwargs | |
| ) | |
| def match(self, select, namespaces=None, flags=0, **kwargs): | |
| """Check whether this Tag matches the given CSS selector. | |
| This uses the Soup Sieve library. For more information, see | |
| that library's documentation for the soupsieve.match() | |
| method. | |
| :param: a CSS selector. | |
| :param namespaces: A dictionary mapping namespace prefixes | |
| used in the CSS selector to namespace URIs. By default, | |
| Beautiful Soup will pass in the prefixes it encountered while | |
| parsing the document. | |
| :param flags: Flags to be passed into Soup Sieve's | |
| soupsieve.match() method. | |
| :param kwargs: Keyword arguments to be passed into SoupSieve's | |
| soupsieve.match() method. | |
| :return: True if this Tag matches the selector; False otherwise. | |
| :rtype: bool | |
| """ | |
| return self.api.match( | |
| select, self.tag, self._ns(namespaces, select), flags, **kwargs | |
| ) | |
| def filter(self, select, namespaces=None, flags=0, **kwargs): | |
| """Filter this Tag's direct children based on the given CSS selector. | |
| This uses the Soup Sieve library. It works the same way as | |
| passing this Tag into that library's soupsieve.filter() | |
| method. More information, for more information see the | |
| documentation for soupsieve.filter(). | |
| :param namespaces: A dictionary mapping namespace prefixes | |
| used in the CSS selector to namespace URIs. By default, | |
| Beautiful Soup will pass in the prefixes it encountered while | |
| parsing the document. | |
| :param flags: Flags to be passed into Soup Sieve's | |
| soupsieve.filter() method. | |
| :param kwargs: Keyword arguments to be passed into SoupSieve's | |
| soupsieve.filter() method. | |
| :return: A ResultSet of Tag objects. | |
| :rtype: bs4.element.ResultSet | |
| """ | |
| return self._rs( | |
| self.api.filter( | |
| select, self.tag, self._ns(namespaces, select), flags, **kwargs | |
| ) | |
| ) | |