| | """Tests to ensure that the lxml tree builder generates good trees.""" |
| |
|
| | import pickle |
| | import pytest |
| | import re |
| | import warnings |
| | from . import LXML_PRESENT, LXML_VERSION |
| |
|
| | if LXML_PRESENT: |
| | from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML |
| |
|
| | from bs4 import ( |
| | BeautifulSoup, |
| | BeautifulStoneSoup, |
| | ) |
| | from bs4.element import Comment, Doctype, SoupStrainer |
| | from . import ( |
| | HTMLTreeBuilderSmokeTest, |
| | XMLTreeBuilderSmokeTest, |
| | SOUP_SIEVE_PRESENT, |
| | SoupTest, |
| | ) |
| |
|
| | @pytest.mark.skipif( |
| | not LXML_PRESENT, |
| | reason="lxml seems not to be present, not testing its tree builder." |
| | ) |
| | class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest): |
| | """See ``HTMLTreeBuilderSmokeTest``.""" |
| |
|
| | @property |
| | def default_builder(self): |
| | return LXMLTreeBuilder |
| |
|
| | def test_out_of_range_entity(self): |
| | self.assert_soup( |
| | "<p>foo�bar</p>", "<p>foobar</p>") |
| | self.assert_soup( |
| | "<p>foo�bar</p>", "<p>foobar</p>") |
| | self.assert_soup( |
| | "<p>foo�bar</p>", "<p>foobar</p>") |
| | |
| | def test_entities_in_foreign_document_encoding(self): |
| | |
| | |
| | |
| | pass |
| | |
| | |
| | |
| |
|
| | @pytest.mark.skipif( |
| | not LXML_PRESENT or LXML_VERSION < (2,3,5,0), |
| | reason="Skipping doctype test for old version of lxml to avoid segfault." |
| | ) |
| | def test_empty_doctype(self): |
| | soup = self.soup("<!DOCTYPE>") |
| | doctype = soup.contents[0] |
| | assert "" == doctype.strip() |
| |
|
| | def test_beautifulstonesoup_is_xml_parser(self): |
| | |
| | |
| | with warnings.catch_warnings(record=True) as w: |
| | soup = BeautifulStoneSoup("<b />") |
| | assert "<b/>" == str(soup.b) |
| | [warning] = w |
| | assert warning.filename == __file__ |
| | assert "BeautifulStoneSoup class is deprecated" in str(warning.message) |
| |
|
| | def test_tracking_line_numbers(self): |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | soup = self.soup( |
| | "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>", |
| | store_line_numbers=True |
| | ) |
| | assert "sourceline" == soup.p.sourceline.name |
| | assert "sourcepos" == soup.p.sourcepos.name |
| | |
| | @pytest.mark.skipif( |
| | not LXML_PRESENT, |
| | reason="lxml seems not to be present, not testing its XML tree builder." |
| | ) |
| | class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest): |
| | """See ``HTMLTreeBuilderSmokeTest``.""" |
| |
|
| | @property |
| | def default_builder(self): |
| | return LXMLTreeBuilderForXML |
| |
|
| | def test_namespace_indexing(self): |
| | soup = self.soup( |
| | '<?xml version="1.1"?>\n' |
| | '<root>' |
| | '<tag xmlns="http://unprefixed-namespace.com">content</tag>' |
| | '<prefix:tag2 xmlns:prefix="http://prefixed-namespace.com">content</prefix:tag2>' |
| | '<prefix2:tag3 xmlns:prefix2="http://another-namespace.com">' |
| | '<subtag xmlns="http://another-unprefixed-namespace.com">' |
| | '<subsubtag xmlns="http://yet-another-unprefixed-namespace.com">' |
| | '</prefix2:tag3>' |
| | '</root>' |
| | ) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | assert soup._namespaces == { |
| | 'xml': 'http://www.w3.org/XML/1998/namespace', |
| | 'prefix': 'http://prefixed-namespace.com', |
| | 'prefix2': 'http://another-namespace.com' |
| | } |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | assert soup.tag._namespaces == { |
| | 'xml': 'http://www.w3.org/XML/1998/namespace', |
| | } |
| |
|
| | assert soup.tag2._namespaces == { |
| | 'prefix': 'http://prefixed-namespace.com', |
| | 'xml': 'http://www.w3.org/XML/1998/namespace', |
| | } |
| |
|
| | assert soup.subtag._namespaces == { |
| | 'prefix2': 'http://another-namespace.com', |
| | 'xml': 'http://www.w3.org/XML/1998/namespace', |
| | } |
| |
|
| | assert soup.subsubtag._namespaces == { |
| | 'prefix2': 'http://another-namespace.com', |
| | 'xml': 'http://www.w3.org/XML/1998/namespace', |
| | } |
| |
|
| |
|
| | @pytest.mark.skipif( |
| | not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed" |
| | ) |
| | def test_namespace_interaction_with_select_and_find(self): |
| | |
| | |
| | |
| | soup = self.soup( |
| | '<?xml version="1.1"?>\n' |
| | '<root>' |
| | '<tag xmlns="http://unprefixed-namespace.com">content</tag>' |
| | '<prefix:tag2 xmlns:prefix="http://prefixed-namespace.com">content</tag>' |
| | '<subtag xmlns:prefix="http://another-namespace-same-prefix.com">' |
| | '<prefix:tag3>' |
| | '</subtag>' |
| | '</root>' |
| | ) |
| |
|
| | |
| | assert soup.select_one('tag').name == 'tag' |
| | assert soup.select_one('prefix|tag2').name == 'tag2' |
| |
|
| | |
| | |
| | assert soup.select_one('prefix|tag3') is None |
| |
|
| | |
| | assert soup.select_one( |
| | 'prefix|tag3', namespaces=soup.subtag._namespaces |
| | ).name == 'tag3' |
| |
|
| | |
| | |
| | assert soup.subtag.select_one('prefix|tag3').name=='tag3' |
| |
|
| | |
| | |
| | assert soup.find('tag').name == 'tag' |
| | assert soup.find('prefix:tag2').name == 'tag2' |
| | assert soup.find('prefix:tag3').name == 'tag3' |
| | assert soup.subtag.find('prefix:tag3').name == 'tag3' |
| |
|
| | def test_pickle_restores_builder(self): |
| | |
| | |
| | |
| | soup = self.soup("<a>some markup</a>") |
| | assert isinstance(soup.builder, self.default_builder) |
| | pickled = pickle.dumps(soup) |
| | unpickled = pickle.loads(pickled) |
| |
|
| | assert "some markup" == unpickled.a.string |
| | assert unpickled.builder != soup.builder |
| | assert isinstance(unpickled.builder, self.default_builder) |
| |
|