Karim shoair commited on
Commit ·
c86e18f
1
Parent(s): 001dbf4
fix(parser): Make `html_content` and `prettify` return strings not bytes (depends on the encoding)
Browse files- scrapling/parser.py +13 -9
scrapling/parser.py
CHANGED
|
@@ -339,7 +339,10 @@ class Selector(SelectorsGeneration):
|
|
| 339 |
@property
|
| 340 |
def html_content(self) -> TextHandler:
|
| 341 |
"""Return the inner HTML code of the element"""
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
@property
|
| 345 |
def body(self):
|
|
@@ -348,15 +351,16 @@ class Selector(SelectorsGeneration):
|
|
| 348 |
|
| 349 |
def prettify(self) -> TextHandler:
|
| 350 |
"""Return a prettified version of the element's inner html-code"""
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
with_tail=False,
|
| 358 |
-
)
|
| 359 |
)
|
|
|
|
|
|
|
|
|
|
| 360 |
|
| 361 |
def has_class(self, class_name: str) -> bool:
|
| 362 |
"""Check if the element has a specific class
|
|
|
|
| 339 |
@property
|
| 340 |
def html_content(self) -> TextHandler:
|
| 341 |
"""Return the inner HTML code of the element"""
|
| 342 |
+
content = tostring(self._root, encoding=self.encoding, method="html", with_tail=False)
|
| 343 |
+
if isinstance(content, bytes):
|
| 344 |
+
content = content.decode("utf-8")
|
| 345 |
+
return TextHandler(content)
|
| 346 |
|
| 347 |
@property
|
| 348 |
def body(self):
|
|
|
|
| 351 |
|
| 352 |
def prettify(self) -> TextHandler:
|
| 353 |
"""Return a prettified version of the element's inner html-code"""
|
| 354 |
+
content = tostring(
|
| 355 |
+
self._root,
|
| 356 |
+
encoding=self.encoding,
|
| 357 |
+
pretty_print=True,
|
| 358 |
+
method="html",
|
| 359 |
+
with_tail=False,
|
|
|
|
|
|
|
| 360 |
)
|
| 361 |
+
if isinstance(content, bytes):
|
| 362 |
+
content = content.decode("utf-8")
|
| 363 |
+
return TextHandler(content)
|
| 364 |
|
| 365 |
def has_class(self, class_name: str) -> bool:
|
| 366 |
"""Check if the element has a specific class
|