Karim shoair commited on
Commit ·
76ae95e
1
Parent(s): 450d5ca
feat: Make `.body` return the passed content as it is without any processing
Browse filesThis makes it possible to download files and deal with non-HTML requests (ex: #81 )
- scrapling/parser.py +6 -4
scrapling/parser.py
CHANGED
|
@@ -132,8 +132,7 @@ class Selector(SelectorsGeneration):
|
|
| 132 |
strip_cdata=(not keep_cdata),
|
| 133 |
)
|
| 134 |
self._root = fromstring(body, parser=parser, base_url=url)
|
| 135 |
-
|
| 136 |
-
self._raw_body = body.decode()
|
| 137 |
|
| 138 |
else:
|
| 139 |
# All HTML types inherit from HtmlMixin so this to check for all at once
|
|
@@ -342,7 +341,10 @@ class Selector(SelectorsGeneration):
|
|
| 342 |
"""Return the inner HTML code of the element"""
|
| 343 |
return TextHandler(tostring(self._root, encoding=self.encoding, method="html", with_tail=False))
|
| 344 |
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
def prettify(self) -> TextHandler:
|
| 348 |
"""Return a prettified version of the element's inner html-code"""
|
|
@@ -934,7 +936,7 @@ class Selector(SelectorsGeneration):
|
|
| 934 |
# Operations on text functions
|
| 935 |
def json(self) -> Dict:
|
| 936 |
"""Return JSON response if the response is jsonable otherwise throws error"""
|
| 937 |
-
if self._raw_body:
|
| 938 |
return TextHandler(self._raw_body).json()
|
| 939 |
elif self.text:
|
| 940 |
return self.text.json()
|
|
|
|
| 132 |
strip_cdata=(not keep_cdata),
|
| 133 |
)
|
| 134 |
self._root = fromstring(body, parser=parser, base_url=url)
|
| 135 |
+
self._raw_body = content
|
|
|
|
| 136 |
|
| 137 |
else:
|
| 138 |
# All HTML types inherit from HtmlMixin so this to check for all at once
|
|
|
|
| 341 |
"""Return the inner HTML code of the element"""
|
| 342 |
return TextHandler(tostring(self._root, encoding=self.encoding, method="html", with_tail=False))
|
| 343 |
|
| 344 |
+
@property
|
| 345 |
+
def body(self):
|
| 346 |
+
"""Return the raw body of the current `Selector` without any processing. Useful for binary and non-HTML requests."""
|
| 347 |
+
return self._raw_body
|
| 348 |
|
| 349 |
def prettify(self) -> TextHandler:
|
| 350 |
"""Return a prettified version of the element's inner html-code"""
|
|
|
|
| 936 |
# Operations on text functions
|
| 937 |
def json(self) -> Dict:
|
| 938 |
"""Return JSON response if the response is jsonable otherwise throws error"""
|
| 939 |
+
if self._raw_body and isinstance(self._raw_body, str):
|
| 940 |
return TextHandler(self._raw_body).json()
|
| 941 |
elif self.text:
|
| 942 |
return self.text.json()
|