Karim shoair commited on
Commit ·
bc33260
1
Parent(s): 27a318e
Adding empty methods (get/get_all/extract/extract_all)
Browse filesFor easy copy-paste from Scrapy/parsel code when needed :)
- scrapling/core/custom_types.py +23 -0
- scrapling/parser.py +17 -0
scrapling/core/custom_types.py
CHANGED
|
@@ -89,6 +89,16 @@ class TextHandler(str):
|
|
| 89 |
data = re.sub(' +', ' ', data)
|
| 90 |
return self.__class__(data.strip())
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
def json(self) -> Dict:
|
| 93 |
"""Return json response if the response is jsonable otherwise throw error"""
|
| 94 |
# Using str function as a workaround for orjson issue with subclasses of str
|
|
@@ -186,6 +196,19 @@ class TextHandlers(List[TextHandler]):
|
|
| 186 |
return result
|
| 187 |
return default
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
class AttributesHandler(Mapping):
|
| 191 |
"""A read-only mapping to use instead of the standard dictionary for the speed boost but at the same time I use it to add more functionalities.
|
|
|
|
| 89 |
data = re.sub(' +', ' ', data)
|
| 90 |
return self.__class__(data.strip())
|
| 91 |
|
| 92 |
+
# For easy copy-paste from Scrapy/parsel code when needed :)
|
| 93 |
+
def get(self, default=None):
|
| 94 |
+
return self
|
| 95 |
+
|
| 96 |
+
def get_all(self):
|
| 97 |
+
return self
|
| 98 |
+
|
| 99 |
+
extract = get_all
|
| 100 |
+
extract_first = get
|
| 101 |
+
|
| 102 |
def json(self) -> Dict:
|
| 103 |
"""Return json response if the response is jsonable otherwise throw error"""
|
| 104 |
# Using str function as a workaround for orjson issue with subclasses of str
|
|
|
|
| 196 |
return result
|
| 197 |
return default
|
| 198 |
|
| 199 |
+
# For easy copy-paste from Scrapy/parsel code when needed :)
|
| 200 |
+
def get(self, default=None):
|
| 201 |
+
"""Returns the first item of the current list
|
| 202 |
+
:param default: the default value to return if the current list is empty
|
| 203 |
+
"""
|
| 204 |
+
return self[0] if len(self) > 0 else default
|
| 205 |
+
|
| 206 |
+
def extract(self):
|
| 207 |
+
return self
|
| 208 |
+
|
| 209 |
+
extract_first = get
|
| 210 |
+
get_all = extract
|
| 211 |
+
|
| 212 |
|
| 213 |
class AttributesHandler(Mapping):
|
| 214 |
"""A read-only mapping to use instead of the standard dictionary for the speed boost but at the same time I use it to add more functionalities.
|
scrapling/parser.py
CHANGED
|
@@ -330,6 +330,16 @@ class Adaptor(SelectorsGeneration):
|
|
| 330 |
|
| 331 |
return self.__convert_results(prev_element)
|
| 332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
def __str__(self) -> str:
|
| 334 |
return self.html_content
|
| 335 |
|
|
@@ -1073,12 +1083,19 @@ class Adaptors(List[Adaptor]):
|
|
| 1073 |
]
|
| 1074 |
return self.__class__(results) if results else results
|
| 1075 |
|
|
|
|
| 1076 |
def get(self, default=None):
|
| 1077 |
"""Returns the first item of the current list
|
| 1078 |
:param default: the default value to return if the current list is empty
|
| 1079 |
"""
|
| 1080 |
return self[0] if len(self) > 0 else default
|
| 1081 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1082 |
@property
|
| 1083 |
def first(self):
|
| 1084 |
"""Returns the first item of the current list or `None` if the list is empty"""
|
|
|
|
| 330 |
|
| 331 |
return self.__convert_results(prev_element)
|
| 332 |
|
| 333 |
+
# For easy copy-paste from Scrapy/parsel code when needed :)
|
| 334 |
+
def get(self, default=None):
|
| 335 |
+
return self
|
| 336 |
+
|
| 337 |
+
def get_all(self):
|
| 338 |
+
return self
|
| 339 |
+
|
| 340 |
+
extract = get_all
|
| 341 |
+
extract_first = get
|
| 342 |
+
|
| 343 |
def __str__(self) -> str:
|
| 344 |
return self.html_content
|
| 345 |
|
|
|
|
| 1083 |
]
|
| 1084 |
return self.__class__(results) if results else results
|
| 1085 |
|
| 1086 |
+
# For easy copy-paste from Scrapy/parsel code when needed :)
|
| 1087 |
def get(self, default=None):
|
| 1088 |
"""Returns the first item of the current list
|
| 1089 |
:param default: the default value to return if the current list is empty
|
| 1090 |
"""
|
| 1091 |
return self[0] if len(self) > 0 else default
|
| 1092 |
|
| 1093 |
+
def extract(self):
|
| 1094 |
+
return self
|
| 1095 |
+
|
| 1096 |
+
extract_first = get
|
| 1097 |
+
get_all = extract
|
| 1098 |
+
|
| 1099 |
@property
|
| 1100 |
def first(self):
|
| 1101 |
"""Returns the first item of the current list or `None` if the list is empty"""
|