Karim shoair commited on
Commit
f88c43a
·
1 Parent(s): ae6e6c8

feat(TextHandler): Add argument to `clean` method to remove html entities

Browse files
Files changed (1) hide show
  1. scrapling/core/custom_types.py +3 -1
scrapling/core/custom_types.py CHANGED
@@ -103,9 +103,11 @@ class TextHandler(str):
103
  """Return a sorted version of the string"""
104
  return self.__class__("".join(sorted(self, reverse=reverse)))
105
 
106
- def clean(self) -> Union[str, "TextHandler"]:
107
  """Return a new version of the string after removing all white spaces and consecutive spaces"""
108
  data = self.translate(__CLEANING_TABLE__)
 
 
109
  return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())
110
 
111
  # For easy copy-paste from Scrapy/parsel code when needed :)
 
103
  """Return a sorted version of the string"""
104
  return self.__class__("".join(sorted(self, reverse=reverse)))
105
 
106
+ def clean(self, remove_entities=False) -> Union[str, "TextHandler"]:
107
  """Return a new version of the string after removing all white spaces and consecutive spaces"""
108
  data = self.translate(__CLEANING_TABLE__)
109
+ if remove_entities:
110
+ data = _replace_entities(data)
111
  return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())
112
 
113
  # For easy copy-paste from Scrapy/parsel code when needed :)