Spaces:
Sleeping
Sleeping
File size: 456 Bytes
4475241 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from bs4 import BeautifulSoup
class Cleaner():
def __init__(self):
pass
def put_line_breaks(self, text):
return text.replace("<\p>", "<\p>\n")
def remove_html_tags(self, text):
clean_text = BeautifulSoup(text, "lxml").text
return clean_text
def clean(self, text):
text = self.put_line_breaks(text)
text = self.remove_html_tags(text)
text = text.strip()
return text |