kankur0007's picture
Add application file
4475241
from bs4 import BeautifulSoup
class Cleaner():
def __init__(self):
pass
def put_line_breaks(self, text):
return text.replace("<\p>", "<\p>\n")
def remove_html_tags(self, text):
clean_text = BeautifulSoup(text, "lxml").text
return clean_text
def clean(self, text):
text = self.put_line_breaks(text)
text = self.remove_html_tags(text)
text = text.strip()
return text