Spaces:
Sleeping
Sleeping
File size: 434 Bytes
4112bd3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
port_stem = PorterStemmer()
stop_words = set(stopwords.words('english'))
def clean_text(text: str) -> str:
text = re.sub('[^a-zA-Z]', ' ', str(text))
words = text.lower().split()
words = [
port_stem.stem(word)
for word in words
if word not in stop_words
]
return ' '.join(words)
|