import re from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer port_stem = PorterStemmer() stop_words = set(stopwords.words('english')) def clean_text(text: str) -> str: text = re.sub('[^a-zA-Z]', ' ', str(text)) words = text.lower().split() words = [ port_stem.stem(word) for word in words if word not in stop_words ] return ' '.join(words)