Spaces:

Moncey10
/

sentiment_analysis

Sleeping

File size: 434 Bytes

4112bd3

import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

port_stem = PorterStemmer()
stop_words = set(stopwords.words('english'))

def clean_text(text: str) -> str:
    text = re.sub('[^a-zA-Z]', ' ', str(text))
    words = text.lower().split()

    words = [
        port_stem.stem(word)
        for word in words
        if word not in stop_words
    ]

    return ' '.join(words)