File size: 246 Bytes
6f953dc | 1 2 3 4 5 6 7 8 9 10 11 12 | from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import re
ps = PorterStemmer()
def stem_text(text):
if not text:
return ""
tokens = word_tokenize(text)
return " ".join(ps.stem(t) for t in tokens)
|