File size: 246 Bytes
6f953dc
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import re

ps = PorterStemmer()

def stem_text(text):
    if not text:
        return ""
    tokens = word_tokenize(text)
    return " ".join(ps.stem(t) for t in tokens)