skopush-test / tools.py
kantundpeterpan's picture
push push push
05a7f14 verified
raw
history blame contribute delete
735 Bytes
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download("stopwords")
nltk.download('punkt_tab')
nltk.download('wordnet')
from nltk.corpus import stopwords
import string
stop = set(stopwords.words('english') + list(string.punctuation))
def tokenize_quote(r):
tokens = nltk.word_tokenize(r.lower())
cleaned = [word for word in tokens if word not in stop]
return cleaned
def lemmatize_tokens(tokens: list):
lemmatizer = WordNetLemmatizer()
return [lemmatizer.lemmatize(t) for t in tokens]
def lemmatize_X(X):
return X.quote.apply(tokenize_quote).apply(lemmatize_tokens).apply(lambda x: " ".join(x))
def test_scorer(ytrue, ypred, blubb = 1):
return F"this works! even with arguments {blubb}"