import nltk from nltk.stem import WordNetLemmatizer nltk.download("stopwords") nltk.download('punkt_tab') nltk.download('wordnet') from nltk.corpus import stopwords import string stop = set(stopwords.words('english') + list(string.punctuation)) def tokenize_quote(r): tokens = nltk.word_tokenize(r.lower()) cleaned = [word for word in tokens if word not in stop] return cleaned def lemmatize_tokens(tokens: list): lemmatizer = WordNetLemmatizer() return [lemmatizer.lemmatize(t) for t in tokens] def lemmatize_X(X): return X.quote.apply(tokenize_quote).apply(lemmatize_tokens).apply(lambda x: " ".join(x)) def test_scorer(ytrue, ypred, blubb = 1): return F"this works! even with arguments {blubb}"