Spaces:
Sleeping
Sleeping
Commit ·
5cbd4b4
1
Parent(s): 2c50978
Update functions.py
Browse files- functions.py +1 -1
functions.py
CHANGED
|
@@ -30,7 +30,7 @@ lemmatizer = WordNetLemmatizer()
|
|
| 30 |
def preprocess_text(text):
|
| 31 |
text = text.lower() # lowercase text
|
| 32 |
tokens = word_tokenize(text) # tokenize
|
| 33 |
-
filtered_words = [word for word in tokens if word.lower() not in stopword_list
|
| 34 |
lemmatized_words = [lemmatizer.lemmatize(w, get_wordnet_pos(w)) for w in filtered_words]
|
| 35 |
lemmatized_clean = [word.translate(str.maketrans('', '', string.punctuation)) for word in lemmatized_words]
|
| 36 |
return ' '.join(lemmatized_clean)
|
|
|
|
| 30 |
def preprocess_text(text):
|
| 31 |
text = text.lower() # lowercase text
|
| 32 |
tokens = word_tokenize(text) # tokenize
|
| 33 |
+
filtered_words = [word for word in tokens if word.lower() not in stopword_list]
|
| 34 |
lemmatized_words = [lemmatizer.lemmatize(w, get_wordnet_pos(w)) for w in filtered_words]
|
| 35 |
lemmatized_clean = [word.translate(str.maketrans('', '', string.punctuation)) for word in lemmatized_words]
|
| 36 |
return ' '.join(lemmatized_clean)
|