Spaces:
Runtime error
Runtime error
| import re | |
| from nltk.corpus.reader import pickle | |
| import pandas as pd | |
| import numpy as np | |
| from nltk.corpus import stopwords | |
| from nltk.stem import SnowballStemmer | |
| def clean_text(text): | |
| stop_words = set(stopwords.words("english")) | |
| # english_stopwords = stopwords.words("english") | |
| english_stemmer = SnowballStemmer("english") | |
| text = text.replace('', '') # Remove | |
| text = re.sub(r'[^\w]', ' ', text) # Remove symbols | |
| text = re.sub(r'[ ]{2,}', ' ', text) # Remove extra spaces | |
| text = re.sub(r'[ \t]+$', '', text) # Remove trailing white spaces | |
| tokens = [] | |
| for token in text.split(): | |
| if token not in stop_words: | |
| token = english_stemmer.stem(token) | |
| tokens.append(token) | |
| return " ".join(tokens) | |
| def preprocess_pipeline(text): | |
| return clean_text(text) | |
| def vectorizer(text): | |
| count_vectorizer = pickle.load(open("vectorizers/count_vectorizer.pkl", "rb")) | |
| return count_vectorizer.transform(text) | |