Spaces:
Runtime error
Runtime error
| #preprocessing | |
| from sklearn.preprocessing import OrdinalEncoder | |
| from nltk.corpus import stopwords | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras.preprocessing.text import one_hot | |
| from nltk.stem.porter import PorterStemmer | |
| import re | |
| #stem words | |
| def stemm(data): | |
| ps=PorterStemmer() | |
| corpus=[] | |
| review=re.sub('[^a-zA-Z]',' ',data) | |
| review=review.lower() | |
| review=review.split() | |
| #remove html tag by removing <br> also | |
| review=[ps.stem(word) for word in review if not word in stopwords.words('english') and not word in ['br']] | |
| review=' '.join(review) | |
| corpus.append(review) | |
| return corpus | |
| #one hot encoding and padding | |
| def preprocess(data): | |
| corpus=stemm(data) | |
| onehot_corpus=[one_hot(words,10000) for words in corpus] | |
| sent_length = 2470 | |
| padded_corpus=pad_sequences(onehot_corpus,padding='pre',maxlen=sent_length) | |
| return padded_corpus | |