Neural_Network / simplelstm.py
ajithavijayan's picture
Upload 21 files
abc900f
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence
from sklearn.model_selection import train_test_split
import pickle
dataset = pd.read_csv(r"C:\Users\Ajitha V\OneDrive\Desktop\Neural_network\IMDB Dataset.csv")
dataset['sentiment'] = dataset['sentiment'].map( {'negative': 1, 'positive': 0} )
X = dataset['review'].values
y = dataset['sentiment'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
tokeniser = tf.keras.preprocessing.text.Tokenizer()
tokeniser.fit_on_texts(X_train)
X_train = tokeniser.texts_to_sequences(X_train)
X_test = tokeniser.texts_to_sequences(X_test)
print(X_train[0:2])
vocab_size = len(tokeniser.word_index)+1
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
embedding_vector_length = 32
model = Sequential()
model.add(Embedding(vocab_size, embedding_vector_length, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=3, batch_size=64)
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
model.save("lstm_model.h5")
with open("lstm_tokeniser.pkl",'wb') as file:
pickle.dump(tokeniser, file)