SandraPK commited on
Commit
bf12787
·
1 Parent(s): a4a5dbc

Delete DNN_IMDB.py

Browse files
Files changed (1) hide show
  1. DNN_IMDB.py +0 -68
DNN_IMDB.py DELETED
@@ -1,68 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- from sklearn.model_selection import train_test_split
4
- from tensorflow.keras import Sequential
5
- from tensorflow.keras.layers import Dense, Embedding, Flatten
6
- from tensorflow.keras.optimizers import Adam
7
- from tensorflow.keras.preprocessing.text import Tokenizer
8
- from tensorflow.keras.preprocessing.sequence import pad_sequences
9
- import pickle
10
-
11
-
12
- # Load the IMDB dataset from a CSV file
13
- path_to_csv = 'IMDB Dataset.csv'
14
- df = pd.read_csv(path_to_csv)
15
-
16
- reviews = df['review'].values
17
- labels = df['sentiment'].values
18
-
19
- # Convert string labels to numerical values
20
- label_encoder = {'positive': 1, 'negative': 0}
21
- y = np.array([label_encoder[label.lower()] for label in labels])
22
-
23
- # Tokenize the text data
24
- max_words = 10000
25
- tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
26
- tokenizer.fit_on_texts(reviews)
27
- sequences = tokenizer.texts_to_sequences(reviews)
28
-
29
- # Pad sequences to a fixed length
30
- max_review_length = 200
31
- x = pad_sequences(sequences, maxlen=max_review_length)
32
- maxlen=200
33
-
34
- # Model building
35
- model = Sequential()
36
- model.add(Embedding(input_dim=max_words, output_dim=64, input_length=maxlen))
37
- model.add(Flatten())
38
- model.add(Dense(64, activation='relu'))
39
- model.add(Dense(1, activation='sigmoid'))
40
-
41
- model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
42
- model.summary()
43
-
44
- # Training
45
- print("Training started...")
46
- history = model.fit(x, y, epochs=3, batch_size=16, validation_split=0.2)
47
- loss, acc = model.evaluate(x, y)
48
- print("Training finished.")
49
- print(f'Test Accuracy: {round(acc*100)}%')
50
-
51
-
52
- with open('tokenizer_dnn.pkl', 'wb') as tokenizer_file:
53
- pickle.dump(tokenizer, tokenizer_file)
54
-
55
-
56
- # Save the model
57
- model.save('dnn_model_imdb.h5')
58
- print("Model saved as 'dnn_model_imdb.h5'")
59
-
60
-
61
- # Example: Make a prediction on a movie review
62
- sample_review = "I really enjoyed the movie. The plot was engaging, and the acting was superb."
63
- sample_sequence = tokenizer.texts_to_sequences([sample_review])
64
- padded_sample = pad_sequences(sample_sequence, maxlen=max_review_length)
65
- prediction = model.predict(padded_sample)
66
- sentiment = "Positive" if prediction[0][0] > 0.3 else "Negative"
67
- print(f'Predicted Sentiment: {sentiment} (Probability: {prediction[0][0]:.2f})')
68
-