saishshinde15 commited on
Commit
cf0cf5a
·
verified ·
1 Parent(s): 1816c50

Upload SentimentTensor1.ipynb

Browse files
Files changed (1) hide show
  1. SentimentTensor1.ipynb +81 -0
SentimentTensor1.ipynb ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "b8101bc5",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "from sklearn.model_selection import train_test_split\n",
12
+ "from sklearn.metrics import accuracy_score\n",
13
+ "from tensorflow.keras.preprocessing.text import Tokenizer\n",
14
+ "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
15
+ "from tensorflow.keras.models import Sequential\n",
16
+ "from tensorflow.keras.layers import Embedding, LSTM, Dense\n",
17
+ "from sklearn.preprocessing import LabelEncoder\n",
18
+ "\n",
19
+ "# Load the preprocessed data\n",
20
+ "train_data = pd.read_csv(\"/Users/saish/Downloads/preprocessed_train_data.csv\")\n",
21
+ "test_data = pd.read_csv(\"/Users/saish/Downloads/preprocessed_test_data.csv\")\n",
22
+ "\n",
23
+ "# Tokenize the text data\n",
24
+ "tokenizer = Tokenizer()\n",
25
+ "tokenizer.fit_on_texts(train_data['text'])\n",
26
+ "\n",
27
+ "train_sequences = tokenizer.texts_to_sequences(train_data['text'])\n",
28
+ "test_sequences = tokenizer.texts_to_sequences(test_data['text'])\n",
29
+ "\n",
30
+ "# Pad sequences to ensure uniform length\n",
31
+ "max_length = max(len(seq) for seq in train_sequences)\n",
32
+ "train_sequences = pad_sequences(train_sequences, maxlen=max_length)\n",
33
+ "test_sequences = pad_sequences(test_sequences, maxlen=max_length)\n",
34
+ "\n",
35
+ "# Encode sentiment labels\n",
36
+ "label_encoder = LabelEncoder()\n",
37
+ "train_labels = label_encoder.fit_transform(train_data['sentiment'])\n",
38
+ "test_labels = label_encoder.transform(test_data['sentiment'])\n",
39
+ "\n",
40
+ "# Define and compile the model\n",
41
+ "model = Sequential()\n",
42
+ "model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_length))\n",
43
+ "model.add(LSTM(units=128))\n",
44
+ "model.add(Dense(units=len(label_encoder.classes_), activation='softmax'))\n",
45
+ "\n",
46
+ "model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
47
+ "\n",
48
+ "# Train the model\n",
49
+ "model.fit(train_sequences, train_labels, epochs=3, batch_size=16, validation_split=0.2)\n",
50
+ "\n",
51
+ "# Evaluate the model\n",
52
+ "test_loss, test_accuracy = model.evaluate(test_sequences, test_labels)\n",
53
+ "print(f'Test Accuracy: {test_accuracy}')\n",
54
+ "\n",
55
+ "# Save the trained model\n",
56
+ "model.save(\"/Users/saish/Downloads/sentitensor1.keras\")\n"
57
+ ]
58
+ }
59
+ ],
60
+ "metadata": {
61
+ "kernelspec": {
62
+ "display_name": "Python 3 (ipykernel)",
63
+ "language": "python",
64
+ "name": "python3"
65
+ },
66
+ "language_info": {
67
+ "codemirror_mode": {
68
+ "name": "ipython",
69
+ "version": 3
70
+ },
71
+ "file_extension": ".py",
72
+ "mimetype": "text/x-python",
73
+ "name": "python",
74
+ "nbconvert_exporter": "python",
75
+ "pygments_lexer": "ipython3",
76
+ "version": "3.9.12"
77
+ }
78
+ },
79
+ "nbformat": 4,
80
+ "nbformat_minor": 5
81
+ }