File size: 2,919 Bytes
cf0cf5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "b8101bc5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score\n",
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Embedding, LSTM, Dense\n",
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"# Load the preprocessed data\n",
"train_data = pd.read_csv(\"/Users/saish/Downloads/preprocessed_train_data.csv\")\n",
"test_data = pd.read_csv(\"/Users/saish/Downloads/preprocessed_test_data.csv\")\n",
"\n",
"# Tokenize the text data\n",
"tokenizer = Tokenizer()\n",
"tokenizer.fit_on_texts(train_data['text'])\n",
"\n",
"train_sequences = tokenizer.texts_to_sequences(train_data['text'])\n",
"test_sequences = tokenizer.texts_to_sequences(test_data['text'])\n",
"\n",
"# Pad sequences to ensure uniform length\n",
"max_length = max(len(seq) for seq in train_sequences)\n",
"train_sequences = pad_sequences(train_sequences, maxlen=max_length)\n",
"test_sequences = pad_sequences(test_sequences, maxlen=max_length)\n",
"\n",
"# Encode sentiment labels\n",
"label_encoder = LabelEncoder()\n",
"train_labels = label_encoder.fit_transform(train_data['sentiment'])\n",
"test_labels = label_encoder.transform(test_data['sentiment'])\n",
"\n",
"# Define and compile the model\n",
"model = Sequential()\n",
"model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_length))\n",
"model.add(LSTM(units=128))\n",
"model.add(Dense(units=len(label_encoder.classes_), activation='softmax'))\n",
"\n",
"model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
"\n",
"# Train the model\n",
"model.fit(train_sequences, train_labels, epochs=3, batch_size=16, validation_split=0.2)\n",
"\n",
"# Evaluate the model\n",
"test_loss, test_accuracy = model.evaluate(test_sequences, test_labels)\n",
"print(f'Test Accuracy: {test_accuracy}')\n",
"\n",
"# Save the trained model\n",
"model.save(\"/Users/saish/Downloads/sentitensor1.keras\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|