kanneboinakumar commited on
Commit
88d997a
·
verified ·
1 Parent(s): 274b0f9

Upload 6 files

Browse files
SPAM text message 20170820 - Data.csv ADDED
The diff for this file is too large to render. See raw diff
 
Spam analysis.ipynb ADDED
@@ -0,0 +1,1691 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "9XjlAPnDBIfp"
7
+ },
8
+ "source": [
9
+ "## Data card\n",
10
+ "- https://www.kaggle.com/datasets/team-ai/spam-text-message-classification"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "id": "7ePpzsWZYuaQ"
17
+ },
18
+ "source": [
19
+ "# 1.Packages"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 138,
25
+ "metadata": {
26
+ "colab": {
27
+ "base_uri": "https://localhost:8080/"
28
+ },
29
+ "id": "oT593Dj5RhBx",
30
+ "outputId": "aa673e7f-946b-438c-9c93-2767deef6544"
31
+ },
32
+ "outputs": [
33
+ {
34
+ "name": "stderr",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
38
+ "[nltk_data] Package stopwords is already up-to-date!\n",
39
+ "[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
40
+ "[nltk_data] Package wordnet is already up-to-date!\n",
41
+ "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
42
+ "[nltk_data] Package punkt is already up-to-date!\n"
43
+ ]
44
+ }
45
+ ],
46
+ "source": [
47
+ "import pandas as pd\n",
48
+ "import numpy as np\n",
49
+ "import matplotlib.pyplot as plt\n",
50
+ "import seaborn as sns\n",
51
+ "import re\n",
52
+ "import nltk\n",
53
+ "from nltk.stem import WordNetLemmatizer\n",
54
+ "from nltk.tokenize import word_tokenize\n",
55
+ "from nltk.corpus import stopwords\n",
56
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
57
+ "from sklearn.model_selection import train_test_split\n",
58
+ "from sklearn.metrics import accuracy_score,f1_score, classification_report, confusion_matrix\n",
59
+ "import torch\n",
60
+ "import torch.nn as nn\n",
61
+ "import torch.optim as optim\n",
62
+ "from torch.utils.data import DataLoader, TensorDataset, random_split\n",
63
+ "nltk.download('stopwords')\n",
64
+ "nltk.download('wordnet')\n",
65
+ "nltk.download('punkt')\n",
66
+ "from joblib import dump, load"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "markdown",
71
+ "metadata": {
72
+ "id": "1Zek_JuxYoiM"
73
+ },
74
+ "source": [
75
+ "# 2.Data Loading"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": 139,
81
+ "metadata": {
82
+ "colab": {
83
+ "base_uri": "https://localhost:8080/",
84
+ "height": 225
85
+ },
86
+ "id": "DiV4mPuqS1_k",
87
+ "outputId": "df9adf14-12d3-42de-fded-33b4603b03a1"
88
+ },
89
+ "outputs": [
90
+ {
91
+ "name": "stdout",
92
+ "output_type": "stream",
93
+ "text": [
94
+ "shape : (5572, 2)\n"
95
+ ]
96
+ },
97
+ {
98
+ "data": {
99
+ "application/vnd.google.colaboratory.intrinsic+json": {
100
+ "summary": "{\n \"name\": \"Message_df\",\n \"rows\": 5572,\n \"fields\": [\n {\n \"column\": \"Category\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Message\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5157,\n \"samples\": [\n \"Also sir, i sent you an email about how to log into the usc payment portal. I.ll send you another message that should explain how things are back home. Have a great weekend.\",\n \"Are you free now?can i call now?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
101
+ "type": "dataframe",
102
+ "variable_name": "Message_df"
103
+ },
104
+ "text/html": [
105
+ "\n",
106
+ " <div id=\"df-31cee80c-615e-4aee-9e82-ba2d9957460b\" class=\"colab-df-container\">\n",
107
+ " <div>\n",
108
+ "<style scoped>\n",
109
+ " .dataframe tbody tr th:only-of-type {\n",
110
+ " vertical-align: middle;\n",
111
+ " }\n",
112
+ "\n",
113
+ " .dataframe tbody tr th {\n",
114
+ " vertical-align: top;\n",
115
+ " }\n",
116
+ "\n",
117
+ " .dataframe thead th {\n",
118
+ " text-align: right;\n",
119
+ " }\n",
120
+ "</style>\n",
121
+ "<table border=\"1\" class=\"dataframe\">\n",
122
+ " <thead>\n",
123
+ " <tr style=\"text-align: right;\">\n",
124
+ " <th></th>\n",
125
+ " <th>Category</th>\n",
126
+ " <th>Message</th>\n",
127
+ " </tr>\n",
128
+ " </thead>\n",
129
+ " <tbody>\n",
130
+ " <tr>\n",
131
+ " <th>0</th>\n",
132
+ " <td>ham</td>\n",
133
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
134
+ " </tr>\n",
135
+ " <tr>\n",
136
+ " <th>1</th>\n",
137
+ " <td>ham</td>\n",
138
+ " <td>Ok lar... Joking wif u oni...</td>\n",
139
+ " </tr>\n",
140
+ " <tr>\n",
141
+ " <th>2</th>\n",
142
+ " <td>spam</td>\n",
143
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
144
+ " </tr>\n",
145
+ " <tr>\n",
146
+ " <th>3</th>\n",
147
+ " <td>ham</td>\n",
148
+ " <td>U dun say so early hor... U c already then say...</td>\n",
149
+ " </tr>\n",
150
+ " <tr>\n",
151
+ " <th>4</th>\n",
152
+ " <td>ham</td>\n",
153
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
154
+ " </tr>\n",
155
+ " </tbody>\n",
156
+ "</table>\n",
157
+ "</div>\n",
158
+ " <div class=\"colab-df-buttons\">\n",
159
+ "\n",
160
+ " <div class=\"colab-df-container\">\n",
161
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-31cee80c-615e-4aee-9e82-ba2d9957460b')\"\n",
162
+ " title=\"Convert this dataframe to an interactive table.\"\n",
163
+ " style=\"display:none;\">\n",
164
+ "\n",
165
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
166
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
167
+ " </svg>\n",
168
+ " </button>\n",
169
+ "\n",
170
+ " <style>\n",
171
+ " .colab-df-container {\n",
172
+ " display:flex;\n",
173
+ " gap: 12px;\n",
174
+ " }\n",
175
+ "\n",
176
+ " .colab-df-convert {\n",
177
+ " background-color: #E8F0FE;\n",
178
+ " border: none;\n",
179
+ " border-radius: 50%;\n",
180
+ " cursor: pointer;\n",
181
+ " display: none;\n",
182
+ " fill: #1967D2;\n",
183
+ " height: 32px;\n",
184
+ " padding: 0 0 0 0;\n",
185
+ " width: 32px;\n",
186
+ " }\n",
187
+ "\n",
188
+ " .colab-df-convert:hover {\n",
189
+ " background-color: #E2EBFA;\n",
190
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
191
+ " fill: #174EA6;\n",
192
+ " }\n",
193
+ "\n",
194
+ " .colab-df-buttons div {\n",
195
+ " margin-bottom: 4px;\n",
196
+ " }\n",
197
+ "\n",
198
+ " [theme=dark] .colab-df-convert {\n",
199
+ " background-color: #3B4455;\n",
200
+ " fill: #D2E3FC;\n",
201
+ " }\n",
202
+ "\n",
203
+ " [theme=dark] .colab-df-convert:hover {\n",
204
+ " background-color: #434B5C;\n",
205
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
206
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
207
+ " fill: #FFFFFF;\n",
208
+ " }\n",
209
+ " </style>\n",
210
+ "\n",
211
+ " <script>\n",
212
+ " const buttonEl =\n",
213
+ " document.querySelector('#df-31cee80c-615e-4aee-9e82-ba2d9957460b button.colab-df-convert');\n",
214
+ " buttonEl.style.display =\n",
215
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
216
+ "\n",
217
+ " async function convertToInteractive(key) {\n",
218
+ " const element = document.querySelector('#df-31cee80c-615e-4aee-9e82-ba2d9957460b');\n",
219
+ " const dataTable =\n",
220
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
221
+ " [key], {});\n",
222
+ " if (!dataTable) return;\n",
223
+ "\n",
224
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
225
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
226
+ " + ' to learn more about interactive tables.';\n",
227
+ " element.innerHTML = '';\n",
228
+ " dataTable['output_type'] = 'display_data';\n",
229
+ " await google.colab.output.renderOutput(dataTable, element);\n",
230
+ " const docLink = document.createElement('div');\n",
231
+ " docLink.innerHTML = docLinkHtml;\n",
232
+ " element.appendChild(docLink);\n",
233
+ " }\n",
234
+ " </script>\n",
235
+ " </div>\n",
236
+ "\n",
237
+ "\n",
238
+ "<div id=\"df-dfc3cdf4-c2be-4859-b4df-f906be898778\">\n",
239
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-dfc3cdf4-c2be-4859-b4df-f906be898778')\"\n",
240
+ " title=\"Suggest charts\"\n",
241
+ " style=\"display:none;\">\n",
242
+ "\n",
243
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
244
+ " width=\"24px\">\n",
245
+ " <g>\n",
246
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
247
+ " </g>\n",
248
+ "</svg>\n",
249
+ " </button>\n",
250
+ "\n",
251
+ "<style>\n",
252
+ " .colab-df-quickchart {\n",
253
+ " --bg-color: #E8F0FE;\n",
254
+ " --fill-color: #1967D2;\n",
255
+ " --hover-bg-color: #E2EBFA;\n",
256
+ " --hover-fill-color: #174EA6;\n",
257
+ " --disabled-fill-color: #AAA;\n",
258
+ " --disabled-bg-color: #DDD;\n",
259
+ " }\n",
260
+ "\n",
261
+ " [theme=dark] .colab-df-quickchart {\n",
262
+ " --bg-color: #3B4455;\n",
263
+ " --fill-color: #D2E3FC;\n",
264
+ " --hover-bg-color: #434B5C;\n",
265
+ " --hover-fill-color: #FFFFFF;\n",
266
+ " --disabled-bg-color: #3B4455;\n",
267
+ " --disabled-fill-color: #666;\n",
268
+ " }\n",
269
+ "\n",
270
+ " .colab-df-quickchart {\n",
271
+ " background-color: var(--bg-color);\n",
272
+ " border: none;\n",
273
+ " border-radius: 50%;\n",
274
+ " cursor: pointer;\n",
275
+ " display: none;\n",
276
+ " fill: var(--fill-color);\n",
277
+ " height: 32px;\n",
278
+ " padding: 0;\n",
279
+ " width: 32px;\n",
280
+ " }\n",
281
+ "\n",
282
+ " .colab-df-quickchart:hover {\n",
283
+ " background-color: var(--hover-bg-color);\n",
284
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
285
+ " fill: var(--button-hover-fill-color);\n",
286
+ " }\n",
287
+ "\n",
288
+ " .colab-df-quickchart-complete:disabled,\n",
289
+ " .colab-df-quickchart-complete:disabled:hover {\n",
290
+ " background-color: var(--disabled-bg-color);\n",
291
+ " fill: var(--disabled-fill-color);\n",
292
+ " box-shadow: none;\n",
293
+ " }\n",
294
+ "\n",
295
+ " .colab-df-spinner {\n",
296
+ " border: 2px solid var(--fill-color);\n",
297
+ " border-color: transparent;\n",
298
+ " border-bottom-color: var(--fill-color);\n",
299
+ " animation:\n",
300
+ " spin 1s steps(1) infinite;\n",
301
+ " }\n",
302
+ "\n",
303
+ " @keyframes spin {\n",
304
+ " 0% {\n",
305
+ " border-color: transparent;\n",
306
+ " border-bottom-color: var(--fill-color);\n",
307
+ " border-left-color: var(--fill-color);\n",
308
+ " }\n",
309
+ " 20% {\n",
310
+ " border-color: transparent;\n",
311
+ " border-left-color: var(--fill-color);\n",
312
+ " border-top-color: var(--fill-color);\n",
313
+ " }\n",
314
+ " 30% {\n",
315
+ " border-color: transparent;\n",
316
+ " border-left-color: var(--fill-color);\n",
317
+ " border-top-color: var(--fill-color);\n",
318
+ " border-right-color: var(--fill-color);\n",
319
+ " }\n",
320
+ " 40% {\n",
321
+ " border-color: transparent;\n",
322
+ " border-right-color: var(--fill-color);\n",
323
+ " border-top-color: var(--fill-color);\n",
324
+ " }\n",
325
+ " 60% {\n",
326
+ " border-color: transparent;\n",
327
+ " border-right-color: var(--fill-color);\n",
328
+ " }\n",
329
+ " 80% {\n",
330
+ " border-color: transparent;\n",
331
+ " border-right-color: var(--fill-color);\n",
332
+ " border-bottom-color: var(--fill-color);\n",
333
+ " }\n",
334
+ " 90% {\n",
335
+ " border-color: transparent;\n",
336
+ " border-bottom-color: var(--fill-color);\n",
337
+ " }\n",
338
+ " }\n",
339
+ "</style>\n",
340
+ "\n",
341
+ " <script>\n",
342
+ " async function quickchart(key) {\n",
343
+ " const quickchartButtonEl =\n",
344
+ " document.querySelector('#' + key + ' button');\n",
345
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
346
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
347
+ " try {\n",
348
+ " const charts = await google.colab.kernel.invokeFunction(\n",
349
+ " 'suggestCharts', [key], {});\n",
350
+ " } catch (error) {\n",
351
+ " console.error('Error during call to suggestCharts:', error);\n",
352
+ " }\n",
353
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
354
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
355
+ " }\n",
356
+ " (() => {\n",
357
+ " let quickchartButtonEl =\n",
358
+ " document.querySelector('#df-dfc3cdf4-c2be-4859-b4df-f906be898778 button');\n",
359
+ " quickchartButtonEl.style.display =\n",
360
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
361
+ " })();\n",
362
+ " </script>\n",
363
+ "</div>\n",
364
+ "\n",
365
+ " </div>\n",
366
+ " </div>\n"
367
+ ],
368
+ "text/plain": [
369
+ " Category Message\n",
370
+ "0 ham Go until jurong point, crazy.. Available only ...\n",
371
+ "1 ham Ok lar... Joking wif u oni...\n",
372
+ "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
373
+ "3 ham U dun say so early hor... U c already then say...\n",
374
+ "4 ham Nah I don't think he goes to usf, he lives aro..."
375
+ ]
376
+ },
377
+ "execution_count": 139,
378
+ "metadata": {},
379
+ "output_type": "execute_result"
380
+ }
381
+ ],
382
+ "source": [
383
+ "Message_df = pd.read_csv(\"/content/drive/MyDrive/SPAM text message 20170820 - Data.csv\")\n",
384
+ "print(f\"shape : {Message_df.shape}\")\n",
385
+ "Message_df.head()"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "markdown",
390
+ "metadata": {
391
+ "id": "OGQFplFhUfmZ"
392
+ },
393
+ "source": [
394
+ "# 3.EDA"
395
+ ]
396
+ },
397
+ {
398
+ "cell_type": "markdown",
399
+ "metadata": {
400
+ "id": "SFV5pePaYgic"
401
+ },
402
+ "source": [
403
+ "## 3.1.Remove Duplicates"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": 140,
409
+ "metadata": {
410
+ "colab": {
411
+ "base_uri": "https://localhost:8080/"
412
+ },
413
+ "id": "l0cRyTaVTHKO",
414
+ "outputId": "66ff8107-06f6-4a07-faf7-04ca228bb569"
415
+ },
416
+ "outputs": [
417
+ {
418
+ "name": "stdout",
419
+ "output_type": "stream",
420
+ "text": [
421
+ "Data with duplicates shape : (5572, 2)\n",
422
+ "Data without duplicates shape : (5157, 2)\n"
423
+ ]
424
+ }
425
+ ],
426
+ "source": [
427
+ "print(f\"Data with duplicates shape : {Message_df.shape}\")\n",
428
+ "Message_df = Message_df.drop_duplicates()\n",
429
+ "print(f\"Data without duplicates shape : {Message_df.shape}\")"
430
+ ]
431
+ },
432
+ {
433
+ "cell_type": "code",
434
+ "execution_count": 141,
435
+ "metadata": {
436
+ "colab": {
437
+ "base_uri": "https://localhost:8080/"
438
+ },
439
+ "id": "coF1hqOITfKW",
440
+ "outputId": "47c7e972-f315-498a-ca22-adf65b6088fd"
441
+ },
442
+ "outputs": [
443
+ {
444
+ "name": "stdout",
445
+ "output_type": "stream",
446
+ "text": [
447
+ "<class 'pandas.core.frame.DataFrame'>\n",
448
+ "Index: 5157 entries, 0 to 5571\n",
449
+ "Data columns (total 2 columns):\n",
450
+ " # Column Non-Null Count Dtype \n",
451
+ "--- ------ -------------- ----- \n",
452
+ " 0 Category 5157 non-null object\n",
453
+ " 1 Message 5157 non-null object\n",
454
+ "dtypes: object(2)\n",
455
+ "memory usage: 120.9+ KB\n"
456
+ ]
457
+ }
458
+ ],
459
+ "source": [
460
+ "Message_df.info()"
461
+ ]
462
+ },
463
+ {
464
+ "cell_type": "code",
465
+ "execution_count": 142,
466
+ "metadata": {
467
+ "colab": {
468
+ "base_uri": "https://localhost:8080/",
469
+ "height": 178
470
+ },
471
+ "id": "E-vHlQXoUOsy",
472
+ "outputId": "27a8e3ce-cb16-4d26-ace7-c2c913802a09"
473
+ },
474
+ "outputs": [
475
+ {
476
+ "data": {
477
+ "text/html": [
478
+ "<div>\n",
479
+ "<style scoped>\n",
480
+ " .dataframe tbody tr th:only-of-type {\n",
481
+ " vertical-align: middle;\n",
482
+ " }\n",
483
+ "\n",
484
+ " .dataframe tbody tr th {\n",
485
+ " vertical-align: top;\n",
486
+ " }\n",
487
+ "\n",
488
+ " .dataframe thead th {\n",
489
+ " text-align: right;\n",
490
+ " }\n",
491
+ "</style>\n",
492
+ "<table border=\"1\" class=\"dataframe\">\n",
493
+ " <thead>\n",
494
+ " <tr style=\"text-align: right;\">\n",
495
+ " <th></th>\n",
496
+ " <th>count</th>\n",
497
+ " </tr>\n",
498
+ " <tr>\n",
499
+ " <th>Category</th>\n",
500
+ " <th></th>\n",
501
+ " </tr>\n",
502
+ " </thead>\n",
503
+ " <tbody>\n",
504
+ " <tr>\n",
505
+ " <th>ham</th>\n",
506
+ " <td>4516</td>\n",
507
+ " </tr>\n",
508
+ " <tr>\n",
509
+ " <th>spam</th>\n",
510
+ " <td>641</td>\n",
511
+ " </tr>\n",
512
+ " </tbody>\n",
513
+ "</table>\n",
514
+ "</div><br><label><b>dtype:</b> int64</label>"
515
+ ],
516
+ "text/plain": [
517
+ "Category\n",
518
+ "ham 4516\n",
519
+ "spam 641\n",
520
+ "Name: count, dtype: int64"
521
+ ]
522
+ },
523
+ "execution_count": 142,
524
+ "metadata": {},
525
+ "output_type": "execute_result"
526
+ }
527
+ ],
528
+ "source": [
529
+ "Message_df.Category.value_counts()"
530
+ ]
531
+ },
532
+ {
533
+ "cell_type": "code",
534
+ "execution_count": 143,
535
+ "metadata": {
536
+ "colab": {
537
+ "base_uri": "https://localhost:8080/"
538
+ },
539
+ "id": "Agpy8AO1UcIO",
540
+ "outputId": "ac2d3811-0cf1-45e8-8618-ded5592cf6c3"
541
+ },
542
+ "outputs": [
543
+ {
544
+ "data": {
545
+ "text/plain": [
546
+ "((4516, 2), (641, 2))"
547
+ ]
548
+ },
549
+ "execution_count": 143,
550
+ "metadata": {},
551
+ "output_type": "execute_result"
552
+ }
553
+ ],
554
+ "source": [
555
+ "ham_df = Message_df[Message_df.Category == \"ham\"]\n",
556
+ "spam_df = Message_df[Message_df.Category == \"spam\"]\n",
557
+ "ham_df.shape, spam_df.shape"
558
+ ]
559
+ },
560
+ {
561
+ "cell_type": "markdown",
562
+ "metadata": {
563
+ "id": "J2wSWyFVYVMw"
564
+ },
565
+ "source": [
566
+ "### Blanced Data"
567
+ ]
568
+ },
569
+ {
570
+ "cell_type": "code",
571
+ "execution_count": 144,
572
+ "metadata": {
573
+ "colab": {
574
+ "base_uri": "https://localhost:8080/"
575
+ },
576
+ "id": "uqrWKLJaUppz",
577
+ "outputId": "52ccaf5d-ffc6-4e6a-8a30-0e89cd5b8397"
578
+ },
579
+ "outputs": [
580
+ {
581
+ "data": {
582
+ "text/plain": [
583
+ "((641, 2), (641, 2))"
584
+ ]
585
+ },
586
+ "execution_count": 144,
587
+ "metadata": {},
588
+ "output_type": "execute_result"
589
+ }
590
+ ],
591
+ "source": [
592
+ "ham_df = ham_df.sample(spam_df.shape[0],random_state=0)\n",
593
+ "ham_df.shape, spam_df.shape"
594
+ ]
595
+ },
596
+ {
597
+ "cell_type": "code",
598
+ "execution_count": 145,
599
+ "metadata": {
600
+ "colab": {
601
+ "base_uri": "https://localhost:8080/",
602
+ "height": 206
603
+ },
604
+ "id": "7sT79-iOVJm0",
605
+ "outputId": "17f61a6d-9b01-4e8b-88b1-e0527a9244c0"
606
+ },
607
+ "outputs": [
608
+ {
609
+ "data": {
610
+ "application/vnd.google.colaboratory.intrinsic+json": {
611
+ "summary": "{\n \"name\": \"Message_df\",\n \"rows\": 1282,\n \"fields\": [\n {\n \"column\": \"Category\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Message\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1282,\n \"samples\": [\n \"December only! Had your mobile 11mths+? You are entitled to update to the latest colour camera mobile for Free! Call The Mobile Update VCo FREE on 08002986906\",\n \"Dear Voucher Holder, To claim this weeks offer, at you PC please go to http://www.e-tlp.co.uk/expressoffer Ts&Cs apply. To stop texts, txt STOP to 80062\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
612
+ "type": "dataframe",
613
+ "variable_name": "Message_df"
614
+ },
615
+ "text/html": [
616
+ "\n",
617
+ " <div id=\"df-a3c0eef7-fbb6-4d81-a8ad-b47acffeb6b6\" class=\"colab-df-container\">\n",
618
+ " <div>\n",
619
+ "<style scoped>\n",
620
+ " .dataframe tbody tr th:only-of-type {\n",
621
+ " vertical-align: middle;\n",
622
+ " }\n",
623
+ "\n",
624
+ " .dataframe tbody tr th {\n",
625
+ " vertical-align: top;\n",
626
+ " }\n",
627
+ "\n",
628
+ " .dataframe thead th {\n",
629
+ " text-align: right;\n",
630
+ " }\n",
631
+ "</style>\n",
632
+ "<table border=\"1\" class=\"dataframe\">\n",
633
+ " <thead>\n",
634
+ " <tr style=\"text-align: right;\">\n",
635
+ " <th></th>\n",
636
+ " <th>Category</th>\n",
637
+ " <th>Message</th>\n",
638
+ " </tr>\n",
639
+ " </thead>\n",
640
+ " <tbody>\n",
641
+ " <tr>\n",
642
+ " <th>3570</th>\n",
643
+ " <td>ham</td>\n",
644
+ " <td>She's fine. Sends her greetings</td>\n",
645
+ " </tr>\n",
646
+ " <tr>\n",
647
+ " <th>3985</th>\n",
648
+ " <td>ham</td>\n",
649
+ " <td>Hey, I missed you tm of last night as my phone...</td>\n",
650
+ " </tr>\n",
651
+ " <tr>\n",
652
+ " <th>2105</th>\n",
653
+ " <td>ham</td>\n",
654
+ " <td>Anyway seriously hit me up when you're back be...</td>\n",
655
+ " </tr>\n",
656
+ " <tr>\n",
657
+ " <th>4729</th>\n",
658
+ " <td>ham</td>\n",
659
+ " <td>I (Career Tel) have added u as a contact on IN...</td>\n",
660
+ " </tr>\n",
661
+ " <tr>\n",
662
+ " <th>3405</th>\n",
663
+ " <td>ham</td>\n",
664
+ " <td>Then ü ask dad to pick ü up lar... Ü wan 2 sta...</td>\n",
665
+ " </tr>\n",
666
+ " </tbody>\n",
667
+ "</table>\n",
668
+ "</div>\n",
669
+ " <div class=\"colab-df-buttons\">\n",
670
+ "\n",
671
+ " <div class=\"colab-df-container\">\n",
672
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a3c0eef7-fbb6-4d81-a8ad-b47acffeb6b6')\"\n",
673
+ " title=\"Convert this dataframe to an interactive table.\"\n",
674
+ " style=\"display:none;\">\n",
675
+ "\n",
676
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
677
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
678
+ " </svg>\n",
679
+ " </button>\n",
680
+ "\n",
681
+ " <style>\n",
682
+ " .colab-df-container {\n",
683
+ " display:flex;\n",
684
+ " gap: 12px;\n",
685
+ " }\n",
686
+ "\n",
687
+ " .colab-df-convert {\n",
688
+ " background-color: #E8F0FE;\n",
689
+ " border: none;\n",
690
+ " border-radius: 50%;\n",
691
+ " cursor: pointer;\n",
692
+ " display: none;\n",
693
+ " fill: #1967D2;\n",
694
+ " height: 32px;\n",
695
+ " padding: 0 0 0 0;\n",
696
+ " width: 32px;\n",
697
+ " }\n",
698
+ "\n",
699
+ " .colab-df-convert:hover {\n",
700
+ " background-color: #E2EBFA;\n",
701
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
702
+ " fill: #174EA6;\n",
703
+ " }\n",
704
+ "\n",
705
+ " .colab-df-buttons div {\n",
706
+ " margin-bottom: 4px;\n",
707
+ " }\n",
708
+ "\n",
709
+ " [theme=dark] .colab-df-convert {\n",
710
+ " background-color: #3B4455;\n",
711
+ " fill: #D2E3FC;\n",
712
+ " }\n",
713
+ "\n",
714
+ " [theme=dark] .colab-df-convert:hover {\n",
715
+ " background-color: #434B5C;\n",
716
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
717
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
718
+ " fill: #FFFFFF;\n",
719
+ " }\n",
720
+ " </style>\n",
721
+ "\n",
722
+ " <script>\n",
723
+ " const buttonEl =\n",
724
+ " document.querySelector('#df-a3c0eef7-fbb6-4d81-a8ad-b47acffeb6b6 button.colab-df-convert');\n",
725
+ " buttonEl.style.display =\n",
726
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
727
+ "\n",
728
+ " async function convertToInteractive(key) {\n",
729
+ " const element = document.querySelector('#df-a3c0eef7-fbb6-4d81-a8ad-b47acffeb6b6');\n",
730
+ " const dataTable =\n",
731
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
732
+ " [key], {});\n",
733
+ " if (!dataTable) return;\n",
734
+ "\n",
735
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
736
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
737
+ " + ' to learn more about interactive tables.';\n",
738
+ " element.innerHTML = '';\n",
739
+ " dataTable['output_type'] = 'display_data';\n",
740
+ " await google.colab.output.renderOutput(dataTable, element);\n",
741
+ " const docLink = document.createElement('div');\n",
742
+ " docLink.innerHTML = docLinkHtml;\n",
743
+ " element.appendChild(docLink);\n",
744
+ " }\n",
745
+ " </script>\n",
746
+ " </div>\n",
747
+ "\n",
748
+ "\n",
749
+ "<div id=\"df-d5411cc4-3fa2-4f84-8a1c-958062268fc2\">\n",
750
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d5411cc4-3fa2-4f84-8a1c-958062268fc2')\"\n",
751
+ " title=\"Suggest charts\"\n",
752
+ " style=\"display:none;\">\n",
753
+ "\n",
754
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
755
+ " width=\"24px\">\n",
756
+ " <g>\n",
757
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
758
+ " </g>\n",
759
+ "</svg>\n",
760
+ " </button>\n",
761
+ "\n",
762
+ "<style>\n",
763
+ " .colab-df-quickchart {\n",
764
+ " --bg-color: #E8F0FE;\n",
765
+ " --fill-color: #1967D2;\n",
766
+ " --hover-bg-color: #E2EBFA;\n",
767
+ " --hover-fill-color: #174EA6;\n",
768
+ " --disabled-fill-color: #AAA;\n",
769
+ " --disabled-bg-color: #DDD;\n",
770
+ " }\n",
771
+ "\n",
772
+ " [theme=dark] .colab-df-quickchart {\n",
773
+ " --bg-color: #3B4455;\n",
774
+ " --fill-color: #D2E3FC;\n",
775
+ " --hover-bg-color: #434B5C;\n",
776
+ " --hover-fill-color: #FFFFFF;\n",
777
+ " --disabled-bg-color: #3B4455;\n",
778
+ " --disabled-fill-color: #666;\n",
779
+ " }\n",
780
+ "\n",
781
+ " .colab-df-quickchart {\n",
782
+ " background-color: var(--bg-color);\n",
783
+ " border: none;\n",
784
+ " border-radius: 50%;\n",
785
+ " cursor: pointer;\n",
786
+ " display: none;\n",
787
+ " fill: var(--fill-color);\n",
788
+ " height: 32px;\n",
789
+ " padding: 0;\n",
790
+ " width: 32px;\n",
791
+ " }\n",
792
+ "\n",
793
+ " .colab-df-quickchart:hover {\n",
794
+ " background-color: var(--hover-bg-color);\n",
795
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
796
+ " fill: var(--button-hover-fill-color);\n",
797
+ " }\n",
798
+ "\n",
799
+ " .colab-df-quickchart-complete:disabled,\n",
800
+ " .colab-df-quickchart-complete:disabled:hover {\n",
801
+ " background-color: var(--disabled-bg-color);\n",
802
+ " fill: var(--disabled-fill-color);\n",
803
+ " box-shadow: none;\n",
804
+ " }\n",
805
+ "\n",
806
+ " .colab-df-spinner {\n",
807
+ " border: 2px solid var(--fill-color);\n",
808
+ " border-color: transparent;\n",
809
+ " border-bottom-color: var(--fill-color);\n",
810
+ " animation:\n",
811
+ " spin 1s steps(1) infinite;\n",
812
+ " }\n",
813
+ "\n",
814
+ " @keyframes spin {\n",
815
+ " 0% {\n",
816
+ " border-color: transparent;\n",
817
+ " border-bottom-color: var(--fill-color);\n",
818
+ " border-left-color: var(--fill-color);\n",
819
+ " }\n",
820
+ " 20% {\n",
821
+ " border-color: transparent;\n",
822
+ " border-left-color: var(--fill-color);\n",
823
+ " border-top-color: var(--fill-color);\n",
824
+ " }\n",
825
+ " 30% {\n",
826
+ " border-color: transparent;\n",
827
+ " border-left-color: var(--fill-color);\n",
828
+ " border-top-color: var(--fill-color);\n",
829
+ " border-right-color: var(--fill-color);\n",
830
+ " }\n",
831
+ " 40% {\n",
832
+ " border-color: transparent;\n",
833
+ " border-right-color: var(--fill-color);\n",
834
+ " border-top-color: var(--fill-color);\n",
835
+ " }\n",
836
+ " 60% {\n",
837
+ " border-color: transparent;\n",
838
+ " border-right-color: var(--fill-color);\n",
839
+ " }\n",
840
+ " 80% {\n",
841
+ " border-color: transparent;\n",
842
+ " border-right-color: var(--fill-color);\n",
843
+ " border-bottom-color: var(--fill-color);\n",
844
+ " }\n",
845
+ " 90% {\n",
846
+ " border-color: transparent;\n",
847
+ " border-bottom-color: var(--fill-color);\n",
848
+ " }\n",
849
+ " }\n",
850
+ "</style>\n",
851
+ "\n",
852
+ " <script>\n",
853
+ " async function quickchart(key) {\n",
854
+ " const quickchartButtonEl =\n",
855
+ " document.querySelector('#' + key + ' button');\n",
856
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
857
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
858
+ " try {\n",
859
+ " const charts = await google.colab.kernel.invokeFunction(\n",
860
+ " 'suggestCharts', [key], {});\n",
861
+ " } catch (error) {\n",
862
+ " console.error('Error during call to suggestCharts:', error);\n",
863
+ " }\n",
864
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
865
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
866
+ " }\n",
867
+ " (() => {\n",
868
+ " let quickchartButtonEl =\n",
869
+ " document.querySelector('#df-d5411cc4-3fa2-4f84-8a1c-958062268fc2 button');\n",
870
+ " quickchartButtonEl.style.display =\n",
871
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
872
+ " })();\n",
873
+ " </script>\n",
874
+ "</div>\n",
875
+ "\n",
876
+ " </div>\n",
877
+ " </div>\n"
878
+ ],
879
+ "text/plain": [
880
+ " Category Message\n",
881
+ "3570 ham She's fine. Sends her greetings\n",
882
+ "3985 ham Hey, I missed you tm of last night as my phone...\n",
883
+ "2105 ham Anyway seriously hit me up when you're back be...\n",
884
+ "4729 ham I (Career Tel) have added u as a contact on IN...\n",
885
+ "3405 ham Then ü ask dad to pick ü up lar... Ü wan 2 sta..."
886
+ ]
887
+ },
888
+ "execution_count": 145,
889
+ "metadata": {},
890
+ "output_type": "execute_result"
891
+ }
892
+ ],
893
+ "source": [
894
+ "# concatination\n",
895
+ "Message_df = pd.concat([ham_df, spam_df], axis=0) # Blanced Data\n",
896
+ "Message_df.head()"
897
+ ]
898
+ },
899
+ {
900
+ "cell_type": "markdown",
901
+ "metadata": {
902
+ "id": "OeXzuc7vYBxV"
903
+ },
904
+ "source": [
905
+ "## 3.2.Text Preprocessing"
906
+ ]
907
+ },
908
+ {
909
+ "cell_type": "code",
910
+ "execution_count": 146,
911
+ "metadata": {
912
+ "id": "7V_nMtigZEdY"
913
+ },
914
+ "outputs": [],
915
+ "source": [
916
+ "def preprocess_text(text):\n",
917
+ " # Convert text to lowercase\n",
918
+ " text = text.casefold()\n",
919
+ "\n",
920
+ " text = re.sub(r'[^a-zA-Z]', ' ', text)\n",
921
+ "\n",
922
+ " # Tokenize text\n",
923
+ " tokens = word_tokenize(text)\n",
924
+ "\n",
925
+ " # Remove stopwords\n",
926
+ " stop_words = set(stopwords.words('english')) # Define stop_words here\n",
927
+ " filtered_tokens = [word for word in tokens if word not in stop_words and word != \"not\"]\n",
928
+ "\n",
929
+ " # Lemmatization\n",
930
+ " lemmatizer = WordNetLemmatizer()\n",
931
+ " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
932
+ "\n",
933
+ " # Join tokens back into a string\n",
934
+ " preprocessed_text = ' '.join(lemmatized_tokens)\n",
935
+ "\n",
936
+ " return preprocessed_text\n"
937
+ ]
938
+ },
939
+ {
940
+ "cell_type": "code",
941
+ "execution_count": 147,
942
+ "metadata": {
943
+ "id": "yD6P08hs67do"
944
+ },
945
+ "outputs": [],
946
+ "source": [
947
+ "corpus=[preprocess_text(Message) for Message in Message_df.Message]"
948
+ ]
949
+ },
950
+ {
951
+ "cell_type": "markdown",
952
+ "metadata": {
953
+ "id": "C-eqca1fX02T"
954
+ },
955
+ "source": [
956
+ "## 3.3.Encoding"
957
+ ]
958
+ },
959
+ {
960
+ "cell_type": "code",
961
+ "execution_count": 148,
962
+ "metadata": {
963
+ "colab": {
964
+ "base_uri": "https://localhost:8080/",
965
+ "height": 143
966
+ },
967
+ "id": "qwRxIJXWcCuS",
968
+ "outputId": "a8d86092-aada-4b52-846d-8d10fb60a6cd"
969
+ },
970
+ "outputs": [
971
+ {
972
+ "data": {
973
+ "application/vnd.google.colaboratory.intrinsic+json": {
974
+ "summary": "{\n \"name\": \"Message_df\",\n \"rows\": 1282,\n \"fields\": [\n {\n \"column\": \"Category\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Message\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1282,\n \"samples\": [\n \"December only! Had your mobile 11mths+? You are entitled to update to the latest colour camera mobile for Free! Call The Mobile Update VCo FREE on 08002986906\",\n \"Dear Voucher Holder, To claim this weeks offer, at you PC please go to http://www.e-tlp.co.uk/expressoffer Ts&Cs apply. To stop texts, txt STOP to 80062\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Category_lable\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
975
+ "type": "dataframe",
976
+ "variable_name": "Message_df"
977
+ },
978
+ "text/html": [
979
+ "\n",
980
+ " <div id=\"df-4980b3be-663a-49a9-a994-41220ec15e3a\" class=\"colab-df-container\">\n",
981
+ " <div>\n",
982
+ "<style scoped>\n",
983
+ " .dataframe tbody tr th:only-of-type {\n",
984
+ " vertical-align: middle;\n",
985
+ " }\n",
986
+ "\n",
987
+ " .dataframe tbody tr th {\n",
988
+ " vertical-align: top;\n",
989
+ " }\n",
990
+ "\n",
991
+ " .dataframe thead th {\n",
992
+ " text-align: right;\n",
993
+ " }\n",
994
+ "</style>\n",
995
+ "<table border=\"1\" class=\"dataframe\">\n",
996
+ " <thead>\n",
997
+ " <tr style=\"text-align: right;\">\n",
998
+ " <th></th>\n",
999
+ " <th>Category</th>\n",
1000
+ " <th>Message</th>\n",
1001
+ " <th>Category_lable</th>\n",
1002
+ " </tr>\n",
1003
+ " </thead>\n",
1004
+ " <tbody>\n",
1005
+ " <tr>\n",
1006
+ " <th>3570</th>\n",
1007
+ " <td>ham</td>\n",
1008
+ " <td>She's fine. Sends her greetings</td>\n",
1009
+ " <td>1</td>\n",
1010
+ " </tr>\n",
1011
+ " <tr>\n",
1012
+ " <th>3985</th>\n",
1013
+ " <td>ham</td>\n",
1014
+ " <td>Hey, I missed you tm of last night as my phone...</td>\n",
1015
+ " <td>1</td>\n",
1016
+ " </tr>\n",
1017
+ " <tr>\n",
1018
+ " <th>2105</th>\n",
1019
+ " <td>ham</td>\n",
1020
+ " <td>Anyway seriously hit me up when you're back be...</td>\n",
1021
+ " <td>1</td>\n",
1022
+ " </tr>\n",
1023
+ " </tbody>\n",
1024
+ "</table>\n",
1025
+ "</div>\n",
1026
+ " <div class=\"colab-df-buttons\">\n",
1027
+ "\n",
1028
+ " <div class=\"colab-df-container\">\n",
1029
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4980b3be-663a-49a9-a994-41220ec15e3a')\"\n",
1030
+ " title=\"Convert this dataframe to an interactive table.\"\n",
1031
+ " style=\"display:none;\">\n",
1032
+ "\n",
1033
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
1034
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
1035
+ " </svg>\n",
1036
+ " </button>\n",
1037
+ "\n",
1038
+ " <style>\n",
1039
+ " .colab-df-container {\n",
1040
+ " display:flex;\n",
1041
+ " gap: 12px;\n",
1042
+ " }\n",
1043
+ "\n",
1044
+ " .colab-df-convert {\n",
1045
+ " background-color: #E8F0FE;\n",
1046
+ " border: none;\n",
1047
+ " border-radius: 50%;\n",
1048
+ " cursor: pointer;\n",
1049
+ " display: none;\n",
1050
+ " fill: #1967D2;\n",
1051
+ " height: 32px;\n",
1052
+ " padding: 0 0 0 0;\n",
1053
+ " width: 32px;\n",
1054
+ " }\n",
1055
+ "\n",
1056
+ " .colab-df-convert:hover {\n",
1057
+ " background-color: #E2EBFA;\n",
1058
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1059
+ " fill: #174EA6;\n",
1060
+ " }\n",
1061
+ "\n",
1062
+ " .colab-df-buttons div {\n",
1063
+ " margin-bottom: 4px;\n",
1064
+ " }\n",
1065
+ "\n",
1066
+ " [theme=dark] .colab-df-convert {\n",
1067
+ " background-color: #3B4455;\n",
1068
+ " fill: #D2E3FC;\n",
1069
+ " }\n",
1070
+ "\n",
1071
+ " [theme=dark] .colab-df-convert:hover {\n",
1072
+ " background-color: #434B5C;\n",
1073
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1074
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1075
+ " fill: #FFFFFF;\n",
1076
+ " }\n",
1077
+ " </style>\n",
1078
+ "\n",
1079
+ " <script>\n",
1080
+ " const buttonEl =\n",
1081
+ " document.querySelector('#df-4980b3be-663a-49a9-a994-41220ec15e3a button.colab-df-convert');\n",
1082
+ " buttonEl.style.display =\n",
1083
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1084
+ "\n",
1085
+ " async function convertToInteractive(key) {\n",
1086
+ " const element = document.querySelector('#df-4980b3be-663a-49a9-a994-41220ec15e3a');\n",
1087
+ " const dataTable =\n",
1088
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
1089
+ " [key], {});\n",
1090
+ " if (!dataTable) return;\n",
1091
+ "\n",
1092
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
1093
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
1094
+ " + ' to learn more about interactive tables.';\n",
1095
+ " element.innerHTML = '';\n",
1096
+ " dataTable['output_type'] = 'display_data';\n",
1097
+ " await google.colab.output.renderOutput(dataTable, element);\n",
1098
+ " const docLink = document.createElement('div');\n",
1099
+ " docLink.innerHTML = docLinkHtml;\n",
1100
+ " element.appendChild(docLink);\n",
1101
+ " }\n",
1102
+ " </script>\n",
1103
+ " </div>\n",
1104
+ "\n",
1105
+ "\n",
1106
+ "<div id=\"df-09b65e71-0d55-4461-a02a-fd0e783326a0\">\n",
1107
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-09b65e71-0d55-4461-a02a-fd0e783326a0')\"\n",
1108
+ " title=\"Suggest charts\"\n",
1109
+ " style=\"display:none;\">\n",
1110
+ "\n",
1111
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
1112
+ " width=\"24px\">\n",
1113
+ " <g>\n",
1114
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
1115
+ " </g>\n",
1116
+ "</svg>\n",
1117
+ " </button>\n",
1118
+ "\n",
1119
+ "<style>\n",
1120
+ " .colab-df-quickchart {\n",
1121
+ " --bg-color: #E8F0FE;\n",
1122
+ " --fill-color: #1967D2;\n",
1123
+ " --hover-bg-color: #E2EBFA;\n",
1124
+ " --hover-fill-color: #174EA6;\n",
1125
+ " --disabled-fill-color: #AAA;\n",
1126
+ " --disabled-bg-color: #DDD;\n",
1127
+ " }\n",
1128
+ "\n",
1129
+ " [theme=dark] .colab-df-quickchart {\n",
1130
+ " --bg-color: #3B4455;\n",
1131
+ " --fill-color: #D2E3FC;\n",
1132
+ " --hover-bg-color: #434B5C;\n",
1133
+ " --hover-fill-color: #FFFFFF;\n",
1134
+ " --disabled-bg-color: #3B4455;\n",
1135
+ " --disabled-fill-color: #666;\n",
1136
+ " }\n",
1137
+ "\n",
1138
+ " .colab-df-quickchart {\n",
1139
+ " background-color: var(--bg-color);\n",
1140
+ " border: none;\n",
1141
+ " border-radius: 50%;\n",
1142
+ " cursor: pointer;\n",
1143
+ " display: none;\n",
1144
+ " fill: var(--fill-color);\n",
1145
+ " height: 32px;\n",
1146
+ " padding: 0;\n",
1147
+ " width: 32px;\n",
1148
+ " }\n",
1149
+ "\n",
1150
+ " .colab-df-quickchart:hover {\n",
1151
+ " background-color: var(--hover-bg-color);\n",
1152
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1153
+ " fill: var(--button-hover-fill-color);\n",
1154
+ " }\n",
1155
+ "\n",
1156
+ " .colab-df-quickchart-complete:disabled,\n",
1157
+ " .colab-df-quickchart-complete:disabled:hover {\n",
1158
+ " background-color: var(--disabled-bg-color);\n",
1159
+ " fill: var(--disabled-fill-color);\n",
1160
+ " box-shadow: none;\n",
1161
+ " }\n",
1162
+ "\n",
1163
+ " .colab-df-spinner {\n",
1164
+ " border: 2px solid var(--fill-color);\n",
1165
+ " border-color: transparent;\n",
1166
+ " border-bottom-color: var(--fill-color);\n",
1167
+ " animation:\n",
1168
+ " spin 1s steps(1) infinite;\n",
1169
+ " }\n",
1170
+ "\n",
1171
+ " @keyframes spin {\n",
1172
+ " 0% {\n",
1173
+ " border-color: transparent;\n",
1174
+ " border-bottom-color: var(--fill-color);\n",
1175
+ " border-left-color: var(--fill-color);\n",
1176
+ " }\n",
1177
+ " 20% {\n",
1178
+ " border-color: transparent;\n",
1179
+ " border-left-color: var(--fill-color);\n",
1180
+ " border-top-color: var(--fill-color);\n",
1181
+ " }\n",
1182
+ " 30% {\n",
1183
+ " border-color: transparent;\n",
1184
+ " border-left-color: var(--fill-color);\n",
1185
+ " border-top-color: var(--fill-color);\n",
1186
+ " border-right-color: var(--fill-color);\n",
1187
+ " }\n",
1188
+ " 40% {\n",
1189
+ " border-color: transparent;\n",
1190
+ " border-right-color: var(--fill-color);\n",
1191
+ " border-top-color: var(--fill-color);\n",
1192
+ " }\n",
1193
+ " 60% {\n",
1194
+ " border-color: transparent;\n",
1195
+ " border-right-color: var(--fill-color);\n",
1196
+ " }\n",
1197
+ " 80% {\n",
1198
+ " border-color: transparent;\n",
1199
+ " border-right-color: var(--fill-color);\n",
1200
+ " border-bottom-color: var(--fill-color);\n",
1201
+ " }\n",
1202
+ " 90% {\n",
1203
+ " border-color: transparent;\n",
1204
+ " border-bottom-color: var(--fill-color);\n",
1205
+ " }\n",
1206
+ " }\n",
1207
+ "</style>\n",
1208
+ "\n",
1209
+ " <script>\n",
1210
+ " async function quickchart(key) {\n",
1211
+ " const quickchartButtonEl =\n",
1212
+ " document.querySelector('#' + key + ' button');\n",
1213
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
1214
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
1215
+ " try {\n",
1216
+ " const charts = await google.colab.kernel.invokeFunction(\n",
1217
+ " 'suggestCharts', [key], {});\n",
1218
+ " } catch (error) {\n",
1219
+ " console.error('Error during call to suggestCharts:', error);\n",
1220
+ " }\n",
1221
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
1222
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
1223
+ " }\n",
1224
+ " (() => {\n",
1225
+ " let quickchartButtonEl =\n",
1226
+ " document.querySelector('#df-09b65e71-0d55-4461-a02a-fd0e783326a0 button');\n",
1227
+ " quickchartButtonEl.style.display =\n",
1228
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1229
+ " })();\n",
1230
+ " </script>\n",
1231
+ "</div>\n",
1232
+ "\n",
1233
+ " </div>\n",
1234
+ " </div>\n"
1235
+ ],
1236
+ "text/plain": [
1237
+ " Category Message \\\n",
1238
+ "3570 ham She's fine. Sends her greetings \n",
1239
+ "3985 ham Hey, I missed you tm of last night as my phone... \n",
1240
+ "2105 ham Anyway seriously hit me up when you're back be... \n",
1241
+ "\n",
1242
+ " Category_lable \n",
1243
+ "3570 1 \n",
1244
+ "3985 1 \n",
1245
+ "2105 1 "
1246
+ ]
1247
+ },
1248
+ "execution_count": 148,
1249
+ "metadata": {},
1250
+ "output_type": "execute_result"
1251
+ }
1252
+ ],
1253
+ "source": [
1254
+ "Message_df['Category_lable'] = Message_df['Category'].map({\"spam\":0,\"ham\":1})\n",
1255
+ "Message_df.head(3)"
1256
+ ]
1257
+ },
1258
+ {
1259
+ "cell_type": "markdown",
1260
+ "metadata": {
1261
+ "id": "UiheZTwnXeef"
1262
+ },
1263
+ "source": [
1264
+ "## 3.4.Data convert to TFIDF Vectors"
1265
+ ]
1266
+ },
1267
+ {
1268
+ "cell_type": "code",
1269
+ "execution_count": 149,
1270
+ "metadata": {
1271
+ "colab": {
1272
+ "base_uri": "https://localhost:8080/"
1273
+ },
1274
+ "id": "nAHJnlluiZkd",
1275
+ "outputId": "0b3d55d1-e274-432b-ea42-ccd602a5ba1c"
1276
+ },
1277
+ "outputs": [
1278
+ {
1279
+ "data": {
1280
+ "text/plain": [
1281
+ "((1282, 10000), (1282,))"
1282
+ ]
1283
+ },
1284
+ "execution_count": 149,
1285
+ "metadata": {},
1286
+ "output_type": "execute_result"
1287
+ }
1288
+ ],
1289
+ "source": [
1290
+ "Tfidf_Vectorizer=TfidfVectorizer(max_features=10000,ngram_range=(1,2))\n",
1291
+ "X=Tfidf_Vectorizer.fit_transform(corpus).toarray()\n",
1292
+ "y = Message_df.Category_lable.values\n",
1293
+ "X.shape,y.shape"
1294
+ ]
1295
+ },
1296
+ {
1297
+ "cell_type": "markdown",
1298
+ "metadata": {
1299
+ "id": "soo4Bxh4ASQq"
1300
+ },
1301
+ "source": [
1302
+ "### Save vectorizer"
1303
+ ]
1304
+ },
1305
+ {
1306
+ "cell_type": "code",
1307
+ "execution_count": 150,
1308
+ "metadata": {
1309
+ "colab": {
1310
+ "base_uri": "https://localhost:8080/"
1311
+ },
1312
+ "id": "dHMsrJSG_m_i",
1313
+ "outputId": "1bc097cd-d109-4a13-82a0-ec5d4e509f0b"
1314
+ },
1315
+ "outputs": [
1316
+ {
1317
+ "data": {
1318
+ "text/plain": [
1319
+ "['tfidf_vectorizer.pkl']"
1320
+ ]
1321
+ },
1322
+ "execution_count": 150,
1323
+ "metadata": {},
1324
+ "output_type": "execute_result"
1325
+ }
1326
+ ],
1327
+ "source": [
1328
+ "dump(Tfidf_Vectorizer,'tfidf_vectorizer.pkl')\n"
1329
+ ]
1330
+ },
1331
+ {
1332
+ "cell_type": "markdown",
1333
+ "metadata": {
1334
+ "id": "afxDu-1ej53c"
1335
+ },
1336
+ "source": [
1337
+ "## 3.5.Train Test split & Convert to PyTorch tensors"
1338
+ ]
1339
+ },
1340
+ {
1341
+ "cell_type": "code",
1342
+ "execution_count": 151,
1343
+ "metadata": {
1344
+ "id": "SR1OFUufi1GC"
1345
+ },
1346
+ "outputs": [],
1347
+ "source": [
1348
+ "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)\n",
1349
+ "# Convert to PyTorch tensors\n",
1350
+ "X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\n",
1351
+ "y_train_tensor = torch.tensor(y_train, dtype=torch.long)\n",
1352
+ "X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\n",
1353
+ "y_test_tensor = torch.tensor(y_test, dtype=torch.long)"
1354
+ ]
1355
+ },
1356
+ {
1357
+ "cell_type": "markdown",
1358
+ "metadata": {
1359
+ "id": "s2-gHMz2XAfN"
1360
+ },
1361
+ "source": [
1362
+ "## 3.6.Create DataLoaders"
1363
+ ]
1364
+ },
1365
+ {
1366
+ "cell_type": "code",
1367
+ "execution_count": 152,
1368
+ "metadata": {
1369
+ "id": "K7M4pBdMlQ8t"
1370
+ },
1371
+ "outputs": [],
1372
+ "source": [
1373
+ "batch_size = 32\n",
1374
+ "train_dataset = TensorDataset(X_train_tensor, y_train_tensor)\n",
1375
+ "test_dataset = TensorDataset(X_test_tensor, y_test_tensor)\n",
1376
+ "train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n",
1377
+ "test_loader = DataLoader(test_dataset, batch_size=batch_size)"
1378
+ ]
1379
+ },
1380
+ {
1381
+ "cell_type": "markdown",
1382
+ "metadata": {
1383
+ "id": "0VBIFpPlXDav"
1384
+ },
1385
+ "source": [
1386
+ "# 4.Model building"
1387
+ ]
1388
+ },
1389
+ {
1390
+ "cell_type": "markdown",
1391
+ "metadata": {
1392
+ "id": "TNlANx7oW42X"
1393
+ },
1394
+ "source": [
1395
+ "## 4.1.Define LSTM Model"
1396
+ ]
1397
+ },
1398
+ {
1399
+ "cell_type": "code",
1400
+ "execution_count": 153,
1401
+ "metadata": {
1402
+ "id": "8dH_7s2tma9M"
1403
+ },
1404
+ "outputs": [],
1405
+ "source": [
1406
+ "class SentimentLSTM(nn.Module):\n",
1407
+ " def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):\n",
1408
+ " super(SentimentLSTM, self).__init__()\n",
1409
+ " self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)\n",
1410
+ " self.fc = nn.Linear(hidden_dim, output_dim)\n",
1411
+ " self.dropout = nn.Dropout(0.5)\n",
1412
+ "\n",
1413
+ " def forward(self, x):\n",
1414
+ " x = x.unsqueeze(1)\n",
1415
+ " lstm_out, _ = self.lstm(x)\n",
1416
+ " final_hidden = lstm_out[:, -1, :]\n",
1417
+ " return self.fc(final_hidden)"
1418
+ ]
1419
+ },
1420
+ {
1421
+ "cell_type": "markdown",
1422
+ "metadata": {
1423
+ "id": "UH_MDi9mW1FA"
1424
+ },
1425
+ "source": [
1426
+ "## 4.2.Model parameters"
1427
+ ]
1428
+ },
1429
+ {
1430
+ "cell_type": "code",
1431
+ "execution_count": 154,
1432
+ "metadata": {
1433
+ "id": "hqEvLZRbmbxr"
1434
+ },
1435
+ "outputs": [],
1436
+ "source": [
1437
+ "input_dim = X_train.shape[1] # TF-IDF feature size\n",
1438
+ "hidden_dim = 64\n",
1439
+ "output_dim = 2 # Binary classification (positive/negative)"
1440
+ ]
1441
+ },
1442
+ {
1443
+ "cell_type": "markdown",
1444
+ "metadata": {
1445
+ "id": "9Xo28W3zWya6"
1446
+ },
1447
+ "source": [
1448
+ "## 4.3.Initialize model, loss, optimizer"
1449
+ ]
1450
+ },
1451
+ {
1452
+ "cell_type": "code",
1453
+ "execution_count": 155,
1454
+ "metadata": {
1455
+ "id": "7pQYNNFQmkiW"
1456
+ },
1457
+ "outputs": [],
1458
+ "source": [
1459
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
1460
+ "model = SentimentLSTM(input_dim, hidden_dim, output_dim).to(device)\n",
1461
+ "criterion = nn.CrossEntropyLoss()\n",
1462
+ "optimizer = optim.Adam(model.parameters(), lr=0.001)"
1463
+ ]
1464
+ },
1465
+ {
1466
+ "cell_type": "markdown",
1467
+ "metadata": {
1468
+ "id": "t8Z1D83ZWuy5"
1469
+ },
1470
+ "source": [
1471
+ "## 4.4.Training loop"
1472
+ ]
1473
+ },
1474
+ {
1475
+ "cell_type": "code",
1476
+ "execution_count": 156,
1477
+ "metadata": {
1478
+ "colab": {
1479
+ "base_uri": "https://localhost:8080/"
1480
+ },
1481
+ "id": "YVHI9YSUmnMR",
1482
+ "outputId": "5df777bb-7af0-4253-9ed2-8c517baad460"
1483
+ },
1484
+ "outputs": [
1485
+ {
1486
+ "name": "stdout",
1487
+ "output_type": "stream",
1488
+ "text": [
1489
+ "Epoch 1, Loss: 0.6868\n",
1490
+ "Epoch 2, Loss: 0.6342\n",
1491
+ "Epoch 3, Loss: 0.5190\n",
1492
+ "Epoch 4, Loss: 0.3569\n",
1493
+ "Epoch 5, Loss: 0.2106\n",
1494
+ "Epoch 6, Loss: 0.1242\n",
1495
+ "Epoch 7, Loss: 0.0766\n",
1496
+ "Epoch 8, Loss: 0.0511\n",
1497
+ "Epoch 9, Loss: 0.0373\n",
1498
+ "Epoch 10, Loss: 0.0295\n",
1499
+ "Epoch 11, Loss: 0.0232\n",
1500
+ "Epoch 12, Loss: 0.0183\n",
1501
+ "Epoch 13, Loss: 0.0146\n",
1502
+ "Epoch 14, Loss: 0.0123\n",
1503
+ "Epoch 15, Loss: 0.0110\n",
1504
+ "Epoch 16, Loss: 0.0089\n",
1505
+ "Epoch 17, Loss: 0.0077\n",
1506
+ "Epoch 18, Loss: 0.0070\n",
1507
+ "Epoch 19, Loss: 0.0060\n",
1508
+ "Epoch 20, Loss: 0.0055\n"
1509
+ ]
1510
+ }
1511
+ ],
1512
+ "source": [
1513
+ "num_epochs = 20\n",
1514
+ "all_loss = []\n",
1515
+ "for epoch in range(num_epochs):\n",
1516
+ " model.train()\n",
1517
+ " total_loss = 0\n",
1518
+ " for text, label in train_loader:\n",
1519
+ " text, label = text.to(device), label.to(device)\n",
1520
+ " optimizer.zero_grad()\n",
1521
+ " output = model(text)\n",
1522
+ " loss = criterion(output, label)\n",
1523
+ " loss.backward()\n",
1524
+ " optimizer.step()\n",
1525
+ " total_loss += loss.item()\n",
1526
+ " all_loss.append(round(total_loss,2))\n",
1527
+ " print(f\"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}\")"
1528
+ ]
1529
+ },
1530
+ {
1531
+ "cell_type": "markdown",
1532
+ "metadata": {
1533
+ "id": "jvnGZpZiWi__"
1534
+ },
1535
+ "source": [
1536
+ "## 4.5.Loss Graph"
1537
+ ]
1538
+ },
1539
+ {
1540
+ "cell_type": "code",
1541
+ "execution_count": 157,
1542
+ "metadata": {
1543
+ "colab": {
1544
+ "base_uri": "https://localhost:8080/",
1545
+ "height": 472
1546
+ },
1547
+ "id": "eeNrXHXEowja",
1548
+ "outputId": "bf4155c0-8e30-4ba2-b18a-c68f74d9d282"
1549
+ },
1550
+ "outputs": [
1551
+ {
1552
+ "data": {
1553
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAARRxJREFUeJzt3Xd8VFX+//H3zCSZ9AnpCYQOoUcWpYiuCEgRRVx2Fdcuyq6Luwq2dYtt97vs2pZVsexXiujXvpaforB0FWlSpAghYOikQEjvM/f3R5KBSAhJSHJnJq/n43Efmbn3zOVzuY55c++551gMwzAEAADghaxmFwAAANBUBBkAAOC1CDIAAMBrEWQAAIDXIsgAAACvRZABAABeiyADAAC8FkEGAAB4LYIMAADwWgQZAGdYsGCBLBaL9u/fb3YpXuW2225TaGio2WUAbQpBBoBXcblcWrhwoa644gpFR0fL399fsbGxGjNmjP7973+rrKzM7BIBtCI/swsAgIYqKSnRtddeqyVLlujiiy/WAw88oLi4OOXk5Gj16tX6zW9+o/Xr12vu3LlmlwqglRBkAHiNGTNmaMmSJZo9e7buvffeWtvuv/9+paWlaenSpfXuo7KyUi6XSwEBAS1ZKoBWwq0lAA320ksvqW/fvrLb7UpMTNT06dOVm5tbq01aWpomT56s+Ph4BQYGqkOHDpoyZYry8vLcbZYuXapLLrlEERERCg0NVXJysv7whz/U+2cfOnRIr732msaNG3dGiKnRo0cP/eY3v3G/379/vywWi5555hnNnj1b3bp1k91u1/fff6/y8nI9+uijGjRokBwOh0JCQnTppZdq5cqVtfZ5+j7++c9/qlOnTgoKCtJll12mHTt21FnHkSNHNGnSJIWGhiomJkYPPPCAnE5nvccHoGm4IgOgQR5//HE98cQTGj16tO6++26lpqbq5Zdf1saNG7VmzRr5+/urvLxcY8eOVVlZmX77298qPj5eR44c0Weffabc3Fw5HA7t3LlTV111lQYMGKAnn3xSdrtde/fu1Zo1a+r987/44gs5nU7ddNNNja59/vz5Ki0t1bRp02S32xUZGan8/Hy99tpruuGGG3TXXXepoKBAc+fO1dixY7VhwwZdcMEFtfaxcOFCFRQUaPr06SotLdW//vUvjRw5Utu3b1dcXJy7ndPp1NixYzVkyBA988wzWrZsmZ599ll169ZNd999d6NrB3AOBgD8yPz58w1JRnp6umEYhpGVlWUEBAQYY8aMMZxOp7vdiy++aEgy5s2bZxiGYWzZssWQZLz//vtn3fc///lPQ5KRnZ3dqJpmzJhhSDK2bt1aa31ZWZmRnZ3tXo4fP+7elp6ebkgywsPDjaysrFqfq6ysNMrKymqtO3nypBEXF2fccccdZ+wjKCjIOHz4sHv9+vXrDUnGjBkz3OtuvfVWQ5Lx5JNP1trvwIEDjUGDBjXqeAE0DLeWAJzTsmXLVF5ervvuu09W66n/bdx1110KDw/XokWLJEkOh0OStGTJEhUXF9e5r4iICEnSJ598IpfL1eAa8vPzJemMx5s///xzxcTEuJdOnTqd8dnJkycrJiam1jqbzebuJ+NyuZSTk6PKykpdeOGF2rx58xn7mDRpktq3b+9+P3jwYA0ZMkSff/75GW1//etf13p/6aWX6ocffmjgkQJoDIIMgHM6cOCAJCk5ObnW+oCAAHXt2tW9vUuXLpo5c6Zee+01RUdHa+zYsZozZ06t/jHXX3+9hg8frjvvvFNxcXGaMmWK3nvvvXOGmrCwMElSYWFhrfXDhw/X0qVLtXTpUo0ZM6bOz3bp0qXO9a+//roGDBigwMBARUVFKSYmRosWLapVb40ePXqcsa5nz55njLUTGBh4Rmhq166dTp48edZjA9B0BBkAzerZZ5/Vtm3b9Ic//EElJSX63e9+p759++rw4cOSpKCgIH355ZdatmyZbr75Zm3btk3XX3+9rrjiino7xPbq1UuSzuhgGxMTo9GjR2v06NFKSEio87NBQUFnrHvzzTd12223qVu3bpo7d64WL16spUuXauTIkY26UvRjNputyZ8F0HgEGQDnVHO7JjU1tdb68vJypaenn3E7p3///vrTn/6kL7/8Ul999ZWOHDmiV155xb3darVq1KhReu655/T999/rf/7nf7RixYoznhg63fjx42Wz2fR///d/zXJMH3zwgbp27aoPP/xQN998s8aOHavRo0ertLS0zvZpaWlnrNuzZ486d+7cLPUAaBqCDIBzGj16tAICAvT888/LMAz3+rlz5yovL08TJkyQVNWPpbKystZn+/fvL6vV6h5xNycn54z91zwhVN+ovB07dtQdd9yhL774Qi+++GKdbU6v7Vxqrpyc/pn169dr7dq1dbb/+OOPdeTIEff7DRs2aP369Ro/fnyD/0wAzY/HrwGcU0xMjB555BE98cQTGjdunCZOnKjU1FS99NJLuuiii9yPRK9YsUL33HOPfvGLX6hnz56qrKzUG2+8IZvNpsmTJ0uSnnzySX355ZeaMGGCOnXqpKysLL300kvq0KGDLrnkknrrmD17ttLT0/Xb3/5W77zzjq6++mrFxsbq+PHjWrNmjT799NMz+vGczVVXXaUPP/xQ1157rSZMmKD09HS98sor6tOnzxn9cCSpe/fuuuSSS3T33XerrKxMs2fPVlRUlB566KFG/m0CaE4EGQAN8vjjjysmJkYvvviiZsyYocjISE2bNk1/+9vf5O/vL0lKSUnR2LFj9emnn+rIkSMKDg5WSkqKvvjiCw0dOlSSNHHiRO3fv1/z5s3T8ePHFR0drcsuu0xPPPGE+6mnswkODtbixYv1xhtv6I033tBTTz2l/Px8RUREKCUlRS+99JJuvfXWBh3PbbfdpoyMDL366qtasmSJ+vTpozfffFPvv/++Vq1adUb7W265RVarVbNnz1ZWVpYGDx6sF1988az9cgC0DovRmGuxANDG7N+/X126dNHTTz+tBx54wOxyAPwIfWQAAIDXIsgAAACvRZABAABeiz4yAADAa3FFBgAAeC2CDAAA8Fo+P46My+XS0aNHFRYWJovFYnY5AACgAQzDUEFBgRITE2W1nv26i88HmaNHjyopKcnsMgAAQBMcOnRIHTp0OOt2nw8yYWFhkqr+IsLDw02uBgAANER+fr6SkpLcv8fPxueDTM3tpPDwcIIMAABe5lzdQujsCwAAvBZBBgAAeC2CDAAA8FoEGQAA4LUIMgAAwGsRZAAAgNciyAAAAK9FkAEAAF6LIAMAALwWQQYAAHgtggwAAPBaBBkAAOC1CDJNZBiGln6fKcMwzC4FAIA2iyDTBIZh6KEPtumuhd/qpVX7zC4HAIA2iyDTBBaLRb0SwiVJTy9J1VvrD5pcEQAAbRNBpommXtJFvxnRTZL0p4+364vtx0yuCACAtocgcx4eHJusGwYnyWVI976zVd/sPW52SQAAtCkEmfNgsVj010n9Na5vvMqdLt218FttO5xrdlkAALQZBJnzZLNaNHvKBRrWNUpF5U7dNn+j9mUXml0WAABtAkGmGQT62/TvWwapX/tw5RSV65a5G3Qsr8TssgAA8HkEmWYSFuivBbcPVtfoEB3JLdEtczfoZFG52WUBAODTCDLNKDrUroVTBysu3K60rELd8fpGFZdXml0WAAA+iyDTzDq0C9YbU4fIEeSvLQdz9es3N6u80mV2WQAA+CSCTAvoGRemebddpCB/m77ck60H3v9OLhdTGQAA0NwIMi1kUKd2evmmn8jPatH/++6onvh0J/MyAQDQzAgyLWhEcqyevS5FkvT62gN6YcVekysCAMC3EGRa2DUXtNfjV/eRJD23dI/eWHfA5IoAAPAdBJlWcNvwLvrdyO6SpEc/2aHPth01uSIAAHwDQaaVzLiip24c0lGGIc14d6u+Sss2uyQAALweQaaVWCwWPXlNP00YkKAKp6FfvbFJWw/lml0WAABejSDTimxWi567LkWXdI9WcblTt8/foL1ZBWaXBQCA1yLItDK7n02v3DxIKR0cOllcoZvnbtDRXOZlAgCgKQgyJgi1+2n+7YPVNSZEx/JKdfPc9cphXiYAABqNIGOSyJAAvTF1iBIcgdqXXaTbF2xUURnzMgEA0BgEGRO1jwjSG1MHKyLYX98dytWv39ykskqn2WUBAOA1CDIm6x4bpvm3XaTgAJu+Sjuume99JyfzMgEA0CAEGQ8wsGM7vXLTIPnbLFq07Zge+387mJcJAIAGIMh4iJ/2jNFz110gi0V6c91B/XNZmtklAQDg8QgyHuTqlEQ9ObGvJOn55WlasCbd5IoAAPBsBBkPc/OwzrpvdA9J0hOffa/vj+abXBEAAJ6LIOOB7h3VQ2P7xskwpNe/2W92OQAAeCyCjAeyWCy689KukqRPvjuivOIKkysCAMAzEWQ81IWd2qlXfJhKK1x6f9Mhs8sBAMAjEWQ8lMVi0S3DOkuS3lh3QC7GlgEA4AwEGQ82aWCiwgL9dOBEsb5Myza7HAAAPA5BxoMFB/jp54M6SJLeWHvA5GoAAPA8BBkPd/PQTpKkFalZOpRTbHI1AAB4FoKMh+saE6pLe0TLMKQ313NVBgCA0xFkvEDNVZn3Nh5SaQWzYwMAUMPUIDNr1ixddNFFCgsLU2xsrCZNmqTU1NRabUpLSzV9+nRFRUUpNDRUkydPVmZmpkkVm2NU7zi1jwjSyeIKfbbtmNnlAADgMUwNMqtXr9b06dO1bt06LV26VBUVFRozZoyKiorcbWbMmKFPP/1U77//vlavXq2jR4/qZz/7mYlVtz6b1aJfDukoSXpj7X5ziwEAwINYDMPwmAFKsrOzFRsbq9WrV+unP/2p8vLyFBMTo7feeks///nPJUm7d+9W7969tXbtWg0dOvSc+8zPz5fD4VBeXp7Cw8Nb+hBazInCMg2btULlTpc+mT5cKUkRZpcEAECLaejvb4/qI5OXlydJioyMlCRt2rRJFRUVGj16tLtNr1691LFjR61du9aUGs0SFWrXhAEJkqSFPIoNAIAkDwoyLpdL9913n4YPH65+/fpJkjIyMhQQEKCIiIhabePi4pSRkVHnfsrKypSfn19r8RU3D6vq9PvptqPKKSo3uRoAAMznMUFm+vTp2rFjh955553z2s+sWbPkcDjcS1JSUjNVaL6BSRHq1z5c5ZUuvfct8y8BAOARQeaee+7RZ599ppUrV6pDhw7u9fHx8SovL1dubm6t9pmZmYqPj69zX4888ojy8vLcy6FDvvML32Kx6JahnSVJb647ICfzLwEA2jhTg4xhGLrnnnv00UcfacWKFerSpUut7YMGDZK/v7+WL1/uXpeamqqDBw9q2LBhde7TbrcrPDy81uJLrk5JlCPIX4dPlmhVapbZ5QAAYCpTg8z06dP15ptv6q233lJYWJgyMjKUkZGhkpISSZLD4dDUqVM1c+ZMrVy5Ups2bdLtt9+uYcOGNeiJJV8UFGDTL6rnX6LTLwCgrTM1yLz88svKy8vTiBEjlJCQ4F7effddd5t//vOfuuqqqzR58mT99Kc/VXx8vD788EMTqzbfTdUj/a7ek639x4vO0RoAAN/lUePItARfGUfmx26dt0Gr92Trrku76I8T+phdDgAAzcorx5FBw91S/Sj2e98eVkk58y8BANomgoyXGpEcqw7tgpRXUqFPvztqdjkAAJiCIOOlbFaLu6/MwnX75eN3CAEAqBNBxotdd2GSAvys2nEkX1sO5ZpdDgAArY4g48UiQwJ09YBESdIbPIoNAGiDCDJerqbT76Jtx3S8sMzkagAAaF0EGS+XkhShlA4OlTtdenej70zHAABAQxBkfMDNwzpLkt5af5D5lwAAbQpBxgdcNSBB7YL9dSS3RMt3ZZpdDgAArYYg4wMC/W267qIkSdIb6+j0CwBoOwgyPuKmIZ1ksUhfpR3XD9mFZpcDAECrIMj4iKTIYI1MjpXEVRkAQNtBkPEhN1c/iv3BpsMqLq80uRoAAFoeQcaH/LRHjDpHBaugtFIfb2H+JQCA7yPI+BDr6fMvrWX+JQCA7yPI+JhfDEpSoL9VuzMK9O2Bk2aXAwBAiyLI+BhHsL+uSWkvSVrI/EsAAB9HkPFBNZ1+F+84pqyCUpOrAQCg5RBkfFC/9g79pGOEKpyG3tnA/EsAAN9FkPFRt5w2/1Kl02VuMQAAtBCCjI8a3z9eUSEBysgv1dLvmX8JAOCbCDI+yu5n05TBVfMv0ekXAOCrCDI+7JdDOslqkdb+cEJpmQVmlwMAQLMjyPiw9hFBGt07ThLzLwEAfBNBxsfVdPr9cPMRFZYx/xIAwLcQZHzc8O5R6hoTosKySn20+bDZ5QAA0KwIMj7OYrHoZvf8SweYfwkA4FMIMm3A5EEdFBxgU1pWodb9kGN2OQAANBuCTBsQHuivSQOr5l96Y91+c4sBAKAZEWTaiFuq519asjNTGXnMvwQA8A0EmTaiV3y4BneOlNNl6K0NB80uBwCAZkGQaUNqZsV+e8NBlVcy/xIAwPsRZNqQsX3jFRNmV3ZBmZbszDC7HAAAzhtBpg0J8LPqhsEdJUlvMP8SAMAHEGTamF8O7iib1aIN+3O0OyPf7HIAADgvBJk2Jt4RqDF9quZfYlZsAIC3I8i0QTWdfj/ZckRllU6TqwEAoOkIMm3Q0C5Rig2zq6jcqQ3pjPQLAPBeBJk2yGq1aGSvWEnS8l1ZJlcDAEDTEWTaKHeQ2Z3JRJIAAK9FkGmjhnePVoCfVYdySrQvu9DscgAAaBKCTBsVYvfTsK5Rkri9BADwXgSZNmxU75rbSwQZAIB3Isi0YZcnVwWZTQdOKre43ORqAABoPIJMG5YUGazkuDA5XYZW78k2uxwAABqNINPGjay+vbSC20sAAC9EkGnjRlU/hr0qNVuVTpfJ1QAA0DgEmTZuYMd2igj2V15JhTYfzDW7HAAAGoUg08bZrBZ3p9/luzNNrgYAgMYhyMA9yu8KxpMBAHgZggz0054xslktSssq1METxWaXAwBAgxFkIEeQvy7q3E6StILbSwAAL0KQgSRpVK84SYzyCwDwLgQZSDo1nsz6H3JUWFZpcjUAADQMQQaSpK7RIeocFaxyp0tfpx03uxwAABqEIANJksVi0cjq20v0kwEAeAuCDNxGuacryJbLZZhcDQAA50aQgdtFnSMVavfT8cIybT+SZ3Y5AACcE0EGbgF+Vl3aI1oSTy8BALwDQQa1uEf5pZ8MAMALEGRQy4jkWFks0o4j+crIKzW7HAAA6kWQQS0xYXaldIiQJK1M5fYSAMCzEWRwhlHVt5eWM4kkAMDDEWRwhppRftfsPa7SCqfJ1QAAcHamBpkvv/xSV199tRITE2WxWPTxxx/X2n7bbbfJYrHUWsaNG2dOsW1In4RwxYcHqqTCqbU/nDC7HAAAzsrUIFNUVKSUlBTNmTPnrG3GjRunY8eOuZe33367FStsmywWi/uqzApuLwEAPJifmX/4+PHjNX78+Hrb2O12xcfHt1JFqDGqV6zeWn9QK3Zn6UnDkMViMbskAADO4PF9ZFatWqXY2FglJyfr7rvv1okT9d/qKCsrU35+fq0FjXdxt2jZ/aw6klui1MwCs8sBAKBOHh1kxo0bp4ULF2r58uX6xz/+odWrV2v8+PFyOs/eAXXWrFlyOBzuJSkpqRUr9h1BATYN7149yi+3lwAAHsqjg8yUKVM0ceJE9e/fX5MmTdJnn32mjRs3atWqVWf9zCOPPKK8vDz3cujQodYr2MecGuWXIAMA8EweHWR+rGvXroqOjtbevXvP2sZutys8PLzWgqapCTKbD55UTlG5ydUAAHAmrwoyhw8f1okTJ5SQkGB2KW1CYkSQeieEyzCkVYzyCwDwQKYGmcLCQm3dulVbt26VJKWnp2vr1q06ePCgCgsL9eCDD2rdunXav3+/li9frmuuuUbdu3fX2LFjzSy7TXGP8svtJQCABzI1yHz77bcaOHCgBg4cKEmaOXOmBg4cqEcffVQ2m03btm3TxIkT1bNnT02dOlWDBg3SV199JbvdbmbZbUrNeDJfpmarwukyuRoAAGozdRyZESNGyDCMs25fsmRJK1aDuqR0iFBUSIBOFJVr4/4cXdwt2uySAABw86o+Mmh9NqtFI5IZ5RcA4JkIMjinUb15DBsA4JkIMjinS3tEy89q0Q/Hi/RDdqHZ5QAA4EaQwTmFBfprSNdISVyVAQB4FoIMGmRkrzhJBBkAgGchyKBBasaT2ZCeo/zSCpOrAQCgCkEGDdI5OkRdY0JU6TL0ddpxs8sBAEASQQaN4B7ll8ewAQAegiCDBqvpJ7MqNUtO19kHMgQAoLUQZNBgF3Zup7BAP50oKtd3h3PNLgcAAIIMGs7fZtVlPWMkMcovAMAzEGTQKDWj/DIbNgDAExBk0CiX9YyV1SLtOpavo7klZpcDAGjjCDJolMiQAP2kYztJDI4HADAfQQaNNpJJJAEAHoIgg0YbVf0Y9pq9x1VS7jS5GgBAW0aQQaP1jAtV+4gglVW69M0+RvkFAJiHIINGs1gsGtmLp5cAAOYjyKBJ3P1kdmXJMBjlFwBgDoIMmmRY1ygF+duUkV+q74/lm10OAKCNIsigSQL9bRrePVoSo/wCAMxDkEGTMcovAMBsBBk02eXJVUHmu8O5yi4oM7kaAEBbRJBBk8U7AtWvfbgMQ1qVylUZAEDrI8jgvIysHhyPUX4BAGYgyOC8jKoeT+bLPdkqr3SZXA0AoK0hyOC89G/vUHSoXUXlTm1IzzG7HABAG0OQwXmxWi0a2StGkrR8d6bJ1QAA2hqCDM5bTT+Z5YzyCwBoZQQZnLdLekQrwGbVwZxi7csuMrscAEAbQpDBeQu1+2lI10hJ0gpuLwEAWhFBBs2i5uml5UxXAABoRQQZNIuafjLfHjipvOIKk6sBALQVBBk0i45RweoRGyqny9DqtGyzywEAtBEEGTSbkdWTSK7YRT8ZAEDrIMig2Yyqvr20ak+2Kp2M8gsAaHkEGTSbn3SMkCPIX7nFFdpyKNfscgAAbUCTgszrr7+uRYsWud8/9NBDioiI0MUXX6wDBw40W3HwLn42q0YkV4/yy9NLAIBW0KQg87e//U1BQUGSpLVr12rOnDl66qmnFB0drRkzZjRrgfAuI6sfw2Y8GQBAa/BryocOHTqk7t27S5I+/vhjTZ48WdOmTdPw4cM1YsSI5qwPXuaynjGyWS3ak1moQznFSooMNrskAIAPa9IVmdDQUJ04cUKS9N///ldXXHGFJCkwMFAlJSXNVx28TkRwgAZ1aidJWrGb20sAgJbVpCBzxRVX6M4779Sdd96pPXv26Morr5Qk7dy5U507d27O+uCFakb5Xfo9t5cAAC2rSUFmzpw5GjZsmLKzs/Wf//xHUVFRkqRNmzbphhtuaNYC4X3G9I2XJK394YROFpWbXA0AwJdZDMMwzC6iJeXn58vhcCgvL0/h4eFml9NmjP/XV9p1LF//mNxf11/U0exyAABepqG/v5t0RWbx4sX6+uuv3e/nzJmjCy64QL/85S918uTJpuwSPubKflVXZT7fnmFyJQAAX9akIPPggw8qPz9fkrR9+3bdf//9uvLKK5Wenq6ZM2c2a4HwTuP7J0iS1uw9ziSSAIAW06Qgk56erj59+kiS/vOf/+iqq67S3/72N82ZM0dffPFFsxYI79Q9NlTJcWGqdBlaytxLAIAW0qQgExAQoOLiYknSsmXLNGbMGElSZGSk+0oNML5/1e2lL7YfM7kSAICvalKQueSSSzRz5kz95S9/0YYNGzRhwgRJ0p49e9ShQ4dmLRDe68rq20tfpR1Xfim3lwAAza9JQebFF1+Un5+fPvjgA7388stq3769JOmLL77QuHHjmrVAeK8esaHqFhOicqdLK5h7CQDQAnj8Gi3q2f+m6oUVe3VFnzj97y0Xml0OAMBLNPT3d5PmWpIkp9Opjz/+WLt27ZIk9e3bVxMnTpTNZmvqLuGDxvdL0Asr9mr1nmwVllUq1N7k/+QAADhDk36r7N27V1deeaWOHDmi5ORkSdKsWbOUlJSkRYsWqVu3bs1aJLxX74QwdYkOUfrxIq3YnaWJKYlmlwQA8CFN6iPzu9/9Tt26ddOhQ4e0efNmbd68WQcPHlSXLl30u9/9rrlrhBezWCwa34+nlwAALaNJQWb16tV66qmnFBkZ6V4XFRWlv//971q9enWzFQffUPP00srULBWXV5pcDQDAlzQpyNjtdhUUFJyxvrCwUAEBAeddFHxL38RwJUUGqbTCpVWp2WaXAwDwIU0KMldddZWmTZum9evXyzAMGYahdevW6de//rUmTpzY3DXCy1ksFl3Zr+qqzCJuLwEAmlGTgszzzz+vbt26adiwYQoMDFRgYKAuvvhide/eXbNnz27mEuELauZeWrk7SyXlTpOrAQD4iiY9tRQREaFPPvlEe/fudT9+3bt3b3Xv3r1Zi4PvSOngUPuIIB3JLdHqPdkaV90BGACA89HgIHOuWa1Xrlzpfv3cc881vSL4pJqnl177Ol1f7DhGkAEANIsGB5ktW7Y0qJ3FYmlyMfBt4/sn6LWv07V8V5ZKK5wK9GfwRADA+WlwkDn9igvQFAOTIhQfHqiM/FJ9nXZco/vEmV0SAMDLNamzL9AUVqvFfUvp8x08vQQAOH8EGbSqmsHxln6fqbJKnl4CAJwfU4PMl19+qauvvlqJiYmyWCz6+OOPa203DEOPPvqoEhISFBQUpNGjRystLc2cYtEsLuzUTrFhdhWUVuqbvSfMLgcA4OVMDTJFRUVKSUnRnDlz6tz+1FNP6fnnn9crr7yi9evXKyQkRGPHjlVpaWkrV4rmUuv2EoPjAQDOk6lBZvz48frrX/+qa6+99oxthmFo9uzZ+tOf/qRrrrlGAwYM0MKFC3X06NEzrtzAu4yvHuX3v99nqsLpMrkaAIA389g+Munp6crIyNDo0aPd6xwOh4YMGaK1a9ee9XNlZWXKz8+vtcCzDO4SqaiQAOWVVGjtPm4vAQCazmODTEZGhiQpLq72I7pxcXHubXWZNWuWHA6He0lKSmrROtF4NqtFY6tvL33B00sAgPPgsUGmqR555BHl5eW5l0OHDpldEupQM4nkkp2ZquT2EgCgiTw2yMTHV/2LPTMzs9b6zMxM97a62O12hYeH11rgeYZ2jVS7YH/lFJVrfXqO2eUAALyUxwaZLl26KD4+XsuXL3evy8/P1/r16zVs2DATK0Nz8LNZNbYvTy8BAM6PqUGmsLBQW7du1datWyVVdfDdunWrDh48KIvFovvuu09//etf9f/+3//T9u3bdcsttygxMVGTJk0ys2w0k/H9a24vZcjpMkyuBgDgjRo811JL+Pbbb3X55Ze739fMsH3rrbdqwYIFeuihh1RUVKRp06YpNzdXl1xyiRYvXqzAwECzSkYzurhblBxB/jpeWK6N+3M0tGuU2SUBALyMxTAMn/6ncH5+vhwOh/Ly8ugv44EeeP87fbDpsG4d1klPXNPP7HIAAB6iob+/PbaPDNqGK/vXPIadIRe3lwAAjUSQgamGd49WmN1PWQVl2nTwpNnlAAC8DEEGprL72XRFn6pBD3l6CQDQWAQZmK7m6aXF3F4CADQSQQamu7RHtEICbDqWV6qth3PNLgcA4EUIMjBdoL9No3pX3V76gttLAIBGIMjAI9Q8vfT59gz5+IgAAIBmRJCBR7isZ6yC/G06kluibYfzzC4HAOAlCDLwCEEBNo3sHStJ+nwHt5cAAA1DkIHHuLJf1dNLX3B7CQDQQAQZeIwRyTEK9LfqYE6xdh7NN7scAIAXIMjAY4TY/TSiZ9XtpS+4vQQAaACCDDzKeJ5eAgA0AkEGHmVU7zgF+FmVfrxIuzMKzC4HAODhCDLwKKF2P13WM0YSg+MBAM6NIAOP4x4cb0eGyZUAADwdQQYeZ1TvOPnbLNqbVai0TG4vAQDOjiADjxMe6K9Le1TdXvp8O1dlAABnR5CBRxrfr+r2Eo9hAwDqQ5CBR7qiT5z8rBbtzijQvuxCs8sBAHgoggw8UkRwgIZ3j5bE00sAgLMjyMBjXXna4HgAANSFIAOPdUWfeNmsFn1/LF/7jxeZXQ4AwAMRZOCxIkMCNKxrlCTpC8aUAQDUgSADj1Yz9xJPLwEA6kKQgUcb0ydeVou07XCeDuUUm10OAMDDEGTg0WLC7BrcJVISV2UAAGciyMDjTeifIImnlwAAZyLIwOON7Rsvi0XaeihXR3JLzC4HAOBBCDLweLHhgbqoU9XtpcU8vQQAOA1BBl7B/fQSo/wCAE5DkIFXGFc9ieS3B04qI6/U5GoAAJ6CIAOvkOAI0k86RkiSluzk9hIAoApBBl7jyuqnlxZxewkAUI0gA68xvjrIbNyfo6wCbi8BAAgy8CLtI4KUkhQhw5CW7Mw0uxwAgAcgyMCrXNmPp5cAAKcQZOBVxverur207ocTOlFYZnI1AACzEWTgVTpGBatf+3C5DOk/mw+bXQ4AwGQEGXidm4Z0kiTNWblPucXlJlcDADATQQZe5xcXJik5Lkx5JRV6YcVes8sBAJiIIAOvY7Na9MiVvSRJC9fu14ETRSZXBAAwC0EGXmlEcqwu7RGtCqehfyzebXY5AACTEGTgtf44obesFunz7Rn6dn+O2eUAAExAkIHX6hUfrl8MSpIk/XXRLhmGYXJFAIDWRpCBV7t/TE8F+du09VCuPtvGIHkA0NYQZODVYsMD9avLukqS/rF4t8oqnSZXBABoTQQZeL1pP+2quHC7Dp8s0evf7De7HABAKyLIwOsFB/jp/jHJkqQXVuxVThGD5AFAW0GQgU+Y/JMO6hUfpoLSSj2/PM3scgAArYQgA59gs1r0pwl9JElvrjugH7ILTa4IANAaCDLwGZf0iNblyTGqdDFIHgC0FQQZ+JRHrqwaJG/Jzkyt/+GE2eUAAFoYQQY+pWdcmKYM7ihJ+p/Pd8nlYpA8APBlBBn4nBmjeyokwKZth/P06bajZpcDAGhBBBn4nJgwu+4e0U2S9NTiVJVWMEgeAPgqggx80tRLuirBEagjuSWav2a/2eUAAFoIQQY+KSjApgeqB8l7aeVenSgsM7kiAEBLIMjAZ107sL36tQ9XQVmlZi9jkDwA8EUEGfgsq9WiP1zZW5L01oaD2pvFIHkA4GsIMvBpF3eL1ujesXK6DP39i11mlwMAaGYEGfi834/vLZvVomW7svTNvuNmlwMAaEYEGfi87rGhunFI1SB5f2OQPADwKQQZtAn3juqhMLufdhzJ10dbjphdDgCgmXh0kHn88cdlsVhqLb169TK7LHihqFC7fnN5d0nSM/9NVUk5g+QBgC/w6CAjSX379tWxY8fcy9dff212SfBStw/vrPYRQTqWV6q5X/9gdjkAgGbg8UHGz89P8fHx7iU6OtrskuClAv1temhc1SB5L6/ap+wCBskDAG/n8UEmLS1NiYmJ6tq1q2688UYdPHiw3vZlZWXKz8+vtQA1rh6QqJQODhWVO/XPZXvMLgcAcJ48OsgMGTJECxYs0OLFi/Xyyy8rPT1dl156qQoKCs76mVmzZsnhcLiXpKSkVqwYns5qteiPE/pIkt7ZcFB7Ms/+3xIAwPNZDMPwmmdRc3Nz1alTJz333HOaOnVqnW3KyspUVnbqlkF+fr6SkpKUl5en8PDw1ioVHu5Xb3yrJTszdXlyjObfPtjscgAAP5Kfny+Hw3HO398efUXmxyIiItSzZ0/t3bv3rG3sdrvCw8NrLcCP/X58b/lZLVqZmq2v0xgkDwC8lVcFmcLCQu3bt08JCQlmlwIv1yU6RDcN7SRJ+uui7+VkkDwA8EoeHWQeeOABrV69Wvv379c333yja6+9VjabTTfccIPZpcEH3Duqh8ID/bQ7o0D/2XzY7HIAAE3g0UHm8OHDuuGGG5ScnKzrrrtOUVFRWrdunWJiYswuDT6gXUiAfjuyhyTpmSWpKi6vNLkiAEBj+ZldQH3eeecds0uAj7vl4k5auG6/DuWU6H+/TNe9o3uYXRIAoBE8+ooM0NLsfjY9PK5q2otXv9ynrPxSkysCADQGQQZt3oT+CRrYMULF5U49+18GyQMAb0KQQZtnsVj0pwm9JUnvbTqkXccYDRoAvAVBBpA0qFOkJvRPkGFIf/t8l9nlAAAaiCADVHtoXLL8bRZ9lXZcq/dkm10OAKABCDJAtU5RIbp1WGdJ0v8s+l6lFU5zCwIAnBNBBjjNb0f2UESwv/ZkFuquhd8SZgDAwxFkgNM4gv316k2DFBxg01dpxzX19Y0qKSfMAICnIsgAPzKka5Rev2OwQgJsWrP3hO5YsJFRfwHAQxFkgDpc1DlSC6cOVqjdT2t/OKHb529UURlhBgA8DUEGOItBnarCTJjdT+vTc3T7/I0qJMwAgEchyAD1+EnHdnrjziEKC/TThv05um3eBhWUVphdFgCgGkEGOIcLkiL0f3cOUXign749cFK3ztugfMIMAHgEggzQAAM6ROitu4bKEeSvzQdzdctcwgwAeAKCDNBA/do79H93DlFEsL+2HsrVza+tV14JYQYAzESQARqhX3uH3rpzqNoF++u7w3m66bX1yi0uN7ssAGizCDJAI/VJDNdbdw1VZEiAth/J042vrdfJIsIMAJiBIAM0Qe+EcL1911BFhQRo59F8/fK19cohzABAqyPIAE2UHB+md6YNVXSoXbuO5euX/7tOJwrLzC4LANoUggxwHnrEVYWZmDC7dmcU6Jf/u17HCTMA0GoIMsB56h4bqnemDVVsmF2pmQW64d/rlF1AmAGA1kCQAZpBt5hQvfurYYoPD1RaVqFu+N91yiooNbssAPB5BBmgmXSJDtE704YqwRGovVmFmvLvdcrMJ8wAQEsiyADNqHN0iN6dNkztI4L0Q3aRpvx7nTLyCDMA0FIIMkAz6xgVrHemDVX7iCClHy/SlH+v1bG8ErPLAgCfRJABWkBSZFWY6dAuSPtPFOv6V9fpSC5hBgCaG0EGaCFJkcF691fD1DEyWAdzijXl32t1+GSx2WUBgE8hyAAtqH1EkN6ZNlSdooJ1KKdE17+6TodyCDMA0FwIMkALS4wI0rvThqlLdIiO5JZoyr/X6cCJIrPLAgCfQJABWkG8I1DvTBuqrjFVYebKf32l55buUUFphdmlAYBXI8gArSQuPFDv3DVUFyRFqKjcqeeXp+mnT63U/375g0ornGaXBwBeyWIYhmF2ES0pPz9fDodDeXl5Cg8PN7scQIZhaPGODD3931T9kF11iynBEah7R/XQzwd1kJ+Nf18AQEN/fxNkAJNUOl36cPMR/XPZHh2rHjSva0yIHhiTrPH94mWxWEyuEADMQ5CpRpCBpyutcOrNdQf00qp9yikqlyT1b+/Qg2OTdWmPaAINgDaJIFONIANvUVBaode+StdrX/2govKqPjPDukbpoXHJGtixncnVAUDrIshUI8jA25woLNOclfv05roDKne6JElj+sTpgbHJ6hkXZnJ1ANA6CDLVCDLwVodPFutfy9L0n82H5TIki0W6dmB7zRjdU0mRwWaXBwAtiiBTjSADb7c3q0DPLNmjxTszJEn+NotuHNJJ0y/vrpgwu8nVAUDLIMhUI8jAV2w9lKunl+zWmr0nJEnBATZNvaSL7vppV4UH+ptcHQA0L4JMNYIMfM2avcf11OLd+u5wniQpIthfvxnRTbcM66xAf5vJ1QFA8yDIVCPIwBcZhqElOzP0zH/3aG9WoSQpPjxQ947uoV8wqB4AH0CQqUaQgS+rdLr04ZYj+teyNB3JLZEkdY0O0S8uTNLlvWKUHBfGODQAvBJBphpBBm1BWaVT/7fuoF5cudc9qJ4kJToCNaJXrC5PjtXF3aIUYvczsUoAaDiCTDWCDNqSwrJKfbTliFbuztI3+46rtMLl3hZgs2pI10iNSI7VyF6x6hIdYmKlAFA/gkw1ggzaqtIKp9b+cEKrdmdpRWqWDuWU1NreOSpYI5JjdXmvWA3pEklHYQAehSBTjSADVHUO/uF4kVbuztKq1GytTz+hCuepr36Qv00Xd4uqvg0Vow7tGHAPgLkIMtUIMsCZCssqtWbvca1KzdLK3dnKyC+ttb1nXKguT47ViORYXdi5nfx5CgpAKyPIVCPIAPUzDEO7jhVoZWqWVqVmadOBk3Kd9n+FMLufLu0ZrRHJsRrRM0ax4YHmFQugzSDIVCPIAI2TW1yur9KOa2VqllanZuvEaU9BSVKv+DD1SQhX97hQ9YgNU4/YUCVFBstm5TFvAM2HIFONIAM0nctlaNuRvOq+NVnu0YR/LMDPqq7RIeoRVxVsesSGqkdcqDpFhXBbCkCTEGSqEWSA5pNdUKZNB05qb1aB0rIKtbd6Kat01dnez2pRl+gQ9YgLVffqqzfdY0PVJTqEp6QA1IsgU40gA7Qsp8vQkZMlSqsON2mZhe6gU1zurPMzVovUKSpE3U+7etMjNkzdYkIVFEDAAUCQcSPIAOYwDENH80qVllngvnKTllWoPZkFKiitrPMzFouUEB6oDpHB6tAuSB3aBSup+meHdkFKcAQyjxTQRhBkqhFkAM9iGIayC8qqr95UX8WpDjo5P+pY/GM2q0UJjsDTQk5N4AlSh8hgxYcH0ukY8BEN/f3NxCsAWpXFYlFseKBiwwM1vHt0rW0nCst0IKdYh3KKdfhkSfVS9frIyRKVO13u9VLOGfv2s1qUGBHkDjdJ7YLVIfJU6IkNs8tK0AF8CkEGgMeICrUrKtSun3Rsd8Y2l8tQdmGZDp8s1qGcUwGnJuwcyS1RhdPQwZxiHcwprnP/ATar4hx2xYcHKt4RpPhwu+LCA5XgCFK8w654R5Biw+w8aQV4EYIMAK9gtVoUFx6ouPBADep05nany1BWQak72Pw47BzNrbqicyinpHreqZN1/jkWixQVYleCo+rPinfYleAIqg48NesCFcpM4oBHoI8MgDah0ulSZkGZMvJKdCyvVBk1S36pMvNLdSyv6ufpc1DVJ8zupzjHaeEmPFBxjkBFhQQoIthf7YIDFFn92u7Hk1hAY9FHBgBO42ezqn1EkNpHBJ21jctlKKe4vFbIOSPs5JWqoKyyaqnupHwuIQE2RQQHqF1IVcCpWvzVLqT6dUj1+9NeB/nbZLHQnwc4F4IMAFSzWi2KDrUrOtSufu0dZ21XWFZZxxWdEmXml+lkUblOFpfrZHGFcovL5TKkonKnispLdCS3pMG1BPhZFRlc++qOI9hfoXY/BQfYFGr3U0j1Emq3KSSg5vWpn4H+VsIQfB5BBgAaKdTup+7VoxTXx+UyVFBaqZziqnCTW1yunKKqgFMTdmqCT25xhXKKqn6WO10qr3RVXRH60czkjWG1yB1wQuw/Dj9V60LsfgqtbhPob1OAn1X+Novsflb526zV76t+Bpz23t9mOWOdn9VCcEKrI8gAQAuxWi1yBPvLEeyvLgpp0GcMw1BxudMdak7WhJ6icuWVVKqovFKFZZUqql6qXjtPe12pouoRlV2G3LfBWoPFIvnbrLLbrPL3OxV2/G1VgScowKbgAJuC/KtC1OmvgwJsCva3KTjAT8H2OtoF+CkkoKpdgI0rTTiFIAMAHsRisbivmiRFNm0fLpehkgpnraBzKuScCjyF1QGopl1ZZdWVoArnqZ9lNe+dLlVUGu5t5dXrTn9cxDBUta3SJZU1z99HXWxWS1XosVcFnyD/qrBT99Ujyxnrz7zSVDtw+fudHsZqPmOR1WqRn9Uiq8Uim7VqsVqq11W/t1ksslolP6tVVosIXK3AK4LMnDlz9PTTTysjI0MpKSl64YUXNHjwYLPLAgCPZLWeCkOxLfjnGIYhp8twh5wyp1MVTkMVNUGnsiYAVf0sKXeq2L1Uul+XnPa6+EevS8qdKq5wqrjMqXJn1eSkTpdx2pWmFkxMzcBqUT2B51Qw8rNVvfa3WWWzWuRns8rfWrW+5rad32k/a7bVvLZZq8JW1X5qXle3t1pks1lls1hks0o2q1U2q04FstODWfX702utqd1WK8RV76c6uLULDlCISUMSeHyQeffddzVz5ky98sorGjJkiGbPnq2xY8cqNTVVsbEt+RUFANTHYjn1y1QBkuTfon9epdPlDjU1gafmylNphbP66pFR66pS+WlXl06tM8648lT+o9cVlUatMFbpdMnpMuQypEqXSy6X5KwOcvVxGZLLaUgyPDxynZ//ubafbhxSxwBPrcDjg8xzzz2nu+66S7fffrsk6ZVXXtGiRYs0b948/f73vze5OgBAa/GzWRVusyo8sGUDU2O5XIYqXYZc1cHGaRin1lW/d7oMuVzVIcgw5HSpOhhVtXO6qkJYpdNQpcvl/llx+k/nae1+3N5lqNJ5alul06j1+vS6auqpWWpqOLVNcrpOBbfT29bah/PUvvyt5o2G7dFBpry8XJs2bdIjjzziXme1WjV69GitXbvWxMoAAKhitVoUwBxepvHoIHP8+HE5nU7FxcXVWh8XF6fdu3fX+ZmysjKVlZ26gJefn9+iNQIAAPP43Mxos2bNksPhcC9JSUlmlwQAAFqIRweZ6Oho2Ww2ZWZm1lqfmZmp+Pj4Oj/zyCOPKC8vz70cOnSoNUoFAAAm8OggExAQoEGDBmn58uXudS6XS8uXL9ewYcPq/Izdbld4eHitBQAA+CaP7iMjSTNnztStt96qCy+8UIMHD9bs2bNVVFTkfooJAAC0XR4fZK6//nplZ2fr0UcfVUZGhi644AItXrz4jA7AAACg7bEYhlH/aD5eLj8/Xw6HQ3l5edxmAgDASzT097dH95EBAACoD0EGAAB4LYIMAADwWgQZAADgtQgyAADAaxFkAACA1yLIAAAAr+XxA+Kdr5phcpgFGwAA71Hze/tcw935fJApKCiQJGbBBgDACxUUFMjhcJx1u8+P7OtyuXT06FGFhYXJYrE0237z8/OVlJSkQ4cOtYkRg9vS8XKsvqstHS/H6rvayvEahqGCggIlJibKaj17TxifvyJjtVrVoUOHFtt/W5thuy0dL8fqu9rS8XKsvqstHG99V2Jq0NkXAAB4LYIMAADwWgSZJrLb7Xrsscdkt9vNLqVVtKXj5Vh9V1s6Xo7Vd7W14z0Xn+/sCwAAfBdXZAAAgNciyAAAAK9FkAEAAF6LIAMAALwWQaYec+bMUefOnRUYGKghQ4Zow4YN9bZ///331atXLwUGBqp///76/PPPW6nS8zNr1ixddNFFCgsLU2xsrCZNmqTU1NR6P7NgwQJZLJZaS2BgYCtV3HSPP/74GXX36tWr3s9463mVpM6dO59xvBaLRdOnT6+zvTed1y+//FJXX321EhMTZbFY9PHHH9fabhiGHn30USUkJCgoKEijR49WWlraOffb2O99a6jvWCsqKvTwww+rf//+CgkJUWJiom655RYdPXq03n025bvQWs51bm+77bYzah83btw59+tt51ZSnd9fi8Wip59++qz79ORz2xIIMmfx7rvvaubMmXrssce0efNmpaSkaOzYscrKyqqz/TfffKMbbrhBU6dO1ZYtWzRp0iRNmjRJO3bsaOXKG2/16tWaPn261q1bp6VLl6qiokJjxoxRUVFRvZ8LDw/XsWPH3MuBAwdaqeLz07dv31p1f/3112dt683nVZI2btxY61iXLl0qSfrFL35x1s94y3ktKipSSkqK5syZU+f2p556Ss8//7xeeeUVrV+/XiEhIRo7dqxKS0vPus/Gfu9bS33HWlxcrM2bN+vPf/6zNm/erA8//FCpqamaOHHiOffbmO9CazrXuZWkcePG1ar97bffrnef3nhuJdU6xmPHjmnevHmyWCyaPHlyvfv11HPbIgzUafDgwcb06dPd751Op5GYmGjMmjWrzvbXXXedMWHChFrrhgwZYvzqV79q0TpbQlZWliHJWL169VnbzJ8/33A4HK1XVDN57LHHjJSUlAa396XzahiGce+99xrdunUzXC5Xndu99bxKMj766CP3e5fLZcTHxxtPP/20e11ubq5ht9uNt99++6z7aez33gw/Pta6bNiwwZBkHDhw4KxtGvtdMEtdx3vrrbca11xzTaP24yvn9pprrjFGjhxZbxtvObfNhSsydSgvL9emTZs0evRo9zqr1arRo0dr7dq1dX5m7dq1tdpL0tixY8/a3pPl5eVJkiIjI+ttV1hYqE6dOikpKUnXXHONdu7c2Rrlnbe0tDQlJiaqa9euuvHGG3Xw4MGztvWl81peXq4333xTd9xxR70TqHrreT1denq6MjIyap07h8OhIUOGnPXcNeV776ny8vJksVgUERFRb7vGfBc8zapVqxQbG6vk5GTdfffdOnHixFnb+sq5zczM1KJFizR16tRztvXmc9tYBJk6HD9+XE6nU3FxcbXWx8XFKSMjo87PZGRkNKq9p3K5XLrvvvs0fPhw9evX76ztkpOTNW/ePH3yySd688035XK5dPHFF+vw4cOtWG3jDRkyRAsWLNDixYv18ssvKz09XZdeeqkKCgrqbO8r51WSPv74Y+Xm5uq22247axtvPa8/VnN+GnPumvK990SlpaV6+OGHdcMNN9Q7oWBjvwueZNy4cVq4cKGWL1+uf/zjH1q9erXGjx8vp9NZZ3tfObevv/66wsLC9LOf/azedt58bpvC52e/RuNMnz5dO3bsOOf91GHDhmnYsGHu9xdffLF69+6tV199VX/5y19auswmGz9+vPv1gAEDNGTIEHXq1Envvfdeg/6V483mzp2r8ePHKzEx8axtvPW8okpFRYWuu+46GYahl19+ud623vxdmDJlivt1//79NWDAAHXr1k2rVq3SqFGjTKysZc2bN0833njjOTvge/O5bQquyNQhOjpaNptNmZmZtdZnZmYqPj6+zs/Ex8c3qr0nuueee/TZZ59p5cqV6tChQ6M+6+/vr4EDB2rv3r0tVF3LiIiIUM+ePc9aty+cV0k6cOCAli1bpjvvvLNRn/PW81pzfhpz7pryvfckNSHmwIEDWrp0ab1XY+pyru+CJ+vatauio6PPWru3n1tJ+uqrr5Samtro77Dk3ee2IQgydQgICNCgQYO0fPly9zqXy6Xly5fX+tfq6YYNG1arvSQtXbr0rO09iWEYuueee/TRRx9pxYoV6tKlS6P34XQ6tX37diUkJLRAhS2nsLBQ+/btO2vd3nxeTzd//nzFxsZqwoQJjfqct57XLl26KD4+vta5y8/P1/r168967pryvfcUNSEmLS1Ny5YtU1RUVKP3ca7vgic7fPiwTpw4cdbavfnc1pg7d64GDRqklJSURn/Wm89tg5jd29hTvfPOO4bdbjcWLFhgfP/998a0adOMiIgIIyMjwzAMw7j55puN3//+9+72a9asMfz8/IxnnnnG2LVrl/HYY48Z/v7+xvbt2806hAa7++67DYfDYaxatco4duyYeykuLna3+fHxPvHEE8aSJUuMffv2GZs2bTKmTJliBAYGGjt37jTjEBrs/vvvN1atWmWkp6cba9asMUaPHm1ER0cbWVlZhmH41nmt4XQ6jY4dOxoPP/zwGdu8+bwWFBQYW7ZsMbZs2WJIMp577jljy5Yt7id1/v73vxsRERHGJ598Ymzbts245pprjC5duhglJSXufYwcOdJ44YUX3O/P9b03S33HWl5ebkycONHo0KGDsXXr1lrf4bKyMvc+fnys5/oumKm+4y0oKDAeeOABY+3atUZ6erqxbNky4yc/+YnRo0cPo7S01L0PXzi3NfLy8ozg4GDj5ZdfrnMf3nRuWwJBph4vvPCC0bFjRyMgIMAYPHiwsW7dOve2yy67zLj11ltrtX/vvfeMnj17GgEBAUbfvn2NRYsWtXLFTSOpzmX+/PnuNj8+3vvuu8/9dxMXF2dceeWVxubNm1u/+Ea6/vrrjYSEBCMgIMBo3769cf311xt79+51b/el81pjyZIlhiQjNTX1jG3efF5XrlxZ53+3NcfjcrmMP//5z0ZcXJxht9uNUaNGnfF30KlTJ+Oxxx6rta6+771Z6jvW9PT0s36HV65c6d7Hj4/1XN8FM9V3vMXFxcaYMWOMmJgYw9/f3+jUqZNx1113nRFIfOHc1nj11VeNoKAgIzc3t859eNO5bQkWwzCMFr3kAwAA0ELoIwMAALwWQQYAAHgtggwAAPBaBBkAAOC1CDIAAMBrEWQAAIDXIsgAAACvRZAB4PNWrVoli8Wi3Nxcs0sB0MwIMgAAwGsRZAAAgNciyABocS6XS7NmzVKXLl0UFBSklJQUffDBB5JO3fZZtGiRBgwYoMDAQA0dOlQ7duyotY///Oc/6tu3r+x2uzp37qxnn3221vaysjI9/PDDSkpKkt1uV/fu3TV37txabTZt2qQLL7xQwcHBuvjii5Wamure9t133+nyyy9XWFiYwsPDNWjQIH377bct9DcCoLkQZAC0uFmzZmnhwoV65ZVXtHPnTs2YMUM33XSTVq9e7W7z4IMP6tlnn9XGjRsVExOjq6++WhUVFZKqAsh1112nKVOmaPv27Xr88cf15z//WQsWLHB//pZbbtHbb7+t559/Xrt27dKrr76q0NDQWnX88Y9/1LPPPqtvv/1Wfn5+uuOOO9zbbrzxRnXo0EEbN27Upk2b9Pvf/17+/v4t+xcD4PyZPWslAN9WWlpqBAcHG998802t9VOnTjVuuOEG9+y/77zzjnvbiRMnjKCgIOPdd981DMMwfvnLXxpXXHFFrc8/+OCDRp8+fQzDMIzU1FRDkrF06dI6a6j5M5YtW+Zet2jRIkOSUVJSYhiGYYSFhRkLFiw4/wMG0Kq4IgOgRe3du1fFxcW64oorFBoa6l4WLlyoffv2udsNGzbM/ToyMlLJycnatWuXJGnXrl0aPnx4rf0OHz5caWlpcjqd2rp1q2w2my677LJ6axkwYID7dUJCgiQpKytLkjRz5kzdeeedGj16tP7+97/Xqg2A5yLIAGhRhYWFkqRFixZp69at7uX7779395M5X0FBQQ1qd/qtIovFIqmq/44kPf7449q5c6cmTJigFStWqE+fPvroo4+apT4ALYcgA6BF9enTR3a7XQcPHlT37t1rLUlJSe5269atc78+efKk9uzZo969e0uSevfurTVr1tTa75o1a9SzZ0/ZbDb1799fLperVp+bpujZs6dmzJih//73v/rZz36m+fPnn9f+ALQ8P7MLAODbwsLC9MADD2jGjBlyuVy65JJLlJeXpzVr1ig8PFydOnWSJD355JOKiopSXFyc/vjHPyo6OlqTJk2SJN1///266KKL9Je//EXXX3+91q5dqxdffFEvvfSSJKlz58669dZbdccdd+j5559XSkqKDhw4oKysLF133XXnrLGkpEQPPvigfv7zn6tLly46fPiwNm7cqMmTJ7fY3wuAZmJ2Jx0Avs/lchmzZ882kpOTDX9/fyMmJsYYO3assXr1andH3E8//dTo27evERAQYAwePNj47rvvau3jgw8+MPr06WP4+/sbHTt2NJ5++ula20tKSowZM2YYCQkJRkBAgNG9e3dj3rx5hmGc6ux78uRJd/stW7YYkoz09HSjrKzMmDJlipGUlGQEBAQYiYmJxj333OPuCAzAc1kMwzBMzlIA2rBVq1bp8ssv18mTJxUREWF2OQC8DH1kAACA1yLIAAAAr8WtJQAA4LW4IgMAALwWQQYAAHgtggwAAPBaBBkAAOC1CDIAAMBrEWQAAIDXIsgAAACvRZABAABeiyADAAC81v8HGLVlUjjYoogAAAAASUVORK5CYII=",
1554
+ "text/plain": [
1555
+ "<Figure size 640x480 with 1 Axes>"
1556
+ ]
1557
+ },
1558
+ "metadata": {},
1559
+ "output_type": "display_data"
1560
+ }
1561
+ ],
1562
+ "source": [
1563
+ "plt.plot(all_loss)\n",
1564
+ "plt.title(\"loss Graph\")\n",
1565
+ "plt.xlabel(\"epochs\")\n",
1566
+ "plt.ylabel(\"loss\")\n",
1567
+ "plt.show()"
1568
+ ]
1569
+ },
1570
+ {
1571
+ "cell_type": "markdown",
1572
+ "metadata": {
1573
+ "id": "g46t9mRLWXdH"
1574
+ },
1575
+ "source": [
1576
+ "# 4.6.Evaluate Model"
1577
+ ]
1578
+ },
1579
+ {
1580
+ "cell_type": "code",
1581
+ "execution_count": 158,
1582
+ "metadata": {
1583
+ "colab": {
1584
+ "base_uri": "https://localhost:8080/"
1585
+ },
1586
+ "id": "ZleBUg-Gmudt",
1587
+ "outputId": "74dc7500-02bb-477c-d055-a3a3f3835581"
1588
+ },
1589
+ "outputs": [
1590
+ {
1591
+ "name": "stdout",
1592
+ "output_type": "stream",
1593
+ "text": [
1594
+ "Test Accuracy: 92.61%\n"
1595
+ ]
1596
+ }
1597
+ ],
1598
+ "source": [
1599
+ "# Evaluate Model\n",
1600
+ "model.eval()\n",
1601
+ "correct, total = 0, 0\n",
1602
+ "with torch.no_grad():\n",
1603
+ " for text, label in test_loader:\n",
1604
+ " text, label = text.to(device), label.to(device)\n",
1605
+ " output = model(text)\n",
1606
+ " _, predicted = torch.max(output, 1)\n",
1607
+ " total += label.size(0)\n",
1608
+ " correct += (predicted == label).sum().item()\n",
1609
+ "accuracy = 100 * correct / total\n",
1610
+ "print(f\"Test Accuracy: {accuracy:.2f}%\")"
1611
+ ]
1612
+ },
1613
+ {
1614
+ "cell_type": "markdown",
1615
+ "metadata": {
1616
+ "id": "yel48pLbU6Wo"
1617
+ },
1618
+ "source": [
1619
+ "# 4.7.Save Model"
1620
+ ]
1621
+ },
1622
+ {
1623
+ "cell_type": "code",
1624
+ "execution_count": 159,
1625
+ "metadata": {
1626
+ "id": "qT87bZnsUrog"
1627
+ },
1628
+ "outputs": [],
1629
+ "source": [
1630
+ "\n",
1631
+ "torch.save(model.state_dict(), 'spam_classifier_model_lstm.pth')\n"
1632
+ ]
1633
+ },
1634
+ {
1635
+ "cell_type": "markdown",
1636
+ "metadata": {
1637
+ "id": "ykX7OC7RWSuW"
1638
+ },
1639
+ "source": [
1640
+ "# 5.Predict Sentiment for New Text"
1641
+ ]
1642
+ },
1643
+ {
1644
+ "cell_type": "code",
1645
+ "execution_count": 160,
1646
+ "metadata": {
1647
+ "colab": {
1648
+ "base_uri": "https://localhost:8080/"
1649
+ },
1650
+ "id": "q8qsN1kunFu3",
1651
+ "outputId": "d956e7eb-ee73-4d12-b31a-e44c09d049a5"
1652
+ },
1653
+ "outputs": [
1654
+ {
1655
+ "name": "stdout",
1656
+ "output_type": "stream",
1657
+ "text": [
1658
+ "Prediction: Spam\n"
1659
+ ]
1660
+ }
1661
+ ],
1662
+ "source": [
1663
+ "def predict_sentiment(text):\n",
1664
+ " model.eval()\n",
1665
+ " text_vectorized = Tfidf_Vectorizer.transform([text]).toarray()\n",
1666
+ " text_tensor = torch.tensor(text_vectorized, dtype=torch.float32).to(device)\n",
1667
+ " with torch.no_grad():\n",
1668
+ " output = model(text_tensor)\n",
1669
+ " pred_label = torch.argmax(output, dim=1).item()\n",
1670
+ " return \"ham\" if pred_label == 1 else \"Spam\"\n",
1671
+ "print(\"Prediction:\", predict_sentiment(\"For ur chance to win £250 cash every wk TXT: PLAY to 83370. T's&C's www.music-trivia.net custcare 08715705022, 1x150p/wk.\"))"
1672
+ ]
1673
+ }
1674
+ ],
1675
+ "metadata": {
1676
+ "accelerator": "GPU",
1677
+ "colab": {
1678
+ "gpuType": "T4",
1679
+ "provenance": []
1680
+ },
1681
+ "kernelspec": {
1682
+ "display_name": "Python 3",
1683
+ "name": "python3"
1684
+ },
1685
+ "language_info": {
1686
+ "name": "python"
1687
+ }
1688
+ },
1689
+ "nbformat": 4,
1690
+ "nbformat_minor": 0
1691
+ }
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from joblib import load
3
+ import torch
4
+ import torch.nn as nn
5
+ import re
6
+ import nltk
7
+ from nltk.tokenize import word_tokenize
8
+ from nltk.stem import WordNetLemmatizer
9
+ from nltk.corpus import stopwords
10
+ nltk.download('stopwords')
11
+ nltk.download('wordnet')
12
+ nltk.download('punkt')
13
+ nltk.download('punkt_tab')
14
+
15
+
16
+ # Load Encoder and Model
17
+ TFIDF_vectorizer = load("tfidf_vectorizer.pkl")
18
+
19
+ st.title("NaMessage Classification Based on Last Name")
20
+
21
+ # Define RNN Model
22
+ class SentimentLSTM(nn.Module):
23
+ def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):
24
+ super(SentimentLSTM, self).__init__()
25
+ self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
26
+ self.fc = nn.Linear(hidden_dim, output_dim)
27
+ self.dropout = nn.Dropout(0.5)
28
+
29
+ def forward(self, x):
30
+ x = x.unsqueeze(1)
31
+ lstm_out, _ = self.lstm(x)
32
+ final_hidden = lstm_out[:, -1, :]
33
+ return self.fc(final_hidden)
34
+ # Load Model Weights
35
+ model = SentimentLSTM(input_dim = 10000, hidden_dim = 64, output_dim=2).to("cpu")
36
+ model.load_state_dict(torch.load("spam_classifier_model_lstm.pth", map_location=torch.device('cpu')))
37
+ model.eval()
38
+
39
+ # Text Input for Name
40
+ Message = st.text_input("Enter Message")
41
+
42
+ def preprocess_text(text):
43
+ text = text.casefold() # Convert text to lowercase
44
+ text = re.sub(r'[^a-zA-Z]', ' ', text)
45
+ tokens = word_tokenize(text) # Tokenize text
46
+
47
+ # Remove stopwords
48
+ stop_words = set(stopwords.words('english')) # Define stop_words here
49
+ filtered_tokens = [word for word in tokens if word not in stop_words and word != "not"]
50
+
51
+ # Lemmatization
52
+ lemmatizer = WordNetLemmatizer()
53
+ lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
54
+
55
+ # Join tokens back into a string
56
+ preprocessed_text = ' '.join(lemmatized_tokens)
57
+
58
+ return preprocessed_text
59
+
60
+ def predict_sentiment(text):
61
+ model.eval()
62
+ text_vectorized = TFIDF_vectorizer.transform([text]).toarray()
63
+ text_tensor = torch.tensor(text_vectorized, dtype=torch.float32).to("cpu")
64
+ with torch.no_grad():
65
+ output = model(text_tensor)
66
+ pred_label = torch.argmax(output, dim=1).item()
67
+ return "ham" if pred_label == 1 else "Spam"
68
+
69
+ if st.button("Submit"):
70
+ Message = preprocess_text(Message)
71
+ result = predict_sentiment(Message)
72
+ st.success(result)
73
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ joblib==1.4.2
2
+ numpy==2.2.1
3
+ pandas==2.2.3
4
+ scikit-learn==1.6.1
5
+ streamlit==1.41.1
6
+ torch == 2.5.1
7
+ nltk
spam_classifier_model_lstm.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9118e44066ad24a1a863171f4ac7bb78482d65d11515a2aa4b5d0a510b0fb684
3
+ size 10310982
tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e9be53e196c118608513762a5bcc7939c17cd600f15e45cf6ec31842ff795e1
3
+ size 394924