alzx1 commited on
Commit
f2fceb6
·
verified ·
1 Parent(s): ebb0496

Upload 4 files

Browse files
Files changed (4) hide show
  1. ML_with_NLP.ipynb +1898 -0
  2. app.py +23 -0
  3. requirements.txt +7 -0
  4. spam.csv +0 -0
ML_with_NLP.ipynb ADDED
@@ -0,0 +1,1898 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "id": "68dfebbc",
7
+ "metadata": {
8
+ "colab": {
9
+ "base_uri": "https://localhost:8080/",
10
+ "height": 206
11
+ },
12
+ "id": "68dfebbc",
13
+ "outputId": "80579e75-dfb3-46ea-dfe9-16713cbf0ab8"
14
+ },
15
+ "outputs": [
16
+ {
17
+ "output_type": "execute_result",
18
+ "data": {
19
+ "text/plain": [
20
+ " v1 v2 Unnamed: 2 \\\n",
21
+ "0 ham Go until jurong point, crazy.. Available only ... NaN \n",
22
+ "1 ham Ok lar... Joking wif u oni... NaN \n",
23
+ "2 spam Free entry in 2 a wkly comp to win FA Cup fina... NaN \n",
24
+ "3 ham U dun say so early hor... U c already then say... NaN \n",
25
+ "4 ham Nah I don't think he goes to usf, he lives aro... NaN \n",
26
+ "\n",
27
+ " Unnamed: 3 Unnamed: 4 \n",
28
+ "0 NaN NaN \n",
29
+ "1 NaN NaN \n",
30
+ "2 NaN NaN \n",
31
+ "3 NaN NaN \n",
32
+ "4 NaN NaN "
33
+ ],
34
+ "text/html": [
35
+ "\n",
36
+ " <div id=\"df-6ce403b8-499e-4a5a-9f0a-e51a9f1df287\" class=\"colab-df-container\">\n",
37
+ " <div>\n",
38
+ "<style scoped>\n",
39
+ " .dataframe tbody tr th:only-of-type {\n",
40
+ " vertical-align: middle;\n",
41
+ " }\n",
42
+ "\n",
43
+ " .dataframe tbody tr th {\n",
44
+ " vertical-align: top;\n",
45
+ " }\n",
46
+ "\n",
47
+ " .dataframe thead th {\n",
48
+ " text-align: right;\n",
49
+ " }\n",
50
+ "</style>\n",
51
+ "<table border=\"1\" class=\"dataframe\">\n",
52
+ " <thead>\n",
53
+ " <tr style=\"text-align: right;\">\n",
54
+ " <th></th>\n",
55
+ " <th>v1</th>\n",
56
+ " <th>v2</th>\n",
57
+ " <th>Unnamed: 2</th>\n",
58
+ " <th>Unnamed: 3</th>\n",
59
+ " <th>Unnamed: 4</th>\n",
60
+ " </tr>\n",
61
+ " </thead>\n",
62
+ " <tbody>\n",
63
+ " <tr>\n",
64
+ " <th>0</th>\n",
65
+ " <td>ham</td>\n",
66
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
67
+ " <td>NaN</td>\n",
68
+ " <td>NaN</td>\n",
69
+ " <td>NaN</td>\n",
70
+ " </tr>\n",
71
+ " <tr>\n",
72
+ " <th>1</th>\n",
73
+ " <td>ham</td>\n",
74
+ " <td>Ok lar... Joking wif u oni...</td>\n",
75
+ " <td>NaN</td>\n",
76
+ " <td>NaN</td>\n",
77
+ " <td>NaN</td>\n",
78
+ " </tr>\n",
79
+ " <tr>\n",
80
+ " <th>2</th>\n",
81
+ " <td>spam</td>\n",
82
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
83
+ " <td>NaN</td>\n",
84
+ " <td>NaN</td>\n",
85
+ " <td>NaN</td>\n",
86
+ " </tr>\n",
87
+ " <tr>\n",
88
+ " <th>3</th>\n",
89
+ " <td>ham</td>\n",
90
+ " <td>U dun say so early hor... U c already then say...</td>\n",
91
+ " <td>NaN</td>\n",
92
+ " <td>NaN</td>\n",
93
+ " <td>NaN</td>\n",
94
+ " </tr>\n",
95
+ " <tr>\n",
96
+ " <th>4</th>\n",
97
+ " <td>ham</td>\n",
98
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
99
+ " <td>NaN</td>\n",
100
+ " <td>NaN</td>\n",
101
+ " <td>NaN</td>\n",
102
+ " </tr>\n",
103
+ " </tbody>\n",
104
+ "</table>\n",
105
+ "</div>\n",
106
+ " <div class=\"colab-df-buttons\">\n",
107
+ "\n",
108
+ " <div class=\"colab-df-container\">\n",
109
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6ce403b8-499e-4a5a-9f0a-e51a9f1df287')\"\n",
110
+ " title=\"Convert this dataframe to an interactive table.\"\n",
111
+ " style=\"display:none;\">\n",
112
+ "\n",
113
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
114
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
115
+ " </svg>\n",
116
+ " </button>\n",
117
+ "\n",
118
+ " <style>\n",
119
+ " .colab-df-container {\n",
120
+ " display:flex;\n",
121
+ " gap: 12px;\n",
122
+ " }\n",
123
+ "\n",
124
+ " .colab-df-convert {\n",
125
+ " background-color: #E8F0FE;\n",
126
+ " border: none;\n",
127
+ " border-radius: 50%;\n",
128
+ " cursor: pointer;\n",
129
+ " display: none;\n",
130
+ " fill: #1967D2;\n",
131
+ " height: 32px;\n",
132
+ " padding: 0 0 0 0;\n",
133
+ " width: 32px;\n",
134
+ " }\n",
135
+ "\n",
136
+ " .colab-df-convert:hover {\n",
137
+ " background-color: #E2EBFA;\n",
138
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
139
+ " fill: #174EA6;\n",
140
+ " }\n",
141
+ "\n",
142
+ " .colab-df-buttons div {\n",
143
+ " margin-bottom: 4px;\n",
144
+ " }\n",
145
+ "\n",
146
+ " [theme=dark] .colab-df-convert {\n",
147
+ " background-color: #3B4455;\n",
148
+ " fill: #D2E3FC;\n",
149
+ " }\n",
150
+ "\n",
151
+ " [theme=dark] .colab-df-convert:hover {\n",
152
+ " background-color: #434B5C;\n",
153
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
154
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
155
+ " fill: #FFFFFF;\n",
156
+ " }\n",
157
+ " </style>\n",
158
+ "\n",
159
+ " <script>\n",
160
+ " const buttonEl =\n",
161
+ " document.querySelector('#df-6ce403b8-499e-4a5a-9f0a-e51a9f1df287 button.colab-df-convert');\n",
162
+ " buttonEl.style.display =\n",
163
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
164
+ "\n",
165
+ " async function convertToInteractive(key) {\n",
166
+ " const element = document.querySelector('#df-6ce403b8-499e-4a5a-9f0a-e51a9f1df287');\n",
167
+ " const dataTable =\n",
168
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
169
+ " [key], {});\n",
170
+ " if (!dataTable) return;\n",
171
+ "\n",
172
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
173
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
174
+ " + ' to learn more about interactive tables.';\n",
175
+ " element.innerHTML = '';\n",
176
+ " dataTable['output_type'] = 'display_data';\n",
177
+ " await google.colab.output.renderOutput(dataTable, element);\n",
178
+ " const docLink = document.createElement('div');\n",
179
+ " docLink.innerHTML = docLinkHtml;\n",
180
+ " element.appendChild(docLink);\n",
181
+ " }\n",
182
+ " </script>\n",
183
+ " </div>\n",
184
+ "\n",
185
+ "\n",
186
+ " <div id=\"df-16f51769-ef56-4f10-baa6-4d88c99c8037\">\n",
187
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-16f51769-ef56-4f10-baa6-4d88c99c8037')\"\n",
188
+ " title=\"Suggest charts\"\n",
189
+ " style=\"display:none;\">\n",
190
+ "\n",
191
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
192
+ " width=\"24px\">\n",
193
+ " <g>\n",
194
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
195
+ " </g>\n",
196
+ "</svg>\n",
197
+ " </button>\n",
198
+ "\n",
199
+ "<style>\n",
200
+ " .colab-df-quickchart {\n",
201
+ " --bg-color: #E8F0FE;\n",
202
+ " --fill-color: #1967D2;\n",
203
+ " --hover-bg-color: #E2EBFA;\n",
204
+ " --hover-fill-color: #174EA6;\n",
205
+ " --disabled-fill-color: #AAA;\n",
206
+ " --disabled-bg-color: #DDD;\n",
207
+ " }\n",
208
+ "\n",
209
+ " [theme=dark] .colab-df-quickchart {\n",
210
+ " --bg-color: #3B4455;\n",
211
+ " --fill-color: #D2E3FC;\n",
212
+ " --hover-bg-color: #434B5C;\n",
213
+ " --hover-fill-color: #FFFFFF;\n",
214
+ " --disabled-bg-color: #3B4455;\n",
215
+ " --disabled-fill-color: #666;\n",
216
+ " }\n",
217
+ "\n",
218
+ " .colab-df-quickchart {\n",
219
+ " background-color: var(--bg-color);\n",
220
+ " border: none;\n",
221
+ " border-radius: 50%;\n",
222
+ " cursor: pointer;\n",
223
+ " display: none;\n",
224
+ " fill: var(--fill-color);\n",
225
+ " height: 32px;\n",
226
+ " padding: 0;\n",
227
+ " width: 32px;\n",
228
+ " }\n",
229
+ "\n",
230
+ " .colab-df-quickchart:hover {\n",
231
+ " background-color: var(--hover-bg-color);\n",
232
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
233
+ " fill: var(--button-hover-fill-color);\n",
234
+ " }\n",
235
+ "\n",
236
+ " .colab-df-quickchart-complete:disabled,\n",
237
+ " .colab-df-quickchart-complete:disabled:hover {\n",
238
+ " background-color: var(--disabled-bg-color);\n",
239
+ " fill: var(--disabled-fill-color);\n",
240
+ " box-shadow: none;\n",
241
+ " }\n",
242
+ "\n",
243
+ " .colab-df-spinner {\n",
244
+ " border: 2px solid var(--fill-color);\n",
245
+ " border-color: transparent;\n",
246
+ " border-bottom-color: var(--fill-color);\n",
247
+ " animation:\n",
248
+ " spin 1s steps(1) infinite;\n",
249
+ " }\n",
250
+ "\n",
251
+ " @keyframes spin {\n",
252
+ " 0% {\n",
253
+ " border-color: transparent;\n",
254
+ " border-bottom-color: var(--fill-color);\n",
255
+ " border-left-color: var(--fill-color);\n",
256
+ " }\n",
257
+ " 20% {\n",
258
+ " border-color: transparent;\n",
259
+ " border-left-color: var(--fill-color);\n",
260
+ " border-top-color: var(--fill-color);\n",
261
+ " }\n",
262
+ " 30% {\n",
263
+ " border-color: transparent;\n",
264
+ " border-left-color: var(--fill-color);\n",
265
+ " border-top-color: var(--fill-color);\n",
266
+ " border-right-color: var(--fill-color);\n",
267
+ " }\n",
268
+ " 40% {\n",
269
+ " border-color: transparent;\n",
270
+ " border-right-color: var(--fill-color);\n",
271
+ " border-top-color: var(--fill-color);\n",
272
+ " }\n",
273
+ " 60% {\n",
274
+ " border-color: transparent;\n",
275
+ " border-right-color: var(--fill-color);\n",
276
+ " }\n",
277
+ " 80% {\n",
278
+ " border-color: transparent;\n",
279
+ " border-right-color: var(--fill-color);\n",
280
+ " border-bottom-color: var(--fill-color);\n",
281
+ " }\n",
282
+ " 90% {\n",
283
+ " border-color: transparent;\n",
284
+ " border-bottom-color: var(--fill-color);\n",
285
+ " }\n",
286
+ " }\n",
287
+ "</style>\n",
288
+ "\n",
289
+ " <script>\n",
290
+ " async function quickchart(key) {\n",
291
+ " const quickchartButtonEl =\n",
292
+ " document.querySelector('#' + key + ' button');\n",
293
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
294
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
295
+ " try {\n",
296
+ " const charts = await google.colab.kernel.invokeFunction(\n",
297
+ " 'suggestCharts', [key], {});\n",
298
+ " } catch (error) {\n",
299
+ " console.error('Error during call to suggestCharts:', error);\n",
300
+ " }\n",
301
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
302
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
303
+ " }\n",
304
+ " (() => {\n",
305
+ " let quickchartButtonEl =\n",
306
+ " document.querySelector('#df-16f51769-ef56-4f10-baa6-4d88c99c8037 button');\n",
307
+ " quickchartButtonEl.style.display =\n",
308
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
309
+ " })();\n",
310
+ " </script>\n",
311
+ " </div>\n",
312
+ "\n",
313
+ " </div>\n",
314
+ " </div>\n"
315
+ ],
316
+ "application/vnd.google.colaboratory.intrinsic+json": {
317
+ "type": "dataframe",
318
+ "variable_name": "df",
319
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 5572,\n \"fields\": [\n {\n \"column\": \"v1\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"v2\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5169,\n \"samples\": [\n \"Did u download the fring app?\",\n \"Pass dis to all ur contacts n see wat u get! Red;i'm in luv wid u. Blue;u put a smile on my face. Purple;u r realy hot. Pink;u r so swt. Orange;i thnk i lyk u. Green;i realy wana go out wid u. Yelow;i wnt u bck. Black;i'm jealous of u. Brown;i miss you Nw plz giv me one color\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Unnamed: 2\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 43,\n \"samples\": [\n \" GOD said\",\n \" SHE SHUDVETOLD U. DID URGRAN KNOW?NEWAY\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Unnamed: 3\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \" \\\\\\\"OH No! COMPETITION\\\\\\\". Who knew\",\n \" why to miss them\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Unnamed: 4\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"GNT:-)\\\"\",\n \" one day these two will become FREINDS FOREVER!\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
320
+ }
321
+ },
322
+ "metadata": {},
323
+ "execution_count": 4
324
+ }
325
+ ],
326
+ "source": [
327
+ "import pandas as pd\n",
328
+ "\n",
329
+ "df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DS25/spam.csv',encoding='latin1')\n",
330
+ "\n",
331
+ "df.head()"
332
+ ]
333
+ },
334
+ {
335
+ "cell_type": "code",
336
+ "source": [
337
+ "from google.colab import drive\n",
338
+ "drive.mount('/content/drive')"
339
+ ],
340
+ "metadata": {
341
+ "colab": {
342
+ "base_uri": "https://localhost:8080/"
343
+ },
344
+ "id": "XM9z3jeNj9tz",
345
+ "outputId": "d13928c6-3292-481f-9567-d0b9da377d10"
346
+ },
347
+ "id": "XM9z3jeNj9tz",
348
+ "execution_count": 5,
349
+ "outputs": [
350
+ {
351
+ "output_type": "stream",
352
+ "name": "stdout",
353
+ "text": [
354
+ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
355
+ ]
356
+ }
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": 6,
362
+ "id": "a40ec53a",
363
+ "metadata": {
364
+ "colab": {
365
+ "base_uri": "https://localhost:8080/",
366
+ "height": 206
367
+ },
368
+ "id": "a40ec53a",
369
+ "outputId": "f7f07c22-1764-43a4-9e5e-3a113ae64c08"
370
+ },
371
+ "outputs": [
372
+ {
373
+ "output_type": "execute_result",
374
+ "data": {
375
+ "text/plain": [
376
+ " v2 v1\n",
377
+ "0 Go until jurong point, crazy.. Available only ... ham\n",
378
+ "1 Ok lar... Joking wif u oni... ham\n",
379
+ "2 Free entry in 2 a wkly comp to win FA Cup fina... spam\n",
380
+ "3 U dun say so early hor... U c already then say... ham\n",
381
+ "4 Nah I don't think he goes to usf, he lives aro... ham"
382
+ ],
383
+ "text/html": [
384
+ "\n",
385
+ " <div id=\"df-816b900e-a637-49d2-9fc5-4c428d6cf49f\" class=\"colab-df-container\">\n",
386
+ " <div>\n",
387
+ "<style scoped>\n",
388
+ " .dataframe tbody tr th:only-of-type {\n",
389
+ " vertical-align: middle;\n",
390
+ " }\n",
391
+ "\n",
392
+ " .dataframe tbody tr th {\n",
393
+ " vertical-align: top;\n",
394
+ " }\n",
395
+ "\n",
396
+ " .dataframe thead th {\n",
397
+ " text-align: right;\n",
398
+ " }\n",
399
+ "</style>\n",
400
+ "<table border=\"1\" class=\"dataframe\">\n",
401
+ " <thead>\n",
402
+ " <tr style=\"text-align: right;\">\n",
403
+ " <th></th>\n",
404
+ " <th>v2</th>\n",
405
+ " <th>v1</th>\n",
406
+ " </tr>\n",
407
+ " </thead>\n",
408
+ " <tbody>\n",
409
+ " <tr>\n",
410
+ " <th>0</th>\n",
411
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
412
+ " <td>ham</td>\n",
413
+ " </tr>\n",
414
+ " <tr>\n",
415
+ " <th>1</th>\n",
416
+ " <td>Ok lar... Joking wif u oni...</td>\n",
417
+ " <td>ham</td>\n",
418
+ " </tr>\n",
419
+ " <tr>\n",
420
+ " <th>2</th>\n",
421
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
422
+ " <td>spam</td>\n",
423
+ " </tr>\n",
424
+ " <tr>\n",
425
+ " <th>3</th>\n",
426
+ " <td>U dun say so early hor... U c already then say...</td>\n",
427
+ " <td>ham</td>\n",
428
+ " </tr>\n",
429
+ " <tr>\n",
430
+ " <th>4</th>\n",
431
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
432
+ " <td>ham</td>\n",
433
+ " </tr>\n",
434
+ " </tbody>\n",
435
+ "</table>\n",
436
+ "</div>\n",
437
+ " <div class=\"colab-df-buttons\">\n",
438
+ "\n",
439
+ " <div class=\"colab-df-container\">\n",
440
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-816b900e-a637-49d2-9fc5-4c428d6cf49f')\"\n",
441
+ " title=\"Convert this dataframe to an interactive table.\"\n",
442
+ " style=\"display:none;\">\n",
443
+ "\n",
444
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
445
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
446
+ " </svg>\n",
447
+ " </button>\n",
448
+ "\n",
449
+ " <style>\n",
450
+ " .colab-df-container {\n",
451
+ " display:flex;\n",
452
+ " gap: 12px;\n",
453
+ " }\n",
454
+ "\n",
455
+ " .colab-df-convert {\n",
456
+ " background-color: #E8F0FE;\n",
457
+ " border: none;\n",
458
+ " border-radius: 50%;\n",
459
+ " cursor: pointer;\n",
460
+ " display: none;\n",
461
+ " fill: #1967D2;\n",
462
+ " height: 32px;\n",
463
+ " padding: 0 0 0 0;\n",
464
+ " width: 32px;\n",
465
+ " }\n",
466
+ "\n",
467
+ " .colab-df-convert:hover {\n",
468
+ " background-color: #E2EBFA;\n",
469
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
470
+ " fill: #174EA6;\n",
471
+ " }\n",
472
+ "\n",
473
+ " .colab-df-buttons div {\n",
474
+ " margin-bottom: 4px;\n",
475
+ " }\n",
476
+ "\n",
477
+ " [theme=dark] .colab-df-convert {\n",
478
+ " background-color: #3B4455;\n",
479
+ " fill: #D2E3FC;\n",
480
+ " }\n",
481
+ "\n",
482
+ " [theme=dark] .colab-df-convert:hover {\n",
483
+ " background-color: #434B5C;\n",
484
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
485
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
486
+ " fill: #FFFFFF;\n",
487
+ " }\n",
488
+ " </style>\n",
489
+ "\n",
490
+ " <script>\n",
491
+ " const buttonEl =\n",
492
+ " document.querySelector('#df-816b900e-a637-49d2-9fc5-4c428d6cf49f button.colab-df-convert');\n",
493
+ " buttonEl.style.display =\n",
494
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
495
+ "\n",
496
+ " async function convertToInteractive(key) {\n",
497
+ " const element = document.querySelector('#df-816b900e-a637-49d2-9fc5-4c428d6cf49f');\n",
498
+ " const dataTable =\n",
499
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
500
+ " [key], {});\n",
501
+ " if (!dataTable) return;\n",
502
+ "\n",
503
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
504
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
505
+ " + ' to learn more about interactive tables.';\n",
506
+ " element.innerHTML = '';\n",
507
+ " dataTable['output_type'] = 'display_data';\n",
508
+ " await google.colab.output.renderOutput(dataTable, element);\n",
509
+ " const docLink = document.createElement('div');\n",
510
+ " docLink.innerHTML = docLinkHtml;\n",
511
+ " element.appendChild(docLink);\n",
512
+ " }\n",
513
+ " </script>\n",
514
+ " </div>\n",
515
+ "\n",
516
+ "\n",
517
+ " <div id=\"df-2b440129-3804-41fb-9fd2-b28993e75f97\">\n",
518
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2b440129-3804-41fb-9fd2-b28993e75f97')\"\n",
519
+ " title=\"Suggest charts\"\n",
520
+ " style=\"display:none;\">\n",
521
+ "\n",
522
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
523
+ " width=\"24px\">\n",
524
+ " <g>\n",
525
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
526
+ " </g>\n",
527
+ "</svg>\n",
528
+ " </button>\n",
529
+ "\n",
530
+ "<style>\n",
531
+ " .colab-df-quickchart {\n",
532
+ " --bg-color: #E8F0FE;\n",
533
+ " --fill-color: #1967D2;\n",
534
+ " --hover-bg-color: #E2EBFA;\n",
535
+ " --hover-fill-color: #174EA6;\n",
536
+ " --disabled-fill-color: #AAA;\n",
537
+ " --disabled-bg-color: #DDD;\n",
538
+ " }\n",
539
+ "\n",
540
+ " [theme=dark] .colab-df-quickchart {\n",
541
+ " --bg-color: #3B4455;\n",
542
+ " --fill-color: #D2E3FC;\n",
543
+ " --hover-bg-color: #434B5C;\n",
544
+ " --hover-fill-color: #FFFFFF;\n",
545
+ " --disabled-bg-color: #3B4455;\n",
546
+ " --disabled-fill-color: #666;\n",
547
+ " }\n",
548
+ "\n",
549
+ " .colab-df-quickchart {\n",
550
+ " background-color: var(--bg-color);\n",
551
+ " border: none;\n",
552
+ " border-radius: 50%;\n",
553
+ " cursor: pointer;\n",
554
+ " display: none;\n",
555
+ " fill: var(--fill-color);\n",
556
+ " height: 32px;\n",
557
+ " padding: 0;\n",
558
+ " width: 32px;\n",
559
+ " }\n",
560
+ "\n",
561
+ " .colab-df-quickchart:hover {\n",
562
+ " background-color: var(--hover-bg-color);\n",
563
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
564
+ " fill: var(--button-hover-fill-color);\n",
565
+ " }\n",
566
+ "\n",
567
+ " .colab-df-quickchart-complete:disabled,\n",
568
+ " .colab-df-quickchart-complete:disabled:hover {\n",
569
+ " background-color: var(--disabled-bg-color);\n",
570
+ " fill: var(--disabled-fill-color);\n",
571
+ " box-shadow: none;\n",
572
+ " }\n",
573
+ "\n",
574
+ " .colab-df-spinner {\n",
575
+ " border: 2px solid var(--fill-color);\n",
576
+ " border-color: transparent;\n",
577
+ " border-bottom-color: var(--fill-color);\n",
578
+ " animation:\n",
579
+ " spin 1s steps(1) infinite;\n",
580
+ " }\n",
581
+ "\n",
582
+ " @keyframes spin {\n",
583
+ " 0% {\n",
584
+ " border-color: transparent;\n",
585
+ " border-bottom-color: var(--fill-color);\n",
586
+ " border-left-color: var(--fill-color);\n",
587
+ " }\n",
588
+ " 20% {\n",
589
+ " border-color: transparent;\n",
590
+ " border-left-color: var(--fill-color);\n",
591
+ " border-top-color: var(--fill-color);\n",
592
+ " }\n",
593
+ " 30% {\n",
594
+ " border-color: transparent;\n",
595
+ " border-left-color: var(--fill-color);\n",
596
+ " border-top-color: var(--fill-color);\n",
597
+ " border-right-color: var(--fill-color);\n",
598
+ " }\n",
599
+ " 40% {\n",
600
+ " border-color: transparent;\n",
601
+ " border-right-color: var(--fill-color);\n",
602
+ " border-top-color: var(--fill-color);\n",
603
+ " }\n",
604
+ " 60% {\n",
605
+ " border-color: transparent;\n",
606
+ " border-right-color: var(--fill-color);\n",
607
+ " }\n",
608
+ " 80% {\n",
609
+ " border-color: transparent;\n",
610
+ " border-right-color: var(--fill-color);\n",
611
+ " border-bottom-color: var(--fill-color);\n",
612
+ " }\n",
613
+ " 90% {\n",
614
+ " border-color: transparent;\n",
615
+ " border-bottom-color: var(--fill-color);\n",
616
+ " }\n",
617
+ " }\n",
618
+ "</style>\n",
619
+ "\n",
620
+ " <script>\n",
621
+ " async function quickchart(key) {\n",
622
+ " const quickchartButtonEl =\n",
623
+ " document.querySelector('#' + key + ' button');\n",
624
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
625
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
626
+ " try {\n",
627
+ " const charts = await google.colab.kernel.invokeFunction(\n",
628
+ " 'suggestCharts', [key], {});\n",
629
+ " } catch (error) {\n",
630
+ " console.error('Error during call to suggestCharts:', error);\n",
631
+ " }\n",
632
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
633
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
634
+ " }\n",
635
+ " (() => {\n",
636
+ " let quickchartButtonEl =\n",
637
+ " document.querySelector('#df-2b440129-3804-41fb-9fd2-b28993e75f97 button');\n",
638
+ " quickchartButtonEl.style.display =\n",
639
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
640
+ " })();\n",
641
+ " </script>\n",
642
+ " </div>\n",
643
+ "\n",
644
+ " </div>\n",
645
+ " </div>\n"
646
+ ],
647
+ "application/vnd.google.colaboratory.intrinsic+json": {
648
+ "type": "dataframe",
649
+ "variable_name": "df",
650
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 5572,\n \"fields\": [\n {\n \"column\": \"v2\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5169,\n \"samples\": [\n \"Did u download the fring app?\",\n \"Pass dis to all ur contacts n see wat u get! Red;i'm in luv wid u. Blue;u put a smile on my face. Purple;u r realy hot. Pink;u r so swt. Orange;i thnk i lyk u. Green;i realy wana go out wid u. Yelow;i wnt u bck. Black;i'm jealous of u. Brown;i miss you Nw plz giv me one color\",\n \"Ok...\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"v1\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
651
+ }
652
+ },
653
+ "metadata": {},
654
+ "execution_count": 6
655
+ }
656
+ ],
657
+ "source": [
658
+ "df = df[['v2','v1']]\n",
659
+ "\n",
660
+ "df.head()"
661
+ ]
662
+ },
663
+ {
664
+ "cell_type": "code",
665
+ "execution_count": 7,
666
+ "id": "12ef474a",
667
+ "metadata": {
668
+ "colab": {
669
+ "base_uri": "https://localhost:8080/",
670
+ "height": 147
671
+ },
672
+ "id": "12ef474a",
673
+ "outputId": "d9b84ab1-f142-4d31-d334-0a20677f4c72"
674
+ },
675
+ "outputs": [
676
+ {
677
+ "output_type": "execute_result",
678
+ "data": {
679
+ "text/plain": [
680
+ "v2 0\n",
681
+ "v1 0\n",
682
+ "dtype: int64"
683
+ ],
684
+ "text/html": [
685
+ "<div>\n",
686
+ "<style scoped>\n",
687
+ " .dataframe tbody tr th:only-of-type {\n",
688
+ " vertical-align: middle;\n",
689
+ " }\n",
690
+ "\n",
691
+ " .dataframe tbody tr th {\n",
692
+ " vertical-align: top;\n",
693
+ " }\n",
694
+ "\n",
695
+ " .dataframe thead th {\n",
696
+ " text-align: right;\n",
697
+ " }\n",
698
+ "</style>\n",
699
+ "<table border=\"1\" class=\"dataframe\">\n",
700
+ " <thead>\n",
701
+ " <tr style=\"text-align: right;\">\n",
702
+ " <th></th>\n",
703
+ " <th>0</th>\n",
704
+ " </tr>\n",
705
+ " </thead>\n",
706
+ " <tbody>\n",
707
+ " <tr>\n",
708
+ " <th>v2</th>\n",
709
+ " <td>0</td>\n",
710
+ " </tr>\n",
711
+ " <tr>\n",
712
+ " <th>v1</th>\n",
713
+ " <td>0</td>\n",
714
+ " </tr>\n",
715
+ " </tbody>\n",
716
+ "</table>\n",
717
+ "</div><br><label><b>dtype:</b> int64</label>"
718
+ ]
719
+ },
720
+ "metadata": {},
721
+ "execution_count": 7
722
+ }
723
+ ],
724
+ "source": [
725
+ "df.isnull().sum()"
726
+ ]
727
+ },
728
+ {
729
+ "cell_type": "code",
730
+ "execution_count": 8,
731
+ "id": "776032f0",
732
+ "metadata": {
733
+ "colab": {
734
+ "base_uri": "https://localhost:8080/",
735
+ "height": 206
736
+ },
737
+ "id": "776032f0",
738
+ "outputId": "ba137f5e-66e2-4d11-9888-5fe20073492b"
739
+ },
740
+ "outputs": [
741
+ {
742
+ "output_type": "execute_result",
743
+ "data": {
744
+ "text/plain": [
745
+ " v2 v1\n",
746
+ "0 Go until jurong point, crazy.. Available only ... 0\n",
747
+ "1 Ok lar... Joking wif u oni... 0\n",
748
+ "2 Free entry in 2 a wkly comp to win FA Cup fina... 1\n",
749
+ "3 U dun say so early hor... U c already then say... 0\n",
750
+ "4 Nah I don't think he goes to usf, he lives aro... 0"
751
+ ],
752
+ "text/html": [
753
+ "\n",
754
+ " <div id=\"df-b752ef8b-02ae-40c0-b595-1e96d5b72641\" class=\"colab-df-container\">\n",
755
+ " <div>\n",
756
+ "<style scoped>\n",
757
+ " .dataframe tbody tr th:only-of-type {\n",
758
+ " vertical-align: middle;\n",
759
+ " }\n",
760
+ "\n",
761
+ " .dataframe tbody tr th {\n",
762
+ " vertical-align: top;\n",
763
+ " }\n",
764
+ "\n",
765
+ " .dataframe thead th {\n",
766
+ " text-align: right;\n",
767
+ " }\n",
768
+ "</style>\n",
769
+ "<table border=\"1\" class=\"dataframe\">\n",
770
+ " <thead>\n",
771
+ " <tr style=\"text-align: right;\">\n",
772
+ " <th></th>\n",
773
+ " <th>v2</th>\n",
774
+ " <th>v1</th>\n",
775
+ " </tr>\n",
776
+ " </thead>\n",
777
+ " <tbody>\n",
778
+ " <tr>\n",
779
+ " <th>0</th>\n",
780
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
781
+ " <td>0</td>\n",
782
+ " </tr>\n",
783
+ " <tr>\n",
784
+ " <th>1</th>\n",
785
+ " <td>Ok lar... Joking wif u oni...</td>\n",
786
+ " <td>0</td>\n",
787
+ " </tr>\n",
788
+ " <tr>\n",
789
+ " <th>2</th>\n",
790
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
791
+ " <td>1</td>\n",
792
+ " </tr>\n",
793
+ " <tr>\n",
794
+ " <th>3</th>\n",
795
+ " <td>U dun say so early hor... U c already then say...</td>\n",
796
+ " <td>0</td>\n",
797
+ " </tr>\n",
798
+ " <tr>\n",
799
+ " <th>4</th>\n",
800
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
801
+ " <td>0</td>\n",
802
+ " </tr>\n",
803
+ " </tbody>\n",
804
+ "</table>\n",
805
+ "</div>\n",
806
+ " <div class=\"colab-df-buttons\">\n",
807
+ "\n",
808
+ " <div class=\"colab-df-container\">\n",
809
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b752ef8b-02ae-40c0-b595-1e96d5b72641')\"\n",
810
+ " title=\"Convert this dataframe to an interactive table.\"\n",
811
+ " style=\"display:none;\">\n",
812
+ "\n",
813
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
814
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
815
+ " </svg>\n",
816
+ " </button>\n",
817
+ "\n",
818
+ " <style>\n",
819
+ " .colab-df-container {\n",
820
+ " display:flex;\n",
821
+ " gap: 12px;\n",
822
+ " }\n",
823
+ "\n",
824
+ " .colab-df-convert {\n",
825
+ " background-color: #E8F0FE;\n",
826
+ " border: none;\n",
827
+ " border-radius: 50%;\n",
828
+ " cursor: pointer;\n",
829
+ " display: none;\n",
830
+ " fill: #1967D2;\n",
831
+ " height: 32px;\n",
832
+ " padding: 0 0 0 0;\n",
833
+ " width: 32px;\n",
834
+ " }\n",
835
+ "\n",
836
+ " .colab-df-convert:hover {\n",
837
+ " background-color: #E2EBFA;\n",
838
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
839
+ " fill: #174EA6;\n",
840
+ " }\n",
841
+ "\n",
842
+ " .colab-df-buttons div {\n",
843
+ " margin-bottom: 4px;\n",
844
+ " }\n",
845
+ "\n",
846
+ " [theme=dark] .colab-df-convert {\n",
847
+ " background-color: #3B4455;\n",
848
+ " fill: #D2E3FC;\n",
849
+ " }\n",
850
+ "\n",
851
+ " [theme=dark] .colab-df-convert:hover {\n",
852
+ " background-color: #434B5C;\n",
853
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
854
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
855
+ " fill: #FFFFFF;\n",
856
+ " }\n",
857
+ " </style>\n",
858
+ "\n",
859
+ " <script>\n",
860
+ " const buttonEl =\n",
861
+ " document.querySelector('#df-b752ef8b-02ae-40c0-b595-1e96d5b72641 button.colab-df-convert');\n",
862
+ " buttonEl.style.display =\n",
863
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
864
+ "\n",
865
+ " async function convertToInteractive(key) {\n",
866
+ " const element = document.querySelector('#df-b752ef8b-02ae-40c0-b595-1e96d5b72641');\n",
867
+ " const dataTable =\n",
868
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
869
+ " [key], {});\n",
870
+ " if (!dataTable) return;\n",
871
+ "\n",
872
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
873
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
874
+ " + ' to learn more about interactive tables.';\n",
875
+ " element.innerHTML = '';\n",
876
+ " dataTable['output_type'] = 'display_data';\n",
877
+ " await google.colab.output.renderOutput(dataTable, element);\n",
878
+ " const docLink = document.createElement('div');\n",
879
+ " docLink.innerHTML = docLinkHtml;\n",
880
+ " element.appendChild(docLink);\n",
881
+ " }\n",
882
+ " </script>\n",
883
+ " </div>\n",
884
+ "\n",
885
+ "\n",
886
+ " <div id=\"df-7c5d170c-b02d-4338-aa14-8b61340f318e\">\n",
887
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-7c5d170c-b02d-4338-aa14-8b61340f318e')\"\n",
888
+ " title=\"Suggest charts\"\n",
889
+ " style=\"display:none;\">\n",
890
+ "\n",
891
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
892
+ " width=\"24px\">\n",
893
+ " <g>\n",
894
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
895
+ " </g>\n",
896
+ "</svg>\n",
897
+ " </button>\n",
898
+ "\n",
899
+ "<style>\n",
900
+ " .colab-df-quickchart {\n",
901
+ " --bg-color: #E8F0FE;\n",
902
+ " --fill-color: #1967D2;\n",
903
+ " --hover-bg-color: #E2EBFA;\n",
904
+ " --hover-fill-color: #174EA6;\n",
905
+ " --disabled-fill-color: #AAA;\n",
906
+ " --disabled-bg-color: #DDD;\n",
907
+ " }\n",
908
+ "\n",
909
+ " [theme=dark] .colab-df-quickchart {\n",
910
+ " --bg-color: #3B4455;\n",
911
+ " --fill-color: #D2E3FC;\n",
912
+ " --hover-bg-color: #434B5C;\n",
913
+ " --hover-fill-color: #FFFFFF;\n",
914
+ " --disabled-bg-color: #3B4455;\n",
915
+ " --disabled-fill-color: #666;\n",
916
+ " }\n",
917
+ "\n",
918
+ " .colab-df-quickchart {\n",
919
+ " background-color: var(--bg-color);\n",
920
+ " border: none;\n",
921
+ " border-radius: 50%;\n",
922
+ " cursor: pointer;\n",
923
+ " display: none;\n",
924
+ " fill: var(--fill-color);\n",
925
+ " height: 32px;\n",
926
+ " padding: 0;\n",
927
+ " width: 32px;\n",
928
+ " }\n",
929
+ "\n",
930
+ " .colab-df-quickchart:hover {\n",
931
+ " background-color: var(--hover-bg-color);\n",
932
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
933
+ " fill: var(--button-hover-fill-color);\n",
934
+ " }\n",
935
+ "\n",
936
+ " .colab-df-quickchart-complete:disabled,\n",
937
+ " .colab-df-quickchart-complete:disabled:hover {\n",
938
+ " background-color: var(--disabled-bg-color);\n",
939
+ " fill: var(--disabled-fill-color);\n",
940
+ " box-shadow: none;\n",
941
+ " }\n",
942
+ "\n",
943
+ " .colab-df-spinner {\n",
944
+ " border: 2px solid var(--fill-color);\n",
945
+ " border-color: transparent;\n",
946
+ " border-bottom-color: var(--fill-color);\n",
947
+ " animation:\n",
948
+ " spin 1s steps(1) infinite;\n",
949
+ " }\n",
950
+ "\n",
951
+ " @keyframes spin {\n",
952
+ " 0% {\n",
953
+ " border-color: transparent;\n",
954
+ " border-bottom-color: var(--fill-color);\n",
955
+ " border-left-color: var(--fill-color);\n",
956
+ " }\n",
957
+ " 20% {\n",
958
+ " border-color: transparent;\n",
959
+ " border-left-color: var(--fill-color);\n",
960
+ " border-top-color: var(--fill-color);\n",
961
+ " }\n",
962
+ " 30% {\n",
963
+ " border-color: transparent;\n",
964
+ " border-left-color: var(--fill-color);\n",
965
+ " border-top-color: var(--fill-color);\n",
966
+ " border-right-color: var(--fill-color);\n",
967
+ " }\n",
968
+ " 40% {\n",
969
+ " border-color: transparent;\n",
970
+ " border-right-color: var(--fill-color);\n",
971
+ " border-top-color: var(--fill-color);\n",
972
+ " }\n",
973
+ " 60% {\n",
974
+ " border-color: transparent;\n",
975
+ " border-right-color: var(--fill-color);\n",
976
+ " }\n",
977
+ " 80% {\n",
978
+ " border-color: transparent;\n",
979
+ " border-right-color: var(--fill-color);\n",
980
+ " border-bottom-color: var(--fill-color);\n",
981
+ " }\n",
982
+ " 90% {\n",
983
+ " border-color: transparent;\n",
984
+ " border-bottom-color: var(--fill-color);\n",
985
+ " }\n",
986
+ " }\n",
987
+ "</style>\n",
988
+ "\n",
989
+ " <script>\n",
990
+ " async function quickchart(key) {\n",
991
+ " const quickchartButtonEl =\n",
992
+ " document.querySelector('#' + key + ' button');\n",
993
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
994
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
995
+ " try {\n",
996
+ " const charts = await google.colab.kernel.invokeFunction(\n",
997
+ " 'suggestCharts', [key], {});\n",
998
+ " } catch (error) {\n",
999
+ " console.error('Error during call to suggestCharts:', error);\n",
1000
+ " }\n",
1001
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
1002
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
1003
+ " }\n",
1004
+ " (() => {\n",
1005
+ " let quickchartButtonEl =\n",
1006
+ " document.querySelector('#df-7c5d170c-b02d-4338-aa14-8b61340f318e button');\n",
1007
+ " quickchartButtonEl.style.display =\n",
1008
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1009
+ " })();\n",
1010
+ " </script>\n",
1011
+ " </div>\n",
1012
+ "\n",
1013
+ " </div>\n",
1014
+ " </div>\n"
1015
+ ],
1016
+ "application/vnd.google.colaboratory.intrinsic+json": {
1017
+ "type": "dataframe",
1018
+ "variable_name": "df",
1019
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 5572,\n \"fields\": [\n {\n \"column\": \"v2\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5169,\n \"samples\": [\n \"Did u download the fring app?\",\n \"Pass dis to all ur contacts n see wat u get! Red;i'm in luv wid u. Blue;u put a smile on my face. Purple;u r realy hot. Pink;u r so swt. Orange;i thnk i lyk u. Green;i realy wana go out wid u. Yelow;i wnt u bck. Black;i'm jealous of u. Brown;i miss you Nw plz giv me one color\",\n \"Ok...\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"v1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
1020
+ }
1021
+ },
1022
+ "metadata": {},
1023
+ "execution_count": 8
1024
+ }
1025
+ ],
1026
+ "source": [
1027
+ "df['v1'] = df['v1'].map({'ham':0,'spam':1})\n",
1028
+ "\n",
1029
+ "df.head()"
1030
+ ]
1031
+ },
1032
+ {
1033
+ "cell_type": "code",
1034
+ "execution_count": 9,
1035
+ "id": "44c5083f",
1036
+ "metadata": {
1037
+ "id": "44c5083f"
1038
+ },
1039
+ "outputs": [],
1040
+ "source": [
1041
+ "x = df['v2']\n",
1042
+ "\n",
1043
+ "y = df['v1']"
1044
+ ]
1045
+ },
1046
+ {
1047
+ "cell_type": "code",
1048
+ "execution_count": 10,
1049
+ "id": "f9654d1f",
1050
+ "metadata": {
1051
+ "id": "f9654d1f"
1052
+ },
1053
+ "outputs": [],
1054
+ "source": [
1055
+ "from sklearn.model_selection import train_test_split"
1056
+ ]
1057
+ },
1058
+ {
1059
+ "cell_type": "code",
1060
+ "execution_count": 11,
1061
+ "id": "1e230240",
1062
+ "metadata": {
1063
+ "id": "1e230240"
1064
+ },
1065
+ "outputs": [],
1066
+ "source": [
1067
+ "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)"
1068
+ ]
1069
+ },
1070
+ {
1071
+ "cell_type": "code",
1072
+ "execution_count": 12,
1073
+ "id": "b3f2dc41",
1074
+ "metadata": {
1075
+ "colab": {
1076
+ "base_uri": "https://localhost:8080/"
1077
+ },
1078
+ "id": "b3f2dc41",
1079
+ "outputId": "772e0aac-39b2-4d68-ae17-ad6f5e0f650e"
1080
+ },
1081
+ "outputs": [
1082
+ {
1083
+ "output_type": "execute_result",
1084
+ "data": {
1085
+ "text/plain": [
1086
+ "4457"
1087
+ ]
1088
+ },
1089
+ "metadata": {},
1090
+ "execution_count": 12
1091
+ }
1092
+ ],
1093
+ "source": [
1094
+ "len(x_train)"
1095
+ ]
1096
+ },
1097
+ {
1098
+ "cell_type": "code",
1099
+ "execution_count": 13,
1100
+ "id": "21888d5b",
1101
+ "metadata": {
1102
+ "colab": {
1103
+ "base_uri": "https://localhost:8080/"
1104
+ },
1105
+ "id": "21888d5b",
1106
+ "outputId": "43be9fb4-d76d-4484-b2c7-e29193d1dd33"
1107
+ },
1108
+ "outputs": [
1109
+ {
1110
+ "output_type": "execute_result",
1111
+ "data": {
1112
+ "text/plain": [
1113
+ "1115"
1114
+ ]
1115
+ },
1116
+ "metadata": {},
1117
+ "execution_count": 13
1118
+ }
1119
+ ],
1120
+ "source": [
1121
+ "len(x_test)"
1122
+ ]
1123
+ },
1124
+ {
1125
+ "cell_type": "markdown",
1126
+ "id": "4e30ca04",
1127
+ "metadata": {
1128
+ "id": "4e30ca04"
1129
+ },
1130
+ "source": [
1131
+ " let's preprocess text column now"
1132
+ ]
1133
+ },
1134
+ {
1135
+ "cell_type": "code",
1136
+ "execution_count": 14,
1137
+ "id": "384b9817",
1138
+ "metadata": {
1139
+ "id": "384b9817"
1140
+ },
1141
+ "outputs": [],
1142
+ "source": [
1143
+ "from sklearn.feature_extraction.text import TfidfVectorizer"
1144
+ ]
1145
+ },
1146
+ {
1147
+ "cell_type": "code",
1148
+ "execution_count": 15,
1149
+ "id": "90f45ff7",
1150
+ "metadata": {
1151
+ "id": "90f45ff7"
1152
+ },
1153
+ "outputs": [],
1154
+ "source": [
1155
+ "tfd = TfidfVectorizer(stop_words='english')\n",
1156
+ "\n",
1157
+ "\n",
1158
+ "x_train_final = tfd.fit_transform(x_train)\n",
1159
+ "\n",
1160
+ "x_test_final = tfd.transform(x_test)"
1161
+ ]
1162
+ },
1163
+ {
1164
+ "cell_type": "markdown",
1165
+ "id": "473b0720",
1166
+ "metadata": {
1167
+ "id": "473b0720"
1168
+ },
1169
+ "source": [
1170
+ "## model training"
1171
+ ]
1172
+ },
1173
+ {
1174
+ "cell_type": "code",
1175
+ "execution_count": 16,
1176
+ "id": "94f28d1b",
1177
+ "metadata": {
1178
+ "id": "94f28d1b"
1179
+ },
1180
+ "outputs": [],
1181
+ "source": [
1182
+ "from sklearn.naive_bayes import MultinomialNB"
1183
+ ]
1184
+ },
1185
+ {
1186
+ "cell_type": "code",
1187
+ "execution_count": 17,
1188
+ "id": "d406c405",
1189
+ "metadata": {
1190
+ "colab": {
1191
+ "base_uri": "https://localhost:8080/",
1192
+ "height": 80
1193
+ },
1194
+ "id": "d406c405",
1195
+ "outputId": "d7e0082d-9c38-42ff-9e60-9c446d509c45"
1196
+ },
1197
+ "outputs": [
1198
+ {
1199
+ "output_type": "execute_result",
1200
+ "data": {
1201
+ "text/plain": [
1202
+ "MultinomialNB()"
1203
+ ],
1204
+ "text/html": [
1205
+ "<style>#sk-container-id-1 {\n",
1206
+ " /* Definition of color scheme common for light and dark mode */\n",
1207
+ " --sklearn-color-text: #000;\n",
1208
+ " --sklearn-color-text-muted: #666;\n",
1209
+ " --sklearn-color-line: gray;\n",
1210
+ " /* Definition of color scheme for unfitted estimators */\n",
1211
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
1212
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
1213
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
1214
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
1215
+ " /* Definition of color scheme for fitted estimators */\n",
1216
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
1217
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
1218
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
1219
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
1220
+ "\n",
1221
+ " /* Specific color for light theme */\n",
1222
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
1223
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
1224
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
1225
+ " --sklearn-color-icon: #696969;\n",
1226
+ "\n",
1227
+ " @media (prefers-color-scheme: dark) {\n",
1228
+ " /* Redefinition of color scheme for dark theme */\n",
1229
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
1230
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
1231
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
1232
+ " --sklearn-color-icon: #878787;\n",
1233
+ " }\n",
1234
+ "}\n",
1235
+ "\n",
1236
+ "#sk-container-id-1 {\n",
1237
+ " color: var(--sklearn-color-text);\n",
1238
+ "}\n",
1239
+ "\n",
1240
+ "#sk-container-id-1 pre {\n",
1241
+ " padding: 0;\n",
1242
+ "}\n",
1243
+ "\n",
1244
+ "#sk-container-id-1 input.sk-hidden--visually {\n",
1245
+ " border: 0;\n",
1246
+ " clip: rect(1px 1px 1px 1px);\n",
1247
+ " clip: rect(1px, 1px, 1px, 1px);\n",
1248
+ " height: 1px;\n",
1249
+ " margin: -1px;\n",
1250
+ " overflow: hidden;\n",
1251
+ " padding: 0;\n",
1252
+ " position: absolute;\n",
1253
+ " width: 1px;\n",
1254
+ "}\n",
1255
+ "\n",
1256
+ "#sk-container-id-1 div.sk-dashed-wrapped {\n",
1257
+ " border: 1px dashed var(--sklearn-color-line);\n",
1258
+ " margin: 0 0.4em 0.5em 0.4em;\n",
1259
+ " box-sizing: border-box;\n",
1260
+ " padding-bottom: 0.4em;\n",
1261
+ " background-color: var(--sklearn-color-background);\n",
1262
+ "}\n",
1263
+ "\n",
1264
+ "#sk-container-id-1 div.sk-container {\n",
1265
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
1266
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
1267
+ " so we also need the `!important` here to be able to override the\n",
1268
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
1269
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
1270
+ " display: inline-block !important;\n",
1271
+ " position: relative;\n",
1272
+ "}\n",
1273
+ "\n",
1274
+ "#sk-container-id-1 div.sk-text-repr-fallback {\n",
1275
+ " display: none;\n",
1276
+ "}\n",
1277
+ "\n",
1278
+ "div.sk-parallel-item,\n",
1279
+ "div.sk-serial,\n",
1280
+ "div.sk-item {\n",
1281
+ " /* draw centered vertical line to link estimators */\n",
1282
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
1283
+ " background-size: 2px 100%;\n",
1284
+ " background-repeat: no-repeat;\n",
1285
+ " background-position: center center;\n",
1286
+ "}\n",
1287
+ "\n",
1288
+ "/* Parallel-specific style estimator block */\n",
1289
+ "\n",
1290
+ "#sk-container-id-1 div.sk-parallel-item::after {\n",
1291
+ " content: \"\";\n",
1292
+ " width: 100%;\n",
1293
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
1294
+ " flex-grow: 1;\n",
1295
+ "}\n",
1296
+ "\n",
1297
+ "#sk-container-id-1 div.sk-parallel {\n",
1298
+ " display: flex;\n",
1299
+ " align-items: stretch;\n",
1300
+ " justify-content: center;\n",
1301
+ " background-color: var(--sklearn-color-background);\n",
1302
+ " position: relative;\n",
1303
+ "}\n",
1304
+ "\n",
1305
+ "#sk-container-id-1 div.sk-parallel-item {\n",
1306
+ " display: flex;\n",
1307
+ " flex-direction: column;\n",
1308
+ "}\n",
1309
+ "\n",
1310
+ "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
1311
+ " align-self: flex-end;\n",
1312
+ " width: 50%;\n",
1313
+ "}\n",
1314
+ "\n",
1315
+ "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
1316
+ " align-self: flex-start;\n",
1317
+ " width: 50%;\n",
1318
+ "}\n",
1319
+ "\n",
1320
+ "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
1321
+ " width: 0;\n",
1322
+ "}\n",
1323
+ "\n",
1324
+ "/* Serial-specific style estimator block */\n",
1325
+ "\n",
1326
+ "#sk-container-id-1 div.sk-serial {\n",
1327
+ " display: flex;\n",
1328
+ " flex-direction: column;\n",
1329
+ " align-items: center;\n",
1330
+ " background-color: var(--sklearn-color-background);\n",
1331
+ " padding-right: 1em;\n",
1332
+ " padding-left: 1em;\n",
1333
+ "}\n",
1334
+ "\n",
1335
+ "\n",
1336
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
1337
+ "clickable and can be expanded/collapsed.\n",
1338
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
1339
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
1340
+ "*/\n",
1341
+ "\n",
1342
+ "/* Pipeline and ColumnTransformer style (default) */\n",
1343
+ "\n",
1344
+ "#sk-container-id-1 div.sk-toggleable {\n",
1345
+ " /* Default theme specific background. It is overwritten whether we have a\n",
1346
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
1347
+ " background-color: var(--sklearn-color-background);\n",
1348
+ "}\n",
1349
+ "\n",
1350
+ "/* Toggleable label */\n",
1351
+ "#sk-container-id-1 label.sk-toggleable__label {\n",
1352
+ " cursor: pointer;\n",
1353
+ " display: flex;\n",
1354
+ " width: 100%;\n",
1355
+ " margin-bottom: 0;\n",
1356
+ " padding: 0.5em;\n",
1357
+ " box-sizing: border-box;\n",
1358
+ " text-align: center;\n",
1359
+ " align-items: start;\n",
1360
+ " justify-content: space-between;\n",
1361
+ " gap: 0.5em;\n",
1362
+ "}\n",
1363
+ "\n",
1364
+ "#sk-container-id-1 label.sk-toggleable__label .caption {\n",
1365
+ " font-size: 0.6rem;\n",
1366
+ " font-weight: lighter;\n",
1367
+ " color: var(--sklearn-color-text-muted);\n",
1368
+ "}\n",
1369
+ "\n",
1370
+ "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
1371
+ " /* Arrow on the left of the label */\n",
1372
+ " content: \"▸\";\n",
1373
+ " float: left;\n",
1374
+ " margin-right: 0.25em;\n",
1375
+ " color: var(--sklearn-color-icon);\n",
1376
+ "}\n",
1377
+ "\n",
1378
+ "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
1379
+ " color: var(--sklearn-color-text);\n",
1380
+ "}\n",
1381
+ "\n",
1382
+ "/* Toggleable content - dropdown */\n",
1383
+ "\n",
1384
+ "#sk-container-id-1 div.sk-toggleable__content {\n",
1385
+ " max-height: 0;\n",
1386
+ " max-width: 0;\n",
1387
+ " overflow: hidden;\n",
1388
+ " text-align: left;\n",
1389
+ " /* unfitted */\n",
1390
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
1391
+ "}\n",
1392
+ "\n",
1393
+ "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
1394
+ " /* fitted */\n",
1395
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
1396
+ "}\n",
1397
+ "\n",
1398
+ "#sk-container-id-1 div.sk-toggleable__content pre {\n",
1399
+ " margin: 0.2em;\n",
1400
+ " border-radius: 0.25em;\n",
1401
+ " color: var(--sklearn-color-text);\n",
1402
+ " /* unfitted */\n",
1403
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
1404
+ "}\n",
1405
+ "\n",
1406
+ "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
1407
+ " /* unfitted */\n",
1408
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
1409
+ "}\n",
1410
+ "\n",
1411
+ "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
1412
+ " /* Expand drop-down */\n",
1413
+ " max-height: 200px;\n",
1414
+ " max-width: 100%;\n",
1415
+ " overflow: auto;\n",
1416
+ "}\n",
1417
+ "\n",
1418
+ "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
1419
+ " content: \"▾\";\n",
1420
+ "}\n",
1421
+ "\n",
1422
+ "/* Pipeline/ColumnTransformer-specific style */\n",
1423
+ "\n",
1424
+ "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
1425
+ " color: var(--sklearn-color-text);\n",
1426
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
1427
+ "}\n",
1428
+ "\n",
1429
+ "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
1430
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
1431
+ "}\n",
1432
+ "\n",
1433
+ "/* Estimator-specific style */\n",
1434
+ "\n",
1435
+ "/* Colorize estimator box */\n",
1436
+ "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
1437
+ " /* unfitted */\n",
1438
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
1439
+ "}\n",
1440
+ "\n",
1441
+ "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
1442
+ " /* fitted */\n",
1443
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
1444
+ "}\n",
1445
+ "\n",
1446
+ "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
1447
+ "#sk-container-id-1 div.sk-label label {\n",
1448
+ " /* The background is the default theme color */\n",
1449
+ " color: var(--sklearn-color-text-on-default-background);\n",
1450
+ "}\n",
1451
+ "\n",
1452
+ "/* On hover, darken the color of the background */\n",
1453
+ "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
1454
+ " color: var(--sklearn-color-text);\n",
1455
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
1456
+ "}\n",
1457
+ "\n",
1458
+ "/* Label box, darken color on hover, fitted */\n",
1459
+ "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
1460
+ " color: var(--sklearn-color-text);\n",
1461
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
1462
+ "}\n",
1463
+ "\n",
1464
+ "/* Estimator label */\n",
1465
+ "\n",
1466
+ "#sk-container-id-1 div.sk-label label {\n",
1467
+ " font-family: monospace;\n",
1468
+ " font-weight: bold;\n",
1469
+ " display: inline-block;\n",
1470
+ " line-height: 1.2em;\n",
1471
+ "}\n",
1472
+ "\n",
1473
+ "#sk-container-id-1 div.sk-label-container {\n",
1474
+ " text-align: center;\n",
1475
+ "}\n",
1476
+ "\n",
1477
+ "/* Estimator-specific */\n",
1478
+ "#sk-container-id-1 div.sk-estimator {\n",
1479
+ " font-family: monospace;\n",
1480
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
1481
+ " border-radius: 0.25em;\n",
1482
+ " box-sizing: border-box;\n",
1483
+ " margin-bottom: 0.5em;\n",
1484
+ " /* unfitted */\n",
1485
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
1486
+ "}\n",
1487
+ "\n",
1488
+ "#sk-container-id-1 div.sk-estimator.fitted {\n",
1489
+ " /* fitted */\n",
1490
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
1491
+ "}\n",
1492
+ "\n",
1493
+ "/* on hover */\n",
1494
+ "#sk-container-id-1 div.sk-estimator:hover {\n",
1495
+ " /* unfitted */\n",
1496
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
1497
+ "}\n",
1498
+ "\n",
1499
+ "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
1500
+ " /* fitted */\n",
1501
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
1502
+ "}\n",
1503
+ "\n",
1504
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
1505
+ "\n",
1506
+ "/* Common style for \"i\" and \"?\" */\n",
1507
+ "\n",
1508
+ ".sk-estimator-doc-link,\n",
1509
+ "a:link.sk-estimator-doc-link,\n",
1510
+ "a:visited.sk-estimator-doc-link {\n",
1511
+ " float: right;\n",
1512
+ " font-size: smaller;\n",
1513
+ " line-height: 1em;\n",
1514
+ " font-family: monospace;\n",
1515
+ " background-color: var(--sklearn-color-background);\n",
1516
+ " border-radius: 1em;\n",
1517
+ " height: 1em;\n",
1518
+ " width: 1em;\n",
1519
+ " text-decoration: none !important;\n",
1520
+ " margin-left: 0.5em;\n",
1521
+ " text-align: center;\n",
1522
+ " /* unfitted */\n",
1523
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
1524
+ " color: var(--sklearn-color-unfitted-level-1);\n",
1525
+ "}\n",
1526
+ "\n",
1527
+ ".sk-estimator-doc-link.fitted,\n",
1528
+ "a:link.sk-estimator-doc-link.fitted,\n",
1529
+ "a:visited.sk-estimator-doc-link.fitted {\n",
1530
+ " /* fitted */\n",
1531
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
1532
+ " color: var(--sklearn-color-fitted-level-1);\n",
1533
+ "}\n",
1534
+ "\n",
1535
+ "/* On hover */\n",
1536
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
1537
+ ".sk-estimator-doc-link:hover,\n",
1538
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
1539
+ ".sk-estimator-doc-link:hover {\n",
1540
+ " /* unfitted */\n",
1541
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
1542
+ " color: var(--sklearn-color-background);\n",
1543
+ " text-decoration: none;\n",
1544
+ "}\n",
1545
+ "\n",
1546
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
1547
+ ".sk-estimator-doc-link.fitted:hover,\n",
1548
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
1549
+ ".sk-estimator-doc-link.fitted:hover {\n",
1550
+ " /* fitted */\n",
1551
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
1552
+ " color: var(--sklearn-color-background);\n",
1553
+ " text-decoration: none;\n",
1554
+ "}\n",
1555
+ "\n",
1556
+ "/* Span, style for the box shown on hovering the info icon */\n",
1557
+ ".sk-estimator-doc-link span {\n",
1558
+ " display: none;\n",
1559
+ " z-index: 9999;\n",
1560
+ " position: relative;\n",
1561
+ " font-weight: normal;\n",
1562
+ " right: .2ex;\n",
1563
+ " padding: .5ex;\n",
1564
+ " margin: .5ex;\n",
1565
+ " width: min-content;\n",
1566
+ " min-width: 20ex;\n",
1567
+ " max-width: 50ex;\n",
1568
+ " color: var(--sklearn-color-text);\n",
1569
+ " box-shadow: 2pt 2pt 4pt #999;\n",
1570
+ " /* unfitted */\n",
1571
+ " background: var(--sklearn-color-unfitted-level-0);\n",
1572
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
1573
+ "}\n",
1574
+ "\n",
1575
+ ".sk-estimator-doc-link.fitted span {\n",
1576
+ " /* fitted */\n",
1577
+ " background: var(--sklearn-color-fitted-level-0);\n",
1578
+ " border: var(--sklearn-color-fitted-level-3);\n",
1579
+ "}\n",
1580
+ "\n",
1581
+ ".sk-estimator-doc-link:hover span {\n",
1582
+ " display: block;\n",
1583
+ "}\n",
1584
+ "\n",
1585
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
1586
+ "\n",
1587
+ "#sk-container-id-1 a.estimator_doc_link {\n",
1588
+ " float: right;\n",
1589
+ " font-size: 1rem;\n",
1590
+ " line-height: 1em;\n",
1591
+ " font-family: monospace;\n",
1592
+ " background-color: var(--sklearn-color-background);\n",
1593
+ " border-radius: 1rem;\n",
1594
+ " height: 1rem;\n",
1595
+ " width: 1rem;\n",
1596
+ " text-decoration: none;\n",
1597
+ " /* unfitted */\n",
1598
+ " color: var(--sklearn-color-unfitted-level-1);\n",
1599
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
1600
+ "}\n",
1601
+ "\n",
1602
+ "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
1603
+ " /* fitted */\n",
1604
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
1605
+ " color: var(--sklearn-color-fitted-level-1);\n",
1606
+ "}\n",
1607
+ "\n",
1608
+ "/* On hover */\n",
1609
+ "#sk-container-id-1 a.estimator_doc_link:hover {\n",
1610
+ " /* unfitted */\n",
1611
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
1612
+ " color: var(--sklearn-color-background);\n",
1613
+ " text-decoration: none;\n",
1614
+ "}\n",
1615
+ "\n",
1616
+ "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
1617
+ " /* fitted */\n",
1618
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
1619
+ "}\n",
1620
+ "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MultinomialNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>MultinomialNB</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.naive_bayes.MultinomialNB.html\">?<span>Documentation for MultinomialNB</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>MultinomialNB()</pre></div> </div></div></div></div>"
1621
+ ]
1622
+ },
1623
+ "metadata": {},
1624
+ "execution_count": 17
1625
+ }
1626
+ ],
1627
+ "source": [
1628
+ "model = MultinomialNB()\n",
1629
+ "\n",
1630
+ "model.fit(x_train_final,y_train)"
1631
+ ]
1632
+ },
1633
+ {
1634
+ "cell_type": "markdown",
1635
+ "id": "91fe1bf2",
1636
+ "metadata": {
1637
+ "id": "91fe1bf2"
1638
+ },
1639
+ "source": [
1640
+ "## model testing"
1641
+ ]
1642
+ },
1643
+ {
1644
+ "cell_type": "code",
1645
+ "execution_count": 18,
1646
+ "id": "7c44540b",
1647
+ "metadata": {
1648
+ "id": "7c44540b"
1649
+ },
1650
+ "outputs": [],
1651
+ "source": [
1652
+ "from sklearn.metrics import classification_report"
1653
+ ]
1654
+ },
1655
+ {
1656
+ "cell_type": "code",
1657
+ "execution_count": 19,
1658
+ "id": "ec824818",
1659
+ "metadata": {
1660
+ "id": "ec824818"
1661
+ },
1662
+ "outputs": [],
1663
+ "source": [
1664
+ "y_pred = model.predict(x_test_final)"
1665
+ ]
1666
+ },
1667
+ {
1668
+ "cell_type": "code",
1669
+ "execution_count": 20,
1670
+ "id": "0f103e37",
1671
+ "metadata": {
1672
+ "colab": {
1673
+ "base_uri": "https://localhost:8080/"
1674
+ },
1675
+ "id": "0f103e37",
1676
+ "outputId": "24ad9f3b-7276-4f99-eeda-ff5a33e8e64b"
1677
+ },
1678
+ "outputs": [
1679
+ {
1680
+ "output_type": "stream",
1681
+ "name": "stdout",
1682
+ "text": [
1683
+ " precision recall f1-score support\n",
1684
+ "\n",
1685
+ " 0 0.96 1.00 0.98 965\n",
1686
+ " 1 1.00 0.75 0.86 150\n",
1687
+ "\n",
1688
+ " accuracy 0.97 1115\n",
1689
+ " macro avg 0.98 0.88 0.92 1115\n",
1690
+ "weighted avg 0.97 0.97 0.96 1115\n",
1691
+ "\n"
1692
+ ]
1693
+ }
1694
+ ],
1695
+ "source": [
1696
+ "cr = classification_report(y_test,y_pred)\n",
1697
+ "\n",
1698
+ "print(cr)"
1699
+ ]
1700
+ },
1701
+ {
1702
+ "cell_type": "markdown",
1703
+ "id": "c1da6516",
1704
+ "metadata": {
1705
+ "id": "c1da6516"
1706
+ },
1707
+ "source": [
1708
+ "**Check individual email**"
1709
+ ]
1710
+ },
1711
+ {
1712
+ "cell_type": "code",
1713
+ "execution_count": 21,
1714
+ "id": "45d1e3c1",
1715
+ "metadata": {
1716
+ "colab": {
1717
+ "base_uri": "https://localhost:8080/"
1718
+ },
1719
+ "id": "45d1e3c1",
1720
+ "outputId": "8e846bfc-0c28-4ee9-94fa-62af679f2a34"
1721
+ },
1722
+ "outputs": [
1723
+ {
1724
+ "output_type": "execute_result",
1725
+ "data": {
1726
+ "text/plain": [
1727
+ "array([1])"
1728
+ ]
1729
+ },
1730
+ "metadata": {},
1731
+ "execution_count": 21
1732
+ }
1733
+ ],
1734
+ "source": [
1735
+ "inp = ['''Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's\n",
1736
+ "''']\n",
1737
+ "\n",
1738
+ "\n",
1739
+ "inp_final = tfd.transform(inp)\n",
1740
+ "\n",
1741
+ "model.predict(inp_final)"
1742
+ ]
1743
+ },
1744
+ {
1745
+ "cell_type": "markdown",
1746
+ "id": "45a632b0",
1747
+ "metadata": {
1748
+ "id": "45a632b0"
1749
+ },
1750
+ "source": [
1751
+ "**Practice**"
1752
+ ]
1753
+ },
1754
+ {
1755
+ "cell_type": "code",
1756
+ "execution_count": 22,
1757
+ "id": "9eacaf9e",
1758
+ "metadata": {
1759
+ "colab": {
1760
+ "base_uri": "https://localhost:8080/",
1761
+ "height": 356
1762
+ },
1763
+ "id": "9eacaf9e",
1764
+ "outputId": "6697ee8d-9787-4d53-94b5-93ba310e9c52"
1765
+ },
1766
+ "outputs": [
1767
+ {
1768
+ "output_type": "error",
1769
+ "ename": "KeyboardInterrupt",
1770
+ "evalue": "Interrupted by user",
1771
+ "traceback": [
1772
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1773
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
1774
+ "\u001b[0;32m/tmp/ipython-input-273501035.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0memail\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Please provide the email:\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0memail\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtfd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0memail\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0memail\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1775
+ "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36mraw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m 1175\u001b[0m \u001b[0;34m\"raw_input was called, but this frontend does not support input requests.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1176\u001b[0m )\n\u001b[0;32m-> 1177\u001b[0;31m return self._input_request(\n\u001b[0m\u001b[1;32m 1178\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1179\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_ident\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"shell\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1776
+ "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36m_input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m 1217\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1218\u001b[0m \u001b[0;31m# re-raise KeyboardInterrupt, to truncate traceback\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1219\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Interrupted by user\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1220\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1221\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwarning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Invalid Message:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1777
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: Interrupted by user"
1778
+ ]
1779
+ }
1780
+ ],
1781
+ "source": [
1782
+ "email = input(\"Please provide the email:\")\n",
1783
+ "\n",
1784
+ "email = tfd.transform([email])\n",
1785
+ "\n",
1786
+ "out = model.predict(email)[0]\n",
1787
+ "\n",
1788
+ "if out==0:\n",
1789
+ "\n",
1790
+ " print(\"\\nThe entered email is not a spam\")\n",
1791
+ "\n",
1792
+ "else:\n",
1793
+ "\n",
1794
+ " print(\"\\nThe entered email is a Spam\")"
1795
+ ]
1796
+ },
1797
+ {
1798
+ "cell_type": "code",
1799
+ "execution_count": 23,
1800
+ "id": "40065daf",
1801
+ "metadata": {
1802
+ "id": "40065daf"
1803
+ },
1804
+ "outputs": [],
1805
+ "source": [
1806
+ "from joblib import dump"
1807
+ ]
1808
+ },
1809
+ {
1810
+ "cell_type": "code",
1811
+ "execution_count": 24,
1812
+ "id": "64908369",
1813
+ "metadata": {
1814
+ "colab": {
1815
+ "base_uri": "https://localhost:8080/"
1816
+ },
1817
+ "id": "64908369",
1818
+ "outputId": "e84fac0a-df4f-4375-c611-ec8451926a49"
1819
+ },
1820
+ "outputs": [
1821
+ {
1822
+ "output_type": "execute_result",
1823
+ "data": {
1824
+ "text/plain": [
1825
+ "['model.joblib']"
1826
+ ]
1827
+ },
1828
+ "metadata": {},
1829
+ "execution_count": 24
1830
+ }
1831
+ ],
1832
+ "source": [
1833
+ "dump(model,\"model.joblib\")"
1834
+ ]
1835
+ },
1836
+ {
1837
+ "cell_type": "code",
1838
+ "execution_count": 25,
1839
+ "id": "d7d47cf2",
1840
+ "metadata": {
1841
+ "colab": {
1842
+ "base_uri": "https://localhost:8080/"
1843
+ },
1844
+ "id": "d7d47cf2",
1845
+ "outputId": "00dfb556-c1c3-427f-9f35-e630afdd0c47"
1846
+ },
1847
+ "outputs": [
1848
+ {
1849
+ "output_type": "execute_result",
1850
+ "data": {
1851
+ "text/plain": [
1852
+ "['tfd.joblib']"
1853
+ ]
1854
+ },
1855
+ "metadata": {},
1856
+ "execution_count": 25
1857
+ }
1858
+ ],
1859
+ "source": [
1860
+ "dump(tfd,\"tfd.joblib\")"
1861
+ ]
1862
+ },
1863
+ {
1864
+ "cell_type": "code",
1865
+ "source": [],
1866
+ "metadata": {
1867
+ "id": "bsjaW-Rjkj_R"
1868
+ },
1869
+ "id": "bsjaW-Rjkj_R",
1870
+ "execution_count": null,
1871
+ "outputs": []
1872
+ }
1873
+ ],
1874
+ "metadata": {
1875
+ "kernelspec": {
1876
+ "display_name": "Python 3 (ipykernel)",
1877
+ "language": "python",
1878
+ "name": "python3"
1879
+ },
1880
+ "language_info": {
1881
+ "codemirror_mode": {
1882
+ "name": "ipython",
1883
+ "version": 3
1884
+ },
1885
+ "file_extension": ".py",
1886
+ "mimetype": "text/x-python",
1887
+ "name": "python",
1888
+ "nbconvert_exporter": "python",
1889
+ "pygments_lexer": "ipython3",
1890
+ "version": "3.11.5"
1891
+ },
1892
+ "colab": {
1893
+ "provenance": []
1894
+ }
1895
+ },
1896
+ "nbformat": 4,
1897
+ "nbformat_minor": 5
1898
+ }
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from joblib import load
3
+
4
+ model = load("model.joblib")
5
+ tfd = load("tfd.joblib")
6
+
7
+ def prediction(email):
8
+
9
+ inp = [email]
10
+ inp_final = tfd.transform(inp)
11
+ res = model.predict_proba(inp_final)[0]
12
+ return "Not Spam" if res==1 else "Spam"
13
+ iface = gr.Interface(
14
+ fn = prediction,
15
+ inputs = [gr.Text(label="Enter Email Text")],
16
+ outputs = "text",
17
+ title = "Spam Identifier",
18
+ description = "This is used an app which can identify the email.")
19
+
20
+ iface.launch()
21
+
22
+
23
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ scikit-learn
3
+ joblib
4
+ preprocessing
5
+ TfiddVectorizer
6
+ pandas
7
+ numpy
spam.csv ADDED
The diff for this file is too large to render. See raw diff