Riyan00b commited on
Commit
c26da27
·
verified ·
1 Parent(s): 4dd8f0b

Upload 7 files

Browse files
Files changed (2) hide show
  1. ML with NLP.ipynb +639 -0
  2. spam.csv +0 -0
ML with NLP.ipynb ADDED
@@ -0,0 +1,639 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "68dfebbc",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/html": [
12
+ "<div>\n",
13
+ "<style scoped>\n",
14
+ " .dataframe tbody tr th:only-of-type {\n",
15
+ " vertical-align: middle;\n",
16
+ " }\n",
17
+ "\n",
18
+ " .dataframe tbody tr th {\n",
19
+ " vertical-align: top;\n",
20
+ " }\n",
21
+ "\n",
22
+ " .dataframe thead th {\n",
23
+ " text-align: right;\n",
24
+ " }\n",
25
+ "</style>\n",
26
+ "<table border=\"1\" class=\"dataframe\">\n",
27
+ " <thead>\n",
28
+ " <tr style=\"text-align: right;\">\n",
29
+ " <th></th>\n",
30
+ " <th>v1</th>\n",
31
+ " <th>v2</th>\n",
32
+ " <th>Unnamed: 2</th>\n",
33
+ " <th>Unnamed: 3</th>\n",
34
+ " <th>Unnamed: 4</th>\n",
35
+ " </tr>\n",
36
+ " </thead>\n",
37
+ " <tbody>\n",
38
+ " <tr>\n",
39
+ " <th>0</th>\n",
40
+ " <td>ham</td>\n",
41
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
42
+ " <td>NaN</td>\n",
43
+ " <td>NaN</td>\n",
44
+ " <td>NaN</td>\n",
45
+ " </tr>\n",
46
+ " <tr>\n",
47
+ " <th>1</th>\n",
48
+ " <td>ham</td>\n",
49
+ " <td>Ok lar... Joking wif u oni...</td>\n",
50
+ " <td>NaN</td>\n",
51
+ " <td>NaN</td>\n",
52
+ " <td>NaN</td>\n",
53
+ " </tr>\n",
54
+ " <tr>\n",
55
+ " <th>2</th>\n",
56
+ " <td>spam</td>\n",
57
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
58
+ " <td>NaN</td>\n",
59
+ " <td>NaN</td>\n",
60
+ " <td>NaN</td>\n",
61
+ " </tr>\n",
62
+ " <tr>\n",
63
+ " <th>3</th>\n",
64
+ " <td>ham</td>\n",
65
+ " <td>U dun say so early hor... U c already then say...</td>\n",
66
+ " <td>NaN</td>\n",
67
+ " <td>NaN</td>\n",
68
+ " <td>NaN</td>\n",
69
+ " </tr>\n",
70
+ " <tr>\n",
71
+ " <th>4</th>\n",
72
+ " <td>ham</td>\n",
73
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
74
+ " <td>NaN</td>\n",
75
+ " <td>NaN</td>\n",
76
+ " <td>NaN</td>\n",
77
+ " </tr>\n",
78
+ " </tbody>\n",
79
+ "</table>\n",
80
+ "</div>"
81
+ ],
82
+ "text/plain": [
83
+ " v1 v2 Unnamed: 2 \\\n",
84
+ "0 ham Go until jurong point, crazy.. Available only ... NaN \n",
85
+ "1 ham Ok lar... Joking wif u oni... NaN \n",
86
+ "2 spam Free entry in 2 a wkly comp to win FA Cup fina... NaN \n",
87
+ "3 ham U dun say so early hor... U c already then say... NaN \n",
88
+ "4 ham Nah I don't think he goes to usf, he lives aro... NaN \n",
89
+ "\n",
90
+ " Unnamed: 3 Unnamed: 4 \n",
91
+ "0 NaN NaN \n",
92
+ "1 NaN NaN \n",
93
+ "2 NaN NaN \n",
94
+ "3 NaN NaN \n",
95
+ "4 NaN NaN "
96
+ ]
97
+ },
98
+ "execution_count": 1,
99
+ "metadata": {},
100
+ "output_type": "execute_result"
101
+ }
102
+ ],
103
+ "source": [
104
+ "import pandas as pd\n",
105
+ "\n",
106
+ "df = pd.read_csv('spam.csv',encoding='latin1')\n",
107
+ "\n",
108
+ "df.head()"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 2,
114
+ "id": "a40ec53a",
115
+ "metadata": {},
116
+ "outputs": [
117
+ {
118
+ "data": {
119
+ "text/html": [
120
+ "<div>\n",
121
+ "<style scoped>\n",
122
+ " .dataframe tbody tr th:only-of-type {\n",
123
+ " vertical-align: middle;\n",
124
+ " }\n",
125
+ "\n",
126
+ " .dataframe tbody tr th {\n",
127
+ " vertical-align: top;\n",
128
+ " }\n",
129
+ "\n",
130
+ " .dataframe thead th {\n",
131
+ " text-align: right;\n",
132
+ " }\n",
133
+ "</style>\n",
134
+ "<table border=\"1\" class=\"dataframe\">\n",
135
+ " <thead>\n",
136
+ " <tr style=\"text-align: right;\">\n",
137
+ " <th></th>\n",
138
+ " <th>v2</th>\n",
139
+ " <th>v1</th>\n",
140
+ " </tr>\n",
141
+ " </thead>\n",
142
+ " <tbody>\n",
143
+ " <tr>\n",
144
+ " <th>0</th>\n",
145
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
146
+ " <td>ham</td>\n",
147
+ " </tr>\n",
148
+ " <tr>\n",
149
+ " <th>1</th>\n",
150
+ " <td>Ok lar... Joking wif u oni...</td>\n",
151
+ " <td>ham</td>\n",
152
+ " </tr>\n",
153
+ " <tr>\n",
154
+ " <th>2</th>\n",
155
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
156
+ " <td>spam</td>\n",
157
+ " </tr>\n",
158
+ " <tr>\n",
159
+ " <th>3</th>\n",
160
+ " <td>U dun say so early hor... U c already then say...</td>\n",
161
+ " <td>ham</td>\n",
162
+ " </tr>\n",
163
+ " <tr>\n",
164
+ " <th>4</th>\n",
165
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
166
+ " <td>ham</td>\n",
167
+ " </tr>\n",
168
+ " </tbody>\n",
169
+ "</table>\n",
170
+ "</div>"
171
+ ],
172
+ "text/plain": [
173
+ " v2 v1\n",
174
+ "0 Go until jurong point, crazy.. Available only ... ham\n",
175
+ "1 Ok lar... Joking wif u oni... ham\n",
176
+ "2 Free entry in 2 a wkly comp to win FA Cup fina... spam\n",
177
+ "3 U dun say so early hor... U c already then say... ham\n",
178
+ "4 Nah I don't think he goes to usf, he lives aro... ham"
179
+ ]
180
+ },
181
+ "execution_count": 2,
182
+ "metadata": {},
183
+ "output_type": "execute_result"
184
+ }
185
+ ],
186
+ "source": [
187
+ "df = df[['v2','v1']]\n",
188
+ "\n",
189
+ "df.head()"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 3,
195
+ "id": "12ef474a",
196
+ "metadata": {},
197
+ "outputs": [
198
+ {
199
+ "data": {
200
+ "text/plain": [
201
+ "v2 0\n",
202
+ "v1 0\n",
203
+ "dtype: int64"
204
+ ]
205
+ },
206
+ "execution_count": 3,
207
+ "metadata": {},
208
+ "output_type": "execute_result"
209
+ }
210
+ ],
211
+ "source": [
212
+ "df.isnull().sum()"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": 4,
218
+ "id": "776032f0",
219
+ "metadata": {},
220
+ "outputs": [
221
+ {
222
+ "data": {
223
+ "text/html": [
224
+ "<div>\n",
225
+ "<style scoped>\n",
226
+ " .dataframe tbody tr th:only-of-type {\n",
227
+ " vertical-align: middle;\n",
228
+ " }\n",
229
+ "\n",
230
+ " .dataframe tbody tr th {\n",
231
+ " vertical-align: top;\n",
232
+ " }\n",
233
+ "\n",
234
+ " .dataframe thead th {\n",
235
+ " text-align: right;\n",
236
+ " }\n",
237
+ "</style>\n",
238
+ "<table border=\"1\" class=\"dataframe\">\n",
239
+ " <thead>\n",
240
+ " <tr style=\"text-align: right;\">\n",
241
+ " <th></th>\n",
242
+ " <th>v2</th>\n",
243
+ " <th>v1</th>\n",
244
+ " </tr>\n",
245
+ " </thead>\n",
246
+ " <tbody>\n",
247
+ " <tr>\n",
248
+ " <th>0</th>\n",
249
+ " <td>Go until jurong point, crazy.. Available only ...</td>\n",
250
+ " <td>0</td>\n",
251
+ " </tr>\n",
252
+ " <tr>\n",
253
+ " <th>1</th>\n",
254
+ " <td>Ok lar... Joking wif u oni...</td>\n",
255
+ " <td>0</td>\n",
256
+ " </tr>\n",
257
+ " <tr>\n",
258
+ " <th>2</th>\n",
259
+ " <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
260
+ " <td>1</td>\n",
261
+ " </tr>\n",
262
+ " <tr>\n",
263
+ " <th>3</th>\n",
264
+ " <td>U dun say so early hor... U c already then say...</td>\n",
265
+ " <td>0</td>\n",
266
+ " </tr>\n",
267
+ " <tr>\n",
268
+ " <th>4</th>\n",
269
+ " <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
270
+ " <td>0</td>\n",
271
+ " </tr>\n",
272
+ " </tbody>\n",
273
+ "</table>\n",
274
+ "</div>"
275
+ ],
276
+ "text/plain": [
277
+ " v2 v1\n",
278
+ "0 Go until jurong point, crazy.. Available only ... 0\n",
279
+ "1 Ok lar... Joking wif u oni... 0\n",
280
+ "2 Free entry in 2 a wkly comp to win FA Cup fina... 1\n",
281
+ "3 U dun say so early hor... U c already then say... 0\n",
282
+ "4 Nah I don't think he goes to usf, he lives aro... 0"
283
+ ]
284
+ },
285
+ "execution_count": 4,
286
+ "metadata": {},
287
+ "output_type": "execute_result"
288
+ }
289
+ ],
290
+ "source": [
291
+ "df['v1'] = df['v1'].map({'ham':0,'spam':1})\n",
292
+ "\n",
293
+ "df.head()"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": 5,
299
+ "id": "44c5083f",
300
+ "metadata": {},
301
+ "outputs": [],
302
+ "source": [
303
+ "x = df['v2']\n",
304
+ "\n",
305
+ "y = df['v1']"
306
+ ]
307
+ },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": 6,
311
+ "id": "f9654d1f",
312
+ "metadata": {},
313
+ "outputs": [],
314
+ "source": [
315
+ "from sklearn.model_selection import train_test_split"
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": 7,
321
+ "id": "1e230240",
322
+ "metadata": {},
323
+ "outputs": [],
324
+ "source": [
325
+ "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)"
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": 8,
331
+ "id": "b3f2dc41",
332
+ "metadata": {},
333
+ "outputs": [
334
+ {
335
+ "data": {
336
+ "text/plain": [
337
+ "4457"
338
+ ]
339
+ },
340
+ "execution_count": 8,
341
+ "metadata": {},
342
+ "output_type": "execute_result"
343
+ }
344
+ ],
345
+ "source": [
346
+ "len(x_train)"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "code",
351
+ "execution_count": 9,
352
+ "id": "21888d5b",
353
+ "metadata": {},
354
+ "outputs": [
355
+ {
356
+ "data": {
357
+ "text/plain": [
358
+ "1115"
359
+ ]
360
+ },
361
+ "execution_count": 9,
362
+ "metadata": {},
363
+ "output_type": "execute_result"
364
+ }
365
+ ],
366
+ "source": [
367
+ "len(x_test)"
368
+ ]
369
+ },
370
+ {
371
+ "cell_type": "markdown",
372
+ "id": "4e30ca04",
373
+ "metadata": {},
374
+ "source": [
375
+ " let's preprocess text column now"
376
+ ]
377
+ },
378
+ {
379
+ "cell_type": "code",
380
+ "execution_count": 10,
381
+ "id": "384b9817",
382
+ "metadata": {},
383
+ "outputs": [],
384
+ "source": [
385
+ "from sklearn.feature_extraction.text import TfidfVectorizer"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "code",
390
+ "execution_count": 11,
391
+ "id": "90f45ff7",
392
+ "metadata": {},
393
+ "outputs": [],
394
+ "source": [
395
+ "tfd = TfidfVectorizer(stop_words='english')\n",
396
+ "\n",
397
+ "\n",
398
+ "x_train_final = tfd.fit_transform(x_train)\n",
399
+ "\n",
400
+ "x_test_final = tfd.transform(x_test)"
401
+ ]
402
+ },
403
+ {
404
+ "cell_type": "markdown",
405
+ "id": "473b0720",
406
+ "metadata": {},
407
+ "source": [
408
+ "## model training"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": 12,
414
+ "id": "94f28d1b",
415
+ "metadata": {},
416
+ "outputs": [],
417
+ "source": [
418
+ "from sklearn.naive_bayes import MultinomialNB"
419
+ ]
420
+ },
421
+ {
422
+ "cell_type": "code",
423
+ "execution_count": 13,
424
+ "id": "d406c405",
425
+ "metadata": {},
426
+ "outputs": [
427
+ {
428
+ "data": {
429
+ "text/html": [
430
+ "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MultinomialNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MultinomialNB</label><div class=\"sk-toggleable__content\"><pre>MultinomialNB()</pre></div></div></div></div></div>"
431
+ ],
432
+ "text/plain": [
433
+ "MultinomialNB()"
434
+ ]
435
+ },
436
+ "execution_count": 13,
437
+ "metadata": {},
438
+ "output_type": "execute_result"
439
+ }
440
+ ],
441
+ "source": [
442
+ "model = MultinomialNB()\n",
443
+ "\n",
444
+ "model.fit(x_train_final,y_train)"
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "markdown",
449
+ "id": "91fe1bf2",
450
+ "metadata": {},
451
+ "source": [
452
+ "## model testing"
453
+ ]
454
+ },
455
+ {
456
+ "cell_type": "code",
457
+ "execution_count": 14,
458
+ "id": "7c44540b",
459
+ "metadata": {},
460
+ "outputs": [],
461
+ "source": [
462
+ "from sklearn.metrics import classification_report"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "execution_count": 15,
468
+ "id": "ec824818",
469
+ "metadata": {},
470
+ "outputs": [],
471
+ "source": [
472
+ "y_pred = model.predict(x_test_final)"
473
+ ]
474
+ },
475
+ {
476
+ "cell_type": "code",
477
+ "execution_count": 16,
478
+ "id": "0f103e37",
479
+ "metadata": {},
480
+ "outputs": [
481
+ {
482
+ "name": "stdout",
483
+ "output_type": "stream",
484
+ "text": [
485
+ " precision recall f1-score support\n",
486
+ "\n",
487
+ " 0 0.96 1.00 0.98 965\n",
488
+ " 1 1.00 0.75 0.86 150\n",
489
+ "\n",
490
+ " accuracy 0.97 1115\n",
491
+ " macro avg 0.98 0.88 0.92 1115\n",
492
+ "weighted avg 0.97 0.97 0.96 1115\n",
493
+ "\n"
494
+ ]
495
+ }
496
+ ],
497
+ "source": [
498
+ "cr = classification_report(y_test,y_pred)\n",
499
+ "\n",
500
+ "print(cr)"
501
+ ]
502
+ },
503
+ {
504
+ "cell_type": "markdown",
505
+ "id": "c1da6516",
506
+ "metadata": {},
507
+ "source": [
508
+ "**Check individual email**"
509
+ ]
510
+ },
511
+ {
512
+ "cell_type": "code",
513
+ "execution_count": 17,
514
+ "id": "45d1e3c1",
515
+ "metadata": {},
516
+ "outputs": [
517
+ {
518
+ "data": {
519
+ "text/plain": [
520
+ "array([1], dtype=int64)"
521
+ ]
522
+ },
523
+ "execution_count": 17,
524
+ "metadata": {},
525
+ "output_type": "execute_result"
526
+ }
527
+ ],
528
+ "source": [
529
+ "inp = ['''Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's\n",
530
+ "''']\n",
531
+ "\n",
532
+ "\n",
533
+ "inp_final = tfd.transform(inp)\n",
534
+ "\n",
535
+ "model.predict(inp_final)"
536
+ ]
537
+ },
538
+ {
539
+ "cell_type": "markdown",
540
+ "id": "45a632b0",
541
+ "metadata": {},
542
+ "source": [
543
+ "**Practice**"
544
+ ]
545
+ },
546
+ {
547
+ "cell_type": "code",
548
+ "execution_count": 18,
549
+ "id": "9eacaf9e",
550
+ "metadata": {},
551
+ "outputs": [
552
+ {
553
+ "name": "stdout",
554
+ "output_type": "stream",
555
+ "text": [
556
+ "Please provide the email:this is free\n",
557
+ "\n",
558
+ "The entered email is not a spam\n"
559
+ ]
560
+ }
561
+ ],
562
+ "source": [
563
+ "email = input(\"Please provide the email:\")\n",
564
+ "\n",
565
+ "email = tfd.transform([email])\n",
566
+ "\n",
567
+ "out = model.predict(email)[0]\n",
568
+ "\n",
569
+ "if out==0:\n",
570
+ " \n",
571
+ " print(\"\\nThe entered email is not a spam\")\n",
572
+ " \n",
573
+ "else:\n",
574
+ " \n",
575
+ " print(\"\\nThe entered email is a Spam\")"
576
+ ]
577
+ },
578
+ {
579
+ "cell_type": "code",
580
+ "execution_count": 19,
581
+ "id": "40065daf",
582
+ "metadata": {},
583
+ "outputs": [],
584
+ "source": [
585
+ "from joblib import dump"
586
+ ]
587
+ },
588
+ {
589
+ "cell_type": "code",
590
+ "execution_count": 20,
591
+ "id": "64908369",
592
+ "metadata": {},
593
+ "outputs": [
594
+ {
595
+ "data": {
596
+ "text/plain": [
597
+ "['model.joblib']"
598
+ ]
599
+ },
600
+ "execution_count": 20,
601
+ "metadata": {},
602
+ "output_type": "execute_result"
603
+ }
604
+ ],
605
+ "source": [
606
+ "dump(model,\"model.joblib\")"
607
+ ]
608
+ },
609
+ {
610
+ "cell_type": "code",
611
+ "execution_count": null,
612
+ "id": "d7d47cf2",
613
+ "metadata": {},
614
+ "outputs": [],
615
+ "source": []
616
+ }
617
+ ],
618
+ "metadata": {
619
+ "kernelspec": {
620
+ "display_name": "Python 3 (ipykernel)",
621
+ "language": "python",
622
+ "name": "python3"
623
+ },
624
+ "language_info": {
625
+ "codemirror_mode": {
626
+ "name": "ipython",
627
+ "version": 3
628
+ },
629
+ "file_extension": ".py",
630
+ "mimetype": "text/x-python",
631
+ "name": "python",
632
+ "nbconvert_exporter": "python",
633
+ "pygments_lexer": "ipython3",
634
+ "version": "3.11.5"
635
+ }
636
+ },
637
+ "nbformat": 4,
638
+ "nbformat_minor": 5
639
+ }
spam.csv ADDED
The diff for this file is too large to render. See raw diff