mkManishKumar commited on
Commit
a8d6f7f
·
verified ·
1 Parent(s): 6e76905

Upload 5 files

Browse files
Files changed (5) hide show
  1. Email_Spam_Model.ipynb +367 -0
  2. app.py +11 -0
  3. requirements.txt +0 -0
  4. spam2.pkl +3 -0
  5. vectorizer.pkl +3 -0
Email_Spam_Model.ipynb ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "939855ce",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "id": "fba51ea4",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "df=pd.read_csv(\"spam2.csv\",encoding=\"latin-1\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "id": "07c9eb3d",
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "df.head(6)"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": null,
36
+ "id": "f8c8f1d7",
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "df.columns"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "id": "fe0d6616",
47
+ "metadata": {},
48
+ "outputs": [],
49
+ "source": [
50
+ "df.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": null,
56
+ "id": "b5865574",
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "df.head()"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": null,
66
+ "id": "5f96657e",
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "df = df.rename(columns={'v1': 'class', 'v2': 'message'})"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": null,
76
+ "id": "4d3cdb2f",
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "df.head()"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": null,
86
+ "id": "0e196722",
87
+ "metadata": {},
88
+ "outputs": [],
89
+ "source": [
90
+ "df['class']=df['class'].map({'ham':0, 'spam':1})"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": null,
96
+ "id": "7279b415",
97
+ "metadata": {},
98
+ "outputs": [],
99
+ "source": [
100
+ "df.head()\n"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "id": "06d05ab6",
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": [
110
+ "from sklearn.feature_extraction.text import CountVectorizer"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": null,
116
+ "id": "dcba8c4a",
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "cv=CountVectorizer()"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": null,
126
+ "id": "cb354066",
127
+ "metadata": {},
128
+ "outputs": [],
129
+ "source": [
130
+ "x=df['message']"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "id": "a64ff973",
137
+ "metadata": {},
138
+ "outputs": [],
139
+ "source": [
140
+ "y=df['class']"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "code",
145
+ "execution_count": null,
146
+ "id": "ea3ad8e3",
147
+ "metadata": {},
148
+ "outputs": [],
149
+ "source": [
150
+ "x.shape"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "execution_count": null,
156
+ "id": "df685f7b",
157
+ "metadata": {},
158
+ "outputs": [],
159
+ "source": [
160
+ "y.shape"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": null,
166
+ "id": "ed8a1948",
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "x=cv.fit_transform(x)"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": null,
176
+ "id": "dca72cdb",
177
+ "metadata": {},
178
+ "outputs": [],
179
+ "source": [
180
+ "x"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "markdown",
185
+ "id": "4f96137d",
186
+ "metadata": {},
187
+ "source": [
188
+ "1.The Cat\n",
189
+ "2.The Dog \n",
190
+ "3.The rat\n",
191
+ "\n",
192
+ " The Cat Dog rat\n",
193
+ "1. 1 1 0 0\n",
194
+ "2. 1 0 1 0\n",
195
+ "3. 1 0 0 1\n",
196
+ " "
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": null,
202
+ "id": "877394e3",
203
+ "metadata": {},
204
+ "outputs": [],
205
+ "source": [
206
+ "from sklearn.model_selection import train_test_split\n"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": null,
212
+ "id": "563bd95e",
213
+ "metadata": {},
214
+ "outputs": [],
215
+ "source": [
216
+ "x_train, x_test, y_train, y_test=train_test_split(x,y, test_size=0.2)\n"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ "id": "81b47b81",
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": [
226
+ "x_train.shape"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": null,
232
+ "id": "1b33ace6",
233
+ "metadata": {
234
+ "scrolled": true
235
+ },
236
+ "outputs": [],
237
+ "source": [
238
+ "from sklearn.naive_bayes import MultinomialNB"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": null,
244
+ "id": "829d8553",
245
+ "metadata": {},
246
+ "outputs": [],
247
+ "source": [
248
+ "model=MultinomialNB()"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": null,
254
+ "id": "e4e5310d",
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": [
258
+ "model.fit(x_train, y_train)"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": null,
264
+ "id": "295a5529",
265
+ "metadata": {},
266
+ "outputs": [],
267
+ "source": [
268
+ "result=model .score(x_train, y_train)\n"
269
+ ]
270
+ },
271
+ {
272
+ "cell_type": "code",
273
+ "execution_count": null,
274
+ "id": "98b97f5a",
275
+ "metadata": {},
276
+ "outputs": [],
277
+ "source": [
278
+ "result*100\n"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": null,
284
+ "id": "9fa0997c",
285
+ "metadata": {},
286
+ "outputs": [],
287
+ "source": [
288
+ "import pickle"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": null,
294
+ "id": "920727b0",
295
+ "metadata": {},
296
+ "outputs": [],
297
+ "source": [
298
+ "pickle.dump(model,open(\"spam2.pkl\",\"wb\"))"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": null,
304
+ "id": "5e7e327a",
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "pickle.dump(cv,open(\"vectorizer.pkl\",\"wb\"))"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": null,
314
+ "id": "512372fc",
315
+ "metadata": {},
316
+ "outputs": [],
317
+ "source": [
318
+ "clf=pickle.load(open(\"spam2.pkl\",\"rb\"))"
319
+ ]
320
+ },
321
+ {
322
+ "cell_type": "code",
323
+ "execution_count": null,
324
+ "id": "4fae6f43",
325
+ "metadata": {},
326
+ "outputs": [],
327
+ "source": [
328
+ "clf"
329
+ ]
330
+ },
331
+ {
332
+ "cell_type": "code",
333
+ "execution_count": null,
334
+ "id": "d8a6869c",
335
+ "metadata": {},
336
+ "outputs": [],
337
+ "source": [
338
+ "msg =\"hi Sandeep Yu are a hero\"\n",
339
+ "df=[msg]\n",
340
+ "vect=cv.transform(df).toarray()\n",
341
+ "result= model.predict(vect)\n",
342
+ "print(result)"
343
+ ]
344
+ }
345
+ ],
346
+ "metadata": {
347
+ "kernelspec": {
348
+ "display_name": "Python 3 (ipykernel)",
349
+ "language": "python",
350
+ "name": "python3"
351
+ },
352
+ "language_info": {
353
+ "codemirror_mode": {
354
+ "name": "ipython",
355
+ "version": 3
356
+ },
357
+ "file_extension": ".py",
358
+ "mimetype": "text/x-python",
359
+ "name": "python",
360
+ "nbconvert_exporter": "python",
361
+ "pygments_lexer": "ipython3",
362
+ "version": "3.11.5"
363
+ }
364
+ },
365
+ "nbformat": 4,
366
+ "nbformat_minor": 5
367
+ }
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import streamlit as st
3
+ model=pickle.load(open("spam2.pkl","rb"))
4
+ cv=pickle.load(open("vectorizer.pkl","rb"))
5
+
6
+
7
+ def main():
8
+ st.title("Email Spam Detector Apps")
9
+
10
+
11
+ main()
requirements.txt ADDED
Binary file (5.54 kB). View file
 
spam2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd76bf3689fef8561ef859095d99e15ebbf89e9e31d73ed1f701280df896f939
3
+ size 278126
vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9f813b5ed0db70adceb40a68f34082892a7ac408ba19f35328e5128805397b8
3
+ size 105805