CristopherWVSU commited on
Commit
78be9d8
·
1 Parent(s): d0b66d5

Initial Commit

Browse files
Files changed (6) hide show
  1. app.py +43 -0
  2. main.ipynb +836 -0
  3. requirements.txt +5 -0
  4. spam.csv +0 -0
  5. spam_classifier.pkl +3 -0
  6. tfidf_vectorizer.pkl +3 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import re
4
+ import string
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
+
8
+
9
+ # LOAD THE MODEL AND VECTORIZERS
10
+ model = joblib.load("spam_classifier.pkl")
11
+ vectorizer = joblib.load("tfidf_vectorizer.pkl")
12
+
13
+
14
+ nltk.download("stopwords")
15
+
16
+
17
+ # REDUCE THE INPUT TO ITS MOST BASIC FORM
18
+ def preprocess_text(text):
19
+ text = text.lower()
20
+ text = re.sub(r"\d+", "", text)
21
+ text = text.translate(str.maketrans("", "", string.punctuation))
22
+ words = text.split()
23
+ words = [word for word in words if word not in stopwords.words("english")]
24
+ return " ".join(words)
25
+
26
+
27
+ # STREAMLIT APP
28
+ st.title("📩 Spam Detector App")
29
+ st.write("Enter a message below to check if it's **Spam** or **Not Spam**.")
30
+
31
+
32
+ user_input = st.text_area("Enter your message:")
33
+
34
+ if st.button("Check Spam"):
35
+ if user_input.strip():
36
+ processed_input = preprocess_text(user_input)
37
+ input_vector = vectorizer.transform([processed_input])
38
+ prediction = model.predict(input_vector)
39
+
40
+ result = "Spam" if prediction[0] == 1 else "Not Spam"
41
+ st.success(f"Prediction: {result}")
42
+ else:
43
+ st.warning("Please enter a message to check.")
main.ipynb ADDED
@@ -0,0 +1,836 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "<h1>Spam Detection Model</h1>\n",
8
+ "<h5>Created by: Cristopher Ian Artacho</h5>\n",
9
+ "<h5>BSCS 3A</h5>\n",
10
+ "\n",
11
+ "<h5>Using a dataset from kaggle, the aim of this project is to train a model that could identify a message to be \"spam\" or \"not spam\".</h5>"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "name": "stderr",
21
+ "output_type": "stream",
22
+ "text": [
23
+ "[nltk_data] Downloading package stopwords to C:\\Users\\Cristopher\n",
24
+ "[nltk_data] Artacho\\AppData\\Roaming\\nltk_data...\n",
25
+ "[nltk_data] Package stopwords is already up-to-date!\n"
26
+ ]
27
+ }
28
+ ],
29
+ "source": [
30
+ "import pandas as pd\n",
31
+ "import matplotlib.pyplot as plt\n",
32
+ "import seaborn as sns\n",
33
+ "import nltk\n",
34
+ "import string\n",
35
+ "import re\n",
36
+ "\n",
37
+ "\n",
38
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
39
+ "from sklearn.model_selection import train_test_split\n",
40
+ "from sklearn.naive_bayes import MultinomialNB\n",
41
+ "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score\n",
42
+ "\n",
43
+ "nltk.download(\"stopwords\")\n",
44
+ "from nltk.corpus import stopwords\n",
45
+ "\n",
46
+ "import joblib"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "markdown",
51
+ "metadata": {},
52
+ "source": [
53
+ "<h1>Data Exploration and Preprocessing</h1>\n",
54
+ "<h5>In this process, we will get to understand our data, and the dataset. In case that there are missing values, noise, and/or errors in the data, we will need to clean it in order to reduce the complexity of the data, allowing the model to better understand the dataset. </h5>"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 38,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "df = pd.read_csv('spam.csv')"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": null,
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "# CHANGING THE \"CATEGORY\" COLUMN TO \"LABEL\"\n",
73
+ "df.columns = [\"Label\", \"Message\"]"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 40,
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "name": "stdout",
83
+ "output_type": "stream",
84
+ "text": [
85
+ "<class 'pandas.core.frame.DataFrame'>\n",
86
+ "RangeIndex: 5572 entries, 0 to 5571\n",
87
+ "Data columns (total 2 columns):\n",
88
+ " # Column Non-Null Count Dtype \n",
89
+ "--- ------ -------------- ----- \n",
90
+ " 0 Label 5572 non-null object\n",
91
+ " 1 Message 5572 non-null object\n",
92
+ "dtypes: object(2)\n",
93
+ "memory usage: 87.2+ KB\n"
94
+ ]
95
+ }
96
+ ],
97
+ "source": [
98
+ "df.info()"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": null,
104
+ "metadata": {},
105
+ "outputs": [
106
+ {
107
+ "name": "stdout",
108
+ "output_type": "stream",
109
+ "text": [
110
+ "2\n",
111
+ "5157\n"
112
+ ]
113
+ }
114
+ ],
115
+ "source": [
116
+ "for col in df:\n",
117
+ " print(df[col].nunique())"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": 42,
123
+ "metadata": {},
124
+ "outputs": [
125
+ {
126
+ "name": "stdout",
127
+ "output_type": "stream",
128
+ "text": [
129
+ "Label\n",
130
+ "ham 4825\n",
131
+ "spam 747\n",
132
+ "Name: count, dtype: int64\n"
133
+ ]
134
+ }
135
+ ],
136
+ "source": [
137
+ "print(df[\"Label\"].value_counts())"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "markdown",
142
+ "metadata": {},
143
+ "source": [
144
+ "In this part, we notice that the \"ham\" (not spam) far outnumbers the number of \"spam\" messages. This might lead to mode biases towards the \"ham\". Therefore, we will need to reduce the number of ham messages to match the number of spam messages."
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": null,
150
+ "metadata": {},
151
+ "outputs": [],
152
+ "source": [
153
+ "# MAPPING THE VALUES OF LABEL FROM CATEGORICAL TO NUMERICAL\n",
154
+ "df[\"Label\"] = df[\"Label\"].map({\"spam\": 1, \"ham\": 0})"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": null,
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": [
163
+ "ham_df = df[df[\"Label\"] == 0]\n",
164
+ "spam_df = df[df[\"Label\"] == 1]"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "execution_count": null,
170
+ "metadata": {},
171
+ "outputs": [],
172
+ "source": [
173
+ "# RANDOM SELECTION FOR \"ham\" MESSAGES\n",
174
+ "ham_sample = ham_df.sample(n=len(spam_df), random_state=42)"
175
+ ]
176
+ },
177
+ {
178
+ "cell_type": "code",
179
+ "execution_count": null,
180
+ "metadata": {},
181
+ "outputs": [],
182
+ "source": [
183
+ "df= pd.concat([ham_sample, spam_df])\n",
184
+ "df = df.sample(frac=1, random_state=42).reset_index(drop=True)"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": null,
190
+ "metadata": {},
191
+ "outputs": [
192
+ {
193
+ "name": "stdout",
194
+ "output_type": "stream",
195
+ "text": [
196
+ "Label\n",
197
+ "1 747\n",
198
+ "0 747\n",
199
+ "Name: count, dtype: int64\n"
200
+ ]
201
+ }
202
+ ],
203
+ "source": [
204
+ "print(df[\"Label\"].value_counts())"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "markdown",
209
+ "metadata": {},
210
+ "source": [
211
+ "<h1>Text Preprocessing and Feature Engineering</h1>\n",
212
+ "<h5>In this process, we will transform the text to its most basic format, without numbers, stopwords and punctuation that would be unecessary and unrelated to the data.</h5>"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": null,
218
+ "metadata": {},
219
+ "outputs": [],
220
+ "source": [
221
+ "\n",
222
+ "def preprocess_text(text):\n",
223
+ " text = text.lower() \n",
224
+ " text = re.sub(r\"\\d+\", \"\", text) \n",
225
+ " text = text.translate(str.maketrans(\"\", \"\", string.punctuation))\n",
226
+ " words = text.split()\n",
227
+ " words = [word for word in words if word not in stopwords.words(\"english\")]\n",
228
+ " return \" \".join(words)"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "execution_count": 49,
234
+ "metadata": {},
235
+ "outputs": [],
236
+ "source": [
237
+ "df[\"Processed_Message\"] = df[\"Message\"].apply(preprocess_text)"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "markdown",
242
+ "metadata": {},
243
+ "source": [
244
+ "We will create a new column that will contain the preprocessed text and separate it from the original"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": null,
250
+ "metadata": {},
251
+ "outputs": [],
252
+ "source": [
253
+ "vectorizer = TfidfVectorizer()\n",
254
+ "X = vectorizer.fit_transform(df[\"Processed_Message\"])\n",
255
+ "y = df[\"Label\"]\n"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "markdown",
260
+ "metadata": {},
261
+ "source": [
262
+ "TF-IDF (Term Frequency-Inverse Document Frequency) determines how important is the word based on how many times it appeared in the text, we will use this in order to detect the words that belong to the \"spam\" and \"ham\" classes"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "markdown",
267
+ "metadata": {},
268
+ "source": [
269
+ "<h1>Training the Model</h1>\n",
270
+ "<h5>After cleaning the data, and processing the text, it is time to train the model in order to help it classify which messages are \"spam\" and which are \"ham\". For this, we will use Multinomial Naive Bayes that assumes the frequency of words to classify.</h5>"
271
+ ]
272
+ },
273
+ {
274
+ "cell_type": "code",
275
+ "execution_count": null,
276
+ "metadata": {},
277
+ "outputs": [],
278
+ "source": [
279
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
280
+ " X, y, test_size=0.2, stratify=y, random_state=42\n",
281
+ ")"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 52,
287
+ "metadata": {},
288
+ "outputs": [
289
+ {
290
+ "data": {
291
+ "text/html": [
292
+ "<style>#sk-container-id-2 {\n",
293
+ " /* Definition of color scheme common for light and dark mode */\n",
294
+ " --sklearn-color-text: black;\n",
295
+ " --sklearn-color-line: gray;\n",
296
+ " /* Definition of color scheme for unfitted estimators */\n",
297
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
298
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
299
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
300
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
301
+ " /* Definition of color scheme for fitted estimators */\n",
302
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
303
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
304
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
305
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
306
+ "\n",
307
+ " /* Specific color for light theme */\n",
308
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
309
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
310
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
311
+ " --sklearn-color-icon: #696969;\n",
312
+ "\n",
313
+ " @media (prefers-color-scheme: dark) {\n",
314
+ " /* Redefinition of color scheme for dark theme */\n",
315
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
316
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
317
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
318
+ " --sklearn-color-icon: #878787;\n",
319
+ " }\n",
320
+ "}\n",
321
+ "\n",
322
+ "#sk-container-id-2 {\n",
323
+ " color: var(--sklearn-color-text);\n",
324
+ "}\n",
325
+ "\n",
326
+ "#sk-container-id-2 pre {\n",
327
+ " padding: 0;\n",
328
+ "}\n",
329
+ "\n",
330
+ "#sk-container-id-2 input.sk-hidden--visually {\n",
331
+ " border: 0;\n",
332
+ " clip: rect(1px 1px 1px 1px);\n",
333
+ " clip: rect(1px, 1px, 1px, 1px);\n",
334
+ " height: 1px;\n",
335
+ " margin: -1px;\n",
336
+ " overflow: hidden;\n",
337
+ " padding: 0;\n",
338
+ " position: absolute;\n",
339
+ " width: 1px;\n",
340
+ "}\n",
341
+ "\n",
342
+ "#sk-container-id-2 div.sk-dashed-wrapped {\n",
343
+ " border: 1px dashed var(--sklearn-color-line);\n",
344
+ " margin: 0 0.4em 0.5em 0.4em;\n",
345
+ " box-sizing: border-box;\n",
346
+ " padding-bottom: 0.4em;\n",
347
+ " background-color: var(--sklearn-color-background);\n",
348
+ "}\n",
349
+ "\n",
350
+ "#sk-container-id-2 div.sk-container {\n",
351
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
352
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
353
+ " so we also need the `!important` here to be able to override the\n",
354
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
355
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
356
+ " display: inline-block !important;\n",
357
+ " position: relative;\n",
358
+ "}\n",
359
+ "\n",
360
+ "#sk-container-id-2 div.sk-text-repr-fallback {\n",
361
+ " display: none;\n",
362
+ "}\n",
363
+ "\n",
364
+ "div.sk-parallel-item,\n",
365
+ "div.sk-serial,\n",
366
+ "div.sk-item {\n",
367
+ " /* draw centered vertical line to link estimators */\n",
368
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
369
+ " background-size: 2px 100%;\n",
370
+ " background-repeat: no-repeat;\n",
371
+ " background-position: center center;\n",
372
+ "}\n",
373
+ "\n",
374
+ "/* Parallel-specific style estimator block */\n",
375
+ "\n",
376
+ "#sk-container-id-2 div.sk-parallel-item::after {\n",
377
+ " content: \"\";\n",
378
+ " width: 100%;\n",
379
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
380
+ " flex-grow: 1;\n",
381
+ "}\n",
382
+ "\n",
383
+ "#sk-container-id-2 div.sk-parallel {\n",
384
+ " display: flex;\n",
385
+ " align-items: stretch;\n",
386
+ " justify-content: center;\n",
387
+ " background-color: var(--sklearn-color-background);\n",
388
+ " position: relative;\n",
389
+ "}\n",
390
+ "\n",
391
+ "#sk-container-id-2 div.sk-parallel-item {\n",
392
+ " display: flex;\n",
393
+ " flex-direction: column;\n",
394
+ "}\n",
395
+ "\n",
396
+ "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
397
+ " align-self: flex-end;\n",
398
+ " width: 50%;\n",
399
+ "}\n",
400
+ "\n",
401
+ "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
402
+ " align-self: flex-start;\n",
403
+ " width: 50%;\n",
404
+ "}\n",
405
+ "\n",
406
+ "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
407
+ " width: 0;\n",
408
+ "}\n",
409
+ "\n",
410
+ "/* Serial-specific style estimator block */\n",
411
+ "\n",
412
+ "#sk-container-id-2 div.sk-serial {\n",
413
+ " display: flex;\n",
414
+ " flex-direction: column;\n",
415
+ " align-items: center;\n",
416
+ " background-color: var(--sklearn-color-background);\n",
417
+ " padding-right: 1em;\n",
418
+ " padding-left: 1em;\n",
419
+ "}\n",
420
+ "\n",
421
+ "\n",
422
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
423
+ "clickable and can be expanded/collapsed.\n",
424
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
425
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
426
+ "*/\n",
427
+ "\n",
428
+ "/* Pipeline and ColumnTransformer style (default) */\n",
429
+ "\n",
430
+ "#sk-container-id-2 div.sk-toggleable {\n",
431
+ " /* Default theme specific background. It is overwritten whether we have a\n",
432
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
433
+ " background-color: var(--sklearn-color-background);\n",
434
+ "}\n",
435
+ "\n",
436
+ "/* Toggleable label */\n",
437
+ "#sk-container-id-2 label.sk-toggleable__label {\n",
438
+ " cursor: pointer;\n",
439
+ " display: block;\n",
440
+ " width: 100%;\n",
441
+ " margin-bottom: 0;\n",
442
+ " padding: 0.5em;\n",
443
+ " box-sizing: border-box;\n",
444
+ " text-align: center;\n",
445
+ "}\n",
446
+ "\n",
447
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
448
+ " /* Arrow on the left of the label */\n",
449
+ " content: \"▸\";\n",
450
+ " float: left;\n",
451
+ " margin-right: 0.25em;\n",
452
+ " color: var(--sklearn-color-icon);\n",
453
+ "}\n",
454
+ "\n",
455
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
456
+ " color: var(--sklearn-color-text);\n",
457
+ "}\n",
458
+ "\n",
459
+ "/* Toggleable content - dropdown */\n",
460
+ "\n",
461
+ "#sk-container-id-2 div.sk-toggleable__content {\n",
462
+ " max-height: 0;\n",
463
+ " max-width: 0;\n",
464
+ " overflow: hidden;\n",
465
+ " text-align: left;\n",
466
+ " /* unfitted */\n",
467
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
468
+ "}\n",
469
+ "\n",
470
+ "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
471
+ " /* fitted */\n",
472
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
473
+ "}\n",
474
+ "\n",
475
+ "#sk-container-id-2 div.sk-toggleable__content pre {\n",
476
+ " margin: 0.2em;\n",
477
+ " border-radius: 0.25em;\n",
478
+ " color: var(--sklearn-color-text);\n",
479
+ " /* unfitted */\n",
480
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
481
+ "}\n",
482
+ "\n",
483
+ "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
484
+ " /* unfitted */\n",
485
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
486
+ "}\n",
487
+ "\n",
488
+ "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
489
+ " /* Expand drop-down */\n",
490
+ " max-height: 200px;\n",
491
+ " max-width: 100%;\n",
492
+ " overflow: auto;\n",
493
+ "}\n",
494
+ "\n",
495
+ "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
496
+ " content: \"▾\";\n",
497
+ "}\n",
498
+ "\n",
499
+ "/* Pipeline/ColumnTransformer-specific style */\n",
500
+ "\n",
501
+ "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
502
+ " color: var(--sklearn-color-text);\n",
503
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
504
+ "}\n",
505
+ "\n",
506
+ "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
507
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
508
+ "}\n",
509
+ "\n",
510
+ "/* Estimator-specific style */\n",
511
+ "\n",
512
+ "/* Colorize estimator box */\n",
513
+ "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
514
+ " /* unfitted */\n",
515
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
516
+ "}\n",
517
+ "\n",
518
+ "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
519
+ " /* fitted */\n",
520
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
521
+ "}\n",
522
+ "\n",
523
+ "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
524
+ "#sk-container-id-2 div.sk-label label {\n",
525
+ " /* The background is the default theme color */\n",
526
+ " color: var(--sklearn-color-text-on-default-background);\n",
527
+ "}\n",
528
+ "\n",
529
+ "/* On hover, darken the color of the background */\n",
530
+ "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
531
+ " color: var(--sklearn-color-text);\n",
532
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
533
+ "}\n",
534
+ "\n",
535
+ "/* Label box, darken color on hover, fitted */\n",
536
+ "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
537
+ " color: var(--sklearn-color-text);\n",
538
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
539
+ "}\n",
540
+ "\n",
541
+ "/* Estimator label */\n",
542
+ "\n",
543
+ "#sk-container-id-2 div.sk-label label {\n",
544
+ " font-family: monospace;\n",
545
+ " font-weight: bold;\n",
546
+ " display: inline-block;\n",
547
+ " line-height: 1.2em;\n",
548
+ "}\n",
549
+ "\n",
550
+ "#sk-container-id-2 div.sk-label-container {\n",
551
+ " text-align: center;\n",
552
+ "}\n",
553
+ "\n",
554
+ "/* Estimator-specific */\n",
555
+ "#sk-container-id-2 div.sk-estimator {\n",
556
+ " font-family: monospace;\n",
557
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
558
+ " border-radius: 0.25em;\n",
559
+ " box-sizing: border-box;\n",
560
+ " margin-bottom: 0.5em;\n",
561
+ " /* unfitted */\n",
562
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
563
+ "}\n",
564
+ "\n",
565
+ "#sk-container-id-2 div.sk-estimator.fitted {\n",
566
+ " /* fitted */\n",
567
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
568
+ "}\n",
569
+ "\n",
570
+ "/* on hover */\n",
571
+ "#sk-container-id-2 div.sk-estimator:hover {\n",
572
+ " /* unfitted */\n",
573
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
574
+ "}\n",
575
+ "\n",
576
+ "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
577
+ " /* fitted */\n",
578
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
579
+ "}\n",
580
+ "\n",
581
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
582
+ "\n",
583
+ "/* Common style for \"i\" and \"?\" */\n",
584
+ "\n",
585
+ ".sk-estimator-doc-link,\n",
586
+ "a:link.sk-estimator-doc-link,\n",
587
+ "a:visited.sk-estimator-doc-link {\n",
588
+ " float: right;\n",
589
+ " font-size: smaller;\n",
590
+ " line-height: 1em;\n",
591
+ " font-family: monospace;\n",
592
+ " background-color: var(--sklearn-color-background);\n",
593
+ " border-radius: 1em;\n",
594
+ " height: 1em;\n",
595
+ " width: 1em;\n",
596
+ " text-decoration: none !important;\n",
597
+ " margin-left: 1ex;\n",
598
+ " /* unfitted */\n",
599
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
600
+ " color: var(--sklearn-color-unfitted-level-1);\n",
601
+ "}\n",
602
+ "\n",
603
+ ".sk-estimator-doc-link.fitted,\n",
604
+ "a:link.sk-estimator-doc-link.fitted,\n",
605
+ "a:visited.sk-estimator-doc-link.fitted {\n",
606
+ " /* fitted */\n",
607
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
608
+ " color: var(--sklearn-color-fitted-level-1);\n",
609
+ "}\n",
610
+ "\n",
611
+ "/* On hover */\n",
612
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
613
+ ".sk-estimator-doc-link:hover,\n",
614
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
615
+ ".sk-estimator-doc-link:hover {\n",
616
+ " /* unfitted */\n",
617
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
618
+ " color: var(--sklearn-color-background);\n",
619
+ " text-decoration: none;\n",
620
+ "}\n",
621
+ "\n",
622
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
623
+ ".sk-estimator-doc-link.fitted:hover,\n",
624
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
625
+ ".sk-estimator-doc-link.fitted:hover {\n",
626
+ " /* fitted */\n",
627
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
628
+ " color: var(--sklearn-color-background);\n",
629
+ " text-decoration: none;\n",
630
+ "}\n",
631
+ "\n",
632
+ "/* Span, style for the box shown on hovering the info icon */\n",
633
+ ".sk-estimator-doc-link span {\n",
634
+ " display: none;\n",
635
+ " z-index: 9999;\n",
636
+ " position: relative;\n",
637
+ " font-weight: normal;\n",
638
+ " right: .2ex;\n",
639
+ " padding: .5ex;\n",
640
+ " margin: .5ex;\n",
641
+ " width: min-content;\n",
642
+ " min-width: 20ex;\n",
643
+ " max-width: 50ex;\n",
644
+ " color: var(--sklearn-color-text);\n",
645
+ " box-shadow: 2pt 2pt 4pt #999;\n",
646
+ " /* unfitted */\n",
647
+ " background: var(--sklearn-color-unfitted-level-0);\n",
648
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
649
+ "}\n",
650
+ "\n",
651
+ ".sk-estimator-doc-link.fitted span {\n",
652
+ " /* fitted */\n",
653
+ " background: var(--sklearn-color-fitted-level-0);\n",
654
+ " border: var(--sklearn-color-fitted-level-3);\n",
655
+ "}\n",
656
+ "\n",
657
+ ".sk-estimator-doc-link:hover span {\n",
658
+ " display: block;\n",
659
+ "}\n",
660
+ "\n",
661
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
662
+ "\n",
663
+ "#sk-container-id-2 a.estimator_doc_link {\n",
664
+ " float: right;\n",
665
+ " font-size: 1rem;\n",
666
+ " line-height: 1em;\n",
667
+ " font-family: monospace;\n",
668
+ " background-color: var(--sklearn-color-background);\n",
669
+ " border-radius: 1rem;\n",
670
+ " height: 1rem;\n",
671
+ " width: 1rem;\n",
672
+ " text-decoration: none;\n",
673
+ " /* unfitted */\n",
674
+ " color: var(--sklearn-color-unfitted-level-1);\n",
675
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
676
+ "}\n",
677
+ "\n",
678
+ "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
679
+ " /* fitted */\n",
680
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
681
+ " color: var(--sklearn-color-fitted-level-1);\n",
682
+ "}\n",
683
+ "\n",
684
+ "/* On hover */\n",
685
+ "#sk-container-id-2 a.estimator_doc_link:hover {\n",
686
+ " /* unfitted */\n",
687
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
688
+ " color: var(--sklearn-color-background);\n",
689
+ " text-decoration: none;\n",
690
+ "}\n",
691
+ "\n",
692
+ "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
693
+ " /* fitted */\n",
694
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
695
+ "}\n",
696
+ "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MultinomialNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;MultinomialNB<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.MultinomialNB.html\">?<span>Documentation for MultinomialNB</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>MultinomialNB()</pre></div> </div></div></div></div>"
697
+ ],
698
+ "text/plain": [
699
+ "MultinomialNB()"
700
+ ]
701
+ },
702
+ "execution_count": 52,
703
+ "metadata": {},
704
+ "output_type": "execute_result"
705
+ }
706
+ ],
707
+ "source": [
708
+ "# Train the Naive Bayes Model\n",
709
+ "model = MultinomialNB()\n",
710
+ "model.fit(X_train, y_train)\n"
711
+ ]
712
+ },
713
+ {
714
+ "cell_type": "code",
715
+ "execution_count": 53,
716
+ "metadata": {},
717
+ "outputs": [],
718
+ "source": [
719
+ "# Predictions\n",
720
+ "y_pred = model.predict(X_test)\n"
721
+ ]
722
+ },
723
+ {
724
+ "cell_type": "markdown",
725
+ "metadata": {},
726
+ "source": [
727
+ "<h1>Model Evaluation</h1>\n",
728
+ "<h5>After training the model, we will use Evaluation metrics in order to judge if the model's predictions are correct.</h5>\n",
729
+ "<h5></h5>"
730
+ ]
731
+ },
732
+ {
733
+ "cell_type": "code",
734
+ "execution_count": null,
735
+ "metadata": {},
736
+ "outputs": [
737
+ {
738
+ "name": "stdout",
739
+ "output_type": "stream",
740
+ "text": [
741
+ "Accuracy: 0.9398\n",
742
+ "F1 Score: 0.9375\n",
743
+ "\n",
744
+ "Classification Report:\n",
745
+ " precision recall f1-score support\n",
746
+ "\n",
747
+ " 0 0.91 0.97 0.94 150\n",
748
+ " 1 0.97 0.91 0.94 149\n",
749
+ "\n",
750
+ " accuracy 0.94 299\n",
751
+ " macro avg 0.94 0.94 0.94 299\n",
752
+ "weighted avg 0.94 0.94 0.94 299\n",
753
+ "\n"
754
+ ]
755
+ }
756
+ ],
757
+ "source": [
758
+ "accuracy = accuracy_score(y_test, y_pred)\n",
759
+ "f1 = f1_score(y_test, y_pred)\n",
760
+ "print(f\"Accuracy: {accuracy:.4f}\")\n",
761
+ "print(f\"F1 Score: {f1:.4f}\")\n",
762
+ "print(\"\\nClassification Report:\\n\", classification_report(y_test, y_pred))\n"
763
+ ]
764
+ },
765
+ {
766
+ "cell_type": "code",
767
+ "execution_count": null,
768
+ "metadata": {},
769
+ "outputs": [
770
+ {
771
+ "data": {
772
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbwAAAGJCAYAAADxB4bBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+W0lEQVR4nO3deVgVZf8/8PdhOyA7JpvJopKC4m6EmMIjiYiK4RLmAmbuuIALUZpoKUnuWlpmiuaWG5mWSqKiiYoL7rmnlSxugIIcEOb3hz/PtxNQLIdzhPv96prr8txzz8xneHh8e8/cM0cmSZIEIiKiWk5H2wUQERFpAgOPiIiEwMAjIiIhMPCIiEgIDDwiIhICA4+IiITAwCMiIiEw8IiISAgMPCIiEgIDj2qUa9euoWvXrjA3N4dMJkN8fLxa9//7779DJpNhzZo1at1vTebt7Q1vb29tl0FUZQw8qrAbN25g5MiRaNiwIQwNDWFmZgYvLy8sXrwYT58+rdZjh4SE4Pz585g9ezbWrVuHdu3aVevxNCk0NBQymQxmZmal/hyvXbsGmUwGmUyGefPmVXj/d+/eRXR0NFJTU9VQLVHNo6ftAqhm2b17N/r16we5XI4hQ4agefPmKCgowJEjRzBlyhRcvHgRX3/9dbUc++nTp0hOTsZHH32EsLCwajmGo6Mjnj59Cn19/WrZ/3/R09NDXl4efvzxR/Tv319l3fr162FoaIj8/PxK7fvu3buYOXMmnJyc0KpVq3Jvt2/fvkodj+hlw8Cjcrt16xaCg4Ph6OiIxMRE2NnZKdeNHTsW169fx+7du6vt+Pfu3QMAWFhYVNsxZDIZDA0Nq23//0Uul8PLywsbN24sEXgbNmxAQEAAtm3bppFa8vLyUKdOHRgYGGjkeETVjZc0qdxiY2Px5MkTrFq1SiXsXmjcuDEmTJig/Pzs2TN88sknaNSoEeRyOZycnPDhhx9CoVCobOfk5IQePXrgyJEjeP3112FoaIiGDRti7dq1yj7R0dFwdHQEAEyZMgUymQxOTk4Anl8KfPHnv4uOjoZMJlNpS0hIQMeOHWFhYQETExM0adIEH374oXJ9WffwEhMT8eabb8LY2BgWFhYIDAzE5cuXSz3e9evXERoaCgsLC5ibm2Po0KHIy8sr+wf7D++++y5+/vlnZGVlKdtSUlJw7do1vPvuuyX6P3z4EJMnT4a7uztMTExgZmYGf39/nD17Vtnn4MGDaN++PQBg6NChykujL87T29sbzZs3x6lTp9CpUyfUqVNH+XP55z28kJAQGBoaljh/Pz8/WFpa4u7du+U+VyJNYuBRuf34449o2LAhOnToUK7+77//Pj7++GO0adMGCxcuROfOnRETE4Pg4OASfa9fv46+ffvirbfewvz582FpaYnQ0FBcvHgRABAUFISFCxcCAAYMGIB169Zh0aJFFar/4sWL6NGjBxQKBWbNmoX58+ejV69e+PXXX/91u19++QV+fn7IzMxEdHQ0IiIicPToUXh5eeH3338v0b9///54/PgxYmJi0L9/f6xZswYzZ84sd51BQUGQyWTYvn27sm3Dhg1o2rQp2rRpU6L/zZs3ER8fjx49emDBggWYMmUKzp8/j86dOyvDx9XVFbNmzQIAjBgxAuvWrcO6devQqVMn5X4ePHgAf39/tGrVCosWLYKPj0+p9S1evBj16tVDSEgIioqKAABfffUV9u3bh6VLl8Le3r7c50qkURJROWRnZ0sApMDAwHL1T01NlQBI77//vkr75MmTJQBSYmKiss3R0VECICUlJSnbMjMzJblcLk2aNEnZduvWLQmA9Pnnn6vsMyQkRHJ0dCxRw4wZM6S//4ovXLhQAiDdu3evzLpfHGP16tXKtlatWknW1tbSgwcPlG1nz56VdHR0pCFDhpQ43nvvvaeyz7fffluqW7dumcf8+3kYGxtLkiRJffv2lbp06SJJkiQVFRVJtra20syZM0v9GeTn50tFRUUlzkMul0uzZs1StqWkpJQ4txc6d+4sAZBWrFhR6rrOnTurtO3du1cCIH366afSzZs3JRMTE6l3797/eY5E2sQRHpVLTk4OAMDU1LRc/X/66ScAQEREhEr7pEmTAKDEvT43Nze8+eabys/16tVDkyZNcPPmzUrX/E8v7v398MMPKC4uLtc2aWlpSE1NRWhoKKysrJTtLVq0wFtvvaU8z78bNWqUyuc333wTDx48UP4My+Pdd9/FwYMHkZ6ejsTERKSnp5d6ORN4ft9PR+f5/5WLiorw4MED5eXa06dPl/uYcrkcQ4cOLVffrl27YuTIkZg1axaCgoJgaGiIr776qtzHItIGBh6Vi5mZGQDg8ePH5ep/+/Zt6OjooHHjxirttra2sLCwwO3bt1XaHRwcSuzD0tISjx49qmTFJb3zzjvw8vLC+++/DxsbGwQHB+P777//1/B7UWeTJk1KrHN1dcX9+/eRm5ur0v7Pc7G0tASACp1L9+7dYWpqis2bN2P9+vVo3759iZ/lC8XFxVi4cCFcXFwgl8vxyiuvoF69ejh37hyys7PLfcz69etXaILKvHnzYGVlhdTUVCxZsgTW1tbl3pZIGxh4VC5mZmawt7fHhQsXKrTdPyeNlEVXV7fUdkmSKn2MF/eXXjAyMkJSUhJ++eUXDB48GOfOncM777yDt956q0TfqqjKubwgl8sRFBSEuLg47Nixo8zRHQDMmTMHERER6NSpE7777jvs3bsXCQkJaNasWblHssDzn09FnDlzBpmZmQCA8+fPV2hbIm1g4FG59ejRAzdu3EBycvJ/9nV0dERxcTGuXbum0p6RkYGsrCzljEt1sLS0VJnR+MI/R5EAoKOjgy5dumDBggW4dOkSZs+ejcTERBw4cKDUfb+o88qVKyXW/fbbb3jllVdgbGxctRMow7vvvoszZ87g8ePHpU70eWHr1q3w8fHBqlWrEBwcjK5du8LX17fEz6S8//goj9zcXAwdOhRubm4YMWIEYmNjkZKSorb9E1UHBh6V29SpU2FsbIz3338fGRkZJdbfuHEDixcvBvD8khyAEjMpFyxYAAAICAhQW12NGjVCdnY2zp07p2xLS0vDjh07VPo9fPiwxLYvHsD+56MSL9jZ2aFVq1aIi4tTCZALFy5g3759yvOsDj4+Pvjkk0+wbNky2NraltlPV1e3xOhxy5Yt+Ouvv1TaXgRzaf84qKjIyEjcuXMHcXFxWLBgAZycnBASElLmz5HoZcAHz6ncGjVqhA0bNuCdd96Bq6uryptWjh49ii1btiA0NBQA0LJlS4SEhODrr79GVlYWOnfujBMnTiAuLg69e/cuc8p7ZQQHByMyMhJvv/02xo8fj7y8PCxfvhyvvfaayqSNWbNmISkpCQEBAXB0dERmZia+/PJLvPrqq+jYsWOZ+//888/h7+8PT09PDBs2DE+fPsXSpUthbm6O6OhotZ3HP+no6GDatGn/2a9Hjx6YNWsWhg4dig4dOuD8+fNYv349GjZsqNKvUaNGsLCwwIoVK2BqagpjY2N4eHjA2dm5QnUlJibiyy+/xIwZM5SPSaxevRre3t6YPn06YmNjK7Q/Io3R8ixRqoGuXr0qDR8+XHJycpIMDAwkU1NTycvLS1q6dKmUn5+v7FdYWCjNnDlTcnZ2lvT19aUGDRpIUVFRKn0k6fljCQEBASWO88/p8GU9liBJkrRv3z6pefPmkoGBgdSkSRPpu+++K/FYwv79+6XAwEDJ3t5eMjAwkOzt7aUBAwZIV69eLXGMf07d/+WXXyQvLy/JyMhIMjMzk3r27CldunRJpc+L4/3zsYfVq1dLAKRbt26V+TOVJNXHEspS1mMJkyZNkuzs7CQjIyPJy8tLSk5OLvVxgh9++EFyc3OT9PT0VM6zc+fOUrNmzUo95t/3k5OTIzk6Okpt2rSRCgsLVfqFh4dLOjo6UnJy8r+eA5G2yCSpAnfSiYiIaijewyMiIiEw8IiISAgMPCIiEgIDj4iIhMDAIyIiITDwiIhICAw8IiISQq1804pR6zBtl0CCeJSyTNslkCAM1fy3dVX+nnx6pmb+3tfKwCMiov8gE+8CHwOPiEhEavz2jJqCgUdEJCIBR3jinTEREQmJIzwiIhHxkiYREQlBwEuaDDwiIhFxhEdEREIQcIQn3hkTEdHzEV5llwpISkpCz549YW9vD5lMhvj4+DL7jho1CjKZDIsWLVJpf/jwIQYOHAgzMzNYWFhg2LBhePLkSYVPmYFHRETVJjc3Fy1btsQXX3zxr/127NiBY8eOwd7evsS6gQMH4uLFi0hISMCuXbuQlJSEESNGVLgWXtIkIhKRhi5p+vv7w9/f/1/7/PXXXxg3bhz27t2LgIAAlXWXL1/Gnj17kJKSgnbt2gEAli5diu7du2PevHmlBmRZOMIjIhJRFS5pKhQK5OTkqCwKhaJSZRQXF2Pw4MGYMmUKmjVrVmJ9cnIyLCwslGEHAL6+vtDR0cHx48crdCwGHhGRiGQ6lV5iYmJgbm6ussTExFSqjLlz50JPTw/jx48vdX16ejqsra1V2vT09GBlZYX09PQKHYuXNImIRFSFxxKioqIQERGh0iaXyyu8n1OnTmHx4sU4ffo0ZBp4TIIjPCIiEVVhhCeXy2FmZqayVCbwDh8+jMzMTDg4OEBPTw96enq4ffs2Jk2aBCcnJwCAra0tMjMzVbZ79uwZHj58CFtb2wodjyM8IiLSisGDB8PX11elzc/PD4MHD8bQoUMBAJ6ensjKysKpU6fQtm1bAEBiYiKKi4vh4eFRoeMx8IiIRKShWZpPnjzB9evXlZ9v3bqF1NRUWFlZwcHBAXXr1lXpr6+vD1tbWzRp0gQA4Orqim7dumH48OFYsWIFCgsLERYWhuDg4ArN0AQYeEREYtLRzKvFTp48CR8fH+XnF/f+QkJCsGbNmnLtY/369QgLC0OXLl2go6ODPn36YMmSJRWuhYFHRCQiDY3wvL29IUlSufv//vvvJdqsrKywYcOGKtfCwCMiEhFfHk1ERELgy6OJiIhqJ47wiIhExEuaREQkBAEvaTLwiIhExBEeEREJgSM8IiISgoAjPPEinoiIhMQRHhGRiHhJk4iIhCDgJU0GHhGRiDjCIyIiITDwiIhICAJe0hQv4omISEgc4RERiYiXNImISAgCXtJk4BERiYgjPCIiEgJHeEREJAKZgIEn3piWiIiExBEeEZGARBzhMfCIiEQkXt4x8IiIRMQRHhERCYGBR0REQhAx8DhLk4iIhMARHhGRgEQc4THwiIhEJF7eMfCIiETEER4REQmBgUdEREIQMfA4S5OIiKpNUlISevbsCXt7e8hkMsTHxyvXFRYWIjIyEu7u7jA2Noa9vT2GDBmCu3fvquzj4cOHGDhwIMzMzGBhYYFhw4bhyZMnFa6FgUdEJCCZTFbppSJyc3PRsmVLfPHFFyXW5eXl4fTp05g+fTpOnz6N7du348qVK+jVq5dKv4EDB+LixYtISEjArl27kJSUhBEjRlT8nCVJkiq81UvOqHWYtksgQTxKWabtEkgQhmq+AVU3ZGOlt30QN6BS28lkMuzYsQO9e/cus09KSgpef/113L59Gw4ODrh8+TLc3NyQkpKCdu3aAQD27NmD7t27488//4S9vX25j88RHhGRgKoywlMoFMjJyVFZFAqFWurKzs6GTCaDhYUFACA5ORkWFhbKsAMAX19f6Ojo4Pjx4xXaNwOPiEhAVQm8mJgYmJubqywxMTFVrik/Px+RkZEYMGAAzMzMAADp6emwtrZW6aenpwcrKyukp6dXaP+cpUlEJKCqzNKMiopCRESESptcLq9SPYWFhejfvz8kScLy5curtK+yMPCIiKhC5HJ5lQPu716E3e3bt5GYmKgc3QGAra0tMjMzVfo/e/YMDx8+hK2tbYWOw0uaREQiklVhUaMXYXft2jX88ssvqFu3rsp6T09PZGVl4dSpU8q2xMREFBcXw8PDo0LH4giPiEhAmnrw/MmTJ7h+/bry861bt5CamgorKyvY2dmhb9++OH36NHbt2oWioiLlfTkrKysYGBjA1dUV3bp1w/Dhw7FixQoUFhYiLCwMwcHBFZqhCTDwiIiEpKnAO3nyJHx8fJSfX9z7CwkJQXR0NHbu3AkAaNWqlcp2Bw4cgLe3NwBg/fr1CAsLQ5cuXaCjo4M+ffpgyZIlFa6FgUdEJCBNBZ63tzf+7XHv8jwKbmVlhQ0bNlS5FgYeEZGA+C5NIiKiWuqlGeHl5+fj3LlzyMzMRHFxscq6f75XjYiIqki8Ad7LEXh79uzBkCFDcP/+/RLrZDIZioqKtFAVEVHtxUuaWjJu3Dj069cPaWlpKC4uVlkYdkRE6qepb0t4mbwUI7yMjAxERETAxsZG26UQEQmhJgdXZb0UI7y+ffvi4MGD2i6DiIhqsZdihLds2TL069cPhw8fhru7O/T19VXWjx8/XkuVERHVUuIN8F6OwNu4cSP27dsHQ0NDHDx4UGWoLZPJGHiV5NWmEcKH+KKNmwPs6pmjf/jX+PHguVL7LvkoGMP7dsSUz7di2YaDKuu6dWyGD0f4o7mLPfILnuHIqWvoH7FSA2dAtdWqlV9jyaL5GDhoCKZGfaTtcoQk4iXNlyLwPvroI8ycORMffPABdHReiqustYKxkRznr/6FtT8kY/OCEWX26+XTAq+7O+FuZlaJdb27tMIX0wdgxrIfcfDEVejp6aBZI7tqrJpquwvnz2Hrlk147bUm2i5FaAw8LSkoKMA777zDsFOzfb9ewr5fL/1rH/t65lgQ2Q89x3yBHUtHq6zT1dXBvCl98OGieMTFJyvbf7tZsS9dJHohLzcXUZFTMGPmp1j5VfV85xmVj4iB91IkTEhICDZv3qztMoQjk8mw6tMhWBi3H5dLCbHWTRugvo0lioslJG+MxM19sxG/bDTcOMKjSprz6Sx06tQZb3h20HYpwuNjCVpSVFSE2NhY7N27Fy1atCgxaWXBggVaqqx2mzT0LTwrKsYXGw+Wut751VcAANNGdUfk/O24ffcBJgzugr0rJ6BF71l4lJOnwWqppvv5p924fPkSNmzequ1SSFAvReCdP38erVu3BgBcuHBBZd1//WtCoVBAoVCotEnFRZDp6Kq3yFqmtWsDjB3gjQ7vzi2zj87//9nP/WYv4venAgBGzPgO1/d+gqC3WmPVtl81USrVAulpaYj9bDa+WvmtWr8pm6qg5g7UKu2lCLwDBw5UetuYmBjMnDlTpU3Xpj307V6valm1mlfrRrC2MsHVn2Yp2/T0dPFZRBDCBvqgacAMpN3PBgD8djNN2aeg8Bl+//MBGthaabxmqrkuXbqIhw8eILhfkLKtqKgIp06mYNPG9Ug5cx66uvxHqibV5EuTlfVSBF5VREVFKb9Q8AXrNyO1VE3NsWF3ChKPX1Fp+/HLsdiw+wTW/nAMAHDm8h/IVxTCxckGR1NvAgD09HTgYG+FO2kPNV4z1Vweb7yBrfE/qrTN+CgKTg0bYuiw4Qw7LWDgadHJkyfx/fff486dOygoKFBZt3379jK3k8vlJS6R8HLmc8ZGBmjUoJ7ys1P9umjxWn08ysnDH+mP8DA7V6V/4bMiZNzPwbXbmQCAx7n5+GbrEUwf1R1/pj/CnbSHCA/xBQBsTzituROhGs/Y2AQuLq+ptBnVqQMLc4sS7aQZAubdyxF4mzZtwpAhQ+Dn54d9+/aha9euuHr1KjIyMvD2229ru7waq42bI/Z9M0H5OXZyHwDAup3HMGLGd+XaR9SiHXhWVIxVnw6BkVwfKRduw3/EEmQ9flotNRORZog4wpNJ5fl+9WrWokULjBw5EmPHjoWpqSnOnj0LZ2dnjBw5EnZ2diXu0f0Xo9Zh1VQpkapHKcu0XQIJwlDNwxOXKXsqve21z7upsRLNeSmew7tx4wYCAgIAAAYGBsjNzYVMJkN4eDi+/vprLVdHRFT7yGSVX2qqlyLwLC0t8fjxYwBA/fr1lY8mZGVlIS+Pz3oREakbHzzXkk6dOiEhIQHu7u7o168fJkyYgMTERCQkJKBLly7aLo+IqNapwblVaS9F4C1btgz5+fkAnr9IWl9fH0ePHkWfPn0wbdo0LVdHRFT76OiIl3haDbycnJznRejpwcTERPl5zJgxGDNmjDZLIyKq1TjC0zALC4tyXQ8uKirSQDVERFSbaTXw/v5KMUmS0L17d3zzzTeoX7++FqsiIqr9avLkk8rSauB17txZ5bOuri7eeOMNNGzYUEsVERGJQcC8ezkmrRARkWZxhEdEREJg4L0ERPwfgYhI00T8q1argRcUFKTyOT8/H6NGjYKxsbFK+799WwIREVF5aDXwzM3NVT4PGjRIS5UQEYlFxKtpWg281atXa/PwRETCEjDvXo6XRxMRkWZp6uXRSUlJ6NmzJ+zt7SGTyRAfH6+yXpIkfPzxx7Czs4ORkRF8fX1x7do1lT4PHz7EwIEDYWZmBgsLCwwbNgxPnjyp8Dkz8IiIBKSprwfKzc1Fy5Yt8cUXX5S6PjY2FkuWLMGKFStw/PhxGBsbw8/PT/l+ZQAYOHAgLl68iISEBOzatQtJSUkYMWJEhc/5pZulSURE1a8q9/AUCgUUCoVKm1wuh1wuL9HX398f/v7+pe5HkiQsWrQI06ZNQ2BgIABg7dq1sLGxQXx8PIKDg3H58mXs2bMHKSkpaNeuHQBg6dKl6N69O+bNmwd7e/ty180RHhERVUhMTAzMzc1VlpiYmArv59atW0hPT4evr6+yzdzcHB4eHkhOTgYAJCcnw8LCQhl2AODr6wsdHR0cP368QsfjCI+ISEBVmbQSFRWFiIgIlbbSRnf/JT09HQBgY2Oj0m5jY6Ncl56eDmtra5X1enp6sLKyUvYpLwYeEZGAqnJJs6zLly87XtIkIhKQpiat/BtbW1sAQEZGhkp7RkaGcp2trS0yMzNV1j979gwPHz5U9ikvBh4RkYA09VjCv3F2doatrS3279+vbMvJycHx48fh6ekJAPD09ERWVhZOnTql7JOYmIji4mJ4eHhU6Hi8pElEJCBNPXj+5MkTXL9+Xfn51q1bSE1NhZWVFRwcHDBx4kR8+umncHFxgbOzM6ZPnw57e3v07t0bAODq6opu3bph+PDhWLFiBQoLCxEWFobg4OAKzdAEGHhERFSNTp48CR8fH+XnF5NdQkJCsGbNGkydOhW5ubkYMWIEsrKy0LFjR+zZsweGhobKbdavX4+wsDB06dIFOjo66NOnD5YsWVLhWmSSJElVP6WXi1HrMG2XQIJ4lLJM2yWQIAzVPDzx+vxwpbf9dcqbaqxEczjCIyISkIjv0mTgEREJiN+WQEREQmDgERGREATMOz6HR0REYuAIj4hIQLykSUREQhAw7xh4REQi4giPiIiEIGDeMfCIiESkI2DicZYmEREJgSM8IiIBCTjAY+AREYmIk1aIiEgIOuLlHQOPiEhEHOEREZEQBMw7ztIkIiIxcIRHRCQgGcQb4jHwiIgExEkrREQkBE5aISIiIQiYdww8IiIR8V2aREREtRRHeEREAhJwgMfAIyISESetEBGREATMOwYeEZGIRJy0wsAjIhKQeHFXzsDbuXNnuXfYq1evShdDRERUXcoVeL179y7XzmQyGYqKiqpSDxERaQAnrZShuLi4uusgIiIN4rs0iYhICBzhlVNubi4OHTqEO3fuoKCgQGXd+PHj1VIYERFVH03lXVFREaKjo/Hdd98hPT0d9vb2CA0NxbRp05ShK0kSZsyYgZUrVyIrKwteXl5Yvnw5XFxc1FpLhQPvzJkz6N69O/Ly8pCbmwsrKyvcv38fderUgbW1NQOPiKgG0NQIb+7cuVi+fDni4uLQrFkznDx5EkOHDoW5ubkyL2JjY7FkyRLExcXB2dkZ06dPh5+fHy5dugRDQ0O11VLhd2mGh4ejZ8+eePToEYyMjHDs2DHcvn0bbdu2xbx589RWGBER1XxHjx5FYGAgAgIC4OTkhL59+6Jr1644ceIEgOeju0WLFmHatGkIDAxEixYtsHbtWty9exfx8fFqraXCgZeamopJkyZBR0cHurq6UCgUaNCgAWJjY/Hhhx+qtTgiIqoeOrLKLwqFAjk5OSqLQqEo9TgdOnTA/v37cfXqVQDA2bNnceTIEfj7+wMAbt26hfT0dPj6+iq3MTc3h4eHB5KTk9V7zhXdQF9fHzo6zzeztrbGnTt3lAX+8ccfai2OiIiqh0wmq/QSExMDc3NzlSUmJqbU43zwwQcIDg5G06ZNoa+vj9atW2PixIkYOHAgACA9PR0AYGNjo7KdjY2Ncp26VPgeXuvWrZGSkgIXFxd07twZH3/8Me7fv49169ahefPmai2OiIiqR1Xu4EVFRSEiIkKlTS6Xl9r3+++/x/r167FhwwY0a9YMqampmDhxIuzt7RESElKFKiquwoE3Z84cPH78GAAwe/ZsDBkyBKNHj4aLiwu+/fZbtRdIRETqV5V3acrl8jID7p+mTJmiHOUBgLu7O27fvo2YmBiEhITA1tYWAJCRkQE7OzvldhkZGWjVqlWlayxNhQOvXbt2yj9bW1tjz549ai2IiIhqj7y8POVtsBd0dXWVLzRxdnaGra0t9u/frwy4nJwcHD9+HKNHj1ZrLXzwnIhIQJp6Dq9nz56YPXs2HBwc0KxZM5w5cwYLFizAe++99//rkGHixIn49NNP4eLionwswd7evtyvtSyvCgees7Pzvz6/cfPmzSoVRERE1U9Tz+EtXboU06dPx5gxY5CZmQl7e3uMHDkSH3/8sbLP1KlTkZubixEjRiArKwsdO3bEnj171PoMHgDIJEmSKrLB4sWLVT4XFhbizJkz2LNnj/JarbYZtQ7TdgkkiEcpy7RdAgnCUM3X40ZuvVjpbb/q20yNlWhOhX+EEyZMKLX9iy++wMmTJ6tcEBERVT8RvwC2ws/hlcXf3x/btm1T1+6IiKgayWSVX2oqtQXe1q1bYWVlpa7dERERqVWlHjz/+81OSZKQnp6Oe/fu4csvv1RrcUREVD349UDlEBgYqPKD0tHRQb169eDt7Y2mTZuqtbjKup20UNslkCAs/edquwQSxNOESLXuT22X92qQCgdedHR0NZRBRESaJOIIr8Ihr6uri8zMzBLtDx48gK6urlqKIiKi6lWVb0uoqSo8wivrsT2FQgEDA4MqF0RERNWvJgdXZZU78JYsWQLg+TD4m2++gYmJiXJdUVERkpKSXpp7eERERP9U7sBbuPD5RBBJkrBixQqVy5cGBgZwcnLCihUr1F8hERGpnYj38ModeLdu3QIA+Pj4YPv27bC0tKy2ooiIqHrxkmY5HDhwoDrqICIiDRJwgFfxWZp9+vTB3Lklnz2KjY1Fv3791FIUERFVLx2ZrNJLTVXhwEtKSkL37t1LtPv7+yMpKUktRRERUfXSqcJSU1W49idPnpT6+IG+vj5ycnLUUhQREZG6VTjw3N3dsXnz5hLtmzZtgpubm1qKIiKi6iXityVUeNLK9OnTERQUhBs3buB///sfAGD//v3YsGEDtm7dqvYCiYhI/WryvbjKqnDg9ezZE/Hx8ZgzZw62bt0KIyMjtGzZEomJifx6ICKiGkLAvKt44AFAQEAAAgICAAA5OTnYuHEjJk+ejFOnTqGoqEitBRIRkfqJ+BxepSfcJCUlISQkBPb29pg/fz7+97//4dixY+qsjYiIqomIjyVUaISXnp6ONWvWYNWqVcjJyUH//v2hUCgQHx/PCStERPRSK/cIr2fPnmjSpAnOnTuHRYsW4e7du1i6dGl11kZERNWEszT/xc8//4zx48dj9OjRcHFxqc6aiIiomvEe3r84cuQIHj9+jLZt28LDwwPLli3D/fv3q7M2IiKqJrIq/FdTlTvw3njjDaxcuRJpaWkYOXIkNm3aBHt7exQXFyMhIQGPHz+uzjqJiEiNRPzG8wrP0jQ2NsZ7772HI0eO4Pz585g0aRI+++wzWFtbo1evXtVRIxERqRkDr4KaNGmC2NhY/Pnnn9i4caO6aiIiIlK7Sj14/k+6urro3bs3evfurY7dERFRNeM3nhMRkRBq8qXJymLgEREJSMABHgOPiEhENfkVYZXFwCMiEpCIlzRr8re1ExFRDfDXX39h0KBBqFu3LoyMjODu7o6TJ08q10uShI8//hh2dnYwMjKCr68vrl27pvY6GHhERALS1Ls0Hz16BC8vL+jr6+Pnn3/GpUuXMH/+fFhaWir7xMbGYsmSJVixYgWOHz8OY2Nj+Pn5IT8/X63nzEuaREQC0tHQK8Lmzp2LBg0aYPXq1co2Z2dn5Z8lScKiRYswbdo0BAYGAgDWrl0LGxsbxMfHIzg4WG21cIRHRCSgqozwFAoFcnJyVBaFQlHqcXbu3Il27dqhX79+sLa2RuvWrbFy5Url+lu3biE9PR2+vr7KNnNzc3h4eCA5OVmt58zAIyISUFVeLRYTEwNzc3OVJSYmptTj3Lx5E8uXL4eLiwv27t2L0aNHY/z48YiLiwPw/HtWAcDGxkZlOxsbG+U6deElTSIiAVXlsYSoqChERESotMnl8lL7FhcXo127dpgzZw4AoHXr1rhw4QJWrFiBkJCQStdQGRzhERFRhcjlcpiZmaksZQWenZ0d3NzcVNpcXV1x584dAICtrS0AICMjQ6VPRkaGcp26MPCIiASkqVmaXl5euHLlikrb1atX4ejoCOD5BBZbW1vs379fuT4nJwfHjx+Hp6dnlc/z73hJk4hIQJp600p4eDg6dOiAOXPmoH///jhx4gS+/vprfP311wCev8R64sSJ+PTTT+Hi4gJnZ2dMnz4d9vb2av9CAgYeEZGANPVmsfbt22PHjh2IiorCrFmz4OzsjEWLFmHgwIHKPlOnTkVubi5GjBiBrKwsdOzYEXv27IGhoaFaa5FJkiSpdY8vgczHhdougQThGLRA2yWQIJ4mRKp1f2tS7lR629D2DmqsRHM4wiMiEpCI34fHSStERCQEjvCIiAQk3viOgUdEJCR+Hx4REQlBvLhj4BERCUnAAR4Dj4hIRJylSUREVEtxhEdEJCARRzsMPCIiAYl4SZOBR0QkIPHijoFHRCQkjvCIiEgIIt7DE/GciYhIQBzhEREJiJc0iYhICOLFHQOPiEhIAg7wGHhERCLSEXCMx8AjIhKQiCM8ztIkIiIhcIRHRCQgGS9pakdKSgoOHDiAzMxMFBcXq6xbsGCBlqoiIqq9RLykqfXAmzNnDqZNm4YmTZrAxsZG5dkQEZ8TISLSBE5a0YLFixfj22+/RWhoqLZLISIShojjCa0Hno6ODry8vLRdBhGRUEQMPK3P0gwPD8cXX3yh7TKIiKiW0/oIb/LkyQgICECjRo3g5uYGfX19lfXbt2/XUmVERLUXZ2lqwfjx43HgwAH4+Pigbt26nKhCRKQBOgL+Vav1wIuLi8O2bdsQEBCg7VKIiITBEZ4WWFlZoVGjRtoug4hIKCJeTNP6pJXo6GjMmDEDeXl52i6FiIhqMa2P8JYsWYIbN27AxsYGTk5OJSatnD59WkuVERHVXrykqQW9e/fWdglCST19EhvXrcaVy5fw4P49zJ63GJ28u5Tad96cmfhh+xaMi4hE/3cHa7hSqkm83F9FeD8PtHnNBnZ1TdF/xnb8ePSacv1Hg73Qz9sVr9YzRcGzYpy5lo7o1UlI+S1N2ee3daPgaGuust/p3xzEvM3HNXYeIuGkFS2YMWOGtksQSv7Tp2js0gQBvd7GR1Mmltkv6cAvuHjhHF6pZ6254qjGMjY0wPmbmVi79xw2RweVWH/9z4cIX5aAW2lZMJLrY1yfdvjxs3fQPOQr3M9+quw3c81hrP7prPLz46cFGqlfRNoY4X322WeIiorChAkTsGjRIgBAfn4+Jk2ahE2bNkGhUMDPzw9ffvklbGxs1H58rQceadYbXm/iDa83/7XPvcwMLPo8BvOXfoWpE8doqDKqyfal3MS+lJtlrt984LLK58gViRjq3xLNG1rj4JnbyvYneQXIeJRbbXXS/9H0pJWUlBR89dVXaNGihUp7eHg4du/ejS1btsDc3BxhYWEICgrCr7/+qvYatD5ppaioCPPmzcPrr78OW1tbWFlZqSykWcXFxfj04ygMGBwK50aNtV0O1UL6ejoY1r0Vsp7k4/yNTJV1k4I98Oe28UheHorwfq9DV8Trbhoiq8JSUU+ePMHAgQOxcuVKWFpaKtuzs7OxatUqLFiwAP/73//Qtm1brF69GkePHsWxY8eqcnql0nrgzZw5EwsWLMA777yD7OxsREREICgoCDo6OoiOjtZ2ecJZH7cKurq66Bs8SNulUC3j79EI93aGI2v3ZIzr0w49IjfjQc7/Xc78Mv4UhszeiW6TN2LV7lRMGeCJOcN9tFgxlUWhUCAnJ0dlUSgUZfYfO3YsAgIC4Ovrq9J+6tQpFBYWqrQ3bdoUDg4OSE5OVnvdWg+89evXY+XKlZg0aRL09PQwYMAAfPPNN/j444/LlfAV/cFT2a5cvoitm77Dh9Gz+cYbUrtDZ+/AY9Rq+Ez8DvtSbuG7aYGoZ1FHuX7JthQcPvcHLty6h292peKDrxIxuncbGOjrarHq2ktHJqv0EhMTA3Nzc5UlJiam1ONs2rQJp0+fLnV9eno6DAwMYGFhodJuY2OD9PR09Z+z2vdYQenp6XB3dwcAmJiYIDs7GwDQo0cP7N69+z+3L+0Hv2T+3GqtubY6e+Y0Hj18iL493oK3R0t4e7REetpdfLHoc/Tr2VXb5VENl5dfiJt3s3Di8l2MXvAznhUXI6RbizL7p/yWBn09XTjamJfZhyqvKpc0o6KikJ2drbJERUWVOMYff/yBCRMmYP369TA0NNTEaf0rrU9aefXVV5GWlgYHBwc0atQI+/btQ5s2bZCSkgK5XP6f20dFRSEiIkKlLbtA6zleI/l174l2r7+h0jZp3Ej4de+J7j17a6coqrV0ZDLI/2X01rKRNYqKinEvi5NYqkUVLuLI5fJy/f186tQpZGZmok2bNsq2oqIiJCUlYdmyZdi7dy8KCgqQlZWlMsrLyMiAra1t5Qssg9YD7+2338b+/fvh4eGBcePGYdCgQVi1ahXu3LmD8PDw/9y+tB98/uPC6iq3xsvLy8Nff9xRfk776y9cu/IbzMzNYWNrB/N/XFrQ09ODVd1X4ODkrOFKqSYxNtRHo/r/NxnBydYcLRpZ41HOUzx4nI/Idz2xO/k60h88QV1zI4zs1Qb2r5hie9IVAICHqz3aN7XHobO38TivAG+41cfcUf/Dxv0XkfWEtyiqgyYeS+jSpQvOnz+v0jZ06FA0bdoUkZGRaNCgAfT19bF//3706dMHAHDlyhXcuXMHnp6eaq9H64H32WefKf/8zjvvKG9Wuri4oGfPnlqsrHa6cukCxo96T/l52cJYAEC3HoH4KHq2tsqiGq7Na7bYN/9d5efY0c9fZrBu33mMW7QXTRpYYdBbvVHXzAgPHz/FySvp8A1fj8u37wMAFIVF6Ofjio+GeEGur4vf07OxdPtJLNmWopXzEYEmbtObmpqiefPmKm3GxsaoW7eusn3YsGGIiIiAlZUVzMzMMG7cOHh6euKNN94obZdVIpMkSVL7XrUskyM80hDHoAXaLoEE8TQhUq37O3Ezu9Lbvt6w8vdVvb290apVqxIPnm/cuFHlwfPquKT5UgTelStXsHTpUly+/PzhVFdXV4wbNw5NmjSp1P4YeKQpDDzSFHUHXkoVAq99FQJPm7Q+u2Pbtm1o3rw5Tp06hZYtW6Jly5Y4ffo0mjdvjm3btmm7PCKi2kmTT56/JLR+D2/q1KmIiorCrFmzVNpnzJiBqVOnKm9kEhGR+oj4bQlaH+GlpaVhyJAhJdoHDRqEtLS0UrYgIqKqkskqv9RUWg88b29vHD58uET7kSNH8Oab//6SYyIiqhwBr2hq/5Jmr169EBkZiVOnTimnoR47dgxbtmzBzJkzsXPnTpW+RERElaH1WZo6OuUbZMpkMhQVFZWrL2dpkqZwliZpirpnaZ6+nVPpbds4mqmxEs3R+givuLhY2yUQEQmHk1Y0KDk5Gbt27VJpW7t2LZydnWFtbY0RI0bwWw+IiKoJJ61o0KxZs3Dx4kXl5/Pnz2PYsGHw9fXFBx98gB9//LHMr5sgIqKqEXHSitYCLzU1FV26dFF+3rRpEzw8PLBy5UpERERgyZIl+P7777VVHhFR7SZg4mkt8B49egQbGxvl50OHDsHf31/5uX379vjjjz+0URoREdVCWgs8Gxsb3Lp1CwBQUFCA06dPq7wd+/Hjx9DX19dWeUREtZqsCv/VVFoLvO7du+ODDz7A4cOHERUVhTp16qg8aH7u3Dk0atRIW+UREdVqIk5a0dpjCZ988gmCgoLQuXNnmJiYIC4uDgYGBsr13377Lbp27aqt8oiIarUanFuVprXAe+WVV5CUlITs7GyYmJhAV1dXZf2WLVtgYmKipeqIiGo5ARNP6w+em5uX/r1KVlZWGq6EiEgcNfleXGVp/eXRREREmqD1ER4REWleTZ58UlkMPCIiAQmYdww8IiIhCZh4DDwiIgGJOGmFgUdEJCAR7+FxliYREQmBIzwiIgEJOMBj4BERCUnAxGPgEREJiJNWiIhICCJOWmHgEREJSMC84yxNIiISA0d4REQiEnCIx8AjIhIQJ60QEZEQRJy0wnt4REQCklVhqYiYmBi0b98epqamsLa2Ru/evXHlyhWVPvn5+Rg7dizq1q0LExMT9OnTBxkZGVU5vVIx8IiIRKShxDt06BDGjh2LY8eOISEhAYWFhejatStyc3OVfcLDw/Hjjz9iy5YtOHToEO7evYugoKAqn+I/ySRJktS+Vy3LfFyo7RJIEI5BC7RdAgniaUKkWvf3+4P8Sm/rVNew0tveu3cP1tbWOHToEDp16oTs7GzUq1cPGzZsQN++fQEAv/32G1xdXZGcnIw33nij0sf6J47wiIgEJKvCfwqFAjk5OSqLQqEo13Gzs7MBAFZWVgCAU6dOobCwEL6+vso+TZs2hYODA5KTk9V6zgw8IiIByWSVX2JiYmBubq6yxMTE/Ocxi4uLMXHiRHh5eaF58+YAgPT0dBgYGMDCwkKlr42NDdLT09V6zpylSUQkoKpM0oyKikJERIRKm1wu/8/txo4diwsXLuDIkSNVOHrlMfCIiARUlccS5HJ5uQLu78LCwrBr1y4kJSXh1VdfVbbb2tqioKAAWVlZKqO8jIwM2NraVr7IUvCSJhGRkDQzTVOSJISFhWHHjh1ITEyEs7Ozyvq2bdtCX18f+/fvV7ZduXIFd+7cgaenZyXPrXQc4RERUbUZO3YsNmzYgB9++AGmpqbK+3Lm5uYwMjKCubk5hg0bhoiICFhZWcHMzAzjxo2Dp6enWmdoAgw8IiIhaepNK8uXLwcAeHt7q7SvXr0aoaGhAICFCxdCR0cHffr0gUKhgJ+fH7788ku118Ln8IiqgM/hkaao+zm8u1kFld7W3sJAjZVoDkd4REQCEvFdmgw8IiIB8dsSiIhIDOLlHR9LICIiMXCER0QkIAEHeAw8IiIRcdIKEREJgZNWiIhIDOLlHQOPiEhEAuYdZ2kSEZEYOMIjIhIQJ60QEZEQOGmFiIiEIOIIj/fwiIhICBzhEREJiCM8IiKiWoojPCIiAXHSChERCUHES5oMPCIiAQmYdww8IiIhCZh4nLRCRERC4AiPiEhAnLRCRERC4KQVIiISgoB5x8AjIhKSgInHwCMiEpCI9/A4S5OIiITAER4RkYBEnLQikyRJ0nYRpH0KhQIxMTGIioqCXC7XdjlUi/F3jbSFgUcAgJycHJibmyM7OxtmZmbaLodqMf6ukbbwHh4REQmBgUdEREJg4BERkRAYeAQAkMvlmDFjBicRULXj7xppCyetEBGREDjCIyIiITDwiIhICAw8IiISAgOPiIiEwMCrZUJDQ9G7d+8S7QcPHoRMJkNWVpbGa6La4969exg9ejQcHBwgl8tha2sLPz8//Prrr9oujeg/8eXRRFRuffr0QUFBAeLi4tCwYUNkZGRg//79ePDggbZLI/pPHOEJ6MGDBxgwYADq16+POnXqwN3dHRs3blTp4+3tjXHjxmHixImwtLSEjY0NVq5cidzcXAwdOhSmpqZo3Lgxfv75Zy2dBWlaVlYWDh8+jLlz58LHxweOjo54/fXXERUVhV69egEAZDIZli9fDn9/fxgZGaFhw4bYunWryn4iIyPx2muvoU6dOmjYsCGmT5+OwsJC5fro6Gi0atUK3377LRwcHGBiYoIxY8agqKgIsbGxsLW1hbW1NWbPnq3R86eaj4EnoPz8fLRt2xa7d+/GhQsXMGLECAwePBgnTpxQ6RcXF4dXXnkFJ06cwLhx4zB69Gj069cPHTp0wOnTp9G1a1cMHjwYeXl5WjoT0iQTExOYmJggPj4eCoWizH7Tp09Hnz59cPbsWQwcOBDBwcG4fPmycr2pqSnWrFmDS5cuYfHixVi5ciUWLlyoso8bN27g559/xp49e7Bx40asWrUKAQEB+PPPP3Ho0CHMnTsX06ZNw/Hjx6vtfKkWkqhWCQkJkXR1dSVjY2OVxdDQUAIgPXr0qNTtAgICpEmTJik/d+7cWerYsaPy87NnzyRjY2Np8ODByra0tDQJgJScnFxt50Mvl61bt0qWlpaSoaGh1KFDBykqKko6e/ascj0AadSoUSrbeHh4SKNHjy5zn59//rnUtm1b5ecZM2ZIderUkXJycpRtfn5+kpOTk1RUVKRsa9KkiRQTE6OO0yJBcIRXC/n4+CA1NVVl+eabb5Tri4qK8Mknn8Dd3R1WVlYwMTHB3r17cefOHZX9tGjRQvlnXV1d1K1bF+7u7so2GxsbAEBmZmY1nxG9LPr06YO7d+9i586d6NatGw4ePIg2bdpgzZo1yj6enp4q23h6eqqM8DZv3gwvLy/Y2trCxMQE06ZNK/G75+TkBFNTU+VnGxsbuLm5QUdHR6WNv3tUEQy8WsjY2BiNGzdWWerXr69c//nnn2Px4sWIjIzEgQMHkJqaCj8/PxQUFKjsR19fX+WzTCZTaZP9/69MLi4ursazoZeNoaEh3nrrLUyfPh1Hjx5FaGgoZsyYUa5tk5OTMXDgQHTv3h27du3CmTNn8NFHH1X4d+9FG3/3qCIYeAL69ddfERgYiEGDBqFly5Zo2LAhrl69qu2yqIZyc3NDbm6u8vOxY8dU1h87dgyurq4AgKNHj8LR0REfffQR2rVrBxcXF9y+fVuj9ZK4+FiCgFxcXLB161YcPXoUlpaWWLBgATIyMuDm5qbt0ugl9uDBA/Tr1w/vvfceWrRoAVNTU5w8eRKxsbEIDAxU9tuyZQvatWuHjh07Yv369Thx4gRWrVoF4Pnv3p07d7Bp0ya0b98eu3fvxo4dO7R1SiQYBp6Apk2bhps3b8LPzw916tTBiBEj0Lt3b2RnZ2u7NHqJmZiYwMPDAwsXLsSNGzdQWFiIBg0aYPjw4fjwww+V/WbOnIlNmzZhzJgxsLOzw8aNG5X/mOrVqxfCw8MRFhYGhUKBgIAATJ8+HdHR0Vo6KxIJvx6IiNRGJpNhx44dpb7th0jbeA+PiIiEwMAjIiIh8B4eEakN75DQy4wjPCIiEgIDj4iIhMDAIyIiITDwiIhICAw8IiISAgOPqJxCQ0NVHqj29vbGxIkTNV7HwYMHIZPJkJWVpfFjE9VkDDyq8UJDQyGTySCTyWBgYIDGjRtj1qxZePbsWbUed/v27fjkk0/K1ZchRaR9fA6PaoVu3bph9erVUCgU+OmnnzB27Fjo6+sjKipKpV9BQQEMDAzUckwrKyu17IeINIMjPKoV5HI5bG1t4ejoiNGjR8PX1xc7d+5UXoacPXs27O3t0aRJEwDAH3/8gf79+8PCwgJWVlYIDAzE77//rtxfUVERIiIiYGFhgbp162Lq1KklHqr+5yVNhUKByMhINGjQAHK5HI0bN8aqVavw+++/w8fHBwBgaWkJmUyG0NBQAM+/SzAmJgbOzs4wMjJCy5YtsXXrVpXj/PTTT3jttddgZGQEHx8flTqJqPwYeFQrGRkZKb9UdP/+/bhy5QoSEhKwa9cuFBYWws/PD6ampjh8+DB+/fVXmJiYoFu3bspt5s+fjzVr1uDbb7/FkSNH8PDhw//8GpshQ4Zg48aNWLJkCS5fvoyvvvoKJiYmaNCgAbZt2wYAuHLlCtLS0rB48WIAQExMDNauXYsVK1bg4sWLCA8Px6BBg3Do0CEAz4M5KCgIPXv2RGpqKt5//3188MEH1fVjI6rdJKIaLiQkRAoMDJQkSZKKi4ulhIQESS6XS5MnT5ZCQkIkGxsbSaFQKPuvW7dOatKkiVRcXKxsUygUkpGRkbR3715JkiTJzs5Oio2NVa4vLCyUXn31VeVxJEmSOnfuLE2YMEGSJEm6cuWKBEBKSEgotcYDBw5IAKRHjx4p2/Lz86U6depIR48eVek7bNgwacCAAZIkSVJUVJTk5uamsj4yMrLEvojov/EeHtUKu3btgomJCQoLC1FcXIx3330X0dHRGDt2LNzd3VXu2509exbXr1+Hqampyj7y8/Nx48YNZGdnIy0tDR4eHsp1enp6aNeuXZnvikxNTYWuri46d+5c7pqvX7+OvLw8vPXWWyrtBQUFaN26NQDg8uXLKnUAgKenZ7mPQUT/h4FHtYKPjw+WL18OAwMD2NvbQ0/v/361jY2NVfo+efIEbdu2xfr160vsp169epU6vpGRUYW3efLkCQBg9+7dqF+/vso6uVxeqTqIqGwMPKoVjI2N0bhx43L1bdOmDTZv3gxra2uYmZmV2sfOzg7Hjx9Hp06dAADPnj3DqVOn0KZNm1L7u7u7o7i4GIcOHYKvr2+J9S9GmEVFRco2Nzc3yOVy3Llzp8yRoaurK3bu3KnSduzYsf8+SSIqgZNWSDgDBw7EK6+8gsDAQBw+fBi3bt3CwYMHMX78ePz5558AgAkTJuCzzz5DfHw8fvvtN4wZM+Zfn6FzcnJCSEgI3nvvPcTHxyv3+f333wMAHB0dIZPJsGvXLty7dw9PnjyBqakpJk+ejPDwcMTFxeHGjRs4ffo0li5diri4OADAqFGjcO3aNUyZMgVXrlzBhg0bsGbNmur+ERHVSgw8Ek6dOnWQlJQEBwcHBAUFwdXVFcOGDUN+fr5yxDdp0iQMHjwYISEh8PT0hKmpKd5+++1/3e/y5cvRt29fjBkzBk2bNsXw4cORm5sLAKhfvz5mzpyJDz74ADY2NggLCwMAfPLJJ5g+fTpiYmLg6uqKbt26Yffu3XB2dgYAODg4YNu2bYiPj0fLli2xYsUKzJkzpxp/OkS1l0wq6y48ERFRLcIRHhERCYGBR0REQmDgERGREBh4REQkBAYeEREJgYFHRERCYOAREZEQGHhERCQEBh4REQmBgUdEREJg4BERkRD+HxmrPxTbNBRgAAAAAElFTkSuQmCC",
773
+ "text/plain": [
774
+ "<Figure size 500x400 with 2 Axes>"
775
+ ]
776
+ },
777
+ "metadata": {},
778
+ "output_type": "display_data"
779
+ }
780
+ ],
781
+ "source": [
782
+ "conf_matrix = confusion_matrix(y_test, y_pred)\n",
783
+ "plt.figure(figsize=(5, 4))\n",
784
+ "sns.heatmap(conf_matrix, annot=True, fmt=\"d\", cmap=\"Blues\", xticklabels=[\"Ham\", \"Spam\"], yticklabels=[\"Ham\", \"Spam\"])\n",
785
+ "plt.xlabel(\"Predicted\")\n",
786
+ "plt.ylabel(\"Actual\")\n",
787
+ "plt.title(\"Confusion Matrix\")\n",
788
+ "plt.show()\n"
789
+ ]
790
+ },
791
+ {
792
+ "cell_type": "code",
793
+ "execution_count": null,
794
+ "metadata": {},
795
+ "outputs": [
796
+ {
797
+ "name": "stdout",
798
+ "output_type": "stream",
799
+ "text": [
800
+ "Model and vectorizer saved successfully!\n"
801
+ ]
802
+ }
803
+ ],
804
+ "source": [
805
+ "#SAVING THE MODEL AS A .pkl FILE\n",
806
+ "joblib.dump(model, \"spam_classifier.pkl\")\n",
807
+ "\n",
808
+ "#SAVING THE VECTORIZER AS A .pkl FILE\n",
809
+ "joblib.dump(vectorizer, \"tfidf_vectorizer.pkl\")\n",
810
+ "\n",
811
+ "print(\"Model and vectorizer saved successfully!\")"
812
+ ]
813
+ }
814
+ ],
815
+ "metadata": {
816
+ "kernelspec": {
817
+ "display_name": "Python 3",
818
+ "language": "python",
819
+ "name": "python3"
820
+ },
821
+ "language_info": {
822
+ "codemirror_mode": {
823
+ "name": "ipython",
824
+ "version": 3
825
+ },
826
+ "file_extension": ".py",
827
+ "mimetype": "text/x-python",
828
+ "name": "python",
829
+ "nbconvert_exporter": "python",
830
+ "pygments_lexer": "ipython3",
831
+ "version": "3.12.3"
832
+ }
833
+ },
834
+ "nbformat": 4,
835
+ "nbformat_minor": 2
836
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ nltk
2
+ string
3
+ re
4
+ streamlit
5
+ joblib
spam.csv ADDED
The diff for this file is too large to render. See raw diff
 
spam_classifier.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b1b52706d4ecf7192ed492f105adcc5832ebba0d81760d549738b3b8d67f92
3
+ size 123991
tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd80c11cdbdfe22bbc870a72bab572748458ddb19a6aec665fcb10db9c2ee94
3
+ size 78711