Spaces:
Build error
Build error
Upload 6 files
Browse files- SPAM text message 20170820 - Data.csv +0 -0
- Spam analysis.ipynb +1691 -0
- app.py +73 -0
- requirements.txt +7 -0
- spam_classifier_model_lstm.pth +3 -0
- tfidf_vectorizer.pkl +3 -0
SPAM text message 20170820 - Data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Spam analysis.ipynb
ADDED
|
@@ -0,0 +1,1691 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {
|
| 6 |
+
"id": "9XjlAPnDBIfp"
|
| 7 |
+
},
|
| 8 |
+
"source": [
|
| 9 |
+
"## Data card\n",
|
| 10 |
+
"- https://www.kaggle.com/datasets/team-ai/spam-text-message-classification"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "markdown",
|
| 15 |
+
"metadata": {
|
| 16 |
+
"id": "7ePpzsWZYuaQ"
|
| 17 |
+
},
|
| 18 |
+
"source": [
|
| 19 |
+
"# 1.Packages"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"cell_type": "code",
|
| 24 |
+
"execution_count": 138,
|
| 25 |
+
"metadata": {
|
| 26 |
+
"colab": {
|
| 27 |
+
"base_uri": "https://localhost:8080/"
|
| 28 |
+
},
|
| 29 |
+
"id": "oT593Dj5RhBx",
|
| 30 |
+
"outputId": "aa673e7f-946b-438c-9c93-2767deef6544"
|
| 31 |
+
},
|
| 32 |
+
"outputs": [
|
| 33 |
+
{
|
| 34 |
+
"name": "stderr",
|
| 35 |
+
"output_type": "stream",
|
| 36 |
+
"text": [
|
| 37 |
+
"[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
|
| 38 |
+
"[nltk_data] Package stopwords is already up-to-date!\n",
|
| 39 |
+
"[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
|
| 40 |
+
"[nltk_data] Package wordnet is already up-to-date!\n",
|
| 41 |
+
"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
|
| 42 |
+
"[nltk_data] Package punkt is already up-to-date!\n"
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
],
|
| 46 |
+
"source": [
|
| 47 |
+
"import pandas as pd\n",
|
| 48 |
+
"import numpy as np\n",
|
| 49 |
+
"import matplotlib.pyplot as plt\n",
|
| 50 |
+
"import seaborn as sns\n",
|
| 51 |
+
"import re\n",
|
| 52 |
+
"import nltk\n",
|
| 53 |
+
"from nltk.stem import WordNetLemmatizer\n",
|
| 54 |
+
"from nltk.tokenize import word_tokenize\n",
|
| 55 |
+
"from nltk.corpus import stopwords\n",
|
| 56 |
+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
| 57 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 58 |
+
"from sklearn.metrics import accuracy_score,f1_score, classification_report, confusion_matrix\n",
|
| 59 |
+
"import torch\n",
|
| 60 |
+
"import torch.nn as nn\n",
|
| 61 |
+
"import torch.optim as optim\n",
|
| 62 |
+
"from torch.utils.data import DataLoader, TensorDataset, random_split\n",
|
| 63 |
+
"nltk.download('stopwords')\n",
|
| 64 |
+
"nltk.download('wordnet')\n",
|
| 65 |
+
"nltk.download('punkt')\n",
|
| 66 |
+
"from joblib import dump, load"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"cell_type": "markdown",
|
| 71 |
+
"metadata": {
|
| 72 |
+
"id": "1Zek_JuxYoiM"
|
| 73 |
+
},
|
| 74 |
+
"source": [
|
| 75 |
+
"# 2.Data Loading"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"cell_type": "code",
|
| 80 |
+
"execution_count": 139,
|
| 81 |
+
"metadata": {
|
| 82 |
+
"colab": {
|
| 83 |
+
"base_uri": "https://localhost:8080/",
|
| 84 |
+
"height": 225
|
| 85 |
+
},
|
| 86 |
+
"id": "DiV4mPuqS1_k",
|
| 87 |
+
"outputId": "df9adf14-12d3-42de-fded-33b4603b03a1"
|
| 88 |
+
},
|
| 89 |
+
"outputs": [
|
| 90 |
+
{
|
| 91 |
+
"name": "stdout",
|
| 92 |
+
"output_type": "stream",
|
| 93 |
+
"text": [
|
| 94 |
+
"shape : (5572, 2)\n"
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"data": {
|
| 99 |
+
"application/vnd.google.colaboratory.intrinsic+json": {
|
| 100 |
+
"summary": "{\n \"name\": \"Message_df\",\n \"rows\": 5572,\n \"fields\": [\n {\n \"column\": \"Category\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Message\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5157,\n \"samples\": [\n \"Also sir, i sent you an email about how to log into the usc payment portal. I.ll send you another message that should explain how things are back home. Have a great weekend.\",\n \"Are you free now?can i call now?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
|
| 101 |
+
"type": "dataframe",
|
| 102 |
+
"variable_name": "Message_df"
|
| 103 |
+
},
|
| 104 |
+
"text/html": [
|
| 105 |
+
"\n",
|
| 106 |
+
" <div id=\"df-31cee80c-615e-4aee-9e82-ba2d9957460b\" class=\"colab-df-container\">\n",
|
| 107 |
+
" <div>\n",
|
| 108 |
+
"<style scoped>\n",
|
| 109 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 110 |
+
" vertical-align: middle;\n",
|
| 111 |
+
" }\n",
|
| 112 |
+
"\n",
|
| 113 |
+
" .dataframe tbody tr th {\n",
|
| 114 |
+
" vertical-align: top;\n",
|
| 115 |
+
" }\n",
|
| 116 |
+
"\n",
|
| 117 |
+
" .dataframe thead th {\n",
|
| 118 |
+
" text-align: right;\n",
|
| 119 |
+
" }\n",
|
| 120 |
+
"</style>\n",
|
| 121 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 122 |
+
" <thead>\n",
|
| 123 |
+
" <tr style=\"text-align: right;\">\n",
|
| 124 |
+
" <th></th>\n",
|
| 125 |
+
" <th>Category</th>\n",
|
| 126 |
+
" <th>Message</th>\n",
|
| 127 |
+
" </tr>\n",
|
| 128 |
+
" </thead>\n",
|
| 129 |
+
" <tbody>\n",
|
| 130 |
+
" <tr>\n",
|
| 131 |
+
" <th>0</th>\n",
|
| 132 |
+
" <td>ham</td>\n",
|
| 133 |
+
" <td>Go until jurong point, crazy.. Available only ...</td>\n",
|
| 134 |
+
" </tr>\n",
|
| 135 |
+
" <tr>\n",
|
| 136 |
+
" <th>1</th>\n",
|
| 137 |
+
" <td>ham</td>\n",
|
| 138 |
+
" <td>Ok lar... Joking wif u oni...</td>\n",
|
| 139 |
+
" </tr>\n",
|
| 140 |
+
" <tr>\n",
|
| 141 |
+
" <th>2</th>\n",
|
| 142 |
+
" <td>spam</td>\n",
|
| 143 |
+
" <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
|
| 144 |
+
" </tr>\n",
|
| 145 |
+
" <tr>\n",
|
| 146 |
+
" <th>3</th>\n",
|
| 147 |
+
" <td>ham</td>\n",
|
| 148 |
+
" <td>U dun say so early hor... U c already then say...</td>\n",
|
| 149 |
+
" </tr>\n",
|
| 150 |
+
" <tr>\n",
|
| 151 |
+
" <th>4</th>\n",
|
| 152 |
+
" <td>ham</td>\n",
|
| 153 |
+
" <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
|
| 154 |
+
" </tr>\n",
|
| 155 |
+
" </tbody>\n",
|
| 156 |
+
"</table>\n",
|
| 157 |
+
"</div>\n",
|
| 158 |
+
" <div class=\"colab-df-buttons\">\n",
|
| 159 |
+
"\n",
|
| 160 |
+
" <div class=\"colab-df-container\">\n",
|
| 161 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-31cee80c-615e-4aee-9e82-ba2d9957460b')\"\n",
|
| 162 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
| 163 |
+
" style=\"display:none;\">\n",
|
| 164 |
+
"\n",
|
| 165 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
| 166 |
+
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
| 167 |
+
" </svg>\n",
|
| 168 |
+
" </button>\n",
|
| 169 |
+
"\n",
|
| 170 |
+
" <style>\n",
|
| 171 |
+
" .colab-df-container {\n",
|
| 172 |
+
" display:flex;\n",
|
| 173 |
+
" gap: 12px;\n",
|
| 174 |
+
" }\n",
|
| 175 |
+
"\n",
|
| 176 |
+
" .colab-df-convert {\n",
|
| 177 |
+
" background-color: #E8F0FE;\n",
|
| 178 |
+
" border: none;\n",
|
| 179 |
+
" border-radius: 50%;\n",
|
| 180 |
+
" cursor: pointer;\n",
|
| 181 |
+
" display: none;\n",
|
| 182 |
+
" fill: #1967D2;\n",
|
| 183 |
+
" height: 32px;\n",
|
| 184 |
+
" padding: 0 0 0 0;\n",
|
| 185 |
+
" width: 32px;\n",
|
| 186 |
+
" }\n",
|
| 187 |
+
"\n",
|
| 188 |
+
" .colab-df-convert:hover {\n",
|
| 189 |
+
" background-color: #E2EBFA;\n",
|
| 190 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 191 |
+
" fill: #174EA6;\n",
|
| 192 |
+
" }\n",
|
| 193 |
+
"\n",
|
| 194 |
+
" .colab-df-buttons div {\n",
|
| 195 |
+
" margin-bottom: 4px;\n",
|
| 196 |
+
" }\n",
|
| 197 |
+
"\n",
|
| 198 |
+
" [theme=dark] .colab-df-convert {\n",
|
| 199 |
+
" background-color: #3B4455;\n",
|
| 200 |
+
" fill: #D2E3FC;\n",
|
| 201 |
+
" }\n",
|
| 202 |
+
"\n",
|
| 203 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
| 204 |
+
" background-color: #434B5C;\n",
|
| 205 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
| 206 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
| 207 |
+
" fill: #FFFFFF;\n",
|
| 208 |
+
" }\n",
|
| 209 |
+
" </style>\n",
|
| 210 |
+
"\n",
|
| 211 |
+
" <script>\n",
|
| 212 |
+
" const buttonEl =\n",
|
| 213 |
+
" document.querySelector('#df-31cee80c-615e-4aee-9e82-ba2d9957460b button.colab-df-convert');\n",
|
| 214 |
+
" buttonEl.style.display =\n",
|
| 215 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 216 |
+
"\n",
|
| 217 |
+
" async function convertToInteractive(key) {\n",
|
| 218 |
+
" const element = document.querySelector('#df-31cee80c-615e-4aee-9e82-ba2d9957460b');\n",
|
| 219 |
+
" const dataTable =\n",
|
| 220 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
| 221 |
+
" [key], {});\n",
|
| 222 |
+
" if (!dataTable) return;\n",
|
| 223 |
+
"\n",
|
| 224 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
| 225 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
| 226 |
+
" + ' to learn more about interactive tables.';\n",
|
| 227 |
+
" element.innerHTML = '';\n",
|
| 228 |
+
" dataTable['output_type'] = 'display_data';\n",
|
| 229 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
| 230 |
+
" const docLink = document.createElement('div');\n",
|
| 231 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
| 232 |
+
" element.appendChild(docLink);\n",
|
| 233 |
+
" }\n",
|
| 234 |
+
" </script>\n",
|
| 235 |
+
" </div>\n",
|
| 236 |
+
"\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"<div id=\"df-dfc3cdf4-c2be-4859-b4df-f906be898778\">\n",
|
| 239 |
+
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-dfc3cdf4-c2be-4859-b4df-f906be898778')\"\n",
|
| 240 |
+
" title=\"Suggest charts\"\n",
|
| 241 |
+
" style=\"display:none;\">\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
| 244 |
+
" width=\"24px\">\n",
|
| 245 |
+
" <g>\n",
|
| 246 |
+
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
| 247 |
+
" </g>\n",
|
| 248 |
+
"</svg>\n",
|
| 249 |
+
" </button>\n",
|
| 250 |
+
"\n",
|
| 251 |
+
"<style>\n",
|
| 252 |
+
" .colab-df-quickchart {\n",
|
| 253 |
+
" --bg-color: #E8F0FE;\n",
|
| 254 |
+
" --fill-color: #1967D2;\n",
|
| 255 |
+
" --hover-bg-color: #E2EBFA;\n",
|
| 256 |
+
" --hover-fill-color: #174EA6;\n",
|
| 257 |
+
" --disabled-fill-color: #AAA;\n",
|
| 258 |
+
" --disabled-bg-color: #DDD;\n",
|
| 259 |
+
" }\n",
|
| 260 |
+
"\n",
|
| 261 |
+
" [theme=dark] .colab-df-quickchart {\n",
|
| 262 |
+
" --bg-color: #3B4455;\n",
|
| 263 |
+
" --fill-color: #D2E3FC;\n",
|
| 264 |
+
" --hover-bg-color: #434B5C;\n",
|
| 265 |
+
" --hover-fill-color: #FFFFFF;\n",
|
| 266 |
+
" --disabled-bg-color: #3B4455;\n",
|
| 267 |
+
" --disabled-fill-color: #666;\n",
|
| 268 |
+
" }\n",
|
| 269 |
+
"\n",
|
| 270 |
+
" .colab-df-quickchart {\n",
|
| 271 |
+
" background-color: var(--bg-color);\n",
|
| 272 |
+
" border: none;\n",
|
| 273 |
+
" border-radius: 50%;\n",
|
| 274 |
+
" cursor: pointer;\n",
|
| 275 |
+
" display: none;\n",
|
| 276 |
+
" fill: var(--fill-color);\n",
|
| 277 |
+
" height: 32px;\n",
|
| 278 |
+
" padding: 0;\n",
|
| 279 |
+
" width: 32px;\n",
|
| 280 |
+
" }\n",
|
| 281 |
+
"\n",
|
| 282 |
+
" .colab-df-quickchart:hover {\n",
|
| 283 |
+
" background-color: var(--hover-bg-color);\n",
|
| 284 |
+
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 285 |
+
" fill: var(--button-hover-fill-color);\n",
|
| 286 |
+
" }\n",
|
| 287 |
+
"\n",
|
| 288 |
+
" .colab-df-quickchart-complete:disabled,\n",
|
| 289 |
+
" .colab-df-quickchart-complete:disabled:hover {\n",
|
| 290 |
+
" background-color: var(--disabled-bg-color);\n",
|
| 291 |
+
" fill: var(--disabled-fill-color);\n",
|
| 292 |
+
" box-shadow: none;\n",
|
| 293 |
+
" }\n",
|
| 294 |
+
"\n",
|
| 295 |
+
" .colab-df-spinner {\n",
|
| 296 |
+
" border: 2px solid var(--fill-color);\n",
|
| 297 |
+
" border-color: transparent;\n",
|
| 298 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 299 |
+
" animation:\n",
|
| 300 |
+
" spin 1s steps(1) infinite;\n",
|
| 301 |
+
" }\n",
|
| 302 |
+
"\n",
|
| 303 |
+
" @keyframes spin {\n",
|
| 304 |
+
" 0% {\n",
|
| 305 |
+
" border-color: transparent;\n",
|
| 306 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 307 |
+
" border-left-color: var(--fill-color);\n",
|
| 308 |
+
" }\n",
|
| 309 |
+
" 20% {\n",
|
| 310 |
+
" border-color: transparent;\n",
|
| 311 |
+
" border-left-color: var(--fill-color);\n",
|
| 312 |
+
" border-top-color: var(--fill-color);\n",
|
| 313 |
+
" }\n",
|
| 314 |
+
" 30% {\n",
|
| 315 |
+
" border-color: transparent;\n",
|
| 316 |
+
" border-left-color: var(--fill-color);\n",
|
| 317 |
+
" border-top-color: var(--fill-color);\n",
|
| 318 |
+
" border-right-color: var(--fill-color);\n",
|
| 319 |
+
" }\n",
|
| 320 |
+
" 40% {\n",
|
| 321 |
+
" border-color: transparent;\n",
|
| 322 |
+
" border-right-color: var(--fill-color);\n",
|
| 323 |
+
" border-top-color: var(--fill-color);\n",
|
| 324 |
+
" }\n",
|
| 325 |
+
" 60% {\n",
|
| 326 |
+
" border-color: transparent;\n",
|
| 327 |
+
" border-right-color: var(--fill-color);\n",
|
| 328 |
+
" }\n",
|
| 329 |
+
" 80% {\n",
|
| 330 |
+
" border-color: transparent;\n",
|
| 331 |
+
" border-right-color: var(--fill-color);\n",
|
| 332 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 333 |
+
" }\n",
|
| 334 |
+
" 90% {\n",
|
| 335 |
+
" border-color: transparent;\n",
|
| 336 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 337 |
+
" }\n",
|
| 338 |
+
" }\n",
|
| 339 |
+
"</style>\n",
|
| 340 |
+
"\n",
|
| 341 |
+
" <script>\n",
|
| 342 |
+
" async function quickchart(key) {\n",
|
| 343 |
+
" const quickchartButtonEl =\n",
|
| 344 |
+
" document.querySelector('#' + key + ' button');\n",
|
| 345 |
+
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
|
| 346 |
+
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
|
| 347 |
+
" try {\n",
|
| 348 |
+
" const charts = await google.colab.kernel.invokeFunction(\n",
|
| 349 |
+
" 'suggestCharts', [key], {});\n",
|
| 350 |
+
" } catch (error) {\n",
|
| 351 |
+
" console.error('Error during call to suggestCharts:', error);\n",
|
| 352 |
+
" }\n",
|
| 353 |
+
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
|
| 354 |
+
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
|
| 355 |
+
" }\n",
|
| 356 |
+
" (() => {\n",
|
| 357 |
+
" let quickchartButtonEl =\n",
|
| 358 |
+
" document.querySelector('#df-dfc3cdf4-c2be-4859-b4df-f906be898778 button');\n",
|
| 359 |
+
" quickchartButtonEl.style.display =\n",
|
| 360 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 361 |
+
" })();\n",
|
| 362 |
+
" </script>\n",
|
| 363 |
+
"</div>\n",
|
| 364 |
+
"\n",
|
| 365 |
+
" </div>\n",
|
| 366 |
+
" </div>\n"
|
| 367 |
+
],
|
| 368 |
+
"text/plain": [
|
| 369 |
+
" Category Message\n",
|
| 370 |
+
"0 ham Go until jurong point, crazy.. Available only ...\n",
|
| 371 |
+
"1 ham Ok lar... Joking wif u oni...\n",
|
| 372 |
+
"2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
|
| 373 |
+
"3 ham U dun say so early hor... U c already then say...\n",
|
| 374 |
+
"4 ham Nah I don't think he goes to usf, he lives aro..."
|
| 375 |
+
]
|
| 376 |
+
},
|
| 377 |
+
"execution_count": 139,
|
| 378 |
+
"metadata": {},
|
| 379 |
+
"output_type": "execute_result"
|
| 380 |
+
}
|
| 381 |
+
],
|
| 382 |
+
"source": [
|
| 383 |
+
"Message_df = pd.read_csv(\"/content/drive/MyDrive/SPAM text message 20170820 - Data.csv\")\n",
|
| 384 |
+
"print(f\"shape : {Message_df.shape}\")\n",
|
| 385 |
+
"Message_df.head()"
|
| 386 |
+
]
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"cell_type": "markdown",
|
| 390 |
+
"metadata": {
|
| 391 |
+
"id": "OGQFplFhUfmZ"
|
| 392 |
+
},
|
| 393 |
+
"source": [
|
| 394 |
+
"# 3.EDA"
|
| 395 |
+
]
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"cell_type": "markdown",
|
| 399 |
+
"metadata": {
|
| 400 |
+
"id": "SFV5pePaYgic"
|
| 401 |
+
},
|
| 402 |
+
"source": [
|
| 403 |
+
"## 3.1.Remove Duplicates"
|
| 404 |
+
]
|
| 405 |
+
},
|
| 406 |
+
{
|
| 407 |
+
"cell_type": "code",
|
| 408 |
+
"execution_count": 140,
|
| 409 |
+
"metadata": {
|
| 410 |
+
"colab": {
|
| 411 |
+
"base_uri": "https://localhost:8080/"
|
| 412 |
+
},
|
| 413 |
+
"id": "l0cRyTaVTHKO",
|
| 414 |
+
"outputId": "66ff8107-06f6-4a07-faf7-04ca228bb569"
|
| 415 |
+
},
|
| 416 |
+
"outputs": [
|
| 417 |
+
{
|
| 418 |
+
"name": "stdout",
|
| 419 |
+
"output_type": "stream",
|
| 420 |
+
"text": [
|
| 421 |
+
"Data with duplicates shape : (5572, 2)\n",
|
| 422 |
+
"Data without duplicates shape : (5157, 2)\n"
|
| 423 |
+
]
|
| 424 |
+
}
|
| 425 |
+
],
|
| 426 |
+
"source": [
|
| 427 |
+
"print(f\"Data with duplicates shape : {Message_df.shape}\")\n",
|
| 428 |
+
"Message_df = Message_df.drop_duplicates()\n",
|
| 429 |
+
"print(f\"Data without duplicates shape : {Message_df.shape}\")"
|
| 430 |
+
]
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"cell_type": "code",
|
| 434 |
+
"execution_count": 141,
|
| 435 |
+
"metadata": {
|
| 436 |
+
"colab": {
|
| 437 |
+
"base_uri": "https://localhost:8080/"
|
| 438 |
+
},
|
| 439 |
+
"id": "coF1hqOITfKW",
|
| 440 |
+
"outputId": "47c7e972-f315-498a-ca22-adf65b6088fd"
|
| 441 |
+
},
|
| 442 |
+
"outputs": [
|
| 443 |
+
{
|
| 444 |
+
"name": "stdout",
|
| 445 |
+
"output_type": "stream",
|
| 446 |
+
"text": [
|
| 447 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
| 448 |
+
"Index: 5157 entries, 0 to 5571\n",
|
| 449 |
+
"Data columns (total 2 columns):\n",
|
| 450 |
+
" # Column Non-Null Count Dtype \n",
|
| 451 |
+
"--- ------ -------------- ----- \n",
|
| 452 |
+
" 0 Category 5157 non-null object\n",
|
| 453 |
+
" 1 Message 5157 non-null object\n",
|
| 454 |
+
"dtypes: object(2)\n",
|
| 455 |
+
"memory usage: 120.9+ KB\n"
|
| 456 |
+
]
|
| 457 |
+
}
|
| 458 |
+
],
|
| 459 |
+
"source": [
|
| 460 |
+
"Message_df.info()"
|
| 461 |
+
]
|
| 462 |
+
},
|
| 463 |
+
{
|
| 464 |
+
"cell_type": "code",
|
| 465 |
+
"execution_count": 142,
|
| 466 |
+
"metadata": {
|
| 467 |
+
"colab": {
|
| 468 |
+
"base_uri": "https://localhost:8080/",
|
| 469 |
+
"height": 178
|
| 470 |
+
},
|
| 471 |
+
"id": "E-vHlQXoUOsy",
|
| 472 |
+
"outputId": "27a8e3ce-cb16-4d26-ace7-c2c913802a09"
|
| 473 |
+
},
|
| 474 |
+
"outputs": [
|
| 475 |
+
{
|
| 476 |
+
"data": {
|
| 477 |
+
"text/html": [
|
| 478 |
+
"<div>\n",
|
| 479 |
+
"<style scoped>\n",
|
| 480 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 481 |
+
" vertical-align: middle;\n",
|
| 482 |
+
" }\n",
|
| 483 |
+
"\n",
|
| 484 |
+
" .dataframe tbody tr th {\n",
|
| 485 |
+
" vertical-align: top;\n",
|
| 486 |
+
" }\n",
|
| 487 |
+
"\n",
|
| 488 |
+
" .dataframe thead th {\n",
|
| 489 |
+
" text-align: right;\n",
|
| 490 |
+
" }\n",
|
| 491 |
+
"</style>\n",
|
| 492 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 493 |
+
" <thead>\n",
|
| 494 |
+
" <tr style=\"text-align: right;\">\n",
|
| 495 |
+
" <th></th>\n",
|
| 496 |
+
" <th>count</th>\n",
|
| 497 |
+
" </tr>\n",
|
| 498 |
+
" <tr>\n",
|
| 499 |
+
" <th>Category</th>\n",
|
| 500 |
+
" <th></th>\n",
|
| 501 |
+
" </tr>\n",
|
| 502 |
+
" </thead>\n",
|
| 503 |
+
" <tbody>\n",
|
| 504 |
+
" <tr>\n",
|
| 505 |
+
" <th>ham</th>\n",
|
| 506 |
+
" <td>4516</td>\n",
|
| 507 |
+
" </tr>\n",
|
| 508 |
+
" <tr>\n",
|
| 509 |
+
" <th>spam</th>\n",
|
| 510 |
+
" <td>641</td>\n",
|
| 511 |
+
" </tr>\n",
|
| 512 |
+
" </tbody>\n",
|
| 513 |
+
"</table>\n",
|
| 514 |
+
"</div><br><label><b>dtype:</b> int64</label>"
|
| 515 |
+
],
|
| 516 |
+
"text/plain": [
|
| 517 |
+
"Category\n",
|
| 518 |
+
"ham 4516\n",
|
| 519 |
+
"spam 641\n",
|
| 520 |
+
"Name: count, dtype: int64"
|
| 521 |
+
]
|
| 522 |
+
},
|
| 523 |
+
"execution_count": 142,
|
| 524 |
+
"metadata": {},
|
| 525 |
+
"output_type": "execute_result"
|
| 526 |
+
}
|
| 527 |
+
],
|
| 528 |
+
"source": [
|
| 529 |
+
"Message_df.Category.value_counts()"
|
| 530 |
+
]
|
| 531 |
+
},
|
| 532 |
+
{
|
| 533 |
+
"cell_type": "code",
|
| 534 |
+
"execution_count": 143,
|
| 535 |
+
"metadata": {
|
| 536 |
+
"colab": {
|
| 537 |
+
"base_uri": "https://localhost:8080/"
|
| 538 |
+
},
|
| 539 |
+
"id": "Agpy8AO1UcIO",
|
| 540 |
+
"outputId": "ac2d3811-0cf1-45e8-8618-ded5592cf6c3"
|
| 541 |
+
},
|
| 542 |
+
"outputs": [
|
| 543 |
+
{
|
| 544 |
+
"data": {
|
| 545 |
+
"text/plain": [
|
| 546 |
+
"((4516, 2), (641, 2))"
|
| 547 |
+
]
|
| 548 |
+
},
|
| 549 |
+
"execution_count": 143,
|
| 550 |
+
"metadata": {},
|
| 551 |
+
"output_type": "execute_result"
|
| 552 |
+
}
|
| 553 |
+
],
|
| 554 |
+
"source": [
|
| 555 |
+
"ham_df = Message_df[Message_df.Category == \"ham\"]\n",
|
| 556 |
+
"spam_df = Message_df[Message_df.Category == \"spam\"]\n",
|
| 557 |
+
"ham_df.shape, spam_df.shape"
|
| 558 |
+
]
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"cell_type": "markdown",
|
| 562 |
+
"metadata": {
|
| 563 |
+
"id": "J2wSWyFVYVMw"
|
| 564 |
+
},
|
| 565 |
+
"source": [
|
| 566 |
+
"### Blanced Data"
|
| 567 |
+
]
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"cell_type": "code",
|
| 571 |
+
"execution_count": 144,
|
| 572 |
+
"metadata": {
|
| 573 |
+
"colab": {
|
| 574 |
+
"base_uri": "https://localhost:8080/"
|
| 575 |
+
},
|
| 576 |
+
"id": "uqrWKLJaUppz",
|
| 577 |
+
"outputId": "52ccaf5d-ffc6-4e6a-8a30-0e89cd5b8397"
|
| 578 |
+
},
|
| 579 |
+
"outputs": [
|
| 580 |
+
{
|
| 581 |
+
"data": {
|
| 582 |
+
"text/plain": [
|
| 583 |
+
"((641, 2), (641, 2))"
|
| 584 |
+
]
|
| 585 |
+
},
|
| 586 |
+
"execution_count": 144,
|
| 587 |
+
"metadata": {},
|
| 588 |
+
"output_type": "execute_result"
|
| 589 |
+
}
|
| 590 |
+
],
|
| 591 |
+
"source": [
|
| 592 |
+
"ham_df = ham_df.sample(spam_df.shape[0],random_state=0)\n",
|
| 593 |
+
"ham_df.shape, spam_df.shape"
|
| 594 |
+
]
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"cell_type": "code",
|
| 598 |
+
"execution_count": 145,
|
| 599 |
+
"metadata": {
|
| 600 |
+
"colab": {
|
| 601 |
+
"base_uri": "https://localhost:8080/",
|
| 602 |
+
"height": 206
|
| 603 |
+
},
|
| 604 |
+
"id": "7sT79-iOVJm0",
|
| 605 |
+
"outputId": "17f61a6d-9b01-4e8b-88b1-e0527a9244c0"
|
| 606 |
+
},
|
| 607 |
+
"outputs": [
|
| 608 |
+
{
|
| 609 |
+
"data": {
|
| 610 |
+
"application/vnd.google.colaboratory.intrinsic+json": {
|
| 611 |
+
"summary": "{\n \"name\": \"Message_df\",\n \"rows\": 1282,\n \"fields\": [\n {\n \"column\": \"Category\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Message\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1282,\n \"samples\": [\n \"December only! Had your mobile 11mths+? You are entitled to update to the latest colour camera mobile for Free! Call The Mobile Update VCo FREE on 08002986906\",\n \"Dear Voucher Holder, To claim this weeks offer, at you PC please go to http://www.e-tlp.co.uk/expressoffer Ts&Cs apply. To stop texts, txt STOP to 80062\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
|
| 612 |
+
"type": "dataframe",
|
| 613 |
+
"variable_name": "Message_df"
|
| 614 |
+
},
|
| 615 |
+
"text/html": [
|
| 616 |
+
"\n",
|
| 617 |
+
" <div id=\"df-a3c0eef7-fbb6-4d81-a8ad-b47acffeb6b6\" class=\"colab-df-container\">\n",
|
| 618 |
+
" <div>\n",
|
| 619 |
+
"<style scoped>\n",
|
| 620 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 621 |
+
" vertical-align: middle;\n",
|
| 622 |
+
" }\n",
|
| 623 |
+
"\n",
|
| 624 |
+
" .dataframe tbody tr th {\n",
|
| 625 |
+
" vertical-align: top;\n",
|
| 626 |
+
" }\n",
|
| 627 |
+
"\n",
|
| 628 |
+
" .dataframe thead th {\n",
|
| 629 |
+
" text-align: right;\n",
|
| 630 |
+
" }\n",
|
| 631 |
+
"</style>\n",
|
| 632 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 633 |
+
" <thead>\n",
|
| 634 |
+
" <tr style=\"text-align: right;\">\n",
|
| 635 |
+
" <th></th>\n",
|
| 636 |
+
" <th>Category</th>\n",
|
| 637 |
+
" <th>Message</th>\n",
|
| 638 |
+
" </tr>\n",
|
| 639 |
+
" </thead>\n",
|
| 640 |
+
" <tbody>\n",
|
| 641 |
+
" <tr>\n",
|
| 642 |
+
" <th>3570</th>\n",
|
| 643 |
+
" <td>ham</td>\n",
|
| 644 |
+
" <td>She's fine. Sends her greetings</td>\n",
|
| 645 |
+
" </tr>\n",
|
| 646 |
+
" <tr>\n",
|
| 647 |
+
" <th>3985</th>\n",
|
| 648 |
+
" <td>ham</td>\n",
|
| 649 |
+
" <td>Hey, I missed you tm of last night as my phone...</td>\n",
|
| 650 |
+
" </tr>\n",
|
| 651 |
+
" <tr>\n",
|
| 652 |
+
" <th>2105</th>\n",
|
| 653 |
+
" <td>ham</td>\n",
|
| 654 |
+
" <td>Anyway seriously hit me up when you're back be...</td>\n",
|
| 655 |
+
" </tr>\n",
|
| 656 |
+
" <tr>\n",
|
| 657 |
+
" <th>4729</th>\n",
|
| 658 |
+
" <td>ham</td>\n",
|
| 659 |
+
" <td>I (Career Tel) have added u as a contact on IN...</td>\n",
|
| 660 |
+
" </tr>\n",
|
| 661 |
+
" <tr>\n",
|
| 662 |
+
" <th>3405</th>\n",
|
| 663 |
+
" <td>ham</td>\n",
|
| 664 |
+
" <td>Then ü ask dad to pick ü up lar... Ü wan 2 sta...</td>\n",
|
| 665 |
+
" </tr>\n",
|
| 666 |
+
" </tbody>\n",
|
| 667 |
+
"</table>\n",
|
| 668 |
+
"</div>\n",
|
| 669 |
+
" <div class=\"colab-df-buttons\">\n",
|
| 670 |
+
"\n",
|
| 671 |
+
" <div class=\"colab-df-container\">\n",
|
| 672 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a3c0eef7-fbb6-4d81-a8ad-b47acffeb6b6')\"\n",
|
| 673 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
| 674 |
+
" style=\"display:none;\">\n",
|
| 675 |
+
"\n",
|
| 676 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
| 677 |
+
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
| 678 |
+
" </svg>\n",
|
| 679 |
+
" </button>\n",
|
| 680 |
+
"\n",
|
| 681 |
+
" <style>\n",
|
| 682 |
+
" .colab-df-container {\n",
|
| 683 |
+
" display:flex;\n",
|
| 684 |
+
" gap: 12px;\n",
|
| 685 |
+
" }\n",
|
| 686 |
+
"\n",
|
| 687 |
+
" .colab-df-convert {\n",
|
| 688 |
+
" background-color: #E8F0FE;\n",
|
| 689 |
+
" border: none;\n",
|
| 690 |
+
" border-radius: 50%;\n",
|
| 691 |
+
" cursor: pointer;\n",
|
| 692 |
+
" display: none;\n",
|
| 693 |
+
" fill: #1967D2;\n",
|
| 694 |
+
" height: 32px;\n",
|
| 695 |
+
" padding: 0 0 0 0;\n",
|
| 696 |
+
" width: 32px;\n",
|
| 697 |
+
" }\n",
|
| 698 |
+
"\n",
|
| 699 |
+
" .colab-df-convert:hover {\n",
|
| 700 |
+
" background-color: #E2EBFA;\n",
|
| 701 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 702 |
+
" fill: #174EA6;\n",
|
| 703 |
+
" }\n",
|
| 704 |
+
"\n",
|
| 705 |
+
" .colab-df-buttons div {\n",
|
| 706 |
+
" margin-bottom: 4px;\n",
|
| 707 |
+
" }\n",
|
| 708 |
+
"\n",
|
| 709 |
+
" [theme=dark] .colab-df-convert {\n",
|
| 710 |
+
" background-color: #3B4455;\n",
|
| 711 |
+
" fill: #D2E3FC;\n",
|
| 712 |
+
" }\n",
|
| 713 |
+
"\n",
|
| 714 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
| 715 |
+
" background-color: #434B5C;\n",
|
| 716 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
| 717 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
| 718 |
+
" fill: #FFFFFF;\n",
|
| 719 |
+
" }\n",
|
| 720 |
+
" </style>\n",
|
| 721 |
+
"\n",
|
| 722 |
+
" <script>\n",
|
| 723 |
+
" const buttonEl =\n",
|
| 724 |
+
" document.querySelector('#df-a3c0eef7-fbb6-4d81-a8ad-b47acffeb6b6 button.colab-df-convert');\n",
|
| 725 |
+
" buttonEl.style.display =\n",
|
| 726 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 727 |
+
"\n",
|
| 728 |
+
" async function convertToInteractive(key) {\n",
|
| 729 |
+
" const element = document.querySelector('#df-a3c0eef7-fbb6-4d81-a8ad-b47acffeb6b6');\n",
|
| 730 |
+
" const dataTable =\n",
|
| 731 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
| 732 |
+
" [key], {});\n",
|
| 733 |
+
" if (!dataTable) return;\n",
|
| 734 |
+
"\n",
|
| 735 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
| 736 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
| 737 |
+
" + ' to learn more about interactive tables.';\n",
|
| 738 |
+
" element.innerHTML = '';\n",
|
| 739 |
+
" dataTable['output_type'] = 'display_data';\n",
|
| 740 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
| 741 |
+
" const docLink = document.createElement('div');\n",
|
| 742 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
| 743 |
+
" element.appendChild(docLink);\n",
|
| 744 |
+
" }\n",
|
| 745 |
+
" </script>\n",
|
| 746 |
+
" </div>\n",
|
| 747 |
+
"\n",
|
| 748 |
+
"\n",
|
| 749 |
+
"<div id=\"df-d5411cc4-3fa2-4f84-8a1c-958062268fc2\">\n",
|
| 750 |
+
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d5411cc4-3fa2-4f84-8a1c-958062268fc2')\"\n",
|
| 751 |
+
" title=\"Suggest charts\"\n",
|
| 752 |
+
" style=\"display:none;\">\n",
|
| 753 |
+
"\n",
|
| 754 |
+
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
| 755 |
+
" width=\"24px\">\n",
|
| 756 |
+
" <g>\n",
|
| 757 |
+
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
| 758 |
+
" </g>\n",
|
| 759 |
+
"</svg>\n",
|
| 760 |
+
" </button>\n",
|
| 761 |
+
"\n",
|
| 762 |
+
"<style>\n",
|
| 763 |
+
" .colab-df-quickchart {\n",
|
| 764 |
+
" --bg-color: #E8F0FE;\n",
|
| 765 |
+
" --fill-color: #1967D2;\n",
|
| 766 |
+
" --hover-bg-color: #E2EBFA;\n",
|
| 767 |
+
" --hover-fill-color: #174EA6;\n",
|
| 768 |
+
" --disabled-fill-color: #AAA;\n",
|
| 769 |
+
" --disabled-bg-color: #DDD;\n",
|
| 770 |
+
" }\n",
|
| 771 |
+
"\n",
|
| 772 |
+
" [theme=dark] .colab-df-quickchart {\n",
|
| 773 |
+
" --bg-color: #3B4455;\n",
|
| 774 |
+
" --fill-color: #D2E3FC;\n",
|
| 775 |
+
" --hover-bg-color: #434B5C;\n",
|
| 776 |
+
" --hover-fill-color: #FFFFFF;\n",
|
| 777 |
+
" --disabled-bg-color: #3B4455;\n",
|
| 778 |
+
" --disabled-fill-color: #666;\n",
|
| 779 |
+
" }\n",
|
| 780 |
+
"\n",
|
| 781 |
+
" .colab-df-quickchart {\n",
|
| 782 |
+
" background-color: var(--bg-color);\n",
|
| 783 |
+
" border: none;\n",
|
| 784 |
+
" border-radius: 50%;\n",
|
| 785 |
+
" cursor: pointer;\n",
|
| 786 |
+
" display: none;\n",
|
| 787 |
+
" fill: var(--fill-color);\n",
|
| 788 |
+
" height: 32px;\n",
|
| 789 |
+
" padding: 0;\n",
|
| 790 |
+
" width: 32px;\n",
|
| 791 |
+
" }\n",
|
| 792 |
+
"\n",
|
| 793 |
+
" .colab-df-quickchart:hover {\n",
|
| 794 |
+
" background-color: var(--hover-bg-color);\n",
|
| 795 |
+
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 796 |
+
" fill: var(--button-hover-fill-color);\n",
|
| 797 |
+
" }\n",
|
| 798 |
+
"\n",
|
| 799 |
+
" .colab-df-quickchart-complete:disabled,\n",
|
| 800 |
+
" .colab-df-quickchart-complete:disabled:hover {\n",
|
| 801 |
+
" background-color: var(--disabled-bg-color);\n",
|
| 802 |
+
" fill: var(--disabled-fill-color);\n",
|
| 803 |
+
" box-shadow: none;\n",
|
| 804 |
+
" }\n",
|
| 805 |
+
"\n",
|
| 806 |
+
" .colab-df-spinner {\n",
|
| 807 |
+
" border: 2px solid var(--fill-color);\n",
|
| 808 |
+
" border-color: transparent;\n",
|
| 809 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 810 |
+
" animation:\n",
|
| 811 |
+
" spin 1s steps(1) infinite;\n",
|
| 812 |
+
" }\n",
|
| 813 |
+
"\n",
|
| 814 |
+
" @keyframes spin {\n",
|
| 815 |
+
" 0% {\n",
|
| 816 |
+
" border-color: transparent;\n",
|
| 817 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 818 |
+
" border-left-color: var(--fill-color);\n",
|
| 819 |
+
" }\n",
|
| 820 |
+
" 20% {\n",
|
| 821 |
+
" border-color: transparent;\n",
|
| 822 |
+
" border-left-color: var(--fill-color);\n",
|
| 823 |
+
" border-top-color: var(--fill-color);\n",
|
| 824 |
+
" }\n",
|
| 825 |
+
" 30% {\n",
|
| 826 |
+
" border-color: transparent;\n",
|
| 827 |
+
" border-left-color: var(--fill-color);\n",
|
| 828 |
+
" border-top-color: var(--fill-color);\n",
|
| 829 |
+
" border-right-color: var(--fill-color);\n",
|
| 830 |
+
" }\n",
|
| 831 |
+
" 40% {\n",
|
| 832 |
+
" border-color: transparent;\n",
|
| 833 |
+
" border-right-color: var(--fill-color);\n",
|
| 834 |
+
" border-top-color: var(--fill-color);\n",
|
| 835 |
+
" }\n",
|
| 836 |
+
" 60% {\n",
|
| 837 |
+
" border-color: transparent;\n",
|
| 838 |
+
" border-right-color: var(--fill-color);\n",
|
| 839 |
+
" }\n",
|
| 840 |
+
" 80% {\n",
|
| 841 |
+
" border-color: transparent;\n",
|
| 842 |
+
" border-right-color: var(--fill-color);\n",
|
| 843 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 844 |
+
" }\n",
|
| 845 |
+
" 90% {\n",
|
| 846 |
+
" border-color: transparent;\n",
|
| 847 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 848 |
+
" }\n",
|
| 849 |
+
" }\n",
|
| 850 |
+
"</style>\n",
|
| 851 |
+
"\n",
|
| 852 |
+
" <script>\n",
|
| 853 |
+
" async function quickchart(key) {\n",
|
| 854 |
+
" const quickchartButtonEl =\n",
|
| 855 |
+
" document.querySelector('#' + key + ' button');\n",
|
| 856 |
+
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
|
| 857 |
+
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
|
| 858 |
+
" try {\n",
|
| 859 |
+
" const charts = await google.colab.kernel.invokeFunction(\n",
|
| 860 |
+
" 'suggestCharts', [key], {});\n",
|
| 861 |
+
" } catch (error) {\n",
|
| 862 |
+
" console.error('Error during call to suggestCharts:', error);\n",
|
| 863 |
+
" }\n",
|
| 864 |
+
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
|
| 865 |
+
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
|
| 866 |
+
" }\n",
|
| 867 |
+
" (() => {\n",
|
| 868 |
+
" let quickchartButtonEl =\n",
|
| 869 |
+
" document.querySelector('#df-d5411cc4-3fa2-4f84-8a1c-958062268fc2 button');\n",
|
| 870 |
+
" quickchartButtonEl.style.display =\n",
|
| 871 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 872 |
+
" })();\n",
|
| 873 |
+
" </script>\n",
|
| 874 |
+
"</div>\n",
|
| 875 |
+
"\n",
|
| 876 |
+
" </div>\n",
|
| 877 |
+
" </div>\n"
|
| 878 |
+
],
|
| 879 |
+
"text/plain": [
|
| 880 |
+
" Category Message\n",
|
| 881 |
+
"3570 ham She's fine. Sends her greetings\n",
|
| 882 |
+
"3985 ham Hey, I missed you tm of last night as my phone...\n",
|
| 883 |
+
"2105 ham Anyway seriously hit me up when you're back be...\n",
|
| 884 |
+
"4729 ham I (Career Tel) have added u as a contact on IN...\n",
|
| 885 |
+
"3405 ham Then ü ask dad to pick ü up lar... Ü wan 2 sta..."
|
| 886 |
+
]
|
| 887 |
+
},
|
| 888 |
+
"execution_count": 145,
|
| 889 |
+
"metadata": {},
|
| 890 |
+
"output_type": "execute_result"
|
| 891 |
+
}
|
| 892 |
+
],
|
| 893 |
+
"source": [
|
| 894 |
+
"# concatination\n",
|
| 895 |
+
"Message_df = pd.concat([ham_df, spam_df], axis=0) # Blanced Data\n",
|
| 896 |
+
"Message_df.head()"
|
| 897 |
+
]
|
| 898 |
+
},
|
| 899 |
+
{
|
| 900 |
+
"cell_type": "markdown",
|
| 901 |
+
"metadata": {
|
| 902 |
+
"id": "OeXzuc7vYBxV"
|
| 903 |
+
},
|
| 904 |
+
"source": [
|
| 905 |
+
"## 3.2.Text Preprocessing"
|
| 906 |
+
]
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"cell_type": "code",
|
| 910 |
+
"execution_count": 146,
|
| 911 |
+
"metadata": {
|
| 912 |
+
"id": "7V_nMtigZEdY"
|
| 913 |
+
},
|
| 914 |
+
"outputs": [],
|
| 915 |
+
"source": [
|
| 916 |
+
"def preprocess_text(text):\n",
|
| 917 |
+
" # Convert text to lowercase\n",
|
| 918 |
+
" text = text.casefold()\n",
|
| 919 |
+
"\n",
|
| 920 |
+
" text = re.sub(r'[^a-zA-Z]', ' ', text)\n",
|
| 921 |
+
"\n",
|
| 922 |
+
" # Tokenize text\n",
|
| 923 |
+
" tokens = word_tokenize(text)\n",
|
| 924 |
+
"\n",
|
| 925 |
+
" # Remove stopwords\n",
|
| 926 |
+
" stop_words = set(stopwords.words('english')) # Define stop_words here\n",
|
| 927 |
+
" filtered_tokens = [word for word in tokens if word not in stop_words and word != \"not\"]\n",
|
| 928 |
+
"\n",
|
| 929 |
+
" # Lemmatization\n",
|
| 930 |
+
" lemmatizer = WordNetLemmatizer()\n",
|
| 931 |
+
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
|
| 932 |
+
"\n",
|
| 933 |
+
" # Join tokens back into a string\n",
|
| 934 |
+
" preprocessed_text = ' '.join(lemmatized_tokens)\n",
|
| 935 |
+
"\n",
|
| 936 |
+
" return preprocessed_text\n"
|
| 937 |
+
]
|
| 938 |
+
},
|
| 939 |
+
{
|
| 940 |
+
"cell_type": "code",
|
| 941 |
+
"execution_count": 147,
|
| 942 |
+
"metadata": {
|
| 943 |
+
"id": "yD6P08hs67do"
|
| 944 |
+
},
|
| 945 |
+
"outputs": [],
|
| 946 |
+
"source": [
|
| 947 |
+
"corpus=[preprocess_text(Message) for Message in Message_df.Message]"
|
| 948 |
+
]
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"cell_type": "markdown",
|
| 952 |
+
"metadata": {
|
| 953 |
+
"id": "C-eqca1fX02T"
|
| 954 |
+
},
|
| 955 |
+
"source": [
|
| 956 |
+
"## 3.3.Encoding"
|
| 957 |
+
]
|
| 958 |
+
},
|
| 959 |
+
{
|
| 960 |
+
"cell_type": "code",
|
| 961 |
+
"execution_count": 148,
|
| 962 |
+
"metadata": {
|
| 963 |
+
"colab": {
|
| 964 |
+
"base_uri": "https://localhost:8080/",
|
| 965 |
+
"height": 143
|
| 966 |
+
},
|
| 967 |
+
"id": "qwRxIJXWcCuS",
|
| 968 |
+
"outputId": "a8d86092-aada-4b52-846d-8d10fb60a6cd"
|
| 969 |
+
},
|
| 970 |
+
"outputs": [
|
| 971 |
+
{
|
| 972 |
+
"data": {
|
| 973 |
+
"application/vnd.google.colaboratory.intrinsic+json": {
|
| 974 |
+
"summary": "{\n \"name\": \"Message_df\",\n \"rows\": 1282,\n \"fields\": [\n {\n \"column\": \"Category\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"spam\",\n \"ham\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Message\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1282,\n \"samples\": [\n \"December only! Had your mobile 11mths+? You are entitled to update to the latest colour camera mobile for Free! Call The Mobile Update VCo FREE on 08002986906\",\n \"Dear Voucher Holder, To claim this weeks offer, at you PC please go to http://www.e-tlp.co.uk/expressoffer Ts&Cs apply. To stop texts, txt STOP to 80062\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Category_lable\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
|
| 975 |
+
"type": "dataframe",
|
| 976 |
+
"variable_name": "Message_df"
|
| 977 |
+
},
|
| 978 |
+
"text/html": [
|
| 979 |
+
"\n",
|
| 980 |
+
" <div id=\"df-4980b3be-663a-49a9-a994-41220ec15e3a\" class=\"colab-df-container\">\n",
|
| 981 |
+
" <div>\n",
|
| 982 |
+
"<style scoped>\n",
|
| 983 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 984 |
+
" vertical-align: middle;\n",
|
| 985 |
+
" }\n",
|
| 986 |
+
"\n",
|
| 987 |
+
" .dataframe tbody tr th {\n",
|
| 988 |
+
" vertical-align: top;\n",
|
| 989 |
+
" }\n",
|
| 990 |
+
"\n",
|
| 991 |
+
" .dataframe thead th {\n",
|
| 992 |
+
" text-align: right;\n",
|
| 993 |
+
" }\n",
|
| 994 |
+
"</style>\n",
|
| 995 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 996 |
+
" <thead>\n",
|
| 997 |
+
" <tr style=\"text-align: right;\">\n",
|
| 998 |
+
" <th></th>\n",
|
| 999 |
+
" <th>Category</th>\n",
|
| 1000 |
+
" <th>Message</th>\n",
|
| 1001 |
+
" <th>Category_lable</th>\n",
|
| 1002 |
+
" </tr>\n",
|
| 1003 |
+
" </thead>\n",
|
| 1004 |
+
" <tbody>\n",
|
| 1005 |
+
" <tr>\n",
|
| 1006 |
+
" <th>3570</th>\n",
|
| 1007 |
+
" <td>ham</td>\n",
|
| 1008 |
+
" <td>She's fine. Sends her greetings</td>\n",
|
| 1009 |
+
" <td>1</td>\n",
|
| 1010 |
+
" </tr>\n",
|
| 1011 |
+
" <tr>\n",
|
| 1012 |
+
" <th>3985</th>\n",
|
| 1013 |
+
" <td>ham</td>\n",
|
| 1014 |
+
" <td>Hey, I missed you tm of last night as my phone...</td>\n",
|
| 1015 |
+
" <td>1</td>\n",
|
| 1016 |
+
" </tr>\n",
|
| 1017 |
+
" <tr>\n",
|
| 1018 |
+
" <th>2105</th>\n",
|
| 1019 |
+
" <td>ham</td>\n",
|
| 1020 |
+
" <td>Anyway seriously hit me up when you're back be...</td>\n",
|
| 1021 |
+
" <td>1</td>\n",
|
| 1022 |
+
" </tr>\n",
|
| 1023 |
+
" </tbody>\n",
|
| 1024 |
+
"</table>\n",
|
| 1025 |
+
"</div>\n",
|
| 1026 |
+
" <div class=\"colab-df-buttons\">\n",
|
| 1027 |
+
"\n",
|
| 1028 |
+
" <div class=\"colab-df-container\">\n",
|
| 1029 |
+
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4980b3be-663a-49a9-a994-41220ec15e3a')\"\n",
|
| 1030 |
+
" title=\"Convert this dataframe to an interactive table.\"\n",
|
| 1031 |
+
" style=\"display:none;\">\n",
|
| 1032 |
+
"\n",
|
| 1033 |
+
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
| 1034 |
+
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
| 1035 |
+
" </svg>\n",
|
| 1036 |
+
" </button>\n",
|
| 1037 |
+
"\n",
|
| 1038 |
+
" <style>\n",
|
| 1039 |
+
" .colab-df-container {\n",
|
| 1040 |
+
" display:flex;\n",
|
| 1041 |
+
" gap: 12px;\n",
|
| 1042 |
+
" }\n",
|
| 1043 |
+
"\n",
|
| 1044 |
+
" .colab-df-convert {\n",
|
| 1045 |
+
" background-color: #E8F0FE;\n",
|
| 1046 |
+
" border: none;\n",
|
| 1047 |
+
" border-radius: 50%;\n",
|
| 1048 |
+
" cursor: pointer;\n",
|
| 1049 |
+
" display: none;\n",
|
| 1050 |
+
" fill: #1967D2;\n",
|
| 1051 |
+
" height: 32px;\n",
|
| 1052 |
+
" padding: 0 0 0 0;\n",
|
| 1053 |
+
" width: 32px;\n",
|
| 1054 |
+
" }\n",
|
| 1055 |
+
"\n",
|
| 1056 |
+
" .colab-df-convert:hover {\n",
|
| 1057 |
+
" background-color: #E2EBFA;\n",
|
| 1058 |
+
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 1059 |
+
" fill: #174EA6;\n",
|
| 1060 |
+
" }\n",
|
| 1061 |
+
"\n",
|
| 1062 |
+
" .colab-df-buttons div {\n",
|
| 1063 |
+
" margin-bottom: 4px;\n",
|
| 1064 |
+
" }\n",
|
| 1065 |
+
"\n",
|
| 1066 |
+
" [theme=dark] .colab-df-convert {\n",
|
| 1067 |
+
" background-color: #3B4455;\n",
|
| 1068 |
+
" fill: #D2E3FC;\n",
|
| 1069 |
+
" }\n",
|
| 1070 |
+
"\n",
|
| 1071 |
+
" [theme=dark] .colab-df-convert:hover {\n",
|
| 1072 |
+
" background-color: #434B5C;\n",
|
| 1073 |
+
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
| 1074 |
+
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
| 1075 |
+
" fill: #FFFFFF;\n",
|
| 1076 |
+
" }\n",
|
| 1077 |
+
" </style>\n",
|
| 1078 |
+
"\n",
|
| 1079 |
+
" <script>\n",
|
| 1080 |
+
" const buttonEl =\n",
|
| 1081 |
+
" document.querySelector('#df-4980b3be-663a-49a9-a994-41220ec15e3a button.colab-df-convert');\n",
|
| 1082 |
+
" buttonEl.style.display =\n",
|
| 1083 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 1084 |
+
"\n",
|
| 1085 |
+
" async function convertToInteractive(key) {\n",
|
| 1086 |
+
" const element = document.querySelector('#df-4980b3be-663a-49a9-a994-41220ec15e3a');\n",
|
| 1087 |
+
" const dataTable =\n",
|
| 1088 |
+
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
| 1089 |
+
" [key], {});\n",
|
| 1090 |
+
" if (!dataTable) return;\n",
|
| 1091 |
+
"\n",
|
| 1092 |
+
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
| 1093 |
+
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
| 1094 |
+
" + ' to learn more about interactive tables.';\n",
|
| 1095 |
+
" element.innerHTML = '';\n",
|
| 1096 |
+
" dataTable['output_type'] = 'display_data';\n",
|
| 1097 |
+
" await google.colab.output.renderOutput(dataTable, element);\n",
|
| 1098 |
+
" const docLink = document.createElement('div');\n",
|
| 1099 |
+
" docLink.innerHTML = docLinkHtml;\n",
|
| 1100 |
+
" element.appendChild(docLink);\n",
|
| 1101 |
+
" }\n",
|
| 1102 |
+
" </script>\n",
|
| 1103 |
+
" </div>\n",
|
| 1104 |
+
"\n",
|
| 1105 |
+
"\n",
|
| 1106 |
+
"<div id=\"df-09b65e71-0d55-4461-a02a-fd0e783326a0\">\n",
|
| 1107 |
+
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-09b65e71-0d55-4461-a02a-fd0e783326a0')\"\n",
|
| 1108 |
+
" title=\"Suggest charts\"\n",
|
| 1109 |
+
" style=\"display:none;\">\n",
|
| 1110 |
+
"\n",
|
| 1111 |
+
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
| 1112 |
+
" width=\"24px\">\n",
|
| 1113 |
+
" <g>\n",
|
| 1114 |
+
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
| 1115 |
+
" </g>\n",
|
| 1116 |
+
"</svg>\n",
|
| 1117 |
+
" </button>\n",
|
| 1118 |
+
"\n",
|
| 1119 |
+
"<style>\n",
|
| 1120 |
+
" .colab-df-quickchart {\n",
|
| 1121 |
+
" --bg-color: #E8F0FE;\n",
|
| 1122 |
+
" --fill-color: #1967D2;\n",
|
| 1123 |
+
" --hover-bg-color: #E2EBFA;\n",
|
| 1124 |
+
" --hover-fill-color: #174EA6;\n",
|
| 1125 |
+
" --disabled-fill-color: #AAA;\n",
|
| 1126 |
+
" --disabled-bg-color: #DDD;\n",
|
| 1127 |
+
" }\n",
|
| 1128 |
+
"\n",
|
| 1129 |
+
" [theme=dark] .colab-df-quickchart {\n",
|
| 1130 |
+
" --bg-color: #3B4455;\n",
|
| 1131 |
+
" --fill-color: #D2E3FC;\n",
|
| 1132 |
+
" --hover-bg-color: #434B5C;\n",
|
| 1133 |
+
" --hover-fill-color: #FFFFFF;\n",
|
| 1134 |
+
" --disabled-bg-color: #3B4455;\n",
|
| 1135 |
+
" --disabled-fill-color: #666;\n",
|
| 1136 |
+
" }\n",
|
| 1137 |
+
"\n",
|
| 1138 |
+
" .colab-df-quickchart {\n",
|
| 1139 |
+
" background-color: var(--bg-color);\n",
|
| 1140 |
+
" border: none;\n",
|
| 1141 |
+
" border-radius: 50%;\n",
|
| 1142 |
+
" cursor: pointer;\n",
|
| 1143 |
+
" display: none;\n",
|
| 1144 |
+
" fill: var(--fill-color);\n",
|
| 1145 |
+
" height: 32px;\n",
|
| 1146 |
+
" padding: 0;\n",
|
| 1147 |
+
" width: 32px;\n",
|
| 1148 |
+
" }\n",
|
| 1149 |
+
"\n",
|
| 1150 |
+
" .colab-df-quickchart:hover {\n",
|
| 1151 |
+
" background-color: var(--hover-bg-color);\n",
|
| 1152 |
+
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 1153 |
+
" fill: var(--button-hover-fill-color);\n",
|
| 1154 |
+
" }\n",
|
| 1155 |
+
"\n",
|
| 1156 |
+
" .colab-df-quickchart-complete:disabled,\n",
|
| 1157 |
+
" .colab-df-quickchart-complete:disabled:hover {\n",
|
| 1158 |
+
" background-color: var(--disabled-bg-color);\n",
|
| 1159 |
+
" fill: var(--disabled-fill-color);\n",
|
| 1160 |
+
" box-shadow: none;\n",
|
| 1161 |
+
" }\n",
|
| 1162 |
+
"\n",
|
| 1163 |
+
" .colab-df-spinner {\n",
|
| 1164 |
+
" border: 2px solid var(--fill-color);\n",
|
| 1165 |
+
" border-color: transparent;\n",
|
| 1166 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 1167 |
+
" animation:\n",
|
| 1168 |
+
" spin 1s steps(1) infinite;\n",
|
| 1169 |
+
" }\n",
|
| 1170 |
+
"\n",
|
| 1171 |
+
" @keyframes spin {\n",
|
| 1172 |
+
" 0% {\n",
|
| 1173 |
+
" border-color: transparent;\n",
|
| 1174 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 1175 |
+
" border-left-color: var(--fill-color);\n",
|
| 1176 |
+
" }\n",
|
| 1177 |
+
" 20% {\n",
|
| 1178 |
+
" border-color: transparent;\n",
|
| 1179 |
+
" border-left-color: var(--fill-color);\n",
|
| 1180 |
+
" border-top-color: var(--fill-color);\n",
|
| 1181 |
+
" }\n",
|
| 1182 |
+
" 30% {\n",
|
| 1183 |
+
" border-color: transparent;\n",
|
| 1184 |
+
" border-left-color: var(--fill-color);\n",
|
| 1185 |
+
" border-top-color: var(--fill-color);\n",
|
| 1186 |
+
" border-right-color: var(--fill-color);\n",
|
| 1187 |
+
" }\n",
|
| 1188 |
+
" 40% {\n",
|
| 1189 |
+
" border-color: transparent;\n",
|
| 1190 |
+
" border-right-color: var(--fill-color);\n",
|
| 1191 |
+
" border-top-color: var(--fill-color);\n",
|
| 1192 |
+
" }\n",
|
| 1193 |
+
" 60% {\n",
|
| 1194 |
+
" border-color: transparent;\n",
|
| 1195 |
+
" border-right-color: var(--fill-color);\n",
|
| 1196 |
+
" }\n",
|
| 1197 |
+
" 80% {\n",
|
| 1198 |
+
" border-color: transparent;\n",
|
| 1199 |
+
" border-right-color: var(--fill-color);\n",
|
| 1200 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 1201 |
+
" }\n",
|
| 1202 |
+
" 90% {\n",
|
| 1203 |
+
" border-color: transparent;\n",
|
| 1204 |
+
" border-bottom-color: var(--fill-color);\n",
|
| 1205 |
+
" }\n",
|
| 1206 |
+
" }\n",
|
| 1207 |
+
"</style>\n",
|
| 1208 |
+
"\n",
|
| 1209 |
+
" <script>\n",
|
| 1210 |
+
" async function quickchart(key) {\n",
|
| 1211 |
+
" const quickchartButtonEl =\n",
|
| 1212 |
+
" document.querySelector('#' + key + ' button');\n",
|
| 1213 |
+
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
|
| 1214 |
+
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
|
| 1215 |
+
" try {\n",
|
| 1216 |
+
" const charts = await google.colab.kernel.invokeFunction(\n",
|
| 1217 |
+
" 'suggestCharts', [key], {});\n",
|
| 1218 |
+
" } catch (error) {\n",
|
| 1219 |
+
" console.error('Error during call to suggestCharts:', error);\n",
|
| 1220 |
+
" }\n",
|
| 1221 |
+
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
|
| 1222 |
+
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
|
| 1223 |
+
" }\n",
|
| 1224 |
+
" (() => {\n",
|
| 1225 |
+
" let quickchartButtonEl =\n",
|
| 1226 |
+
" document.querySelector('#df-09b65e71-0d55-4461-a02a-fd0e783326a0 button');\n",
|
| 1227 |
+
" quickchartButtonEl.style.display =\n",
|
| 1228 |
+
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 1229 |
+
" })();\n",
|
| 1230 |
+
" </script>\n",
|
| 1231 |
+
"</div>\n",
|
| 1232 |
+
"\n",
|
| 1233 |
+
" </div>\n",
|
| 1234 |
+
" </div>\n"
|
| 1235 |
+
],
|
| 1236 |
+
"text/plain": [
|
| 1237 |
+
" Category Message \\\n",
|
| 1238 |
+
"3570 ham She's fine. Sends her greetings \n",
|
| 1239 |
+
"3985 ham Hey, I missed you tm of last night as my phone... \n",
|
| 1240 |
+
"2105 ham Anyway seriously hit me up when you're back be... \n",
|
| 1241 |
+
"\n",
|
| 1242 |
+
" Category_lable \n",
|
| 1243 |
+
"3570 1 \n",
|
| 1244 |
+
"3985 1 \n",
|
| 1245 |
+
"2105 1 "
|
| 1246 |
+
]
|
| 1247 |
+
},
|
| 1248 |
+
"execution_count": 148,
|
| 1249 |
+
"metadata": {},
|
| 1250 |
+
"output_type": "execute_result"
|
| 1251 |
+
}
|
| 1252 |
+
],
|
| 1253 |
+
"source": [
|
| 1254 |
+
"Message_df['Category_lable'] = Message_df['Category'].map({\"spam\":0,\"ham\":1})\n",
|
| 1255 |
+
"Message_df.head(3)"
|
| 1256 |
+
]
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"cell_type": "markdown",
|
| 1260 |
+
"metadata": {
|
| 1261 |
+
"id": "UiheZTwnXeef"
|
| 1262 |
+
},
|
| 1263 |
+
"source": [
|
| 1264 |
+
"## 3.4.Data convert to TFIDF Vectors"
|
| 1265 |
+
]
|
| 1266 |
+
},
|
| 1267 |
+
{
|
| 1268 |
+
"cell_type": "code",
|
| 1269 |
+
"execution_count": 149,
|
| 1270 |
+
"metadata": {
|
| 1271 |
+
"colab": {
|
| 1272 |
+
"base_uri": "https://localhost:8080/"
|
| 1273 |
+
},
|
| 1274 |
+
"id": "nAHJnlluiZkd",
|
| 1275 |
+
"outputId": "0b3d55d1-e274-432b-ea42-ccd602a5ba1c"
|
| 1276 |
+
},
|
| 1277 |
+
"outputs": [
|
| 1278 |
+
{
|
| 1279 |
+
"data": {
|
| 1280 |
+
"text/plain": [
|
| 1281 |
+
"((1282, 10000), (1282,))"
|
| 1282 |
+
]
|
| 1283 |
+
},
|
| 1284 |
+
"execution_count": 149,
|
| 1285 |
+
"metadata": {},
|
| 1286 |
+
"output_type": "execute_result"
|
| 1287 |
+
}
|
| 1288 |
+
],
|
| 1289 |
+
"source": [
|
| 1290 |
+
"Tfidf_Vectorizer=TfidfVectorizer(max_features=10000,ngram_range=(1,2))\n",
|
| 1291 |
+
"X=Tfidf_Vectorizer.fit_transform(corpus).toarray()\n",
|
| 1292 |
+
"y = Message_df.Category_lable.values\n",
|
| 1293 |
+
"X.shape,y.shape"
|
| 1294 |
+
]
|
| 1295 |
+
},
|
| 1296 |
+
{
|
| 1297 |
+
"cell_type": "markdown",
|
| 1298 |
+
"metadata": {
|
| 1299 |
+
"id": "soo4Bxh4ASQq"
|
| 1300 |
+
},
|
| 1301 |
+
"source": [
|
| 1302 |
+
"### Save vectorizer"
|
| 1303 |
+
]
|
| 1304 |
+
},
|
| 1305 |
+
{
|
| 1306 |
+
"cell_type": "code",
|
| 1307 |
+
"execution_count": 150,
|
| 1308 |
+
"metadata": {
|
| 1309 |
+
"colab": {
|
| 1310 |
+
"base_uri": "https://localhost:8080/"
|
| 1311 |
+
},
|
| 1312 |
+
"id": "dHMsrJSG_m_i",
|
| 1313 |
+
"outputId": "1bc097cd-d109-4a13-82a0-ec5d4e509f0b"
|
| 1314 |
+
},
|
| 1315 |
+
"outputs": [
|
| 1316 |
+
{
|
| 1317 |
+
"data": {
|
| 1318 |
+
"text/plain": [
|
| 1319 |
+
"['tfidf_vectorizer.pkl']"
|
| 1320 |
+
]
|
| 1321 |
+
},
|
| 1322 |
+
"execution_count": 150,
|
| 1323 |
+
"metadata": {},
|
| 1324 |
+
"output_type": "execute_result"
|
| 1325 |
+
}
|
| 1326 |
+
],
|
| 1327 |
+
"source": [
|
| 1328 |
+
"dump(Tfidf_Vectorizer,'tfidf_vectorizer.pkl')\n"
|
| 1329 |
+
]
|
| 1330 |
+
},
|
| 1331 |
+
{
|
| 1332 |
+
"cell_type": "markdown",
|
| 1333 |
+
"metadata": {
|
| 1334 |
+
"id": "afxDu-1ej53c"
|
| 1335 |
+
},
|
| 1336 |
+
"source": [
|
| 1337 |
+
"## 3.5.Train Test split & Convert to PyTorch tensors"
|
| 1338 |
+
]
|
| 1339 |
+
},
|
| 1340 |
+
{
|
| 1341 |
+
"cell_type": "code",
|
| 1342 |
+
"execution_count": 151,
|
| 1343 |
+
"metadata": {
|
| 1344 |
+
"id": "SR1OFUufi1GC"
|
| 1345 |
+
},
|
| 1346 |
+
"outputs": [],
|
| 1347 |
+
"source": [
|
| 1348 |
+
"X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)\n",
|
| 1349 |
+
"# Convert to PyTorch tensors\n",
|
| 1350 |
+
"X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\n",
|
| 1351 |
+
"y_train_tensor = torch.tensor(y_train, dtype=torch.long)\n",
|
| 1352 |
+
"X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\n",
|
| 1353 |
+
"y_test_tensor = torch.tensor(y_test, dtype=torch.long)"
|
| 1354 |
+
]
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"cell_type": "markdown",
|
| 1358 |
+
"metadata": {
|
| 1359 |
+
"id": "s2-gHMz2XAfN"
|
| 1360 |
+
},
|
| 1361 |
+
"source": [
|
| 1362 |
+
"## 3.6.Create DataLoaders"
|
| 1363 |
+
]
|
| 1364 |
+
},
|
| 1365 |
+
{
|
| 1366 |
+
"cell_type": "code",
|
| 1367 |
+
"execution_count": 152,
|
| 1368 |
+
"metadata": {
|
| 1369 |
+
"id": "K7M4pBdMlQ8t"
|
| 1370 |
+
},
|
| 1371 |
+
"outputs": [],
|
| 1372 |
+
"source": [
|
| 1373 |
+
"batch_size = 32\n",
|
| 1374 |
+
"train_dataset = TensorDataset(X_train_tensor, y_train_tensor)\n",
|
| 1375 |
+
"test_dataset = TensorDataset(X_test_tensor, y_test_tensor)\n",
|
| 1376 |
+
"train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n",
|
| 1377 |
+
"test_loader = DataLoader(test_dataset, batch_size=batch_size)"
|
| 1378 |
+
]
|
| 1379 |
+
},
|
| 1380 |
+
{
|
| 1381 |
+
"cell_type": "markdown",
|
| 1382 |
+
"metadata": {
|
| 1383 |
+
"id": "0VBIFpPlXDav"
|
| 1384 |
+
},
|
| 1385 |
+
"source": [
|
| 1386 |
+
"# 4.Model building"
|
| 1387 |
+
]
|
| 1388 |
+
},
|
| 1389 |
+
{
|
| 1390 |
+
"cell_type": "markdown",
|
| 1391 |
+
"metadata": {
|
| 1392 |
+
"id": "TNlANx7oW42X"
|
| 1393 |
+
},
|
| 1394 |
+
"source": [
|
| 1395 |
+
"## 4.1.Define LSTM Model"
|
| 1396 |
+
]
|
| 1397 |
+
},
|
| 1398 |
+
{
|
| 1399 |
+
"cell_type": "code",
|
| 1400 |
+
"execution_count": 153,
|
| 1401 |
+
"metadata": {
|
| 1402 |
+
"id": "8dH_7s2tma9M"
|
| 1403 |
+
},
|
| 1404 |
+
"outputs": [],
|
| 1405 |
+
"source": [
|
| 1406 |
+
"class SentimentLSTM(nn.Module):\n",
|
| 1407 |
+
" def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):\n",
|
| 1408 |
+
" super(SentimentLSTM, self).__init__()\n",
|
| 1409 |
+
" self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)\n",
|
| 1410 |
+
" self.fc = nn.Linear(hidden_dim, output_dim)\n",
|
| 1411 |
+
" self.dropout = nn.Dropout(0.5)\n",
|
| 1412 |
+
"\n",
|
| 1413 |
+
" def forward(self, x):\n",
|
| 1414 |
+
" x = x.unsqueeze(1)\n",
|
| 1415 |
+
" lstm_out, _ = self.lstm(x)\n",
|
| 1416 |
+
" final_hidden = lstm_out[:, -1, :]\n",
|
| 1417 |
+
" return self.fc(final_hidden)"
|
| 1418 |
+
]
|
| 1419 |
+
},
|
| 1420 |
+
{
|
| 1421 |
+
"cell_type": "markdown",
|
| 1422 |
+
"metadata": {
|
| 1423 |
+
"id": "UH_MDi9mW1FA"
|
| 1424 |
+
},
|
| 1425 |
+
"source": [
|
| 1426 |
+
"## 4.2.Model parameters"
|
| 1427 |
+
]
|
| 1428 |
+
},
|
| 1429 |
+
{
|
| 1430 |
+
"cell_type": "code",
|
| 1431 |
+
"execution_count": 154,
|
| 1432 |
+
"metadata": {
|
| 1433 |
+
"id": "hqEvLZRbmbxr"
|
| 1434 |
+
},
|
| 1435 |
+
"outputs": [],
|
| 1436 |
+
"source": [
|
| 1437 |
+
"input_dim = X_train.shape[1] # TF-IDF feature size\n",
|
| 1438 |
+
"hidden_dim = 64\n",
|
| 1439 |
+
"output_dim = 2 # Binary classification (positive/negative)"
|
| 1440 |
+
]
|
| 1441 |
+
},
|
| 1442 |
+
{
|
| 1443 |
+
"cell_type": "markdown",
|
| 1444 |
+
"metadata": {
|
| 1445 |
+
"id": "9Xo28W3zWya6"
|
| 1446 |
+
},
|
| 1447 |
+
"source": [
|
| 1448 |
+
"## 4.3.Initialize model, loss, optimizer"
|
| 1449 |
+
]
|
| 1450 |
+
},
|
| 1451 |
+
{
|
| 1452 |
+
"cell_type": "code",
|
| 1453 |
+
"execution_count": 155,
|
| 1454 |
+
"metadata": {
|
| 1455 |
+
"id": "7pQYNNFQmkiW"
|
| 1456 |
+
},
|
| 1457 |
+
"outputs": [],
|
| 1458 |
+
"source": [
|
| 1459 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 1460 |
+
"model = SentimentLSTM(input_dim, hidden_dim, output_dim).to(device)\n",
|
| 1461 |
+
"criterion = nn.CrossEntropyLoss()\n",
|
| 1462 |
+
"optimizer = optim.Adam(model.parameters(), lr=0.001)"
|
| 1463 |
+
]
|
| 1464 |
+
},
|
| 1465 |
+
{
|
| 1466 |
+
"cell_type": "markdown",
|
| 1467 |
+
"metadata": {
|
| 1468 |
+
"id": "t8Z1D83ZWuy5"
|
| 1469 |
+
},
|
| 1470 |
+
"source": [
|
| 1471 |
+
"## 4.4.Training loop"
|
| 1472 |
+
]
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"cell_type": "code",
|
| 1476 |
+
"execution_count": 156,
|
| 1477 |
+
"metadata": {
|
| 1478 |
+
"colab": {
|
| 1479 |
+
"base_uri": "https://localhost:8080/"
|
| 1480 |
+
},
|
| 1481 |
+
"id": "YVHI9YSUmnMR",
|
| 1482 |
+
"outputId": "5df777bb-7af0-4253-9ed2-8c517baad460"
|
| 1483 |
+
},
|
| 1484 |
+
"outputs": [
|
| 1485 |
+
{
|
| 1486 |
+
"name": "stdout",
|
| 1487 |
+
"output_type": "stream",
|
| 1488 |
+
"text": [
|
| 1489 |
+
"Epoch 1, Loss: 0.6868\n",
|
| 1490 |
+
"Epoch 2, Loss: 0.6342\n",
|
| 1491 |
+
"Epoch 3, Loss: 0.5190\n",
|
| 1492 |
+
"Epoch 4, Loss: 0.3569\n",
|
| 1493 |
+
"Epoch 5, Loss: 0.2106\n",
|
| 1494 |
+
"Epoch 6, Loss: 0.1242\n",
|
| 1495 |
+
"Epoch 7, Loss: 0.0766\n",
|
| 1496 |
+
"Epoch 8, Loss: 0.0511\n",
|
| 1497 |
+
"Epoch 9, Loss: 0.0373\n",
|
| 1498 |
+
"Epoch 10, Loss: 0.0295\n",
|
| 1499 |
+
"Epoch 11, Loss: 0.0232\n",
|
| 1500 |
+
"Epoch 12, Loss: 0.0183\n",
|
| 1501 |
+
"Epoch 13, Loss: 0.0146\n",
|
| 1502 |
+
"Epoch 14, Loss: 0.0123\n",
|
| 1503 |
+
"Epoch 15, Loss: 0.0110\n",
|
| 1504 |
+
"Epoch 16, Loss: 0.0089\n",
|
| 1505 |
+
"Epoch 17, Loss: 0.0077\n",
|
| 1506 |
+
"Epoch 18, Loss: 0.0070\n",
|
| 1507 |
+
"Epoch 19, Loss: 0.0060\n",
|
| 1508 |
+
"Epoch 20, Loss: 0.0055\n"
|
| 1509 |
+
]
|
| 1510 |
+
}
|
| 1511 |
+
],
|
| 1512 |
+
"source": [
|
| 1513 |
+
"num_epochs = 20\n",
|
| 1514 |
+
"all_loss = []\n",
|
| 1515 |
+
"for epoch in range(num_epochs):\n",
|
| 1516 |
+
" model.train()\n",
|
| 1517 |
+
" total_loss = 0\n",
|
| 1518 |
+
" for text, label in train_loader:\n",
|
| 1519 |
+
" text, label = text.to(device), label.to(device)\n",
|
| 1520 |
+
" optimizer.zero_grad()\n",
|
| 1521 |
+
" output = model(text)\n",
|
| 1522 |
+
" loss = criterion(output, label)\n",
|
| 1523 |
+
" loss.backward()\n",
|
| 1524 |
+
" optimizer.step()\n",
|
| 1525 |
+
" total_loss += loss.item()\n",
|
| 1526 |
+
" all_loss.append(round(total_loss,2))\n",
|
| 1527 |
+
" print(f\"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}\")"
|
| 1528 |
+
]
|
| 1529 |
+
},
|
| 1530 |
+
{
|
| 1531 |
+
"cell_type": "markdown",
|
| 1532 |
+
"metadata": {
|
| 1533 |
+
"id": "jvnGZpZiWi__"
|
| 1534 |
+
},
|
| 1535 |
+
"source": [
|
| 1536 |
+
"## 4.5.Loss Graph"
|
| 1537 |
+
]
|
| 1538 |
+
},
|
| 1539 |
+
{
|
| 1540 |
+
"cell_type": "code",
|
| 1541 |
+
"execution_count": 157,
|
| 1542 |
+
"metadata": {
|
| 1543 |
+
"colab": {
|
| 1544 |
+
"base_uri": "https://localhost:8080/",
|
| 1545 |
+
"height": 472
|
| 1546 |
+
},
|
| 1547 |
+
"id": "eeNrXHXEowja",
|
| 1548 |
+
"outputId": "bf4155c0-8e30-4ba2-b18a-c68f74d9d282"
|
| 1549 |
+
},
|
| 1550 |
+
"outputs": [
|
| 1551 |
+
{
|
| 1552 |
+
"data": {
|
| 1553 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAARRxJREFUeJzt3Xd8VFX+//H3zCSZ9AnpCYQOoUcWpYiuCEgRRVx2Fdcuyq6Luwq2dYtt97vs2pZVsexXiujXvpaforB0FWlSpAghYOikQEjvM/f3R5KBSAhJSHJnJq/n43Efmbn3zOVzuY55c++551gMwzAEAADghaxmFwAAANBUBBkAAOC1CDIAAMBrEWQAAIDXIsgAAACvRZABAABeiyADAAC8FkEGAAB4LYIMAADwWgQZAGdYsGCBLBaL9u/fb3YpXuW2225TaGio2WUAbQpBBoBXcblcWrhwoa644gpFR0fL399fsbGxGjNmjP7973+rrKzM7BIBtCI/swsAgIYqKSnRtddeqyVLlujiiy/WAw88oLi4OOXk5Gj16tX6zW9+o/Xr12vu3LlmlwqglRBkAHiNGTNmaMmSJZo9e7buvffeWtvuv/9+paWlaenSpfXuo7KyUi6XSwEBAS1ZKoBWwq0lAA320ksvqW/fvrLb7UpMTNT06dOVm5tbq01aWpomT56s+Ph4BQYGqkOHDpoyZYry8vLcbZYuXapLLrlEERERCg0NVXJysv7whz/U+2cfOnRIr732msaNG3dGiKnRo0cP/eY3v3G/379/vywWi5555hnNnj1b3bp1k91u1/fff6/y8nI9+uijGjRokBwOh0JCQnTppZdq5cqVtfZ5+j7++c9/qlOnTgoKCtJll12mHTt21FnHkSNHNGnSJIWGhiomJkYPPPCAnE5nvccHoGm4IgOgQR5//HE98cQTGj16tO6++26lpqbq5Zdf1saNG7VmzRr5+/urvLxcY8eOVVlZmX77298qPj5eR44c0Weffabc3Fw5HA7t3LlTV111lQYMGKAnn3xSdrtde/fu1Zo1a+r987/44gs5nU7ddNNNja59/vz5Ki0t1bRp02S32xUZGan8/Hy99tpruuGGG3TXXXepoKBAc+fO1dixY7VhwwZdcMEFtfaxcOFCFRQUaPr06SotLdW//vUvjRw5Utu3b1dcXJy7ndPp1NixYzVkyBA988wzWrZsmZ599ll169ZNd999d6NrB3AOBgD8yPz58w1JRnp6umEYhpGVlWUEBAQYY8aMMZxOp7vdiy++aEgy5s2bZxiGYWzZssWQZLz//vtn3fc///lPQ5KRnZ3dqJpmzJhhSDK2bt1aa31ZWZmRnZ3tXo4fP+7elp6ebkgywsPDjaysrFqfq6ysNMrKymqtO3nypBEXF2fccccdZ+wjKCjIOHz4sHv9+vXrDUnGjBkz3OtuvfVWQ5Lx5JNP1trvwIEDjUGDBjXqeAE0DLeWAJzTsmXLVF5ervvuu09W66n/bdx1110KDw/XokWLJEkOh0OStGTJEhUXF9e5r4iICEnSJ598IpfL1eAa8vPzJemMx5s///xzxcTEuJdOnTqd8dnJkycrJiam1jqbzebuJ+NyuZSTk6PKykpdeOGF2rx58xn7mDRpktq3b+9+P3jwYA0ZMkSff/75GW1//etf13p/6aWX6ocffmjgkQJoDIIMgHM6cOCAJCk5ObnW+oCAAHXt2tW9vUuXLpo5c6Zee+01RUdHa+zYsZozZ06t/jHXX3+9hg8frjvvvFNxcXGaMmWK3nvvvXOGmrCwMElSYWFhrfXDhw/X0qVLtXTpUo0ZM6bOz3bp0qXO9a+//roGDBigwMBARUVFKSYmRosWLapVb40ePXqcsa5nz55njLUTGBh4Rmhq166dTp48edZjA9B0BBkAzerZZ5/Vtm3b9Ic//EElJSX63e9+p759++rw4cOSpKCgIH355ZdatmyZbr75Zm3btk3XX3+9rrjiino7xPbq1UuSzuhgGxMTo9GjR2v06NFKSEio87NBQUFnrHvzzTd12223qVu3bpo7d64WL16spUuXauTIkY26UvRjNputyZ8F0HgEGQDnVHO7JjU1tdb68vJypaenn3E7p3///vrTn/6kL7/8Ul999ZWOHDmiV155xb3darVq1KhReu655/T999/rf/7nf7RixYoznhg63fjx42Wz2fR///d/zXJMH3zwgbp27aoPP/xQN998s8aOHavRo0ertLS0zvZpaWlnrNuzZ486d+7cLPUAaBqCDIBzGj16tAICAvT888/LMAz3+rlz5yovL08TJkyQVNWPpbKystZn+/fvL6vV6h5xNycn54z91zwhVN+ovB07dtQdd9yhL774Qi+++GKdbU6v7Vxqrpyc/pn169dr7dq1dbb/+OOPdeTIEff7DRs2aP369Ro/fnyD/0wAzY/HrwGcU0xMjB555BE98cQTGjdunCZOnKjU1FS99NJLuuiii9yPRK9YsUL33HOPfvGLX6hnz56qrKzUG2+8IZvNpsmTJ0uSnnzySX355ZeaMGGCOnXqpKysLL300kvq0KGDLrnkknrrmD17ttLT0/Xb3/5W77zzjq6++mrFxsbq+PHjWrNmjT799NMz+vGczVVXXaUPP/xQ1157rSZMmKD09HS98sor6tOnzxn9cCSpe/fuuuSSS3T33XerrKxMs2fPVlRUlB566KFG/m0CaE4EGQAN8vjjjysmJkYvvviiZsyYocjISE2bNk1/+9vf5O/vL0lKSUnR2LFj9emnn+rIkSMKDg5WSkqKvvjiCw0dOlSSNHHiRO3fv1/z5s3T8ePHFR0drcsuu0xPPPGE+6mnswkODtbixYv1xhtv6I033tBTTz2l/Px8RUREKCUlRS+99JJuvfXWBh3PbbfdpoyMDL366qtasmSJ+vTpozfffFPvv/++Vq1adUb7W265RVarVbNnz1ZWVpYGDx6sF1988az9cgC0DovRmGuxANDG7N+/X126dNHTTz+tBx54wOxyAPwIfWQAAIDXIsgAAACvRZABAABeiz4yAADAa3FFBgAAeC2CDAAA8Fo+P46My+XS0aNHFRYWJovFYnY5AACgAQzDUEFBgRITE2W1nv26i88HmaNHjyopKcnsMgAAQBMcOnRIHTp0OOt2nw8yYWFhkqr+IsLDw02uBgAANER+fr6SkpLcv8fPxueDTM3tpPDwcIIMAABe5lzdQujsCwAAvBZBBgAAeC2CDAAA8FoEGQAA4LUIMgAAwGsRZAAAgNciyAAAAK9FkAEAAF6LIAMAALwWQQYAAHgtggwAAPBaBBkAAOC1CDJNZBiGln6fKcMwzC4FAIA2iyDTBIZh6KEPtumuhd/qpVX7zC4HAIA2iyDTBBaLRb0SwiVJTy9J1VvrD5pcEQAAbRNBpommXtJFvxnRTZL0p4+364vtx0yuCACAtocgcx4eHJusGwYnyWVI976zVd/sPW52SQAAtCkEmfNgsVj010n9Na5vvMqdLt218FttO5xrdlkAALQZBJnzZLNaNHvKBRrWNUpF5U7dNn+j9mUXml0WAABtAkGmGQT62/TvWwapX/tw5RSV65a5G3Qsr8TssgAA8HkEmWYSFuivBbcPVtfoEB3JLdEtczfoZFG52WUBAODTCDLNKDrUroVTBysu3K60rELd8fpGFZdXml0WAAA+iyDTzDq0C9YbU4fIEeSvLQdz9es3N6u80mV2WQAA+CSCTAvoGRemebddpCB/m77ck60H3v9OLhdTGQAA0NwIMi1kUKd2evmmn8jPatH/++6onvh0J/MyAQDQzAgyLWhEcqyevS5FkvT62gN6YcVekysCAMC3EGRa2DUXtNfjV/eRJD23dI/eWHfA5IoAAPAdBJlWcNvwLvrdyO6SpEc/2aHPth01uSIAAHwDQaaVzLiip24c0lGGIc14d6u+Sss2uyQAALweQaaVWCwWPXlNP00YkKAKp6FfvbFJWw/lml0WAABejSDTimxWi567LkWXdI9WcblTt8/foL1ZBWaXBQCA1yLItDK7n02v3DxIKR0cOllcoZvnbtDRXOZlAgCgKQgyJgi1+2n+7YPVNSZEx/JKdfPc9cphXiYAABqNIGOSyJAAvTF1iBIcgdqXXaTbF2xUURnzMgEA0BgEGRO1jwjSG1MHKyLYX98dytWv39ykskqn2WUBAOA1CDIm6x4bpvm3XaTgAJu+Sjuume99JyfzMgEA0CAEGQ8wsGM7vXLTIPnbLFq07Zge+387mJcJAIAGIMh4iJ/2jNFz110gi0V6c91B/XNZmtklAQDg8QgyHuTqlEQ9ObGvJOn55WlasCbd5IoAAPBsBBkPc/OwzrpvdA9J0hOffa/vj+abXBEAAJ6LIOOB7h3VQ2P7xskwpNe/2W92OQAAeCyCjAeyWCy689KukqRPvjuivOIKkysCAMAzEWQ81IWd2qlXfJhKK1x6f9Mhs8sBAMAjEWQ8lMVi0S3DOkuS3lh3QC7GlgEA4AwEGQ82aWCiwgL9dOBEsb5Myza7HAAAPA5BxoMFB/jp54M6SJLeWHvA5GoAAPA8BBkPd/PQTpKkFalZOpRTbHI1AAB4FoKMh+saE6pLe0TLMKQ313NVBgCA0xFkvEDNVZn3Nh5SaQWzYwMAUMPUIDNr1ixddNFFCgsLU2xsrCZNmqTU1NRabUpLSzV9+nRFRUUpNDRUkydPVmZmpkkVm2NU7zi1jwjSyeIKfbbtmNnlAADgMUwNMqtXr9b06dO1bt06LV26VBUVFRozZoyKiorcbWbMmKFPP/1U77//vlavXq2jR4/qZz/7mYlVtz6b1aJfDukoSXpj7X5ziwEAwINYDMPwmAFKsrOzFRsbq9WrV+unP/2p8vLyFBMTo7feeks///nPJUm7d+9W7969tXbtWg0dOvSc+8zPz5fD4VBeXp7Cw8Nb+hBazInCMg2btULlTpc+mT5cKUkRZpcEAECLaejvb4/qI5OXlydJioyMlCRt2rRJFRUVGj16tLtNr1691LFjR61du9aUGs0SFWrXhAEJkqSFPIoNAIAkDwoyLpdL9913n4YPH65+/fpJkjIyMhQQEKCIiIhabePi4pSRkVHnfsrKypSfn19r8RU3D6vq9PvptqPKKSo3uRoAAMznMUFm+vTp2rFjh955553z2s+sWbPkcDjcS1JSUjNVaL6BSRHq1z5c5ZUuvfct8y8BAOARQeaee+7RZ599ppUrV6pDhw7u9fHx8SovL1dubm6t9pmZmYqPj69zX4888ojy8vLcy6FDvvML32Kx6JahnSVJb647ICfzLwEA2jhTg4xhGLrnnnv00UcfacWKFerSpUut7YMGDZK/v7+WL1/uXpeamqqDBw9q2LBhde7TbrcrPDy81uJLrk5JlCPIX4dPlmhVapbZ5QAAYCpTg8z06dP15ptv6q233lJYWJgyMjKUkZGhkpISSZLD4dDUqVM1c+ZMrVy5Ups2bdLtt9+uYcOGNeiJJV8UFGDTL6rnX6LTLwCgrTM1yLz88svKy8vTiBEjlJCQ4F7effddd5t//vOfuuqqqzR58mT99Kc/VXx8vD788EMTqzbfTdUj/a7ek639x4vO0RoAAN/lUePItARfGUfmx26dt0Gr92Trrku76I8T+phdDgAAzcorx5FBw91S/Sj2e98eVkk58y8BANomgoyXGpEcqw7tgpRXUqFPvztqdjkAAJiCIOOlbFaLu6/MwnX75eN3CAEAqBNBxotdd2GSAvys2nEkX1sO5ZpdDgAArY4g48UiQwJ09YBESdIbPIoNAGiDCDJerqbT76Jtx3S8sMzkagAAaF0EGS+XkhShlA4OlTtdenej70zHAABAQxBkfMDNwzpLkt5af5D5lwAAbQpBxgdcNSBB7YL9dSS3RMt3ZZpdDgAArYYg4wMC/W267qIkSdIb6+j0CwBoOwgyPuKmIZ1ksUhfpR3XD9mFZpcDAECrIMj4iKTIYI1MjpXEVRkAQNtBkPEhN1c/iv3BpsMqLq80uRoAAFoeQcaH/LRHjDpHBaugtFIfb2H+JQCA7yPI+BDr6fMvrWX+JQCA7yPI+JhfDEpSoL9VuzMK9O2Bk2aXAwBAiyLI+BhHsL+uSWkvSVrI/EsAAB9HkPFBNZ1+F+84pqyCUpOrAQCg5RBkfFC/9g79pGOEKpyG3tnA/EsAAN9FkPFRt5w2/1Kl02VuMQAAtBCCjI8a3z9eUSEBysgv1dLvmX8JAOCbCDI+yu5n05TBVfMv0ekXAOCrCDI+7JdDOslqkdb+cEJpmQVmlwMAQLMjyPiw9hFBGt07ThLzLwEAfBNBxsfVdPr9cPMRFZYx/xIAwLcQZHzc8O5R6hoTosKySn20+bDZ5QAA0KwIMj7OYrHoZvf8SweYfwkA4FMIMm3A5EEdFBxgU1pWodb9kGN2OQAANBuCTBsQHuivSQOr5l96Y91+c4sBAKAZEWTaiFuq519asjNTGXnMvwQA8A0EmTaiV3y4BneOlNNl6K0NB80uBwCAZkGQaUNqZsV+e8NBlVcy/xIAwPsRZNqQsX3jFRNmV3ZBmZbszDC7HAAAzhtBpg0J8LPqhsEdJUlvMP8SAMAHEGTamF8O7iib1aIN+3O0OyPf7HIAADgvBJk2Jt4RqDF9quZfYlZsAIC3I8i0QTWdfj/ZckRllU6TqwEAoOkIMm3Q0C5Rig2zq6jcqQ3pjPQLAPBeBJk2yGq1aGSvWEnS8l1ZJlcDAEDTEWTaKHeQ2Z3JRJIAAK9FkGmjhnePVoCfVYdySrQvu9DscgAAaBKCTBsVYvfTsK5Rkri9BADwXgSZNmxU75rbSwQZAIB3Isi0YZcnVwWZTQdOKre43ORqAABoPIJMG5YUGazkuDA5XYZW78k2uxwAABqNINPGjay+vbSC20sAAC9EkGnjRlU/hr0qNVuVTpfJ1QAA0DgEmTZuYMd2igj2V15JhTYfzDW7HAAAGoUg08bZrBZ3p9/luzNNrgYAgMYhyMA9yu8KxpMBAHgZggz0054xslktSssq1METxWaXAwBAgxFkIEeQvy7q3E6StILbSwAAL0KQgSRpVK84SYzyCwDwLgQZSDo1nsz6H3JUWFZpcjUAADQMQQaSpK7RIeocFaxyp0tfpx03uxwAABqEIANJksVi0cjq20v0kwEAeAuCDNxGuacryJbLZZhcDQAA50aQgdtFnSMVavfT8cIybT+SZ3Y5AACcE0EGbgF+Vl3aI1oSTy8BALwDQQa1uEf5pZ8MAMALEGRQy4jkWFks0o4j+crIKzW7HAAA6kWQQS0xYXaldIiQJK1M5fYSAMCzEWRwhlHVt5eWM4kkAMDDEWRwhppRftfsPa7SCqfJ1QAAcHamBpkvv/xSV199tRITE2WxWPTxxx/X2n7bbbfJYrHUWsaNG2dOsW1In4RwxYcHqqTCqbU/nDC7HAAAzsrUIFNUVKSUlBTNmTPnrG3GjRunY8eOuZe33367FStsmywWi/uqzApuLwEAPJifmX/4+PHjNX78+Hrb2O12xcfHt1JFqDGqV6zeWn9QK3Zn6UnDkMViMbskAADO4PF9ZFatWqXY2FglJyfr7rvv1okT9d/qKCsrU35+fq0FjXdxt2jZ/aw6klui1MwCs8sBAKBOHh1kxo0bp4ULF2r58uX6xz/+odWrV2v8+PFyOs/eAXXWrFlyOBzuJSkpqRUr9h1BATYN7149yi+3lwAAHsqjg8yUKVM0ceJE9e/fX5MmTdJnn32mjRs3atWqVWf9zCOPPKK8vDz3cujQodYr2MecGuWXIAMA8EweHWR+rGvXroqOjtbevXvP2sZutys8PLzWgqapCTKbD55UTlG5ydUAAHAmrwoyhw8f1okTJ5SQkGB2KW1CYkSQeieEyzCkVYzyCwDwQKYGmcLCQm3dulVbt26VJKWnp2vr1q06ePCgCgsL9eCDD2rdunXav3+/li9frmuuuUbdu3fX2LFjzSy7TXGP8svtJQCABzI1yHz77bcaOHCgBg4cKEmaOXOmBg4cqEcffVQ2m03btm3TxIkT1bNnT02dOlWDBg3SV199JbvdbmbZbUrNeDJfpmarwukyuRoAAGozdRyZESNGyDCMs25fsmRJK1aDuqR0iFBUSIBOFJVr4/4cXdwt2uySAABw86o+Mmh9NqtFI5IZ5RcA4JkIMjinUb15DBsA4JkIMjinS3tEy89q0Q/Hi/RDdqHZ5QAA4EaQwTmFBfprSNdISVyVAQB4FoIMGmRkrzhJBBkAgGchyKBBasaT2ZCeo/zSCpOrAQCgCkEGDdI5OkRdY0JU6TL0ddpxs8sBAEASQQaN4B7ll8ewAQAegiCDBqvpJ7MqNUtO19kHMgQAoLUQZNBgF3Zup7BAP50oKtd3h3PNLgcAAIIMGs7fZtVlPWMkMcovAMAzEGTQKDWj/DIbNgDAExBk0CiX9YyV1SLtOpavo7klZpcDAGjjCDJolMiQAP2kYztJDI4HADAfQQaNNpJJJAEAHoIgg0YbVf0Y9pq9x1VS7jS5GgBAW0aQQaP1jAtV+4gglVW69M0+RvkFAJiHIINGs1gsGtmLp5cAAOYjyKBJ3P1kdmXJMBjlFwBgDoIMmmRY1ygF+duUkV+q74/lm10OAKCNIsigSQL9bRrePVoSo/wCAMxDkEGTMcovAMBsBBk02eXJVUHmu8O5yi4oM7kaAEBbRJBBk8U7AtWvfbgMQ1qVylUZAEDrI8jgvIysHhyPUX4BAGYgyOC8jKoeT+bLPdkqr3SZXA0AoK0hyOC89G/vUHSoXUXlTm1IzzG7HABAG0OQwXmxWi0a2StGkrR8d6bJ1QAA2hqCDM5bTT+Z5YzyCwBoZQQZnLdLekQrwGbVwZxi7csuMrscAEAbQpDBeQu1+2lI10hJ0gpuLwEAWhFBBs2i5uml5UxXAABoRQQZNIuafjLfHjipvOIKk6sBALQVBBk0i45RweoRGyqny9DqtGyzywEAtBEEGTSbkdWTSK7YRT8ZAEDrIMig2Yyqvr20ak+2Kp2M8gsAaHkEGTSbn3SMkCPIX7nFFdpyKNfscgAAbUCTgszrr7+uRYsWud8/9NBDioiI0MUXX6wDBw40W3HwLn42q0YkV4/yy9NLAIBW0KQg87e//U1BQUGSpLVr12rOnDl66qmnFB0drRkzZjRrgfAuI6sfw2Y8GQBAa/BryocOHTqk7t27S5I+/vhjTZ48WdOmTdPw4cM1YsSI5qwPXuaynjGyWS3ak1moQznFSooMNrskAIAPa9IVmdDQUJ04cUKS9N///ldXXHGFJCkwMFAlJSXNVx28TkRwgAZ1aidJWrGb20sAgJbVpCBzxRVX6M4779Sdd96pPXv26Morr5Qk7dy5U507d27O+uCFakb5Xfo9t5cAAC2rSUFmzpw5GjZsmLKzs/Wf//xHUVFRkqRNmzbphhtuaNYC4X3G9I2XJK394YROFpWbXA0AwJdZDMMwzC6iJeXn58vhcCgvL0/h4eFml9NmjP/XV9p1LF//mNxf11/U0exyAABepqG/v5t0RWbx4sX6+uuv3e/nzJmjCy64QL/85S918uTJpuwSPubKflVXZT7fnmFyJQAAX9akIPPggw8qPz9fkrR9+3bdf//9uvLKK5Wenq6ZM2c2a4HwTuP7J0iS1uw9ziSSAIAW06Qgk56erj59+kiS/vOf/+iqq67S3/72N82ZM0dffPFFsxYI79Q9NlTJcWGqdBlaytxLAIAW0qQgExAQoOLiYknSsmXLNGbMGElSZGSk+0oNML5/1e2lL7YfM7kSAICvalKQueSSSzRz5kz95S9/0YYNGzRhwgRJ0p49e9ShQ4dmLRDe68rq20tfpR1Xfim3lwAAza9JQebFF1+Un5+fPvjgA7388stq3769JOmLL77QuHHjmrVAeK8esaHqFhOicqdLK5h7CQDQAnj8Gi3q2f+m6oUVe3VFnzj97y0Xml0OAMBLNPT3d5PmWpIkp9Opjz/+WLt27ZIk9e3bVxMnTpTNZmvqLuGDxvdL0Asr9mr1nmwVllUq1N7k/+QAADhDk36r7N27V1deeaWOHDmi5ORkSdKsWbOUlJSkRYsWqVu3bs1aJLxX74QwdYkOUfrxIq3YnaWJKYlmlwQA8CFN6iPzu9/9Tt26ddOhQ4e0efNmbd68WQcPHlSXLl30u9/9rrlrhBezWCwa34+nlwAALaNJQWb16tV66qmnFBkZ6V4XFRWlv//971q9enWzFQffUPP00srULBWXV5pcDQDAlzQpyNjtdhUUFJyxvrCwUAEBAeddFHxL38RwJUUGqbTCpVWp2WaXAwDwIU0KMldddZWmTZum9evXyzAMGYahdevW6de//rUmTpzY3DXCy1ksFl3Zr+qqzCJuLwEAmlGTgszzzz+vbt26adiwYQoMDFRgYKAuvvhide/eXbNnz27mEuELauZeWrk7SyXlTpOrAQD4iiY9tRQREaFPPvlEe/fudT9+3bt3b3Xv3r1Zi4PvSOngUPuIIB3JLdHqPdkaV90BGACA89HgIHOuWa1Xrlzpfv3cc881vSL4pJqnl177Ol1f7DhGkAEANIsGB5ktW7Y0qJ3FYmlyMfBt4/sn6LWv07V8V5ZKK5wK9GfwRADA+WlwkDn9igvQFAOTIhQfHqiM/FJ9nXZco/vEmV0SAMDLNamzL9AUVqvFfUvp8x08vQQAOH8EGbSqmsHxln6fqbJKnl4CAJwfU4PMl19+qauvvlqJiYmyWCz6+OOPa203DEOPPvqoEhISFBQUpNGjRystLc2cYtEsLuzUTrFhdhWUVuqbvSfMLgcA4OVMDTJFRUVKSUnRnDlz6tz+1FNP6fnnn9crr7yi9evXKyQkRGPHjlVpaWkrV4rmUuv2EoPjAQDOk6lBZvz48frrX/+qa6+99oxthmFo9uzZ+tOf/qRrrrlGAwYM0MKFC3X06NEzrtzAu4yvHuX3v99nqsLpMrkaAIA389g+Munp6crIyNDo0aPd6xwOh4YMGaK1a9ee9XNlZWXKz8+vtcCzDO4SqaiQAOWVVGjtPm4vAQCazmODTEZGhiQpLq72I7pxcXHubXWZNWuWHA6He0lKSmrROtF4NqtFY6tvL33B00sAgPPgsUGmqR555BHl5eW5l0OHDpldEupQM4nkkp2ZquT2EgCgiTw2yMTHV/2LPTMzs9b6zMxM97a62O12hYeH11rgeYZ2jVS7YH/lFJVrfXqO2eUAALyUxwaZLl26KD4+XsuXL3evy8/P1/r16zVs2DATK0Nz8LNZNbYvTy8BAM6PqUGmsLBQW7du1datWyVVdfDdunWrDh48KIvFovvuu09//etf9f/+3//T9u3bdcsttygxMVGTJk0ys2w0k/H9a24vZcjpMkyuBgDgjRo811JL+Pbbb3X55Ze739fMsH3rrbdqwYIFeuihh1RUVKRp06YpNzdXl1xyiRYvXqzAwECzSkYzurhblBxB/jpeWK6N+3M0tGuU2SUBALyMxTAMn/6ncH5+vhwOh/Ly8ugv44EeeP87fbDpsG4d1klPXNPP7HIAAB6iob+/PbaPDNqGK/vXPIadIRe3lwAAjUSQgamGd49WmN1PWQVl2nTwpNnlAAC8DEEGprL72XRFn6pBD3l6CQDQWAQZmK7m6aXF3F4CADQSQQamu7RHtEICbDqWV6qth3PNLgcA4EUIMjBdoL9No3pX3V76gttLAIBGIMjAI9Q8vfT59gz5+IgAAIBmRJCBR7isZ6yC/G06kluibYfzzC4HAOAlCDLwCEEBNo3sHStJ+nwHt5cAAA1DkIHHuLJf1dNLX3B7CQDQQAQZeIwRyTEK9LfqYE6xdh7NN7scAIAXIMjAY4TY/TSiZ9XtpS+4vQQAaACCDDzKeJ5eAgA0AkEGHmVU7zgF+FmVfrxIuzMKzC4HAODhCDLwKKF2P13WM0YSg+MBAM6NIAOP4x4cb0eGyZUAADwdQQYeZ1TvOPnbLNqbVai0TG4vAQDOjiADjxMe6K9Le1TdXvp8O1dlAABnR5CBRxrfr+r2Eo9hAwDqQ5CBR7qiT5z8rBbtzijQvuxCs8sBAHgoggw8UkRwgIZ3j5bE00sAgLMjyMBjXXna4HgAANSFIAOPdUWfeNmsFn1/LF/7jxeZXQ4AwAMRZOCxIkMCNKxrlCTpC8aUAQDUgSADj1Yz9xJPLwEA6kKQgUcb0ydeVou07XCeDuUUm10OAMDDEGTg0WLC7BrcJVISV2UAAGciyMDjTeifIImnlwAAZyLIwOON7Rsvi0XaeihXR3JLzC4HAOBBCDLweLHhgbqoU9XtpcU8vQQAOA1BBl7B/fQSo/wCAE5DkIFXGFc9ieS3B04qI6/U5GoAAJ6CIAOvkOAI0k86RkiSluzk9hIAoApBBl7jyuqnlxZxewkAUI0gA68xvjrIbNyfo6wCbi8BAAgy8CLtI4KUkhQhw5CW7Mw0uxwAgAcgyMCrXNmPp5cAAKcQZOBVxverur207ocTOlFYZnI1AACzEWTgVTpGBatf+3C5DOk/mw+bXQ4AwGQEGXidm4Z0kiTNWblPucXlJlcDADATQQZe5xcXJik5Lkx5JRV6YcVes8sBAJiIIAOvY7Na9MiVvSRJC9fu14ETRSZXBAAwC0EGXmlEcqwu7RGtCqehfyzebXY5AACTEGTgtf44obesFunz7Rn6dn+O2eUAAExAkIHX6hUfrl8MSpIk/XXRLhmGYXJFAIDWRpCBV7t/TE8F+du09VCuPtvGIHkA0NYQZODVYsMD9avLukqS/rF4t8oqnSZXBABoTQQZeL1pP+2quHC7Dp8s0evf7De7HABAKyLIwOsFB/jp/jHJkqQXVuxVThGD5AFAW0GQgU+Y/JMO6hUfpoLSSj2/PM3scgAArYQgA59gs1r0pwl9JElvrjugH7ILTa4IANAaCDLwGZf0iNblyTGqdDFIHgC0FQQZ+JRHrqwaJG/Jzkyt/+GE2eUAAFoYQQY+pWdcmKYM7ihJ+p/Pd8nlYpA8APBlBBn4nBmjeyokwKZth/P06bajZpcDAGhBBBn4nJgwu+4e0U2S9NTiVJVWMEgeAPgqggx80tRLuirBEagjuSWav2a/2eUAAFoIQQY+KSjApgeqB8l7aeVenSgsM7kiAEBLIMjAZ107sL36tQ9XQVmlZi9jkDwA8EUEGfgsq9WiP1zZW5L01oaD2pvFIHkA4GsIMvBpF3eL1ujesXK6DP39i11mlwMAaGYEGfi834/vLZvVomW7svTNvuNmlwMAaEYEGfi87rGhunFI1SB5f2OQPADwKQQZtAn3juqhMLufdhzJ10dbjphdDgCgmXh0kHn88cdlsVhqLb169TK7LHihqFC7fnN5d0nSM/9NVUk5g+QBgC/w6CAjSX379tWxY8fcy9dff212SfBStw/vrPYRQTqWV6q5X/9gdjkAgGbg8UHGz89P8fHx7iU6OtrskuClAv1temhc1SB5L6/ap+wCBskDAG/n8UEmLS1NiYmJ6tq1q2688UYdPHiw3vZlZWXKz8+vtQA1rh6QqJQODhWVO/XPZXvMLgcAcJ48OsgMGTJECxYs0OLFi/Xyyy8rPT1dl156qQoKCs76mVmzZsnhcLiXpKSkVqwYns5qteiPE/pIkt7ZcFB7Ms/+3xIAwPNZDMPwmmdRc3Nz1alTJz333HOaOnVqnW3KyspUVnbqlkF+fr6SkpKUl5en8PDw1ioVHu5Xb3yrJTszdXlyjObfPtjscgAAP5Kfny+Hw3HO398efUXmxyIiItSzZ0/t3bv3rG3sdrvCw8NrLcCP/X58b/lZLVqZmq2v0xgkDwC8lVcFmcLCQu3bt08JCQlmlwIv1yU6RDcN7SRJ+uui7+VkkDwA8EoeHWQeeOABrV69Wvv379c333yja6+9VjabTTfccIPZpcEH3Duqh8ID/bQ7o0D/2XzY7HIAAE3g0UHm8OHDuuGGG5ScnKzrrrtOUVFRWrdunWJiYswuDT6gXUiAfjuyhyTpmSWpKi6vNLkiAEBj+ZldQH3eeecds0uAj7vl4k5auG6/DuWU6H+/TNe9o3uYXRIAoBE8+ooM0NLsfjY9PK5q2otXv9ynrPxSkysCADQGQQZt3oT+CRrYMULF5U49+18GyQMAb0KQQZtnsVj0pwm9JUnvbTqkXccYDRoAvAVBBpA0qFOkJvRPkGFIf/t8l9nlAAAaiCADVHtoXLL8bRZ9lXZcq/dkm10OAKABCDJAtU5RIbp1WGdJ0v8s+l6lFU5zCwIAnBNBBjjNb0f2UESwv/ZkFuquhd8SZgDAwxFkgNM4gv316k2DFBxg01dpxzX19Y0qKSfMAICnIsgAPzKka5Rev2OwQgJsWrP3hO5YsJFRfwHAQxFkgDpc1DlSC6cOVqjdT2t/OKHb529UURlhBgA8DUEGOItBnarCTJjdT+vTc3T7/I0qJMwAgEchyAD1+EnHdnrjziEKC/TThv05um3eBhWUVphdFgCgGkEGOIcLkiL0f3cOUXign749cFK3ztugfMIMAHgEggzQAAM6ROitu4bKEeSvzQdzdctcwgwAeAKCDNBA/do79H93DlFEsL+2HsrVza+tV14JYQYAzESQARqhX3uH3rpzqNoF++u7w3m66bX1yi0uN7ssAGizCDJAI/VJDNdbdw1VZEiAth/J042vrdfJIsIMAJiBIAM0Qe+EcL1911BFhQRo59F8/fK19cohzABAqyPIAE2UHB+md6YNVXSoXbuO5euX/7tOJwrLzC4LANoUggxwHnrEVYWZmDC7dmcU6Jf/u17HCTMA0GoIMsB56h4bqnemDVVsmF2pmQW64d/rlF1AmAGA1kCQAZpBt5hQvfurYYoPD1RaVqFu+N91yiooNbssAPB5BBmgmXSJDtE704YqwRGovVmFmvLvdcrMJ8wAQEsiyADNqHN0iN6dNkztI4L0Q3aRpvx7nTLyCDMA0FIIMkAz6xgVrHemDVX7iCClHy/SlH+v1bG8ErPLAgCfRJABWkBSZFWY6dAuSPtPFOv6V9fpSC5hBgCaG0EGaCFJkcF691fD1DEyWAdzijXl32t1+GSx2WUBgE8hyAAtqH1EkN6ZNlSdooJ1KKdE17+6TodyCDMA0FwIMkALS4wI0rvThqlLdIiO5JZoyr/X6cCJIrPLAgCfQJABWkG8I1DvTBuqrjFVYebKf32l55buUUFphdmlAYBXI8gArSQuPFDv3DVUFyRFqKjcqeeXp+mnT63U/375g0ornGaXBwBeyWIYhmF2ES0pPz9fDodDeXl5Cg8PN7scQIZhaPGODD3931T9kF11iynBEah7R/XQzwd1kJ+Nf18AQEN/fxNkAJNUOl36cPMR/XPZHh2rHjSva0yIHhiTrPH94mWxWEyuEADMQ5CpRpCBpyutcOrNdQf00qp9yikqlyT1b+/Qg2OTdWmPaAINgDaJIFONIANvUVBaode+StdrX/2govKqPjPDukbpoXHJGtixncnVAUDrIshUI8jA25woLNOclfv05roDKne6JElj+sTpgbHJ6hkXZnJ1ANA6CDLVCDLwVodPFutfy9L0n82H5TIki0W6dmB7zRjdU0mRwWaXBwAtiiBTjSADb7c3q0DPLNmjxTszJEn+NotuHNJJ0y/vrpgwu8nVAUDLIMhUI8jAV2w9lKunl+zWmr0nJEnBATZNvaSL7vppV4UH+ptcHQA0L4JMNYIMfM2avcf11OLd+u5wniQpIthfvxnRTbcM66xAf5vJ1QFA8yDIVCPIwBcZhqElOzP0zH/3aG9WoSQpPjxQ947uoV8wqB4AH0CQqUaQgS+rdLr04ZYj+teyNB3JLZEkdY0O0S8uTNLlvWKUHBfGODQAvBJBphpBBm1BWaVT/7fuoF5cudc9qJ4kJToCNaJXrC5PjtXF3aIUYvczsUoAaDiCTDWCDNqSwrJKfbTliFbuztI3+46rtMLl3hZgs2pI10iNSI7VyF6x6hIdYmKlAFA/gkw1ggzaqtIKp9b+cEKrdmdpRWqWDuWU1NreOSpYI5JjdXmvWA3pEklHYQAehSBTjSADVHUO/uF4kVbuztKq1GytTz+hCuepr36Qv00Xd4uqvg0Vow7tGHAPgLkIMtUIMsCZCssqtWbvca1KzdLK3dnKyC+ttb1nXKguT47ViORYXdi5nfx5CgpAKyPIVCPIAPUzDEO7jhVoZWqWVqVmadOBk3Kd9n+FMLufLu0ZrRHJsRrRM0ax4YHmFQugzSDIVCPIAI2TW1yur9KOa2VqllanZuvEaU9BSVKv+DD1SQhX97hQ9YgNU4/YUCVFBstm5TFvAM2HIFONIAM0nctlaNuRvOq+NVnu0YR/LMDPqq7RIeoRVxVsesSGqkdcqDpFhXBbCkCTEGSqEWSA5pNdUKZNB05qb1aB0rIKtbd6Kat01dnez2pRl+gQ9YgLVffqqzfdY0PVJTqEp6QA1IsgU40gA7Qsp8vQkZMlSqsON2mZhe6gU1zurPMzVovUKSpE3U+7etMjNkzdYkIVFEDAAUCQcSPIAOYwDENH80qVllngvnKTllWoPZkFKiitrPMzFouUEB6oDpHB6tAuSB3aBSup+meHdkFKcAQyjxTQRhBkqhFkAM9iGIayC8qqr95UX8WpDjo5P+pY/GM2q0UJjsDTQk5N4AlSh8hgxYcH0ukY8BEN/f3NxCsAWpXFYlFseKBiwwM1vHt0rW0nCst0IKdYh3KKdfhkSfVS9frIyRKVO13u9VLOGfv2s1qUGBHkDjdJ7YLVIfJU6IkNs8tK0AF8CkEGgMeICrUrKtSun3Rsd8Y2l8tQdmGZDp8s1qGcUwGnJuwcyS1RhdPQwZxiHcwprnP/ATar4hx2xYcHKt4RpPhwu+LCA5XgCFK8w654R5Biw+w8aQV4EYIMAK9gtVoUFx6ouPBADep05nany1BWQak72Pw47BzNrbqicyinpHreqZN1/jkWixQVYleCo+rPinfYleAIqg48NesCFcpM4oBHoI8MgDah0ulSZkGZMvJKdCyvVBk1S36pMvNLdSyv6ufpc1DVJ8zupzjHaeEmPFBxjkBFhQQoIthf7YIDFFn92u7Hk1hAY9FHBgBO42ezqn1EkNpHBJ21jctlKKe4vFbIOSPs5JWqoKyyaqnupHwuIQE2RQQHqF1IVcCpWvzVLqT6dUj1+9NeB/nbZLHQnwc4F4IMAFSzWi2KDrUrOtSufu0dZ21XWFZZxxWdEmXml+lkUblOFpfrZHGFcovL5TKkonKnispLdCS3pMG1BPhZFRlc++qOI9hfoXY/BQfYFGr3U0j1Emq3KSSg5vWpn4H+VsIQfB5BBgAaKdTup+7VoxTXx+UyVFBaqZziqnCTW1yunKKqgFMTdmqCT25xhXKKqn6WO10qr3RVXRH60czkjWG1yB1wQuw/Dj9V60LsfgqtbhPob1OAn1X+Novsflb526zV76t+Bpz23t9mOWOdn9VCcEKrI8gAQAuxWi1yBPvLEeyvLgpp0GcMw1BxudMdak7WhJ6icuWVVKqovFKFZZUqql6qXjtPe12pouoRlV2G3LfBWoPFIvnbrLLbrPL3OxV2/G1VgScowKbgAJuC/KtC1OmvgwJsCva3KTjAT8H2OtoF+CkkoKpdgI0rTTiFIAMAHsRisbivmiRFNm0fLpehkgpnraBzKuScCjyF1QGopl1ZZdWVoArnqZ9lNe+dLlVUGu5t5dXrTn9cxDBUta3SJZU1z99HXWxWS1XosVcFnyD/qrBT99Ujyxnrz7zSVDtw+fudHsZqPmOR1WqRn9Uiq8Uim7VqsVqq11W/t1ksslolP6tVVosIXK3AK4LMnDlz9PTTTysjI0MpKSl64YUXNHjwYLPLAgCPZLWeCkOxLfjnGIYhp8twh5wyp1MVTkMVNUGnsiYAVf0sKXeq2L1Uul+XnPa6+EevS8qdKq5wqrjMqXJn1eSkTpdx2pWmFkxMzcBqUT2B51Qw8rNVvfa3WWWzWuRns8rfWrW+5rad32k/a7bVvLZZq8JW1X5qXle3t1pks1lls1hks0o2q1U2q04FstODWfX702utqd1WK8RV76c6uLULDlCISUMSeHyQeffddzVz5ky98sorGjJkiGbPnq2xY8cqNTVVsbEt+RUFANTHYjn1y1QBkuTfon9epdPlDjU1gafmylNphbP66pFR66pS+WlXl06tM8648lT+o9cVlUatMFbpdMnpMuQypEqXSy6X5KwOcvVxGZLLaUgyPDxynZ//ubafbhxSxwBPrcDjg8xzzz2nu+66S7fffrsk6ZVXXtGiRYs0b948/f73vze5OgBAa/GzWRVusyo8sGUDU2O5XIYqXYZc1cHGaRin1lW/d7oMuVzVIcgw5HSpOhhVtXO6qkJYpdNQpcvl/llx+k/nae1+3N5lqNJ5alul06j1+vS6auqpWWpqOLVNcrpOBbfT29bah/PUvvyt5o2G7dFBpry8XJs2bdIjjzziXme1WjV69GitXbvWxMoAAKhitVoUwBxepvHoIHP8+HE5nU7FxcXVWh8XF6fdu3fX+ZmysjKVlZ26gJefn9+iNQIAAPP43Mxos2bNksPhcC9JSUlmlwQAAFqIRweZ6Oho2Ww2ZWZm1lqfmZmp+Pj4Oj/zyCOPKC8vz70cOnSoNUoFAAAm8OggExAQoEGDBmn58uXudS6XS8uXL9ewYcPq/Izdbld4eHitBQAA+CaP7iMjSTNnztStt96qCy+8UIMHD9bs2bNVVFTkfooJAAC0XR4fZK6//nplZ2fr0UcfVUZGhi644AItXrz4jA7AAACg7bEYhlH/aD5eLj8/Xw6HQ3l5edxmAgDASzT097dH95EBAACoD0EGAAB4LYIMAADwWgQZAADgtQgyAADAaxFkAACA1yLIAAAAr+XxA+Kdr5phcpgFGwAA71Hze/tcw935fJApKCiQJGbBBgDACxUUFMjhcJx1u8+P7OtyuXT06FGFhYXJYrE0237z8/OVlJSkQ4cOtYkRg9vS8XKsvqstHS/H6rvayvEahqGCggIlJibKaj17TxifvyJjtVrVoUOHFtt/W5thuy0dL8fqu9rS8XKsvqstHG99V2Jq0NkXAAB4LYIMAADwWgSZJrLb7Xrsscdkt9vNLqVVtKXj5Vh9V1s6Xo7Vd7W14z0Xn+/sCwAAfBdXZAAAgNciyAAAAK9FkAEAAF6LIAMAALwWQaYec+bMUefOnRUYGKghQ4Zow4YN9bZ///331atXLwUGBqp///76/PPPW6nS8zNr1ixddNFFCgsLU2xsrCZNmqTU1NR6P7NgwQJZLJZaS2BgYCtV3HSPP/74GXX36tWr3s9463mVpM6dO59xvBaLRdOnT6+zvTed1y+//FJXX321EhMTZbFY9PHHH9fabhiGHn30USUkJCgoKEijR49WWlraOffb2O99a6jvWCsqKvTwww+rf//+CgkJUWJiom655RYdPXq03n025bvQWs51bm+77bYzah83btw59+tt51ZSnd9fi8Wip59++qz79ORz2xIIMmfx7rvvaubMmXrssce0efNmpaSkaOzYscrKyqqz/TfffKMbbrhBU6dO1ZYtWzRp0iRNmjRJO3bsaOXKG2/16tWaPn261q1bp6VLl6qiokJjxoxRUVFRvZ8LDw/XsWPH3MuBAwdaqeLz07dv31p1f/3112dt683nVZI2btxY61iXLl0qSfrFL35x1s94y3ktKipSSkqK5syZU+f2p556Ss8//7xeeeUVrV+/XiEhIRo7dqxKS0vPus/Gfu9bS33HWlxcrM2bN+vPf/6zNm/erA8//FCpqamaOHHiOffbmO9CazrXuZWkcePG1ar97bffrnef3nhuJdU6xmPHjmnevHmyWCyaPHlyvfv11HPbIgzUafDgwcb06dPd751Op5GYmGjMmjWrzvbXXXedMWHChFrrhgwZYvzqV79q0TpbQlZWliHJWL169VnbzJ8/33A4HK1XVDN57LHHjJSUlAa396XzahiGce+99xrdunUzXC5Xndu99bxKMj766CP3e5fLZcTHxxtPP/20e11ubq5ht9uNt99++6z7aez33gw/Pta6bNiwwZBkHDhw4KxtGvtdMEtdx3vrrbca11xzTaP24yvn9pprrjFGjhxZbxtvObfNhSsydSgvL9emTZs0evRo9zqr1arRo0dr7dq1dX5m7dq1tdpL0tixY8/a3pPl5eVJkiIjI+ttV1hYqE6dOikpKUnXXHONdu7c2Rrlnbe0tDQlJiaqa9euuvHGG3Xw4MGztvWl81peXq4333xTd9xxR70TqHrreT1denq6MjIyap07h8OhIUOGnPXcNeV776ny8vJksVgUERFRb7vGfBc8zapVqxQbG6vk5GTdfffdOnHixFnb+sq5zczM1KJFizR16tRztvXmc9tYBJk6HD9+XE6nU3FxcbXWx8XFKSMjo87PZGRkNKq9p3K5XLrvvvs0fPhw9evX76ztkpOTNW/ePH3yySd688035XK5dPHFF+vw4cOtWG3jDRkyRAsWLNDixYv18ssvKz09XZdeeqkKCgrqbO8r51WSPv74Y+Xm5uq22247axtvPa8/VnN+GnPumvK990SlpaV6+OGHdcMNN9Q7oWBjvwueZNy4cVq4cKGWL1+uf/zjH1q9erXGjx8vp9NZZ3tfObevv/66wsLC9LOf/azedt58bpvC52e/RuNMnz5dO3bsOOf91GHDhmnYsGHu9xdffLF69+6tV199VX/5y19auswmGz9+vPv1gAEDNGTIEHXq1Envvfdeg/6V483mzp2r8ePHKzEx8axtvPW8okpFRYWuu+46GYahl19+ud623vxdmDJlivt1//79NWDAAHXr1k2rVq3SqFGjTKysZc2bN0833njjOTvge/O5bQquyNQhOjpaNptNmZmZtdZnZmYqPj6+zs/Ex8c3qr0nuueee/TZZ59p5cqV6tChQ6M+6+/vr4EDB2rv3r0tVF3LiIiIUM+ePc9aty+cV0k6cOCAli1bpjvvvLNRn/PW81pzfhpz7pryvfckNSHmwIEDWrp0ab1XY+pyru+CJ+vatauio6PPWru3n1tJ+uqrr5Samtro77Dk3ee2IQgydQgICNCgQYO0fPly9zqXy6Xly5fX+tfq6YYNG1arvSQtXbr0rO09iWEYuueee/TRRx9pxYoV6tKlS6P34XQ6tX37diUkJLRAhS2nsLBQ+/btO2vd3nxeTzd//nzFxsZqwoQJjfqct57XLl26KD4+vta5y8/P1/r168967pryvfcUNSEmLS1Ny5YtU1RUVKP3ca7vgic7fPiwTpw4cdbavfnc1pg7d64GDRqklJSURn/Wm89tg5jd29hTvfPOO4bdbjcWLFhgfP/998a0adOMiIgIIyMjwzAMw7j55puN3//+9+72a9asMfz8/IxnnnnG2LVrl/HYY48Z/v7+xvbt2806hAa7++67DYfDYaxatco4duyYeykuLna3+fHxPvHEE8aSJUuMffv2GZs2bTKmTJliBAYGGjt37jTjEBrs/vvvN1atWmWkp6cba9asMUaPHm1ER0cbWVlZhmH41nmt4XQ6jY4dOxoPP/zwGdu8+bwWFBQYW7ZsMbZs2WJIMp577jljy5Yt7id1/v73vxsRERHGJ598Ymzbts245pprjC5duhglJSXufYwcOdJ44YUX3O/P9b03S33HWl5ebkycONHo0KGDsXXr1lrf4bKyMvc+fnys5/oumKm+4y0oKDAeeOABY+3atUZ6erqxbNky4yc/+YnRo0cPo7S01L0PXzi3NfLy8ozg4GDj5ZdfrnMf3nRuWwJBph4vvPCC0bFjRyMgIMAYPHiwsW7dOve2yy67zLj11ltrtX/vvfeMnj17GgEBAUbfvn2NRYsWtXLFTSOpzmX+/PnuNj8+3vvuu8/9dxMXF2dceeWVxubNm1u/+Ea6/vrrjYSEBCMgIMBo3769cf311xt79+51b/el81pjyZIlhiQjNTX1jG3efF5XrlxZ53+3NcfjcrmMP//5z0ZcXJxht9uNUaNGnfF30KlTJ+Oxxx6rta6+771Z6jvW9PT0s36HV65c6d7Hj4/1XN8FM9V3vMXFxcaYMWOMmJgYw9/f3+jUqZNx1113nRFIfOHc1nj11VeNoKAgIzc3t859eNO5bQkWwzCMFr3kAwAA0ELoIwMAALwWQQYAAHgtggwAAPBaBBkAAOC1CDIAAMBrEWQAAIDXIsgAAACvRZAB4PNWrVoli8Wi3Nxcs0sB0MwIMgAAwGsRZAAAgNciyABocS6XS7NmzVKXLl0UFBSklJQUffDBB5JO3fZZtGiRBgwYoMDAQA0dOlQ7duyotY///Oc/6tu3r+x2uzp37qxnn3221vaysjI9/PDDSkpKkt1uV/fu3TV37txabTZt2qQLL7xQwcHBuvjii5Wamure9t133+nyyy9XWFiYwsPDNWjQIH377bct9DcCoLkQZAC0uFmzZmnhwoV65ZVXtHPnTs2YMUM33XSTVq9e7W7z4IMP6tlnn9XGjRsVExOjq6++WhUVFZKqAsh1112nKVOmaPv27Xr88cf15z//WQsWLHB//pZbbtHbb7+t559/Xrt27dKrr76q0NDQWnX88Y9/1LPPPqtvv/1Wfn5+uuOOO9zbbrzxRnXo0EEbN27Upk2b9Pvf/17+/v4t+xcD4PyZPWslAN9WWlpqBAcHG998802t9VOnTjVuuOEG9+y/77zzjnvbiRMnjKCgIOPdd981DMMwfvnLXxpXXHFFrc8/+OCDRp8+fQzDMIzU1FRDkrF06dI6a6j5M5YtW+Zet2jRIkOSUVJSYhiGYYSFhRkLFiw4/wMG0Kq4IgOgRe3du1fFxcW64oorFBoa6l4WLlyoffv2udsNGzbM/ToyMlLJycnatWuXJGnXrl0aPnx4rf0OHz5caWlpcjqd2rp1q2w2my677LJ6axkwYID7dUJCgiQpKytLkjRz5kzdeeedGj16tP7+97/Xqg2A5yLIAGhRhYWFkqRFixZp69at7uX7779395M5X0FBQQ1qd/qtIovFIqmq/44kPf7449q5c6cmTJigFStWqE+fPvroo4+apT4ALYcgA6BF9enTR3a7XQcPHlT37t1rLUlJSe5269atc78+efKk9uzZo969e0uSevfurTVr1tTa75o1a9SzZ0/ZbDb1799fLperVp+bpujZs6dmzJih//73v/rZz36m+fPnn9f+ALQ8P7MLAODbwsLC9MADD2jGjBlyuVy65JJLlJeXpzVr1ig8PFydOnWSJD355JOKiopSXFyc/vjHPyo6OlqTJk2SJN1///266KKL9Je//EXXX3+91q5dqxdffFEvvfSSJKlz58669dZbdccdd+j5559XSkqKDhw4oKysLF133XXnrLGkpEQPPvigfv7zn6tLly46fPiwNm7cqMmTJ7fY3wuAZmJ2Jx0Avs/lchmzZ882kpOTDX9/fyMmJsYYO3assXr1andH3E8//dTo27evERAQYAwePNj47rvvau3jgw8+MPr06WP4+/sbHTt2NJ5++ula20tKSowZM2YYCQkJRkBAgNG9e3dj3rx5hmGc6ux78uRJd/stW7YYkoz09HSjrKzMmDJlipGUlGQEBAQYiYmJxj333OPuCAzAc1kMwzBMzlIA2rBVq1bp8ssv18mTJxUREWF2OQC8DH1kAACA1yLIAAAAr8WtJQAA4LW4IgMAALwWQQYAAHgtggwAAPBaBBkAAOC1CDIAAMBrEWQAAIDXIsgAAACvRZABAABeiyADAAC81v8HGLVlUjjYoogAAAAASUVORK5CYII=",
|
| 1554 |
+
"text/plain": [
|
| 1555 |
+
"<Figure size 640x480 with 1 Axes>"
|
| 1556 |
+
]
|
| 1557 |
+
},
|
| 1558 |
+
"metadata": {},
|
| 1559 |
+
"output_type": "display_data"
|
| 1560 |
+
}
|
| 1561 |
+
],
|
| 1562 |
+
"source": [
|
| 1563 |
+
"plt.plot(all_loss)\n",
|
| 1564 |
+
"plt.title(\"loss Graph\")\n",
|
| 1565 |
+
"plt.xlabel(\"epochs\")\n",
|
| 1566 |
+
"plt.ylabel(\"loss\")\n",
|
| 1567 |
+
"plt.show()"
|
| 1568 |
+
]
|
| 1569 |
+
},
|
| 1570 |
+
{
|
| 1571 |
+
"cell_type": "markdown",
|
| 1572 |
+
"metadata": {
|
| 1573 |
+
"id": "g46t9mRLWXdH"
|
| 1574 |
+
},
|
| 1575 |
+
"source": [
|
| 1576 |
+
"# 4.6.Evaluate Model"
|
| 1577 |
+
]
|
| 1578 |
+
},
|
| 1579 |
+
{
|
| 1580 |
+
"cell_type": "code",
|
| 1581 |
+
"execution_count": 158,
|
| 1582 |
+
"metadata": {
|
| 1583 |
+
"colab": {
|
| 1584 |
+
"base_uri": "https://localhost:8080/"
|
| 1585 |
+
},
|
| 1586 |
+
"id": "ZleBUg-Gmudt",
|
| 1587 |
+
"outputId": "74dc7500-02bb-477c-d055-a3a3f3835581"
|
| 1588 |
+
},
|
| 1589 |
+
"outputs": [
|
| 1590 |
+
{
|
| 1591 |
+
"name": "stdout",
|
| 1592 |
+
"output_type": "stream",
|
| 1593 |
+
"text": [
|
| 1594 |
+
"Test Accuracy: 92.61%\n"
|
| 1595 |
+
]
|
| 1596 |
+
}
|
| 1597 |
+
],
|
| 1598 |
+
"source": [
|
| 1599 |
+
"# Evaluate Model\n",
|
| 1600 |
+
"model.eval()\n",
|
| 1601 |
+
"correct, total = 0, 0\n",
|
| 1602 |
+
"with torch.no_grad():\n",
|
| 1603 |
+
" for text, label in test_loader:\n",
|
| 1604 |
+
" text, label = text.to(device), label.to(device)\n",
|
| 1605 |
+
" output = model(text)\n",
|
| 1606 |
+
" _, predicted = torch.max(output, 1)\n",
|
| 1607 |
+
" total += label.size(0)\n",
|
| 1608 |
+
" correct += (predicted == label).sum().item()\n",
|
| 1609 |
+
"accuracy = 100 * correct / total\n",
|
| 1610 |
+
"print(f\"Test Accuracy: {accuracy:.2f}%\")"
|
| 1611 |
+
]
|
| 1612 |
+
},
|
| 1613 |
+
{
|
| 1614 |
+
"cell_type": "markdown",
|
| 1615 |
+
"metadata": {
|
| 1616 |
+
"id": "yel48pLbU6Wo"
|
| 1617 |
+
},
|
| 1618 |
+
"source": [
|
| 1619 |
+
"# 4.7.Save Model"
|
| 1620 |
+
]
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"cell_type": "code",
|
| 1624 |
+
"execution_count": 159,
|
| 1625 |
+
"metadata": {
|
| 1626 |
+
"id": "qT87bZnsUrog"
|
| 1627 |
+
},
|
| 1628 |
+
"outputs": [],
|
| 1629 |
+
"source": [
|
| 1630 |
+
"\n",
|
| 1631 |
+
"torch.save(model.state_dict(), 'spam_classifier_model_lstm.pth')\n"
|
| 1632 |
+
]
|
| 1633 |
+
},
|
| 1634 |
+
{
|
| 1635 |
+
"cell_type": "markdown",
|
| 1636 |
+
"metadata": {
|
| 1637 |
+
"id": "ykX7OC7RWSuW"
|
| 1638 |
+
},
|
| 1639 |
+
"source": [
|
| 1640 |
+
"# 5.Predict Sentiment for New Text"
|
| 1641 |
+
]
|
| 1642 |
+
},
|
| 1643 |
+
{
|
| 1644 |
+
"cell_type": "code",
|
| 1645 |
+
"execution_count": 160,
|
| 1646 |
+
"metadata": {
|
| 1647 |
+
"colab": {
|
| 1648 |
+
"base_uri": "https://localhost:8080/"
|
| 1649 |
+
},
|
| 1650 |
+
"id": "q8qsN1kunFu3",
|
| 1651 |
+
"outputId": "d956e7eb-ee73-4d12-b31a-e44c09d049a5"
|
| 1652 |
+
},
|
| 1653 |
+
"outputs": [
|
| 1654 |
+
{
|
| 1655 |
+
"name": "stdout",
|
| 1656 |
+
"output_type": "stream",
|
| 1657 |
+
"text": [
|
| 1658 |
+
"Prediction: Spam\n"
|
| 1659 |
+
]
|
| 1660 |
+
}
|
| 1661 |
+
],
|
| 1662 |
+
"source": [
|
| 1663 |
+
"def predict_sentiment(text):\n",
|
| 1664 |
+
" model.eval()\n",
|
| 1665 |
+
" text_vectorized = Tfidf_Vectorizer.transform([text]).toarray()\n",
|
| 1666 |
+
" text_tensor = torch.tensor(text_vectorized, dtype=torch.float32).to(device)\n",
|
| 1667 |
+
" with torch.no_grad():\n",
|
| 1668 |
+
" output = model(text_tensor)\n",
|
| 1669 |
+
" pred_label = torch.argmax(output, dim=1).item()\n",
|
| 1670 |
+
" return \"ham\" if pred_label == 1 else \"Spam\"\n",
|
| 1671 |
+
"print(\"Prediction:\", predict_sentiment(\"For ur chance to win £250 cash every wk TXT: PLAY to 83370. T's&C's www.music-trivia.net custcare 08715705022, 1x150p/wk.\"))"
|
| 1672 |
+
]
|
| 1673 |
+
}
|
| 1674 |
+
],
|
| 1675 |
+
"metadata": {
|
| 1676 |
+
"accelerator": "GPU",
|
| 1677 |
+
"colab": {
|
| 1678 |
+
"gpuType": "T4",
|
| 1679 |
+
"provenance": []
|
| 1680 |
+
},
|
| 1681 |
+
"kernelspec": {
|
| 1682 |
+
"display_name": "Python 3",
|
| 1683 |
+
"name": "python3"
|
| 1684 |
+
},
|
| 1685 |
+
"language_info": {
|
| 1686 |
+
"name": "python"
|
| 1687 |
+
}
|
| 1688 |
+
},
|
| 1689 |
+
"nbformat": 4,
|
| 1690 |
+
"nbformat_minor": 0
|
| 1691 |
+
}
|
app.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from joblib import load
|
| 3 |
+
import torch
|
| 4 |
+
import torch.nn as nn
|
| 5 |
+
import re
|
| 6 |
+
import nltk
|
| 7 |
+
from nltk.tokenize import word_tokenize
|
| 8 |
+
from nltk.stem import WordNetLemmatizer
|
| 9 |
+
from nltk.corpus import stopwords
|
| 10 |
+
nltk.download('stopwords')
|
| 11 |
+
nltk.download('wordnet')
|
| 12 |
+
nltk.download('punkt')
|
| 13 |
+
nltk.download('punkt_tab')
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# Load Encoder and Model
|
| 17 |
+
TFIDF_vectorizer = load("tfidf_vectorizer.pkl")
|
| 18 |
+
|
| 19 |
+
st.title("NaMessage Classification Based on Last Name")
|
| 20 |
+
|
| 21 |
+
# Define RNN Model
|
| 22 |
+
class SentimentLSTM(nn.Module):
|
| 23 |
+
def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):
|
| 24 |
+
super(SentimentLSTM, self).__init__()
|
| 25 |
+
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
|
| 26 |
+
self.fc = nn.Linear(hidden_dim, output_dim)
|
| 27 |
+
self.dropout = nn.Dropout(0.5)
|
| 28 |
+
|
| 29 |
+
def forward(self, x):
|
| 30 |
+
x = x.unsqueeze(1)
|
| 31 |
+
lstm_out, _ = self.lstm(x)
|
| 32 |
+
final_hidden = lstm_out[:, -1, :]
|
| 33 |
+
return self.fc(final_hidden)
|
| 34 |
+
# Load Model Weights
|
| 35 |
+
model = SentimentLSTM(input_dim = 10000, hidden_dim = 64, output_dim=2).to("cpu")
|
| 36 |
+
model.load_state_dict(torch.load("spam_classifier_model_lstm.pth", map_location=torch.device('cpu')))
|
| 37 |
+
model.eval()
|
| 38 |
+
|
| 39 |
+
# Text Input for Name
|
| 40 |
+
Message = st.text_input("Enter Message")
|
| 41 |
+
|
| 42 |
+
def preprocess_text(text):
|
| 43 |
+
text = text.casefold() # Convert text to lowercase
|
| 44 |
+
text = re.sub(r'[^a-zA-Z]', ' ', text)
|
| 45 |
+
tokens = word_tokenize(text) # Tokenize text
|
| 46 |
+
|
| 47 |
+
# Remove stopwords
|
| 48 |
+
stop_words = set(stopwords.words('english')) # Define stop_words here
|
| 49 |
+
filtered_tokens = [word for word in tokens if word not in stop_words and word != "not"]
|
| 50 |
+
|
| 51 |
+
# Lemmatization
|
| 52 |
+
lemmatizer = WordNetLemmatizer()
|
| 53 |
+
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
|
| 54 |
+
|
| 55 |
+
# Join tokens back into a string
|
| 56 |
+
preprocessed_text = ' '.join(lemmatized_tokens)
|
| 57 |
+
|
| 58 |
+
return preprocessed_text
|
| 59 |
+
|
| 60 |
+
def predict_sentiment(text):
|
| 61 |
+
model.eval()
|
| 62 |
+
text_vectorized = TFIDF_vectorizer.transform([text]).toarray()
|
| 63 |
+
text_tensor = torch.tensor(text_vectorized, dtype=torch.float32).to("cpu")
|
| 64 |
+
with torch.no_grad():
|
| 65 |
+
output = model(text_tensor)
|
| 66 |
+
pred_label = torch.argmax(output, dim=1).item()
|
| 67 |
+
return "ham" if pred_label == 1 else "Spam"
|
| 68 |
+
|
| 69 |
+
if st.button("Submit"):
|
| 70 |
+
Message = preprocess_text(Message)
|
| 71 |
+
result = predict_sentiment(Message)
|
| 72 |
+
st.success(result)
|
| 73 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
joblib==1.4.2
|
| 2 |
+
numpy==2.2.1
|
| 3 |
+
pandas==2.2.3
|
| 4 |
+
scikit-learn==1.6.1
|
| 5 |
+
streamlit==1.41.1
|
| 6 |
+
torch == 2.5.1
|
| 7 |
+
nltk
|
spam_classifier_model_lstm.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9118e44066ad24a1a863171f4ac7bb78482d65d11515a2aa4b5d0a510b0fb684
|
| 3 |
+
size 10310982
|
tfidf_vectorizer.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e9be53e196c118608513762a5bcc7939c17cd600f15e45cf6ec31842ff795e1
|
| 3 |
+
size 394924
|