Spaces:
Sleeping
Sleeping
Commit
·
e8afe32
1
Parent(s):
d9f8f61
Upload 14 files
Browse files- .gitattributes +1 -0
- Procfile +1 -0
- README.md +10 -10
- Spam SMS Classifier - Deployment.py +66 -0
- Spam SMS Collection +0 -0
- app.py +25 -0
- cv-transform.pkl +3 -0
- requirements.txt +11 -0
- spam-sms-mnb-model.pkl +3 -0
- static/not-spam.webp +0 -0
- static/spam-favicon.ico +0 -0
- static/spam.webp +3 -0
- static/styles.css +126 -0
- templates/home.html +40 -0
- templates/result.html +43 -0
.gitattributes
CHANGED
|
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
static/spam.webp filter=lfs diff=lfs merge=lfs -text
|
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: gunicorn app:app
|
README.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
| 1 |
+
# Spam SMS Classification - Deployment
|
| 2 |
+
  
|
| 3 |
+
|
| 4 |
+
• This repository consists of files required to deploy a ___Machine Learning Web App___ created with ___Flask___ on ___Heroku___ platform.
|
| 5 |
+
|
| 6 |
+
• If you want to view the deployed model, click on the following link:<br />
|
| 7 |
+
Deployed at: https://spam-message-predictor21.herokuapp.com/
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
• Please do ⭐ the repository, if it helped you in anyway.
|
Spam SMS Classifier - Deployment.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Importing essential libraries
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import pickle
|
| 4 |
+
|
| 5 |
+
# Loading the dataset
|
| 6 |
+
df = pd.read_csv('Spam SMS Collection', sep='\t', names=['label', 'message'])
|
| 7 |
+
|
| 8 |
+
# Importing essential libraries for performing Natural Language Processing on 'SMS Spam Collection' dataset
|
| 9 |
+
import nltk
|
| 10 |
+
import re
|
| 11 |
+
nltk.download('stopwords')
|
| 12 |
+
from nltk.corpus import stopwords
|
| 13 |
+
from nltk.stem.porter import PorterStemmer
|
| 14 |
+
|
| 15 |
+
# Cleaning the messages
|
| 16 |
+
corpus = []
|
| 17 |
+
ps = PorterStemmer()
|
| 18 |
+
|
| 19 |
+
for i in range(0,df.shape[0]):
|
| 20 |
+
|
| 21 |
+
# Cleaning special character from the message
|
| 22 |
+
message = re.sub(pattern='[^a-zA-Z]', repl=' ', string=df.message[i])
|
| 23 |
+
|
| 24 |
+
# Converting the entire message into lower case
|
| 25 |
+
message = message.lower()
|
| 26 |
+
|
| 27 |
+
# Tokenizing the review by words
|
| 28 |
+
words = message.split()
|
| 29 |
+
|
| 30 |
+
# Removing the stop words
|
| 31 |
+
words = [word for word in words if word not in set(stopwords.words('english'))]
|
| 32 |
+
|
| 33 |
+
# Stemming the words
|
| 34 |
+
words = [ps.stem(word) for word in words]
|
| 35 |
+
|
| 36 |
+
# Joining the stemmed words
|
| 37 |
+
message = ' '.join(words)
|
| 38 |
+
|
| 39 |
+
# Building a corpus of messages
|
| 40 |
+
corpus.append(message)
|
| 41 |
+
|
| 42 |
+
# Creating the Bag of Words model
|
| 43 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
| 44 |
+
cv = CountVectorizer(max_features=2500)
|
| 45 |
+
X = cv.fit_transform(corpus).toarray()
|
| 46 |
+
|
| 47 |
+
# Extracting dependent variable from the dataset
|
| 48 |
+
y = pd.get_dummies(df['label'])
|
| 49 |
+
y = y.iloc[:, 1].values
|
| 50 |
+
|
| 51 |
+
# Creating a pickle file for the CountVectorizer
|
| 52 |
+
pickle.dump(cv, open('cv-transform.pkl', 'wb'))
|
| 53 |
+
|
| 54 |
+
# Model Building
|
| 55 |
+
|
| 56 |
+
from sklearn.model_selection import train_test_split
|
| 57 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
|
| 58 |
+
|
| 59 |
+
# Fitting Naive Bayes to the Training set
|
| 60 |
+
from sklearn.naive_bayes import MultinomialNB
|
| 61 |
+
classifier = MultinomialNB(alpha=0.3)
|
| 62 |
+
classifier.fit(X_train, y_train)
|
| 63 |
+
|
| 64 |
+
# Creating a pickle file for the Multinomial Naive Bayes model
|
| 65 |
+
filename = 'spam-sms-mnb-model.pkl'
|
| 66 |
+
pickle.dump(classifier, open(filename, 'wb'))
|
Spam SMS Collection
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Importing essential libraries
|
| 2 |
+
from flask import Flask, render_template, request
|
| 3 |
+
import pickle
|
| 4 |
+
|
| 5 |
+
# Load the Multinomial Naive Bayes model and CountVectorizer object from disk
|
| 6 |
+
filename = 'spam-sms-mnb-model.pkl'
|
| 7 |
+
classifier = pickle.load(open(filename, 'rb'))
|
| 8 |
+
cv = pickle.load(open('cv-transform.pkl','rb'))
|
| 9 |
+
app = Flask(__name__)
|
| 10 |
+
|
| 11 |
+
@app.route('/')
|
| 12 |
+
def home():
|
| 13 |
+
return render_template('home.html')
|
| 14 |
+
|
| 15 |
+
@app.route('/predict',methods=['POST'])
|
| 16 |
+
def predict():
|
| 17 |
+
if request.method == 'POST':
|
| 18 |
+
message = request.form['message']
|
| 19 |
+
data = [message]
|
| 20 |
+
vect = cv.transform(data).toarray()
|
| 21 |
+
my_prediction = classifier.predict(vect)
|
| 22 |
+
return render_template('result.html', prediction=my_prediction)
|
| 23 |
+
|
| 24 |
+
if __name__ == '__main__':
|
| 25 |
+
app.run(debug=True)
|
cv-transform.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6850efa9440ff2b4cc6346f16adb34412101d83be2129c9ea7ce159f0487341e
|
| 3 |
+
size 179663
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==1.1.1
|
| 2 |
+
gunicorn==19.9.0
|
| 3 |
+
itsdangerous==1.1.0
|
| 4 |
+
Jinja2==2.10.1
|
| 5 |
+
MarkupSafe==1.1.1
|
| 6 |
+
Werkzeug==0.15.5
|
| 7 |
+
numpy>=1.9.2
|
| 8 |
+
scipy>=0.15.1
|
| 9 |
+
scikit-learn>=0.18
|
| 10 |
+
matplotlib>=1.4.3
|
| 11 |
+
pandas>=0.19
|
spam-sms-mnb-model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6cc509111cefe1d973976508cb26039511b2d8a4b7d7832a407cd3333dfe342
|
| 3 |
+
size 80636
|
static/not-spam.webp
ADDED
|
static/spam-favicon.ico
ADDED
|
|
static/spam.webp
ADDED
|
Git LFS Details
|
static/styles.css
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
html{
|
| 2 |
+
height: 100%;
|
| 3 |
+
margin: 0;
|
| 4 |
+
}
|
| 5 |
+
|
| 6 |
+
body{
|
| 7 |
+
font-family: Arial, Helvetica,sans-serif;
|
| 8 |
+
text-align: center;
|
| 9 |
+
margin: 0;
|
| 10 |
+
padding: 0;
|
| 11 |
+
width: 100%;
|
| 12 |
+
height: 100%;
|
| 13 |
+
display: flex;
|
| 14 |
+
flex-direction: column;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
/* Website Title */
|
| 18 |
+
.container{
|
| 19 |
+
padding: 30px;
|
| 20 |
+
position: relative;
|
| 21 |
+
background: linear-gradient(45deg, #ffffff, #ffffff, #f9f9f9, #eeeeee, #e0e4e1, #d7e1ec);
|
| 22 |
+
background-size: 500% 500%;
|
| 23 |
+
animation: change-gradient 10s ease-in-out infinite;
|
| 24 |
+
}
|
| 25 |
+
@keyframes change-gradient {
|
| 26 |
+
0%{
|
| 27 |
+
background-position: 0 50%;
|
| 28 |
+
}
|
| 29 |
+
50%{
|
| 30 |
+
background-position: 100% 50%;
|
| 31 |
+
}
|
| 32 |
+
100%{
|
| 33 |
+
background-position: 0 50%;
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.container-heading{
|
| 38 |
+
margin: 0;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
.container span{
|
| 42 |
+
color: #ff0000;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.description p{
|
| 46 |
+
font-style: italic;
|
| 47 |
+
font-size: 14px;
|
| 48 |
+
margin: 3px 0 0;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
/* Text Area */
|
| 52 |
+
.ml-container{
|
| 53 |
+
margin: 30px 0;
|
| 54 |
+
flex: 1 0 auto;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
.message-box{
|
| 58 |
+
margin-bottom: 20px;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
/* Predict Button */
|
| 62 |
+
.my-cta-button{
|
| 63 |
+
background: #f9f9f9;
|
| 64 |
+
border: 2px solid #000000;
|
| 65 |
+
border-radius: 1000px;
|
| 66 |
+
box-shadow: 3px 3px #8c8c8c;
|
| 67 |
+
padding: 10px 36px;
|
| 68 |
+
color: #000000;
|
| 69 |
+
display: inline-block;
|
| 70 |
+
font: italic bold 20px/1 "Calibri", sans-serif;
|
| 71 |
+
text-align: center;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.my-cta-button:hover{
|
| 75 |
+
color: #ff0000;
|
| 76 |
+
border: 2px solid #ff0000;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.my-cta-button:active{
|
| 80 |
+
box-shadow: 0 0;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
/* Footer */
|
| 85 |
+
.footer{
|
| 86 |
+
font-size: 14px;
|
| 87 |
+
padding: 20px;
|
| 88 |
+
flex-shrink: 0;
|
| 89 |
+
position: relative;
|
| 90 |
+
background: linear-gradient(45deg, #ffffff, #ffffff, #f9f9f9, #eeeeee, #e0e4e1, #d7e1ec);
|
| 91 |
+
background-size: 500% 500%;
|
| 92 |
+
animation: change-gradient 10s ease-in-out infinite;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.contact-icon{
|
| 96 |
+
color: #000000;
|
| 97 |
+
padding: 7px;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
.contact-icon:hover{
|
| 102 |
+
color: #8c8c8c;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
.footer-description{
|
| 106 |
+
margin: 0;
|
| 107 |
+
font-size: 12px;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
/* Result */
|
| 111 |
+
.results{
|
| 112 |
+
padding: 30px 0 0;
|
| 113 |
+
flex: 1 0 auto;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
.danger{
|
| 117 |
+
color: #ff0000;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
.safe{
|
| 121 |
+
color: green;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.gif{
|
| 125 |
+
width: 30%;
|
| 126 |
+
}
|
templates/home.html
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
|
| 3 |
+
<html lang="en" dir="ltr">
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="utf-8">
|
| 6 |
+
<title>Spam message predictor</title>
|
| 7 |
+
<link rel="shortcut icon" href="{{ url_for('static', filename='spam-favicon.ico') }}">
|
| 8 |
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='styles.css') }}">
|
| 9 |
+
<script src="https://kit.fontawesome.com/5f3f547070.js" crossorigin="anonymous"></script>
|
| 10 |
+
</head>
|
| 11 |
+
|
| 12 |
+
<body>
|
| 13 |
+
|
| 14 |
+
<!-- Website Title -->
|
| 15 |
+
<div class="container">
|
| 16 |
+
<h2 class='container-heading'><span>Spam Detector</span> for Short Message Service (SMS)</h2>
|
| 17 |
+
<div class='description'>
|
| 18 |
+
<p>A Machine Learning Web App, Built with Flask, Deployed using Heroku.</p>
|
| 19 |
+
</div>
|
| 20 |
+
</div>
|
| 21 |
+
|
| 22 |
+
<!-- Text Area -->
|
| 23 |
+
<div class="ml-container">
|
| 24 |
+
<form action="{{ url_for('predict') }}" method="POST">
|
| 25 |
+
<textarea class='message-box' name="message" rows="15" cols="75" placeholder="Enter Your Message Here..."></textarea><br/>
|
| 26 |
+
<input type="submit" class="my-cta-button" value="Predict">
|
| 27 |
+
</form>
|
| 28 |
+
</div>
|
| 29 |
+
|
| 30 |
+
<!-- Footer -->
|
| 31 |
+
<div class='footer'>
|
| 32 |
+
<div class="contact">
|
| 33 |
+
<a target="_blank" href="https://github.com/ArchitSharma21/Spam-message-predictor"><i class="fab fa-github fa-lg contact-icon"></i></a>
|
| 34 |
+
<a target="_blank" href="https://www.linkedin.com/in/thearchitsharma/"><i class="fab fa-linkedin fa-lg contact-icon"></i></a>
|
| 35 |
+
</div>
|
| 36 |
+
<p class='footer-description'>Made with ❤️ by Archit Sharma.</p>
|
| 37 |
+
</div>
|
| 38 |
+
|
| 39 |
+
</body>
|
| 40 |
+
</html>
|
templates/result.html
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
|
| 3 |
+
<html lang="en" dir="ltr">
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="utf-8">
|
| 6 |
+
<title>Spam message predictor</title>
|
| 7 |
+
<link rel="shortcut icon" href="{{ url_for('static', filename='spam-favicon.ico') }}">
|
| 8 |
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='styles.css') }}">
|
| 9 |
+
<script src="https://kit.fontawesome.com/5f3f547070.js" crossorigin="anonymous"></script>
|
| 10 |
+
</head>
|
| 11 |
+
|
| 12 |
+
<body>
|
| 13 |
+
|
| 14 |
+
<!-- Website Title -->
|
| 15 |
+
<div class="container">
|
| 16 |
+
<h2 class='container-heading'><span>Spam Detector</span> for Short Message Service (SMS)</h2>
|
| 17 |
+
<div class='description'>
|
| 18 |
+
<p>A Machine Learning Web App, Built with Flask, Deployed using Heroku.</p>
|
| 19 |
+
</div>
|
| 20 |
+
</div>
|
| 21 |
+
|
| 22 |
+
<!-- Result -->
|
| 23 |
+
<div class="results">
|
| 24 |
+
{% if prediction==1 %}
|
| 25 |
+
<h1>Prediction: <span class='danger'>Gotcha! This is a SPAM message.</span></h1>
|
| 26 |
+
<img class="gif" src="{{ url_for('static', filename='spam.webp') }}" alt="SPAM Image">
|
| 27 |
+
{% elif prediction==0 %}
|
| 28 |
+
<h1>Prediction: <span class='safe'>Great! This is NOT a spam message.</span></h1>
|
| 29 |
+
<img class="gif" src="{{ url_for('static', filename='not-spam.webp') }}" alt="Not a spam image">
|
| 30 |
+
{% endif %}
|
| 31 |
+
</div>
|
| 32 |
+
|
| 33 |
+
<!-- Footer -->
|
| 34 |
+
<div class='footer'>
|
| 35 |
+
<div class="contact">
|
| 36 |
+
<a target="_blank" href="https://github.com/ArchitSharma21/Spam-message-predictor"><i class="fab fa-github fa-lg contact-icon"></i></a>
|
| 37 |
+
<a target="_blank" href="https://www.linkedin.com/in/thearchitsharma/"><i class="fab fa-linkedin fa-lg contact-icon"></i></a>
|
| 38 |
+
</div>
|
| 39 |
+
<p class='footer-description'>Made with ❤️ by Archit Sharma.</p>
|
| 40 |
+
</div>
|
| 41 |
+
|
| 42 |
+
</body>
|
| 43 |
+
</html>
|