Prasad commited on
Commit
8f1e4ae
·
verified ·
1 Parent(s): 14e5b2a

Upload 11 files

Browse files
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: sh setup.sh && streamlit run app.py
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # email-spam-classifier-new
2
+ End to end code for the email spam classifier project
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import string
4
+ import pickle
5
+ from nltk.corpus import stopwords
6
+ import nltk
7
+ from nltk.stem.porter import PorterStemmer
8
+ nltk.download('stopwords') # Downloading stopwords data
9
+ nltk.download('punkt') # Downloading tokenizer data
10
+
11
+ ps = PorterStemmer()
12
+
13
+
14
+ def transform_text(text):
15
+ text = text.lower()
16
+ text = nltk.word_tokenize(text)
17
+
18
+ y = []
19
+ for i in text:
20
+ if i.isalnum():
21
+ y.append(i)
22
+
23
+ text = y[:]
24
+ y.clear()
25
+
26
+ for i in text:
27
+ if i not in stopwords.words('english') and i not in string.punctuation:
28
+ y.append(i)
29
+
30
+ text = y[:]
31
+ y.clear()
32
+
33
+ for i in text:
34
+ y.append(ps.stem(i))
35
+
36
+ return " ".join(y)
37
+
38
+ tfidf = pickle.load(open('vectorizer.pkl','rb'))
39
+ model = pickle.load(open('model.pkl','rb'))
40
+
41
+ st.title("Email/SMS Spam Classifier")
42
+
43
+ input_sms = st.text_area("Enter the message")
44
+
45
+ if st.button('Predict'):
46
+
47
+ # 1. preprocess
48
+ transformed_sms = transform_text(input_sms)
49
+ # 2. vectorize
50
+ vector_input = tfidf.transform([transformed_sms])
51
+ # 3. predict
52
+ result = model.predict(vector_input)[0]
53
+ # 4. Display
54
+ if result == 1:
55
+ st.header("Spam")
56
+ else:
57
+ st.header("Not Spam")
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:063393928e57a326b661f9db1226438e481dc361964048c57a7cc320b35fe083
3
+ size 96602
nltk.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ stopwords
2
+ punkt
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ nltk
3
+ scikit-sklearn
4
+ pickle
5
+ string
setup.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+
3
+ echo "\
4
+ [server]\n\
5
+ port = $PORT\n\
6
+ enableCORS = false\n\
7
+ headless = true\n\
8
+ \n\
9
+ " > ~/.streamlit/config.toml
sms-spam-detection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
spam-marketing-classification-ml.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
spam.csv ADDED
The diff for this file is too large to render. See raw diff
 
vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc65b94b327d6c337ab82d6350660ecce36d02f92bbc08da29ec060ce2485f50
3
+ size 160266