File size: 1,109 Bytes
4805a76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import streamlit as st
import pickle 
import string 
import nltk
nltk.download('punkt')
from nltk.corpus import stopwords
nltk.download('stopwords') 
from nltk.stem import PorterStemmer


tfidf = pickle.load(open("vectorizer.pkl","rb"))
model = pickle.load(open("model.pkl","rb"))

stemmer = PorterStemmer()

def preprocess_data(text):

  # lowercase
  text = text.lower()
  # word tokenization
  text = nltk.word_tokenize(text)

  y = []
  for i in text:
    if i.isalnum():
      y.append(i)
  
  text = y.copy()
  y.clear()

  for i in text:
    if i not in stopwords.words("english") and i not in string.punctuation:
      y.append(i)
    
  text = y.copy()
  y.clear()

  for i in text:
    y.append(stemmer.stem(i))

  return " ".join(y)

st.title("SMS Spam Classifier")

sms = st.text_area("Enter The Message:")
predict_button = st.button("Predict")


if predict_button:
    
    transform_sms = preprocess_data(sms)

    vector_input = tfidf.transform([transform_sms])

    result = model.predict(vector_input)[0]

    if result == 1:
        st.header("Spam") 
    else:
        st.header("Not Spam")