Spaces:
Sleeping
Sleeping
File size: 1,109 Bytes
4805a76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import streamlit as st
import pickle
import string
import nltk
nltk.download('punkt')
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.stem import PorterStemmer
tfidf = pickle.load(open("vectorizer.pkl","rb"))
model = pickle.load(open("model.pkl","rb"))
stemmer = PorterStemmer()
def preprocess_data(text):
# lowercase
text = text.lower()
# word tokenization
text = nltk.word_tokenize(text)
y = []
for i in text:
if i.isalnum():
y.append(i)
text = y.copy()
y.clear()
for i in text:
if i not in stopwords.words("english") and i not in string.punctuation:
y.append(i)
text = y.copy()
y.clear()
for i in text:
y.append(stemmer.stem(i))
return " ".join(y)
st.title("SMS Spam Classifier")
sms = st.text_area("Enter The Message:")
predict_button = st.button("Predict")
if predict_button:
transform_sms = preprocess_data(sms)
vector_input = tfidf.transform([transform_sms])
result = model.predict(vector_input)[0]
if result == 1:
st.header("Spam")
else:
st.header("Not Spam") |