Spaces:
Sleeping
Sleeping
File size: 2,237 Bytes
626deb4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | import os
import pickle
import streamlit as st
import re
import nltk
import contractions
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
nltk.download('stopwords')
## Setting Page Configuration and Header
st.set_page_config(
page_title="Spam Email Classifier",
page_icon="📧",
layout="wide",
)
st.title("📧 Spam Email Classifier")
st.write("Enter your email content below and the model will predict whether it is Spam or Ham (Not Spam).")
## Preprocessing Function
def preprocess_text(text):
# Converting text to lowercase
text = text.lower()
# Removing Extra Spaces
text = re.sub(r'\s+', ' ', text).strip()
# Replacing Numbers with a Token
text = re.sub(r'\d+', '<NUM>', text)
# Normalize Elongated Words
text = re.sub(r'(.)\1+', r'\1\1', text)
# Expand Contractions (e.g.: weren't => were not)
text = contractions.fix(text)
# Removing Punctuations and Non-English Charachters
text = re.sub(r'[^a-z0-9\s]', '', text)
# Lemmatization
words = text.split()
lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word) for word in words]
# Returning the Cleaned Text
cleaned_text = ' '.join(words)
return cleaned_text
## Loading the Model and Vectorizer
with open('models/logistic_regression.pkl', "rb") as file:
model = pickle.load(file)
with open("models/vectorizer.pkl", "rb") as file:
vectorizer = pickle.load(file)
## Prediction
email_text = st.text_area("Email Content:")
if st.button("Predict"):
if email_text:
processed_text = preprocess_text(email_text)
vect_text = vectorizer.transform([processed_text])
prediction = model.predict(vect_text)[0]
prediction_proba = model.predict_proba(vect_text)[0]
st.subheader("Prediction Result:")
if prediction == 1:
st.error("🚫 This email is Spam")
else:
st.success("✅ This email is Not Spam")
st.subheader("Prediction Probabilities:")
st.write(f"Ham: {prediction_proba[0]:.2f}, Spam: {prediction_proba[1]:.2f}")
else:
st.warning("Please enter email content to predict.") |