Hamdy005's picture
Upload 9 files
626deb4 verified
import os
import pickle
import streamlit as st
import re
import nltk
import contractions
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
nltk.download('stopwords')
## Setting Page Configuration and Header
st.set_page_config(
page_title="Spam Email Classifier",
page_icon="πŸ“§",
layout="wide",
)
st.title("πŸ“§ Spam Email Classifier")
st.write("Enter your email content below and the model will predict whether it is Spam or Ham (Not Spam).")
## Preprocessing Function
def preprocess_text(text):
# Converting text to lowercase
text = text.lower()
# Removing Extra Spaces
text = re.sub(r'\s+', ' ', text).strip()
# Replacing Numbers with a Token
text = re.sub(r'\d+', '<NUM>', text)
# Normalize Elongated Words
text = re.sub(r'(.)\1+', r'\1\1', text)
# Expand Contractions (e.g.: weren't => were not)
text = contractions.fix(text)
# Removing Punctuations and Non-English Charachters
text = re.sub(r'[^a-z0-9\s]', '', text)
# Lemmatization
words = text.split()
lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word) for word in words]
# Returning the Cleaned Text
cleaned_text = ' '.join(words)
return cleaned_text
## Loading the Model and Vectorizer
with open('models/logistic_regression.pkl', "rb") as file:
model = pickle.load(file)
with open("models/vectorizer.pkl", "rb") as file:
vectorizer = pickle.load(file)
## Prediction
email_text = st.text_area("Email Content:")
if st.button("Predict"):
if email_text:
processed_text = preprocess_text(email_text)
vect_text = vectorizer.transform([processed_text])
prediction = model.predict(vect_text)[0]
prediction_proba = model.predict_proba(vect_text)[0]
st.subheader("Prediction Result:")
if prediction == 1:
st.error("🚫 This email is Spam")
else:
st.success("βœ… This email is Not Spam")
st.subheader("Prediction Probabilities:")
st.write(f"Ham: {prediction_proba[0]:.2f}, Spam: {prediction_proba[1]:.2f}")
else:
st.warning("Please enter email content to predict.")