Spaces:

CristopherWVSU
/

SpamDetection

Sleeping

File size: 4,200 Bytes

import streamlit as st
import joblib
import re
import string
import nltk
from nltk.corpus import stopwords

# Download stopwords
nltk.download("stopwords")

# Sidebar Model Selection
st.sidebar.title("🔍 Choose Model")
model_choice = st.sidebar.radio(
    "Select a model for Spam Detection:",
    ("Naive Bayes", "Logistic Regression", "Support Vector Machine")
)

# Load selected model
model_paths = {
    "Naive Bayes": "MNBspam_classifier_model.pkl",
    "Logistic Regression": "LRspam_classifier_model.pkl",
    "Support Vector Machine": "SVMspam_classifier.pkl"
}
model = joblib.load(model_paths[model_choice])
vectorizer = joblib.load("tfidf_vectorizer.pkl")

# Function to preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"\d+", "", text) 
    text = text.translate(str.maketrans("", "", string.punctuation))
    words = text.split()
    words = [word for word in words if word not in stopwords.words("english")]
    return " ".join(words)

# Tabs for Application & Model Evaluation
app, model_eval = st.tabs(["📩 Application", "📊 Model Evaluation"])

# Spam Detector Application
with app:
    st.title("📩 Spam Detector App")
    st.write("Enter a message below to check if it's **Spam** or **Not Spam**.")
    
    user_input = st.text_area("Enter your message:")

    if st.button("Check Spam"):
        if user_input.strip():
            processed_input = preprocess_text(user_input) 
            input_vector = vectorizer.transform([processed_input])  
            prediction = model.predict(input_vector)  
            
            result = "Spam" if prediction[0] == 1 else "Not Spam"
            st.success(f"Prediction: {result} ({model_choice})")
        else:
            st.warning("Please enter a message to check.")

# Model Evaluation Tab
with model_eval:
    st.header("Model Evaluation")
    st.write("The Spam Detection model was trained to classify messages as 'Spam' or 'Not Spam'. The dataset was taken from Kaggle.")
    st.write("Dataset by Faisal Qureshi: [Kaggle Link](https://www.kaggle.com/datasets/mfaisalqureshi/spam-email)")

    # Confusion Matrix
    st.title("Confusion Matrix")
    st.write("The confusion matrix displays actual vs. predicted labels. Consider the following when interpreting it:")
    st.write("- **True Positives (TP):** Correctly predicted Spam")
    st.write("- **True Negatives (TN):** Correctly predicted Not Spam")
    st.write("- **False Positives (FP):** Predicted Spam but was actually Not Spam (Type I error)")
    st.write("- **False Negatives (FN):** Predicted Not Spam but was actually Spam (Type II error)")

    st.header("Naive Bayes Confusion Matrix")
    st.write("The image below represents the Confusion Matrix of the Naive Bayes model.")
    st.image("MNBconfusion_matrix.png")

    st.header("Logistic Regression Confusion Matrix")
    st.write("The image below represents the Confusion Matrix of the Logistic Regression model.")
    st.image("LRconfusion_matrix.png")


    st.header("SVM Confusion Matrix")
    st.write("The image below represents the Confusion Matrix of the SVM model.")
    st.image("SVMconfusion_matrix.png")


    # Evaluation Metrics
    st.title("Evaluation Metrics")
    st.write("Evaluation metrics help assess the performance of the spam detector.")

    st.header("Naive Bayes Evaluation Metrics")
    st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Naive Bayes model.")
    st.image("MNBclassification_report.png")
    
    st.header("Logistic Regression Evaluation Metrics")
    st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Logistic Regression model.")
    st.image("LRclassification_report.png")
    
    st.header("SVM Evaluation Metrics")
    st.write("The image below represents the **Accuracy, F1 score, and classification report** of the SVM model.")
    st.image("SVM_classification_report.png")

    # COMPARISON

    st.header("Comparison")
    st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the three classification algorithms chosen, Naive Bayes performs the best using this dataset")