Spaces:
Sleeping
Sleeping
File size: 4,200 Bytes
78be9d8 884d5f3 78be9d8 884d5f3 78be9d8 884d5f3 78be9d8 884d5f3 78be9d8 884d5f3 404ac46 884d5f3 404ac46 884d5f3 404ac46 884d5f3 404ac46 884d5f3 404ac46 884d5f3 404ac46 884d5f3 404ac46 884d5f3 404ac46 884d5f3 404ac46 884d5f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import streamlit as st
import joblib
import re
import string
import nltk
from nltk.corpus import stopwords
# Download stopwords
nltk.download("stopwords")
# Sidebar Model Selection
st.sidebar.title("π Choose Model")
model_choice = st.sidebar.radio(
"Select a model for Spam Detection:",
("Naive Bayes", "Logistic Regression", "Support Vector Machine")
)
# Load selected model
model_paths = {
"Naive Bayes": "MNBspam_classifier_model.pkl",
"Logistic Regression": "LRspam_classifier_model.pkl",
"Support Vector Machine": "SVMspam_classifier.pkl"
}
model = joblib.load(model_paths[model_choice])
vectorizer = joblib.load("tfidf_vectorizer.pkl")
# Function to preprocess text
def preprocess_text(text):
text = text.lower()
text = re.sub(r"\d+", "", text)
text = text.translate(str.maketrans("", "", string.punctuation))
words = text.split()
words = [word for word in words if word not in stopwords.words("english")]
return " ".join(words)
# Tabs for Application & Model Evaluation
app, model_eval = st.tabs(["π© Application", "π Model Evaluation"])
# Spam Detector Application
with app:
st.title("π© Spam Detector App")
st.write("Enter a message below to check if it's **Spam** or **Not Spam**.")
user_input = st.text_area("Enter your message:")
if st.button("Check Spam"):
if user_input.strip():
processed_input = preprocess_text(user_input)
input_vector = vectorizer.transform([processed_input])
prediction = model.predict(input_vector)
result = "Spam" if prediction[0] == 1 else "Not Spam"
st.success(f"Prediction: {result} ({model_choice})")
else:
st.warning("Please enter a message to check.")
# Model Evaluation Tab
with model_eval:
st.header("Model Evaluation")
st.write("The Spam Detection model was trained to classify messages as 'Spam' or 'Not Spam'. The dataset was taken from Kaggle.")
st.write("Dataset by Faisal Qureshi: [Kaggle Link](https://www.kaggle.com/datasets/mfaisalqureshi/spam-email)")
# Confusion Matrix
st.title("Confusion Matrix")
st.write("The confusion matrix displays actual vs. predicted labels. Consider the following when interpreting it:")
st.write("- **True Positives (TP):** Correctly predicted Spam")
st.write("- **True Negatives (TN):** Correctly predicted Not Spam")
st.write("- **False Positives (FP):** Predicted Spam but was actually Not Spam (Type I error)")
st.write("- **False Negatives (FN):** Predicted Not Spam but was actually Spam (Type II error)")
st.header("Naive Bayes Confusion Matrix")
st.write("The image below represents the Confusion Matrix of the Naive Bayes model.")
st.image("MNBconfusion_matrix.png")
st.header("Logistic Regression Confusion Matrix")
st.write("The image below represents the Confusion Matrix of the Logistic Regression model.")
st.image("LRconfusion_matrix.png")
st.header("SVM Confusion Matrix")
st.write("The image below represents the Confusion Matrix of the SVM model.")
st.image("SVMconfusion_matrix.png")
# Evaluation Metrics
st.title("Evaluation Metrics")
st.write("Evaluation metrics help assess the performance of the spam detector.")
st.header("Naive Bayes Evaluation Metrics")
st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Naive Bayes model.")
st.image("MNBclassification_report.png")
st.header("Logistic Regression Evaluation Metrics")
st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Logistic Regression model.")
st.image("LRclassification_report.png")
st.header("SVM Evaluation Metrics")
st.write("The image below represents the **Accuracy, F1 score, and classification report** of the SVM model.")
st.image("SVM_classification_report.png")
# COMPARISON
st.header("Comparison")
st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the three classification algorithms chosen, Naive Bayes performs the best using this dataset")
|