Spaces:
Sleeping
Sleeping
File size: 3,823 Bytes
9740f31 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 91f527b 82fda6d 91f527b 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 82fda6d 1dc2d2c 9740f31 1dc2d2c 82fda6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import streamlit as st
import pandas as pd
import string
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
# ----------------- STREAMLIT CONFIG -----------------
st.set_page_config(page_title="π§ Email Spam Detector", layout="centered")
st.title("π§ Email Spam Detector")
st.markdown("This app uses **Machine Learning** (Naive Bayes + TF-IDF) to classify emails as **Spam** or **Ham (Not Spam)**.")
# ----------------- DATA LOADING -----------------
@st.cache_data
def load_data(file):
df = pd.read_csv(file, encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'message']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})
return df
# File uploader for user dataset
st.subheader("π Upload Dataset")
uploaded_file = st.file_uploader("Upload your spam dataset (CSV format)", type=["csv"])
if uploaded_file is not None:
df = load_data(uploaded_file)
st.success("β
Dataset loaded successfully from uploaded file.")
else:
st.info("βΉοΈ No file uploaded. Using default dataset (spam.csv).")
df = load_data("spam.csv")
# ----------------- PREPROCESS FUNCTION -----------------
def clean_text(text):
text = text.lower().strip()
text = text.translate(str.maketrans("", "", string.punctuation))
return text
df['message'] = df['message'].apply(clean_text)
# ----------------- TRAIN / TEST SPLIT -----------------
X_train, X_test, y_train, y_test = train_test_split(
df['message'], df['label'], test_size=0.2, random_state=42
)
# ----------------- TF-IDF VECTORIZATION -----------------
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)
# ----------------- MODEL TRAINING -----------------
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)
# ----------------- METRICS -----------------
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# ----------------- SIDEBAR METRICS -----------------
st.sidebar.header("π Model Performance")
st.sidebar.write(f"**Accuracy:** {accuracy:.2%}")
st.sidebar.write(f"**Precision:** {precision:.2%}")
st.sidebar.write(f"**Recall:** {recall:.2%}")
st.sidebar.write(f"**F1 Score:** {f1:.2%}")
st.sidebar.markdown("Model: `Multinomial Naive Bayes` \nVectorizer: `TF-IDF`")
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", xticklabels=["Ham", "Spam"], yticklabels=["Ham", "Spam"])
plt.ylabel('Actual')
plt.xlabel('Predicted')
st.sidebar.pyplot(fig)
# ----------------- PREDICT FUNCTION -----------------
def predict_message(msg):
msg_clean = clean_text(msg)
vect_msg = vectorizer.transform([msg_clean])
pred = model.predict(vect_msg)[0]
prob = model.predict_proba(vect_msg)[0][pred]
return ("π« Spam", prob) if pred == 1 else ("β
Ham (Not Spam)", prob)
# ----------------- USER INPUT -----------------
st.subheader("βοΈ Test Your Message")
user_input = st.text_area("Enter your email message here:")
if st.button("Detect"):
if user_input.strip() == "":
st.warning("β οΈ Please enter a message to classify.")
else:
result, confidence = predict_message(user_input)
st.success(f"Prediction: **{result}** \nConfidence: **{confidence:.2%}**")
# ----------------- SAMPLE DATA -----------------
with st.expander("π View Sample Dataset"):
st.dataframe(df.sample(10))
|