Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import string | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix | |
| # ----------------- STREAMLIT CONFIG ----------------- | |
| st.set_page_config(page_title="π§ Email Spam Detector", layout="centered") | |
| st.title("π§ Email Spam Detector") | |
| st.markdown("This app uses **Machine Learning** (Naive Bayes + TF-IDF) to classify emails as **Spam** or **Ham (Not Spam)**.") | |
| # ----------------- DATA LOADING ----------------- | |
| def load_data(file): | |
| df = pd.read_csv(file, encoding='latin-1')[['v1', 'v2']] | |
| df.columns = ['label', 'message'] | |
| df['label'] = df['label'].map({'ham': 0, 'spam': 1}) | |
| return df | |
| # File uploader for user dataset | |
| st.subheader("π Upload Dataset") | |
| uploaded_file = st.file_uploader("Upload your spam dataset (CSV format)", type=["csv"]) | |
| if uploaded_file is not None: | |
| df = load_data(uploaded_file) | |
| st.success("β Dataset loaded successfully from uploaded file.") | |
| else: | |
| st.info("βΉοΈ No file uploaded. Using default dataset (spam.csv).") | |
| df = load_data("spam.csv") | |
| # ----------------- PREPROCESS FUNCTION ----------------- | |
| def clean_text(text): | |
| text = text.lower().strip() | |
| text = text.translate(str.maketrans("", "", string.punctuation)) | |
| return text | |
| df['message'] = df['message'].apply(clean_text) | |
| # ----------------- TRAIN / TEST SPLIT ----------------- | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| df['message'], df['label'], test_size=0.2, random_state=42 | |
| ) | |
| # ----------------- TF-IDF VECTORIZATION ----------------- | |
| vectorizer = TfidfVectorizer(stop_words='english') | |
| X_train_tfidf = vectorizer.fit_transform(X_train) | |
| X_test_tfidf = vectorizer.transform(X_test) | |
| # ----------------- MODEL TRAINING ----------------- | |
| model = MultinomialNB() | |
| model.fit(X_train_tfidf, y_train) | |
| # ----------------- METRICS ----------------- | |
| y_pred = model.predict(X_test_tfidf) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| precision = precision_score(y_test, y_pred) | |
| recall = recall_score(y_test, y_pred) | |
| f1 = f1_score(y_test, y_pred) | |
| # ----------------- SIDEBAR METRICS ----------------- | |
| st.sidebar.header("π Model Performance") | |
| st.sidebar.write(f"**Accuracy:** {accuracy:.2%}") | |
| st.sidebar.write(f"**Precision:** {precision:.2%}") | |
| st.sidebar.write(f"**Recall:** {recall:.2%}") | |
| st.sidebar.write(f"**F1 Score:** {f1:.2%}") | |
| st.sidebar.markdown("Model: `Multinomial Naive Bayes` \nVectorizer: `TF-IDF`") | |
| # Confusion Matrix | |
| cm = confusion_matrix(y_test, y_pred) | |
| fig, ax = plt.subplots() | |
| sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", xticklabels=["Ham", "Spam"], yticklabels=["Ham", "Spam"]) | |
| plt.ylabel('Actual') | |
| plt.xlabel('Predicted') | |
| st.sidebar.pyplot(fig) | |
| # ----------------- PREDICT FUNCTION ----------------- | |
| def predict_message(msg): | |
| msg_clean = clean_text(msg) | |
| vect_msg = vectorizer.transform([msg_clean]) | |
| pred = model.predict(vect_msg)[0] | |
| prob = model.predict_proba(vect_msg)[0][pred] | |
| return ("π« Spam", prob) if pred == 1 else ("β Ham (Not Spam)", prob) | |
| # ----------------- USER INPUT ----------------- | |
| st.subheader("βοΈ Test Your Message") | |
| user_input = st.text_area("Enter your email message here:") | |
| if st.button("Detect"): | |
| if user_input.strip() == "": | |
| st.warning("β οΈ Please enter a message to classify.") | |
| else: | |
| result, confidence = predict_message(user_input) | |
| st.success(f"Prediction: **{result}** \nConfidence: **{confidence:.2%}**") | |
| # ----------------- SAMPLE DATA ----------------- | |
| with st.expander("π View Sample Dataset"): | |
| st.dataframe(df.sample(10)) | |