| import streamlit as st | |
| import joblib | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| import string | |
| import re | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| def preprocess_text(text): | |
| text = text.lower() | |
| text = ''.join([char for char in text if char not in string.punctuation]) | |
| text = re.sub(r'\d+', '', text) | |
| text = ' '.join(text.split()) | |
| tokens = word_tokenize(text) | |
| stop_words = set(stopwords.words('english')) | |
| tokens = [token for token in tokens if token not in stop_words] | |
| return ' '.join(tokens) | |
| model = joblib.load('spam_detector_model.joblib') | |
| vectorizer = joblib.load('tfidf_vectorizer.joblib') | |
| st.title("📧 Spam Message Detector") | |
| st.write(""" | |
| This app detects whether a message is spam or not. | |
| Enter your message below and click 'Analyze' to check! | |
| """) | |
| message = st.text_area("Enter your message:", height=100) | |
| if st.button("Analyze"): | |
| if message: | |
| processed_text = preprocess_text(message) | |
| text_vectorized = vectorizer.transform([processed_text]) | |
| prediction = model.predict(text_vectorized)[0] | |
| probability = model.predict_proba(text_vectorized)[0] | |
| st.markdown("### Analysis Result") | |
| if prediction == 1: | |
| st.error("🚨 This message is likely SPAM!") | |
| st.write(f"Confidence: {probability[1]:.2%}") | |
| else: | |
| st.success("✅ This message appears to be legitimate.") | |
| st.write(f"Confidence: {probability[0]:.2%}") | |
| with st.expander("See preprocessing steps"): | |
| st.write("Original message:", message) | |
| st.write("Processed message:", processed_text) | |
| else: | |
| st.warning("Please enter a message to analyze.") | |
| with st.sidebar: | |
| st.header("About the Model") | |
| st.write(""" | |
| This spam detector uses an XGBoost classifier trained on a dataset of spam and legitimate messages. | |
| Model Performance: | |
| - Training Accuracy: 99.7% | |
| - Testing Accuracy: 98.9% | |
| """) | |