Upload 6 files

Browse files

Files changed (6) hide show

README.md +2 -0
main.ipynb +0 -0
spam_app.py +89 -0
spam_detector_model.joblib +3 -0
spam_ham_india.csv +0 -0
tfidf_vectorizer.joblib +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Spam-message-detection
2	+ Building model to detect spam messages specifically for India most probably by using NNs and ML

main.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

spam_app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import streamlit as st
+import joblib
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+import string
+import re
+# Download NLTK data
+nltk.download('punkt')
+nltk.download('stopwords')
+def preprocess_text(text):
+    # Convert to lowercase
+    text = text.lower()
+    # Remove punctuation
+    text = ''.join([char for char in text if char not in string.punctuation])
+    # Remove numbers
+    text = re.sub(r'\d+', '', text)
+    # Remove extra whitespace
+    text = ' '.join(text.split())
+    # Tokenization
+    tokens = word_tokenize(text)
+    # Remove stopwords
+    stop_words = set(stopwords.words('english'))
+    tokens = [token for token in tokens if token not in stop_words]
+    # Join tokens back into text
+    return ' '.join(tokens)
+# Load the saved model and vectorizer
+model = joblib.load('spam_detector_model.joblib')
+vectorizer = joblib.load('tfidf_vectorizer.joblib')
+# Create the Streamlit interface
+st.title("📧 Spam Message Detector")
+st.write("""
+This app detects whether a message is spam or not.
+Enter your message below and click 'Analyze' to check!
+""")
+# Create text input
+message = st.text_area("Enter your message:", height=100)
+if st.button("Analyze"):
+    if message:
+        # Preprocess the input
+        processed_text = preprocess_text(message)
+        # Vectorize the text
+        text_vectorized = vectorizer.transform([processed_text])
+        # Make prediction
+        prediction = model.predict(text_vectorized)[0]
+        probability = model.predict_proba(text_vectorized)[0]
+        # Display result
+        st.markdown("### Analysis Result")
+        if prediction == 1:
+            st.error("🚨 This message is likely SPAM!")
+            st.write(f"Confidence: {probability[1]:.2%}")
+        else:
+            st.success("✅ This message appears to be legitimate.")
+            st.write(f"Confidence: {probability[0]:.2%}")
+        # Show preprocessing details
+        with st.expander("See preprocessing steps"):
+            st.write("Original message:", message)
+            st.write("Processed message:", processed_text)
+    else:
+        st.warning("Please enter a message to analyze.")
+# Add sidebar information
+with st.sidebar:
+    st.header("About the Model")
+    st.write("""
+    This spam detector uses an XGBoost classifier trained on a dataset of spam and legitimate messages.
+    Model Performance:
+    - Training Accuracy: 99.7%
+    - Testing Accuracy: 98.9%
+    """)

spam_detector_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b493180f86a79d71c0f8bb56f09c25627feb48b89e9bd20df1740295abd902d
+size 339087

spam_ham_india.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

tfidf_vectorizer.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae27e1b70184ff42809e70f62e4f58bd9342ed9a2cf438508dd9991b08e6969c
+size 94260