Spaces:

mrciomnl
/

spam_detection

Sleeping

App Files Files Community

spam_detection / app.py

mrciomnl

major changes made

6ae6f57 11 months ago

raw

history blame contribute delete

4.33 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import string
	import re
	from wordcloud import WordCloud
	from sklearn.model_selection import train_test_split
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.metrics import accuracy_score, classification_report

	# Load dataset
	def load_data():
	df = pd.read_csv("spam.csv", encoding="latin-1")
	df = df[['v1', 'v2']]
	df.columns = ['label', 'message']
	return df

	# Preprocess text
	def preprocess_text(text):
	text = text.lower()
	text = re.sub(f"[{string.punctuation}]", "", text)
	return text

	# Train model
	def train_model(X_train, y_train):
	vectorizer = TfidfVectorizer()
	X_train_tfidf = vectorizer.fit_transform(X_train)
	model = MultinomialNB()
	model.fit(X_train_tfidf, y_train)
	return model, vectorizer

	# Streamlit app navigation
	st.sidebar.title("Navigation")
	page = st.sidebar.radio("Go to:", ["Data Exploration", "Model Training & Evaluation", "Message Prediction"])

	# Load data
	df = load_data()
	df['message_clean'] = df['message'].apply(preprocess_text)

	if page == "Data Exploration":
	st.title("📊 Data Exploration")
	st.write("This page provides an overview of the dataset, including distributions and key insights.")
	st.subheader("Dataset Overview")
	st.write(df.head())
	st.write("Total messages:", df.shape[0])
	st.write(df['label'].value_counts())

	# Visualization
	st.subheader("Spam vs. Ham Distribution")
	fig, ax = plt.subplots()
	sns.countplot(x=df['label'], palette='coolwarm', ax=ax)
	st.pyplot(fig)

	# Word Cloud
	st.subheader("Word Cloud for Spam Messages")
	spam_words = " ".join(df[df['label'] == 'spam']['message_clean'])
	wordcloud = WordCloud(width=500, height=300, background_color='black').generate(spam_words)
	fig, ax = plt.subplots()
	ax.imshow(wordcloud, interpolation='bilinear')
	ax.axis("off")
	st.pyplot(fig)

	elif page == "Model Training & Evaluation":
	st.title("📈 Model Training & Evaluation")
	st.write("This page shows the model training process and performance evaluation.")

	# Train/test split
	X_train, X_test, y_train, y_test = train_test_split(df['message_clean'], df['label'], test_size=0.2, random_state=42)
	model, vectorizer = train_model(X_train, y_train)

	# Store model and vectorizer in session state
	st.session_state['model'] = model
	st.session_state['vectorizer'] = vectorizer

	# Model evaluation
	X_test_tfidf = vectorizer.transform(X_test)
	y_pred = model.predict(X_test_tfidf)
	accuracy = accuracy_score(y_test, y_pred)

	st.subheader("Model Performance")
	st.write("The model is evaluated using accuracy and a classification report.")
	st.write(f"Accuracy: {accuracy:.2f}")
	st.text("Classification Report:")
	st.text(classification_report(y_test, y_pred))

	st.write("Explanation: The accuracy score represents the proportion of correctly classified messages. The classification report provides precision, recall, and F1-score for spam and ham categories, helping us understand the model's performance in more detail.")

	elif page == "Message Prediction":
	st.title("✉ Message Prediction")
	st.write("Test the model by entering an SMS message to classify it as spam or ham.")

	# Check if model and vectorizer exist
	if 'model' in st.session_state and 'vectorizer' in st.session_state:
	model = st.session_state['model']
	vectorizer = st.session_state['vectorizer']

	# Prediction interface
	user_input = st.text_area("Enter an SMS message:")
	if st.button("Predict"):
	user_input_tfidf = vectorizer.transform([user_input])
	prediction = model.predict(user_input_tfidf)[0]
	st.success(f"This message is classified as: {prediction.upper()}")

	st.write("Explanation: The model analyzes the text and classifies it as spam or ham based on learned patterns. Spam messages typically contain promotional content, urgent requests, or suspicious links, while ham messages are normal communications.")
	else:
	st.warning("⚠️ Please train the model first in the 'Model Training & Evaluation' page.")