Spaces:

Gillie2004
/

SMS_Spam_Detection_using_SVM

Build error

App Files Files Community

SMS_Spam_Detection_using_SVM / app.py

3v324v23

app

eabb121 10 months ago

raw

history blame contribute delete

5.16 kB

	import streamlit as st
	import joblib
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.svm import SVC
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score, confusion_matrix

	# Set Streamlit page config
	st.set_page_config(page_title="SMS Spam Detector", page_icon="📩", layout="wide")

	# Custom CSS for centering and styling
	st.markdown("""
	<style>
	.centered-container {
	display: flex;
	justify-content: center;
	align-items: center;
	flex-direction: column;
	text-align: center;
	width: 80%;
	}
	.padded-container {
	padding: 20px;
	}
	.big-dataset {
	font-size: 12px;
	max-width: 100%;
	margin: auto;
	}
	.stDataFrame {
	display: flex;
	justify-content: center;
	align-items: center;
	}
	img {
	max-width: 150px;
	height: 600px;
	}
	</style>
	""", unsafe_allow_html=True)

	# Title
	st.title("📩 SMS Spam Detector")

	# Load dataset
	@st.cache_data
	def load_data():
	dataset_path = "spam.csv"
	df = pd.read_csv(dataset_path, encoding='latin-1')[['v1', 'v2']]
	df.columns = ['label', 'message']
	df['label'] = df['label'].map({'ham': 0, 'spam': 1})
	return df

	df = load_data()

	# Train and save model
	@st.cache_resource
	def train_and_save_model():
	X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)
	vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
	X_train_tfidf = vectorizer.fit_transform(X_train)
	X_test_tfidf = vectorizer.transform(X_test)

	svm_model = SVC(kernel='linear')
	svm_model.fit(X_train_tfidf, y_train)

	y_pred = svm_model.predict(X_test_tfidf)
	accuracy = accuracy_score(y_test, y_pred)

	joblib.dump(svm_model, "svm_sms_spam.pkl")
	joblib.dump(vectorizer, "vectorizer.pkl")

	return svm_model, vectorizer, accuracy

	svm_model, vectorizer, accuracy = train_and_save_model()

	# Create tabs
	tab1, tab2, tab3 = st.tabs(["📊 Data Overview", "📈 Data Visualization", "🔍 Spam Detector"])

	# Tab 1: Data Overview
	with tab1:
	st.subheader("Dataset Overview")
	st.markdown('<div class="centered-container">', unsafe_allow_html=True)
	st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
	st.dataframe(df, height=300, width=1000)
	st.markdown('</div>', unsafe_allow_html=True)
	st.markdown('</div>', unsafe_allow_html=True)

	# Smaller class distribution title
	st.subheader("Class Distribution")
	fig, ax = plt.subplots(figsize=(2, 2)) # Smaller figure size
	sns.countplot(
	x=df['label'].map({0: 'Not Spam', 1: 'Spam'}),
	palette='coolwarm',
	ax=ax,
	width=0.2
	)
	ax.set_title("Distribution of Spam vs. Not Spam Messages", fontsize=8) # Smaller title
	ax.set_xlabel("Message Type", fontsize=5) # Smaller x-axis label
	ax.set_ylabel("Count", fontsize=5) # Smaller y-axis label
	ax.tick_params(axis='both', labelsize=5) # Smaller tick labels
	st.pyplot(fig)

	st.markdown(f"### 📊 Model Accuracy: *{accuracy 100:.2f}%**")

	# Tab 2: Data Visualization
	with tab2:
	st.subheader("Data Visualizations")

	# Confusion Matrix
	st.markdown("### Confusion Matrix")
	X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)
	X_test_tfidf = vectorizer.transform(X_test)
	y_pred = svm_model.predict(X_test_tfidf)

	cm = confusion_matrix(y_test, y_pred)
	fig, ax = plt.subplots(figsize=(5, 3))
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Spam', 'Spam'], yticklabels=['Not Spam', 'Spam'])
	ax.set_xlabel("Predicted")
	ax.set_ylabel("Actual")
	ax.set_title("Confusion Matrix")
	st.pyplot(fig)

	# Heatmap
	st.markdown("### Heatmap of Feature Correlations")
	df['message_length'] = df['message'].apply(len)
	correlation_matrix = df[['message_length', 'label']].corr()
	fig, ax = plt.subplots(figsize=(5, 3))
	sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', ax=ax)
	ax.set_title("Feature Correlation Heatmap")
	st.pyplot(fig)

	st.markdown('</div>', unsafe_allow_html=True)

	# Tab 3: Spam Detector
	with tab3:
	st.subheader("Check SMS Message")
	st.write("Enter an SMS message below to check if it's spam or not.")
	user_input = st.text_area("Enter SMS Message:")

	if st.button("Check Message"):
	if user_input:
	input_features = vectorizer.transform([user_input])
	prediction = svm_model.predict(input_features)

	if prediction[0] == 1:
	st.error("🚨 This message is Spam!")
	else:
	st.success("✅ This message is NOT Spam!")
	else:
	st.warning("Please enter a message before checking.")