Spaces:

Gillie2004
/

SMS_Spam_Detection_using_SVM

Build error

App Files Files Community

3v324v23 commited on Mar 3, 2025

Commit

136950c

1 Parent(s): 45bb8fa

final changes

Browse files

Files changed (5) hide show

app.py +155 -0
requirements.txt +7 -0
spam.csv +0 -0
svm_sms_spam.pkl +3 -0
vectorizer.pkl +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import streamlit as st
+import joblib
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import SVC
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, confusion_matrix
+import numpy as np
+# Set Streamlit page config
+st.set_page_config(page_title="SMS Spam Detector", page_icon="📩", layout="wide")
+# Custom CSS for centering and styling
+st.markdown("""
+    <style>
+        .centered-container {
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            flex-direction: column;
+            text-align: center;
+            width: 80%;
+        }
+        .padded-container {
+            padding: 20px;
+        }
+        .big-dataset {
+            font-size: 12px;
+            max-width: 100%;
+            margin: auto;
+        }
+        .stDataFrame {
+            display: flex;
+            justify-content: center;
+            align-items: center;
+        }
+        img {
+            max-width: 150px;
+            height: 600px;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Title
+st.title("📩 SMS Spam Detector")
+# Load dataset
+@st.cache_data
+def load_data():
+    dataset_path = "D:/CCS229 - Intelligent System/SMS_Spam_Detection_using_SVM/spam.csv"
+    df = pd.read_csv(dataset_path, encoding='latin-1')[['v1', 'v2']]
+    df.columns = ['label', 'message']
+    df['label'] = df['label'].map({'ham': 0, 'spam': 1})
+    return df
+df = load_data()
+# Train and save model
+@st.cache_resource
+def train_and_save_model():
+    X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)
+    vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
+    X_train_tfidf = vectorizer.fit_transform(X_train)
+    X_test_tfidf = vectorizer.transform(X_test)
+    svm_model = SVC(kernel='linear')
+    svm_model.fit(X_train_tfidf, y_train)
+    y_pred = svm_model.predict(X_test_tfidf)
+    accuracy = accuracy_score(y_test, y_pred)
+    joblib.dump(svm_model, "D:/CCS229 - Intelligent System/SMS_Spam_Detection_using_SVM/svm_sms_spam.pkl")
+    joblib.dump(vectorizer, "D:/CCS229 - Intelligent System/SMS_Spam_Detection_using_SVM/vectorizer.pkl")
+    return svm_model, vectorizer, accuracy
+svm_model, vectorizer, accuracy = train_and_save_model()
+# Create tabs
+tab1, tab2, tab3 = st.tabs(["📊 Data Overview", "📈 Data Visualization", "🔍 Spam Detector"])
+# Tab 1: Data Overview
+with tab1:
+    st.subheader("Dataset Overview")
+    st.markdown('<div class="centered-container">', unsafe_allow_html=True)
+    st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
+    st.dataframe(df, height=300, width=1000)
+    st.markdown('</div>', unsafe_allow_html=True)
+    st.markdown('</div>', unsafe_allow_html=True)
+    # Smaller class distribution title
+    st.subheader("Class Distribution")
+    fig, ax = plt.subplots(figsize=(2, 2))  # Smaller figure size
+    sns.countplot(
+        x=df['label'].map({0: 'Not Spam', 1: 'Spam'}),
+        palette='coolwarm',
+        ax=ax,
+        width=0.2
+    )
+    ax.set_title("Distribution of Spam vs. Not Spam Messages", fontsize=8)  # Smaller title
+    ax.set_xlabel("Message Type", fontsize=5)  # Smaller x-axis label
+    ax.set_ylabel("Count", fontsize=5)  # Smaller y-axis label
+    ax.tick_params(axis='both', labelsize=5)  # Smaller tick labels
+    st.pyplot(fig)
+    st.markdown(f"### 📊 Model Accuracy: **{accuracy * 100:.2f}%**")
+# Tab 2: Data Visualization
+with tab2:
+    st.subheader("Data Visualizations")
+    # Confusion Matrix
+    st.markdown("### Confusion Matrix")
+    X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)
+    X_test_tfidf = vectorizer.transform(X_test)
+    y_pred = svm_model.predict(X_test_tfidf)
+    cm = confusion_matrix(y_test, y_pred)
+    fig, ax = plt.subplots(figsize=(5, 3))
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Spam', 'Spam'], yticklabels=['Not Spam', 'Spam'])
+    ax.set_xlabel("Predicted")
+    ax.set_ylabel("Actual")
+    ax.set_title("Confusion Matrix")
+    st.pyplot(fig)
+    # Heatmap
+    st.markdown("### Heatmap of Feature Correlations")
+    df['message_length'] = df['message'].apply(len)
+    correlation_matrix = df[['message_length', 'label']].corr()
+    fig, ax = plt.subplots(figsize=(5, 3))
+    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', ax=ax)
+    ax.set_title("Feature Correlation Heatmap")
+    st.pyplot(fig)
+    st.markdown('</div>', unsafe_allow_html=True)
+# Tab 3: Spam Detector
+with tab3:
+    st.subheader("Check SMS Message")
+    st.write("Enter an SMS message below to check if it's spam or not.")
+    user_input = st.text_area("Enter SMS Message:")
+    if st.button("Check Message"):
+        if user_input:
+            input_features = vectorizer.transform([user_input])
+            prediction = svm_model.predict(input_features)
+            if prediction[0] == 1:
+                st.error("🚨 This message is Spam!")
+            else:
+                st.success("✅ This message is NOT Spam!")
+        else:
+            st.warning("Please enter a message before checking.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+joblib
+pandas
+matplotlib.pyplot
+seaborn
+sklearn.metrics
+numpy

spam.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

svm_sms_spam.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e71a5173c59e56448ec3ffe20e45b1acef918074915f47ceaca4b0013f79ccaf
+size 133483

vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2892dd114db5bb43346bd34b5d092cb9e83225d9f2b519513efae2d6443ec153
+size 180007