Spaces:

Pravincoder
/

Spam_Message_Detection_NLP

Build error

App Files Files Community

Pravincoder commited on Jan 19, 2024

Commit

3e7fad6

verified ·

1 Parent(s): 5282b3f

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -64

app.py CHANGED Viewed

@@ -1,67 +1,11 @@
-## Imports
 import tensorflow as tf
 import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
-import gradio
-## Load Data
-dataset = pd.read_csv('./SPAMtextmessage.csv')
-## Data Preprocessing
-# Convert ham to 0 and spam to 1
-dataset['Category']= dataset['Category'].str.replace('ham','0')
-dataset['Category']= dataset['Category'].str.replace('spam','1')
-dataset['Category']= dataset['Category'].astype(int)
-sentences = dataset['Message'].tolist()
-labels = dataset['Category'].tolist()
-# Separate out the sentences and labels into training and test sets
-training_size = int(len(sentences) * 0.8)
-# Sentence variables
-training_sentences = sentences[0:training_size]
-testing_sentences = sentences[training_size:]
-# Labels variables
-training_labels = labels[0:training_size]
-testing_labels = labels[training_size:]
-# Make labels into numpy arrays for use with the network later
-training_labels_final = np.array(training_labels)
-testing_labels_final = np.array(testing_labels)
-## Text Preprocessing
-vocab_size = 1000
-embedding_dim = 16
-max_length = 100
-trunc_type='post'
-padding_type='post'
-oov_tok = ""
-tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
-tokenizer.fit_on_texts(training_sentences)
-word_index = tokenizer.word_index
-sequences = tokenizer.texts_to_sequences(training_sentences)
-padded = pad_sequences(sequences,maxlen=max_length, padding=padding_type,
-                       truncating=trunc_type)
-testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
-testing_padded = pad_sequences(testing_sequences,maxlen=max_length,
-                               padding=padding_type, truncating=trunc_type)
-## Modeling
-# Set lr = 0.01
-model = tf.keras.Sequential([
-    tf.keras.layers.Embedding(vocab_size,embedding_dim,input_length=max_length),
-    tf.keras.layers.Flatten(),
-    tf.keras.layers.Dense(20,activation='relu'),
-    tf.keras.layers.Dense(10,activation= 'relu'),
-    tf.keras.layers.Dense(1,activation= 'sigmoid')
-])
-model.compile(loss='binary_crossentropy',metrics=['accuracy'],
-              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))
-model.fit(padded,training_labels_final,batch_size=128,epochs=50,
-           validation_data=(testing_padded,testing_labels_final))
-## Gradio App
 def spam_detection(message):
     # Preprocess the input message
     sequence = tokenizer.texts_to_sequences([message])
@@ -71,7 +15,7 @@ def spam_detection(message):
     prediction = model.predict(padded_sequence)[0, 0]
     # Return the result
-    return "Spam" if prediction >= 0.5 else "Not Spam"
 # Gradio Interface
 iface = gr.Interface(
@@ -80,9 +24,23 @@ iface = gr.Interface(
     outputs="text",
     live=True,
     theme="huggingface",
-    title="Spam Message Detection",
-    description="A demo app for learning purposes. Detects spam messages with 98% accuracy based on the dataset."
-)
 # Launch the app
-iface.launch()

+import gradio as gr
 import tensorflow as tf
 import numpy as np
 from tensorflow.keras.preprocessing.sequence import pad_sequences
+# Load the trained model
+model = tf.keras.models.load_model('./saved_model.pb')
 def spam_detection(message):
     # Preprocess the input message
     sequence = tokenizer.texts_to_sequences([message])
     prediction = model.predict(padded_sequence)[0, 0]
     # Return the result
+    return "Spam" if prediction >= 0.5 else "Ham"
 # Gradio Interface
 iface = gr.Interface(
     outputs="text",
     live=True,
     theme="huggingface",
+    title='🚫 Spam Message Detection 🕵️‍♂️',
+    description="
+    Welcome to the Spam Message Detection app—a powerful demo designed for learning purposes. 🎓 This application employs advanced machine learning techniques to identify and flag spam messages with remarkable accuracy. 🤖 With a training set accuracy of 99.89% and a validation/test set accuracy of 98.39%, the model has been fine-tuned using a comprehensive dataset.
+    **🔍 Key Features:**
+    - State-of-the-art machine learning model
+    - High accuracy: 99.89% on the training set, 98.39% on the validation/test set
+    - Intuitive user interface for easy interaction
+    - Ideal for educational purposes and exploring spam detection techniques
+    **📝 Instructions:**
+    1. Enter a text message in the provided input box.
+    2. Click the "Detect" button to initiate the spam detection process.
+    3. Receive instant feedback on whether the input message is classified as spam or not.
+    **📌 Note: **
+    This app is a demonstration and educational tool. It showcases the effectiveness of machine learning in identifying spam messages. Enjoy exploring the world of spam detection with our highly accurate model! 🚀"
 # Launch the app
+iface.launch()