Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,67 +1,11 @@
|
|
| 1 |
-
|
| 2 |
import tensorflow as tf
|
| 3 |
import numpy as np
|
| 4 |
-
import pandas as pd
|
| 5 |
-
import matplotlib.pyplot as plt
|
| 6 |
-
from tensorflow.keras.preprocessing.text import Tokenizer
|
| 7 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| 8 |
-
import gradio
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
|
| 13 |
-
## Data Preprocessing
|
| 14 |
-
# Convert ham to 0 and spam to 1
|
| 15 |
-
dataset['Category']= dataset['Category'].str.replace('ham','0')
|
| 16 |
-
dataset['Category']= dataset['Category'].str.replace('spam','1')
|
| 17 |
-
dataset['Category']= dataset['Category'].astype(int)
|
| 18 |
-
sentences = dataset['Message'].tolist()
|
| 19 |
-
labels = dataset['Category'].tolist()
|
| 20 |
-
# Separate out the sentences and labels into training and test sets
|
| 21 |
-
training_size = int(len(sentences) * 0.8)
|
| 22 |
-
# Sentence variables
|
| 23 |
-
training_sentences = sentences[0:training_size]
|
| 24 |
-
testing_sentences = sentences[training_size:]
|
| 25 |
-
# Labels variables
|
| 26 |
-
training_labels = labels[0:training_size]
|
| 27 |
-
testing_labels = labels[training_size:]
|
| 28 |
-
# Make labels into numpy arrays for use with the network later
|
| 29 |
-
training_labels_final = np.array(training_labels)
|
| 30 |
-
testing_labels_final = np.array(testing_labels)
|
| 31 |
-
|
| 32 |
-
## Text Preprocessing
|
| 33 |
-
vocab_size = 1000
|
| 34 |
-
embedding_dim = 16
|
| 35 |
-
max_length = 100
|
| 36 |
-
trunc_type='post'
|
| 37 |
-
padding_type='post'
|
| 38 |
-
oov_tok = ""
|
| 39 |
-
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
|
| 40 |
-
tokenizer.fit_on_texts(training_sentences)
|
| 41 |
-
word_index = tokenizer.word_index
|
| 42 |
-
sequences = tokenizer.texts_to_sequences(training_sentences)
|
| 43 |
-
padded = pad_sequences(sequences,maxlen=max_length, padding=padding_type,
|
| 44 |
-
truncating=trunc_type)
|
| 45 |
-
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
|
| 46 |
-
testing_padded = pad_sequences(testing_sequences,maxlen=max_length,
|
| 47 |
-
padding=padding_type, truncating=trunc_type)
|
| 48 |
-
|
| 49 |
-
## Modeling
|
| 50 |
-
# Set lr = 0.01
|
| 51 |
-
model = tf.keras.Sequential([
|
| 52 |
-
tf.keras.layers.Embedding(vocab_size,embedding_dim,input_length=max_length),
|
| 53 |
-
tf.keras.layers.Flatten(),
|
| 54 |
-
tf.keras.layers.Dense(20,activation='relu'),
|
| 55 |
-
tf.keras.layers.Dense(10,activation= 'relu'),
|
| 56 |
-
tf.keras.layers.Dense(1,activation= 'sigmoid')
|
| 57 |
-
])
|
| 58 |
-
|
| 59 |
-
model.compile(loss='binary_crossentropy',metrics=['accuracy'],
|
| 60 |
-
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))
|
| 61 |
-
model.fit(padded,training_labels_final,batch_size=128,epochs=50,
|
| 62 |
-
validation_data=(testing_padded,testing_labels_final))
|
| 63 |
-
|
| 64 |
-
## Gradio App
|
| 65 |
def spam_detection(message):
|
| 66 |
# Preprocess the input message
|
| 67 |
sequence = tokenizer.texts_to_sequences([message])
|
|
@@ -71,7 +15,7 @@ def spam_detection(message):
|
|
| 71 |
prediction = model.predict(padded_sequence)[0, 0]
|
| 72 |
|
| 73 |
# Return the result
|
| 74 |
-
return "Spam" if prediction >= 0.5 else "
|
| 75 |
|
| 76 |
# Gradio Interface
|
| 77 |
iface = gr.Interface(
|
|
@@ -80,9 +24,23 @@ iface = gr.Interface(
|
|
| 80 |
outputs="text",
|
| 81 |
live=True,
|
| 82 |
theme="huggingface",
|
| 83 |
-
title=
|
| 84 |
-
description="
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
# Launch the app
|
| 88 |
-
iface.launch()
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
import tensorflow as tf
|
| 3 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
| 4 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
|
|
| 5 |
|
| 6 |
+
# Load the trained model
|
| 7 |
+
model = tf.keras.models.load_model('./saved_model.pb')
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def spam_detection(message):
|
| 10 |
# Preprocess the input message
|
| 11 |
sequence = tokenizer.texts_to_sequences([message])
|
|
|
|
| 15 |
prediction = model.predict(padded_sequence)[0, 0]
|
| 16 |
|
| 17 |
# Return the result
|
| 18 |
+
return "Spam" if prediction >= 0.5 else "Ham"
|
| 19 |
|
| 20 |
# Gradio Interface
|
| 21 |
iface = gr.Interface(
|
|
|
|
| 24 |
outputs="text",
|
| 25 |
live=True,
|
| 26 |
theme="huggingface",
|
| 27 |
+
title='π« Spam Message Detection π΅οΈββοΈ',
|
| 28 |
+
description="
|
| 29 |
+
Welcome to the Spam Message Detection appβa powerful demo designed for learning purposes. π This application employs advanced machine learning techniques to identify and flag spam messages with remarkable accuracy. π€ With a training set accuracy of 99.89% and a validation/test set accuracy of 98.39%, the model has been fine-tuned using a comprehensive dataset.
|
| 30 |
+
|
| 31 |
+
**π Key Features:**
|
| 32 |
+
- State-of-the-art machine learning model
|
| 33 |
+
- High accuracy: 99.89% on the training set, 98.39% on the validation/test set
|
| 34 |
+
- Intuitive user interface for easy interaction
|
| 35 |
+
- Ideal for educational purposes and exploring spam detection techniques
|
| 36 |
+
|
| 37 |
+
**π Instructions:**
|
| 38 |
+
1. Enter a text message in the provided input box.
|
| 39 |
+
2. Click the "Detect" button to initiate the spam detection process.
|
| 40 |
+
3. Receive instant feedback on whether the input message is classified as spam or not.
|
| 41 |
+
|
| 42 |
+
**π Note: **
|
| 43 |
+
This app is a demonstration and educational tool. It showcases the effectiveness of machine learning in identifying spam messages. Enjoy exploring the world of spam detection with our highly accurate model! π"
|
| 44 |
|
| 45 |
# Launch the app
|
| 46 |
+
iface.launch()
|