Spaces:
Build error
Build error
| ## Imports | |
| import tensorflow as tf | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import gradio | |
| ## Load Data | |
| dataset = pd.read_csv('./SPAMtextmessage.csv') | |
| ## Data Preprocessing | |
| # Convert ham to 0 and spam to 1 | |
| dataset['Category']= dataset['Category'].str.replace('ham','0') | |
| dataset['Category']= dataset['Category'].str.replace('spam','1') | |
| dataset['Category']= dataset['Category'].astype(int) | |
| sentences = dataset['Message'].tolist() | |
| labels = dataset['Category'].tolist() | |
| # Separate out the sentences and labels into training and test sets | |
| training_size = int(len(sentences) * 0.8) | |
| # Sentence variables | |
| training_sentences = sentences[0:training_size] | |
| testing_sentences = sentences[training_size:] | |
| # Labels variables | |
| training_labels = labels[0:training_size] | |
| testing_labels = labels[training_size:] | |
| # Make labels into numpy arrays for use with the network later | |
| training_labels_final = np.array(training_labels) | |
| testing_labels_final = np.array(testing_labels) | |
| ## Text Preprocessing | |
| vocab_size = 1000 | |
| embedding_dim = 16 | |
| max_length = 100 | |
| trunc_type='post' | |
| padding_type='post' | |
| oov_tok = "" | |
| tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok) | |
| tokenizer.fit_on_texts(training_sentences) | |
| word_index = tokenizer.word_index | |
| sequences = tokenizer.texts_to_sequences(training_sentences) | |
| padded = pad_sequences(sequences,maxlen=max_length, padding=padding_type, | |
| truncating=trunc_type) | |
| testing_sequences = tokenizer.texts_to_sequences(testing_sentences) | |
| testing_padded = pad_sequences(testing_sequences,maxlen=max_length, | |
| padding=padding_type, truncating=trunc_type) | |
| ## Modeling | |
| # Set lr = 0.01 | |
| model = tf.keras.Sequential([ | |
| tf.keras.layers.Embedding(vocab_size,embedding_dim,input_length=max_length), | |
| tf.keras.layers.Flatten(), | |
| tf.keras.layers.Dense(20,activation='relu'), | |
| tf.keras.layers.Dense(10,activation= 'relu'), | |
| tf.keras.layers.Dense(1,activation= 'sigmoid') | |
| ]) | |
| model.compile(loss='binary_crossentropy',metrics=['accuracy'], | |
| optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)) | |
| model.fit(padded,training_labels_final,batch_size=128,epochs=50, | |
| validation_data=(testing_padded,testing_labels_final)) | |
| ## Gradio App | |
| def spam_detection(message): | |
| # Preprocess the input message | |
| sequence = tokenizer.texts_to_sequences([message]) | |
| padded_sequence = pad_sequences(sequence, maxlen=max_length, padding=padding_type, truncating=trunc_type) | |
| # Make prediction | |
| prediction = model.predict(padded_sequence)[0, 0] | |
| # Return the result | |
| return "Spam" if prediction >= 0.5 else "Not Spam" | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=spam_detection, | |
| inputs=gr.Textbox(prompt="Enter a message:"), | |
| outputs="text", | |
| live=True, | |
| theme="huggingface", | |
| title="Spam Message Detection", | |
| description="A demo app for learning purposes. Detects spam messages with 98% accuracy based on the dataset." | |
| ) | |
| # Launch the app | |
| iface.launch() |