SpamDetection / app.py
liamfrank15's picture
Update app.py
ae1aa36 verified
import pandas as pd
import numpy as np
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import gradio as gr
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")
with open("count_vectorizer_spam.pkl", "rb") as f:
vectorizer = pickle.load(f)
with open("nb_model_spam.pkl", "rb") as f:
nb_model = pickle.load(f)
def preprocess_text(text):
words = word_tokenize(text)
words_without_punct = [word for word in words if word.isalnum()]
clean_text = ' '.join(words_without_punct)
clean_text = clean_text.lower()
stop_words = set(stopwords.words('english'))
words = word_tokenize(clean_text)
filtered_words = [word for word in words if word.lower() not in stop_words]
clean_text_without_stopwords = ' '.join(filtered_words)
lemmatizer = WordNetLemmatizer()
words = word_tokenize(clean_text_without_stopwords)
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
lemmatized_text = ' '.join(lemmatized_words)
text = re.sub(r'[^a-z\s]', '', lemmatized_text)
return text
def predict_spam(text):
if text.strip() == "":
return "Please enter an email!"
cleaned_text = preprocess_text(text)
X_input = vectorizer.transform([cleaned_text])
prediction = nb_model.predict(X_input)[0]
return "Spam" if prediction == 1 else "Non-Spam"
iface = gr.Interface(
fn=predict_spam,
inputs=gr.Textbox(lines=5, placeholder="Enter email here..."),
outputs="text",
title="Spam Detection",
description="Enter an email message to detect if it's Spam or Non-Spam.",
flagging_mode="never"
)
iface.launch()