import pandas as pd import numpy as np import re import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer from sklearn.feature_extraction.text import CountVectorizer import pickle import gradio as gr nltk.download("punkt") nltk.download("stopwords") nltk.download("wordnet") with open("count_vectorizer_spam.pkl", "rb") as f: vectorizer = pickle.load(f) with open("nb_model_spam.pkl", "rb") as f: nb_model = pickle.load(f) def preprocess_text(text): words = word_tokenize(text) words_without_punct = [word for word in words if word.isalnum()] clean_text = ' '.join(words_without_punct) clean_text = clean_text.lower() stop_words = set(stopwords.words('english')) words = word_tokenize(clean_text) filtered_words = [word for word in words if word.lower() not in stop_words] clean_text_without_stopwords = ' '.join(filtered_words) lemmatizer = WordNetLemmatizer() words = word_tokenize(clean_text_without_stopwords) lemmatized_words = [lemmatizer.lemmatize(word) for word in words] lemmatized_text = ' '.join(lemmatized_words) text = re.sub(r'[^a-z\s]', '', lemmatized_text) return text def predict_spam(text): if text.strip() == "": return "Please enter an email!" cleaned_text = preprocess_text(text) X_input = vectorizer.transform([cleaned_text]) prediction = nb_model.predict(X_input)[0] return "Spam" if prediction == 1 else "Non-Spam" iface = gr.Interface( fn=predict_spam, inputs=gr.Textbox(lines=5, placeholder="Enter email here..."), outputs="text", title="Spam Detection", description="Enter an email message to detect if it's Spam or Non-Spam.", flagging_mode="never" ) iface.launch()