Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import re | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| import pickle | |
| import gradio as gr | |
| nltk.download("punkt") | |
| nltk.download("stopwords") | |
| nltk.download("wordnet") | |
| with open("count_vectorizer_spam.pkl", "rb") as f: | |
| vectorizer = pickle.load(f) | |
| with open("nb_model_spam.pkl", "rb") as f: | |
| nb_model = pickle.load(f) | |
| def preprocess_text(text): | |
| words = word_tokenize(text) | |
| words_without_punct = [word for word in words if word.isalnum()] | |
| clean_text = ' '.join(words_without_punct) | |
| clean_text = clean_text.lower() | |
| stop_words = set(stopwords.words('english')) | |
| words = word_tokenize(clean_text) | |
| filtered_words = [word for word in words if word.lower() not in stop_words] | |
| clean_text_without_stopwords = ' '.join(filtered_words) | |
| lemmatizer = WordNetLemmatizer() | |
| words = word_tokenize(clean_text_without_stopwords) | |
| lemmatized_words = [lemmatizer.lemmatize(word) for word in words] | |
| lemmatized_text = ' '.join(lemmatized_words) | |
| text = re.sub(r'[^a-z\s]', '', lemmatized_text) | |
| return text | |
| def predict_spam(text): | |
| if text.strip() == "": | |
| return "Please enter an email!" | |
| cleaned_text = preprocess_text(text) | |
| X_input = vectorizer.transform([cleaned_text]) | |
| prediction = nb_model.predict(X_input)[0] | |
| return "Spam" if prediction == 1 else "Non-Spam" | |
| iface = gr.Interface( | |
| fn=predict_spam, | |
| inputs=gr.Textbox(lines=5, placeholder="Enter email here..."), | |
| outputs="text", | |
| title="Spam Detection", | |
| description="Enter an email message to detect if it's Spam or Non-Spam.", | |
| flagging_mode="never" | |
| ) | |
| iface.launch() | |