Spaces:

liamfrank15
/

SpamDetection

Sleeping

File size: 1,813 Bytes

import pandas as pd
import numpy as np
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import gradio as gr

nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")

with open("count_vectorizer_spam.pkl", "rb") as f:
    vectorizer = pickle.load(f)

with open("nb_model_spam.pkl", "rb") as f:
    nb_model = pickle.load(f)

def preprocess_text(text):
    words = word_tokenize(text)
    words_without_punct = [word for word in words if word.isalnum()]
    clean_text = ' '.join(words_without_punct)
    clean_text = clean_text.lower()
    
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(clean_text)
    filtered_words = [word for word in words if word.lower() not in stop_words]
    clean_text_without_stopwords = ' '.join(filtered_words)
    
    lemmatizer = WordNetLemmatizer()
    words = word_tokenize(clean_text_without_stopwords)
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    lemmatized_text = ' '.join(lemmatized_words)
    
    text = re.sub(r'[^a-z\s]', '', lemmatized_text)
    return text

def predict_spam(text):
    if text.strip() == "":
        return "Please enter an email!"
    
    cleaned_text = preprocess_text(text)
    X_input = vectorizer.transform([cleaned_text])
    prediction = nb_model.predict(X_input)[0]
    
    return "Spam" if prediction == 1 else "Non-Spam"

iface = gr.Interface(
    fn=predict_spam,
    inputs=gr.Textbox(lines=5, placeholder="Enter email here..."),
    outputs="text",
    title="Spam Detection",
    description="Enter an email message to detect if it's Spam or Non-Spam.",
    flagging_mode="never"  
)

iface.launch()