liamfrank15 commited on
Commit
0a5a7e7
·
verified ·
1 Parent(s): ba4dd83

Upload 4 files

Browse files
count_vectorizer_spam.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab21921805595cfe88566f589a4a6d3340ce4aa1e15928ca204a67a58ce31c9c
3
+ size 546064
gradioapp.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import re
4
+ import nltk
5
+ from nltk.tokenize import word_tokenize
6
+ from nltk.corpus import stopwords
7
+ from nltk.stem import WordNetLemmatizer
8
+ from sklearn.feature_extraction.text import CountVectorizer
9
+ import pickle
10
+ import gradio as gr
11
+
12
+ # Download NLTK data
13
+ nltk.download("punkt")
14
+ nltk.download("stopwords")
15
+ nltk.download("wordnet")
16
+
17
+ # Load vectorizer and model
18
+ with open("count_vectorizer_spam.pkl", "rb") as f:
19
+ vectorizer = pickle.load(f)
20
+
21
+ with open("nb_model_spam.pkl", "rb") as f:
22
+ nb_model = pickle.load(f)
23
+
24
+ # Preprocessing function
25
+ def preprocess_text(text):
26
+ words = word_tokenize(text)
27
+ words_without_punct = [word for word in words if word.isalnum()]
28
+ clean_text = ' '.join(words_without_punct)
29
+ clean_text = clean_text.lower()
30
+
31
+ stop_words = set(stopwords.words('english'))
32
+ words = word_tokenize(clean_text)
33
+ filtered_words = [word for word in words if word.lower() not in stop_words]
34
+ clean_text_without_stopwords = ' '.join(filtered_words)
35
+
36
+ lemmatizer = WordNetLemmatizer()
37
+ words = word_tokenize(clean_text_without_stopwords)
38
+ lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
39
+ lemmatized_text = ' '.join(lemmatized_words)
40
+
41
+ text = re.sub(r'[^a-z\s]', '', lemmatized_text)
42
+ return text
43
+
44
+ # Prediction function for Gradio
45
+ def predict_spam(text):
46
+ if text.strip() == "":
47
+ return "Please enter an email!"
48
+
49
+ cleaned_text = preprocess_text(text)
50
+ X_input = vectorizer.transform([cleaned_text])
51
+ prediction = nb_model.predict(X_input)[0]
52
+
53
+ return "Spam" if prediction == 1 else "Non-Spam"
54
+
55
+ # Create Gradio interface
56
+ iface = gr.Interface(
57
+ fn=predict_spam,
58
+ inputs=gr.Textbox(lines=5, placeholder="Enter email here..."),
59
+ outputs="text",
60
+ title="Spam Detection",
61
+ description="Enter an email message to detect if it's Spam or Non-Spam.",
62
+ allow_flagging=False
63
+ )
64
+
65
+ # Launch the app
66
+ iface.launch()
nb_model_spam.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a35cb6fa9ca2a7195d51346b1c873f8b268ae5e587bd5aaff8d4d7e1f46eb7f
3
+ size 1262542
requirements.txt ADDED
Binary file (4.82 kB). View file