arifa-batool commited on
Commit
ec159e6
·
verified ·
1 Parent(s): eeaa930

Email Spam Classifier using ML and UI in Gradio with Jupyter Notebook

Browse files
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import gradio as gr
4
+ from utils.model_loader import load_models
5
+ from utils.predict import predict
6
+
7
+ vectorizer, model = load_models()
8
+
9
+ def classify_email(text):
10
+ if not text.strip():
11
+ return {"__not_spam__": 0.5}
12
+
13
+ result = predict(text, vectorizer, model)
14
+
15
+ if result == "Spam":
16
+ return {"Spam": 1.0}
17
+ else:
18
+ return {"Not Spam": 1.0}
19
+
20
+
21
+ with gr.Blocks(theme="soft", css="footer {display: none !important}") as demo:
22
+ gr.Markdown(
23
+ """
24
+ # 🚨 Spam Email Classifier
25
+ Classify emails as **Spam** or **Not Spam** using TF-IDF + SVM
26
+ """
27
+ )
28
+
29
+ with gr.Row():
30
+ with gr.Column(scale=4):
31
+ input_text = gr.Textbox(
32
+ lines=10,
33
+ placeholder="Paste the full email content here...",
34
+ label="Email Text",
35
+ info="Include subject and body for better accuracy"
36
+ )
37
+ with gr.Column(scale=1, min_width=200):
38
+ output_label = gr.Label(
39
+ label="Prediction",
40
+ num_top_classes=1
41
+ )
42
+
43
+ with gr.Row():
44
+ submit_btn = gr.Button("Classify", variant="primary", size="lg")
45
+ clear_btn = gr.ClearButton([input_text, output_label], value="Clear")
46
+
47
+ submit_btn.click(
48
+ fn=classify_email,
49
+ inputs=input_text,
50
+ outputs=output_label
51
+ )
52
+
53
+ gr.Markdown("### Examples (click to load)")
54
+ examples = gr.Examples(
55
+ examples=[
56
+ ["Win a free iPhone! Click here now!!! Limited time offer."],
57
+ ["Earn money from home with this simple trick. Start today."],
58
+ ["Hey, are we still meeting for lunch tomorrow?"],
59
+ ["Meeting rescheduled to 3 PM. See you then!"],
60
+ ],
61
+ inputs=input_text,
62
+ outputs=output_label,
63
+ fn=classify_email,
64
+ cache_examples=False
65
+ )
66
+
67
+
68
+ if __name__ == "__main__":
69
+ demo.launch()
datasets/emails.csv ADDED
The diff for this file is too large to render. See raw diff
 
notebook/spam_email_classification.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ nltk
3
+ scikit-learn
saved_models/SVM_TF-IDF.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08d47efb74837ab4280b983d375b85d1a21fd3ef5036fd8eb29448901a50a5e1
3
+ size 738740
saved_models/vectorizer_TF-IDF.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f021f2b6366aa0e24654bca2a802ed56471a47b60455e8f0a01853bef3b184b4
3
+ size 182801
utils/model_loader.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+ def load_models(vectorizer_path="saved_models/vectorizer_TF-IDF.pkl",
4
+ model_path="saved_models/SVM_TF-IDF.pkl"):
5
+ """Load vectorizer and SVM model."""
6
+ with open(vectorizer_path, "rb") as f:
7
+ vectorizer = pickle.load(f)
8
+ with open(model_path, "rb") as f:
9
+ model = pickle.load(f)
10
+ return vectorizer, model
utils/predict.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from .preprocessing import preprocess_text
2
+
3
+ def predict(text: str, vectorizer, model) -> str:
4
+ """Preprocess text, vectorize, and predict Spam/Ham."""
5
+ processed = preprocess_text(text)
6
+ vectorized = vectorizer.transform([processed])
7
+ result = model.predict(vectorized)[0]
8
+ return "Spam" if result == 1 else "Not Spam"
utils/preprocessing.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import string
3
+ import nltk
4
+ from nltk.corpus import stopwords
5
+ from nltk.stem import WordNetLemmatizer
6
+
7
+ nltk.download('punkt')
8
+ nltk.download('stopwords')
9
+ nltk.download('wordnet')
10
+
11
+ stop_words = set(stopwords.words('english'))
12
+ lemmatizer = WordNetLemmatizer()
13
+
14
+ def preprocess_text(text: str) -> str:
15
+ """Clean and preprocess input text."""
16
+ text = text.lower()
17
+ text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
18
+ tokens = nltk.word_tokenize(text)
19
+ tokens = [word for word in tokens if word not in stop_words and word not in string.punctuation]
20
+ tokens = [lemmatizer.lemmatize(word) for word in tokens]
21
+ return " ".join(tokens)