Spaces:

arifa-batool
/

spam-filter-app

Runtime error

App Files Files Community

arifa-batool commited on Dec 24, 2025

Commit

ec159e6

verified ·

1 Parent(s): eeaa930

Email Spam Classifier using ML and UI in Gradio with Jupyter Notebook

Browse files

Files changed (9) hide show

app.py +69 -0
datasets/emails.csv +0 -0
notebook/spam_email_classification.ipynb +0 -0
requirements.txt +3 -0
saved_models/SVM_TF-IDF.pkl +3 -0
saved_models/vectorizer_TF-IDF.pkl +3 -0
utils/model_loader.py +10 -0
utils/predict.py +8 -0
utils/preprocessing.py +21 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import gradio as gr
+from utils.model_loader import load_models
+from utils.predict import predict
+vectorizer, model = load_models()
+def classify_email(text):
+    if not text.strip():
+        return {"__not_spam__": 0.5}
+    result = predict(text, vectorizer, model)
+    if result == "Spam":
+        return {"Spam": 1.0}
+    else:
+        return {"Not Spam": 1.0}
+with gr.Blocks(theme="soft", css="footer {display: none !important}") as demo:
+    gr.Markdown(
+        """
+        # 🚨 Spam Email Classifier
+        Classify emails as **Spam** or **Not Spam** using TF-IDF + SVM
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=4):
+            input_text = gr.Textbox(
+                lines=10,
+                placeholder="Paste the full email content here...",
+                label="Email Text",
+                info="Include subject and body for better accuracy"
+            )
+        with gr.Column(scale=1, min_width=200):
+            output_label = gr.Label(
+                label="Prediction",
+                num_top_classes=1
+            )
+    with gr.Row():
+        submit_btn = gr.Button("Classify", variant="primary", size="lg")
+        clear_btn = gr.ClearButton([input_text, output_label], value="Clear")
+    submit_btn.click(
+        fn=classify_email,
+        inputs=input_text,
+        outputs=output_label
+    )
+    gr.Markdown("### Examples (click to load)")
+    examples = gr.Examples(
+        examples=[
+            ["Win a free iPhone! Click here now!!! Limited time offer."],
+            ["Earn money from home with this simple trick. Start today."],
+            ["Hey, are we still meeting for lunch tomorrow?"],
+            ["Meeting rescheduled to 3 PM. See you then!"],
+        ],
+        inputs=input_text,
+        outputs=output_label,
+        fn=classify_email,
+        cache_examples=False
+                )
+if __name__ == "__main__":
+    demo.launch()

datasets/emails.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

notebook/spam_email_classification.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+nltk
+scikit-learn

saved_models/SVM_TF-IDF.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08d47efb74837ab4280b983d375b85d1a21fd3ef5036fd8eb29448901a50a5e1
+size 738740

saved_models/vectorizer_TF-IDF.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f021f2b6366aa0e24654bca2a802ed56471a47b60455e8f0a01853bef3b184b4
+size 182801

utils/model_loader.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import pickle
+def load_models(vectorizer_path="saved_models/vectorizer_TF-IDF.pkl",
+                model_path="saved_models/SVM_TF-IDF.pkl"):
+    """Load vectorizer and SVM model."""
+    with open(vectorizer_path, "rb") as f:
+        vectorizer = pickle.load(f)
+    with open(model_path, "rb") as f:
+        model = pickle.load(f)
+    return vectorizer, model

utils/predict.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from .preprocessing import preprocess_text
+def predict(text: str, vectorizer, model) -> str:
+    """Preprocess text, vectorize, and predict Spam/Ham."""
+    processed = preprocess_text(text)
+    vectorized = vectorizer.transform([processed])
+    result = model.predict(vectorized)[0]
+    return "Spam" if result == 1 else "Not Spam"

utils/preprocessing.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import re
+import string
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+nltk.download('punkt')
+nltk.download('stopwords')
+nltk.download('wordnet')
+stop_words = set(stopwords.words('english'))
+lemmatizer = WordNetLemmatizer()
+def preprocess_text(text: str) -> str:
+    """Clean and preprocess input text."""
+    text = text.lower()
+    text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
+    tokens = nltk.word_tokenize(text)
+    tokens = [word for word in tokens if word not in stop_words and word not in string.punctuation]
+    tokens = [lemmatizer.lemmatize(word) for word in tokens]
+    return " ".join(tokens)