File size: 2,237 Bytes
626deb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import pickle
import streamlit as st 
import re
import nltk
import contractions
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
nltk.download('stopwords')

## Setting Page Configuration and Header
st.set_page_config(
    page_title="Spam Email Classifier",
    page_icon="📧",
    layout="wide",
) 

st.title("📧 Spam Email Classifier")
st.write("Enter your email content below and the model will predict whether it is Spam or Ham (Not Spam).")


## Preprocessing Function
def preprocess_text(text):

    # Converting text to lowercase
    text = text.lower()

    # Removing Extra Spaces
    text = re.sub(r'\s+', ' ', text).strip()

    # Replacing Numbers with a Token
    text = re.sub(r'\d+', '<NUM>', text)

    # Normalize Elongated Words
    text = re.sub(r'(.)\1+', r'\1\1', text) 

    # Expand Contractions (e.g.: weren't => were not)
    text = contractions.fix(text)
    
    # Removing Punctuations and Non-English Charachters
    text = re.sub(r'[^a-z0-9\s]', '', text) 

    # Lemmatization  
    words = text.split()
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]

    # Returning the Cleaned Text 
    cleaned_text = ' '.join(words)
    return cleaned_text
    

## Loading the Model and Vectorizer
with open('models/logistic_regression.pkl', "rb") as file:
    model = pickle.load(file)

with open("models/vectorizer.pkl", "rb") as file:
    vectorizer = pickle.load(file)


## Prediction
email_text = st.text_area("Email Content:")

if st.button("Predict"):

    if email_text:
        processed_text = preprocess_text(email_text)
        vect_text = vectorizer.transform([processed_text])
        
        prediction = model.predict(vect_text)[0]
        prediction_proba = model.predict_proba(vect_text)[0]

        st.subheader("Prediction Result:")
        if prediction == 1:
            st.error("🚫 This email is Spam")
        else:
            st.success("✅ This email is Not Spam")

        st.subheader("Prediction Probabilities:")
        st.write(f"Ham: {prediction_proba[0]:.2f}, Spam: {prediction_proba[1]:.2f}")

    else:
        st.warning("Please enter email content to predict.")