Commit ·
aafddbb
0
Parent(s):
first commit
Browse files- .gitattributes +1 -0
- .gitignore +1 -0
- app.py +70 -0
- count_vectorizer.pkl +3 -0
- model.pkl +3 -0
- requirements.txt +52 -0
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.venv
|
app.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pickle
|
| 3 |
+
import PyPDF2
|
| 4 |
+
import docx
|
| 5 |
+
from io import BytesIO
|
| 6 |
+
import re
|
| 7 |
+
import string
|
| 8 |
+
import nltk
|
| 9 |
+
import sklearn
|
| 10 |
+
from nltk.corpus import stopwords
|
| 11 |
+
from nltk.stem import WordNetLemmatizer
|
| 12 |
+
from nltk.tokenize import word_tokenize
|
| 13 |
+
from contractions import fix
|
| 14 |
+
|
| 15 |
+
st.title('Spam Email Dection System')
|
| 16 |
+
st.markdown(
|
| 17 |
+
"""
|
| 18 |
+
<style>
|
| 19 |
+
.stApp {
|
| 20 |
+
background-color: #0066cc;
|
| 21 |
+
}
|
| 22 |
+
</style>
|
| 23 |
+
""",
|
| 24 |
+
unsafe_allow_html=True
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
model = pickle.load(open('model.pkl','rb'))
|
| 28 |
+
count_vectorizer = pickle.load(open('count_vectorizer.pkl','rb'))
|
| 29 |
+
|
| 30 |
+
stop_words = set(stopwords.words('english'))
|
| 31 |
+
lemmatizer = WordNetLemmatizer()
|
| 32 |
+
|
| 33 |
+
def clean_text_advance(text):
|
| 34 |
+
text = text.lower()
|
| 35 |
+
test =fix(text)
|
| 36 |
+
text = re.sub(r'http\\S+','', text)
|
| 37 |
+
text = re.sub(r'<.*?>', '', text)
|
| 38 |
+
text = text.translate(str.maketrans('','', string.punctuation))
|
| 39 |
+
tokens = word_tokenize(text)
|
| 40 |
+
clean_tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
|
| 41 |
+
return ''.join(clean_tokens)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def detection_email(text):
|
| 47 |
+
cleaned_text = clean_text_advance(text)
|
| 48 |
+
features = count_vectorizer.transform([cleaned_text])
|
| 49 |
+
results = model.predict(features)
|
| 50 |
+
if results == 0:
|
| 51 |
+
return 'Not Spam'
|
| 52 |
+
else:
|
| 53 |
+
return 'Spam'
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
search_query = st.text_input('Enter your email text')
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
if st.button('Detect Spam'):
|
| 64 |
+
if search_query:
|
| 65 |
+
results = detection_email(search_query)
|
| 66 |
+
if results == 0:
|
| 67 |
+
st.write('Not spam')
|
| 68 |
+
else:
|
| 69 |
+
st.write('Spam')
|
| 70 |
+
|
count_vectorizer.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f1c227783fe9f6612b8d94fd12baa223853ed728ea4bf9ba3a5af730121f050
|
| 3 |
+
size 4762473
|
model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c383d63f60b322f9768a19791612f8768c09bc55cd05b4020e7b074988167b3
|
| 3 |
+
size 48572084
|
requirements.txt
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
altair==6.0.0
|
| 2 |
+
anyascii==0.3.3
|
| 3 |
+
attrs==25.4.0
|
| 4 |
+
blinker==1.9.0
|
| 5 |
+
cachetools==7.0.5
|
| 6 |
+
certifi==2026.2.25
|
| 7 |
+
charset-normalizer==3.4.5
|
| 8 |
+
click==8.3.1
|
| 9 |
+
colorama==0.4.6
|
| 10 |
+
contractions==0.1.73
|
| 11 |
+
gitdb==4.0.12
|
| 12 |
+
GitPython==3.1.46
|
| 13 |
+
idna==3.11
|
| 14 |
+
Jinja2==3.1.6
|
| 15 |
+
joblib==1.5.3
|
| 16 |
+
jsonschema==4.26.0
|
| 17 |
+
jsonschema-specifications==2025.9.1
|
| 18 |
+
lxml==6.0.2
|
| 19 |
+
MarkupSafe==3.0.3
|
| 20 |
+
narwhals==2.18.0
|
| 21 |
+
nltk==3.9.3
|
| 22 |
+
numpy==2.4.3
|
| 23 |
+
packaging==26.0
|
| 24 |
+
pandas==2.3.3
|
| 25 |
+
pillow==12.1.1
|
| 26 |
+
protobuf==6.33.5
|
| 27 |
+
pyahocorasick==2.3.0
|
| 28 |
+
pyarrow==23.0.1
|
| 29 |
+
pydeck==0.9.1
|
| 30 |
+
PyPDF2==3.0.1
|
| 31 |
+
python-dateutil==2.9.0.post0
|
| 32 |
+
python-docx==1.2.0
|
| 33 |
+
pytz==2026.1.post1
|
| 34 |
+
referencing==0.37.0
|
| 35 |
+
regex==2026.2.28
|
| 36 |
+
requests==2.32.5
|
| 37 |
+
rpds-py==0.30.0
|
| 38 |
+
scikit-learn==1.8.0
|
| 39 |
+
scipy==1.17.1
|
| 40 |
+
six==1.17.0
|
| 41 |
+
smmap==5.0.3
|
| 42 |
+
streamlit==1.55.0
|
| 43 |
+
tenacity==9.1.4
|
| 44 |
+
textsearch==0.0.24
|
| 45 |
+
threadpoolctl==3.6.0
|
| 46 |
+
toml==0.10.2
|
| 47 |
+
tornado==6.5.5
|
| 48 |
+
tqdm==4.67.3
|
| 49 |
+
typing_extensions==4.15.0
|
| 50 |
+
tzdata==2025.3
|
| 51 |
+
urllib3==2.6.3
|
| 52 |
+
watchdog==6.0.0
|