Asgharkhan9275 commited on
Commit
aafddbb
·
0 Parent(s):

first commit

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. .gitignore +1 -0
  3. app.py +70 -0
  4. count_vectorizer.pkl +3 -0
  5. model.pkl +3 -0
  6. requirements.txt +52 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.pkl filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import PyPDF2
4
+ import docx
5
+ from io import BytesIO
6
+ import re
7
+ import string
8
+ import nltk
9
+ import sklearn
10
+ from nltk.corpus import stopwords
11
+ from nltk.stem import WordNetLemmatizer
12
+ from nltk.tokenize import word_tokenize
13
+ from contractions import fix
14
+
15
+ st.title('Spam Email Dection System')
16
+ st.markdown(
17
+ """
18
+ <style>
19
+ .stApp {
20
+ background-color: #0066cc;
21
+ }
22
+ </style>
23
+ """,
24
+ unsafe_allow_html=True
25
+ )
26
+
27
+ model = pickle.load(open('model.pkl','rb'))
28
+ count_vectorizer = pickle.load(open('count_vectorizer.pkl','rb'))
29
+
30
+ stop_words = set(stopwords.words('english'))
31
+ lemmatizer = WordNetLemmatizer()
32
+
33
+ def clean_text_advance(text):
34
+ text = text.lower()
35
+ test =fix(text)
36
+ text = re.sub(r'http\\S+','', text)
37
+ text = re.sub(r'<.*?>', '', text)
38
+ text = text.translate(str.maketrans('','', string.punctuation))
39
+ tokens = word_tokenize(text)
40
+ clean_tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
41
+ return ''.join(clean_tokens)
42
+
43
+
44
+
45
+
46
+ def detection_email(text):
47
+ cleaned_text = clean_text_advance(text)
48
+ features = count_vectorizer.transform([cleaned_text])
49
+ results = model.predict(features)
50
+ if results == 0:
51
+ return 'Not Spam'
52
+ else:
53
+ return 'Spam'
54
+
55
+
56
+
57
+ search_query = st.text_input('Enter your email text')
58
+
59
+
60
+
61
+
62
+
63
+ if st.button('Detect Spam'):
64
+ if search_query:
65
+ results = detection_email(search_query)
66
+ if results == 0:
67
+ st.write('Not spam')
68
+ else:
69
+ st.write('Spam')
70
+
count_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f1c227783fe9f6612b8d94fd12baa223853ed728ea4bf9ba3a5af730121f050
3
+ size 4762473
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c383d63f60b322f9768a19791612f8768c09bc55cd05b4020e7b074988167b3
3
+ size 48572084
requirements.txt ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==6.0.0
2
+ anyascii==0.3.3
3
+ attrs==25.4.0
4
+ blinker==1.9.0
5
+ cachetools==7.0.5
6
+ certifi==2026.2.25
7
+ charset-normalizer==3.4.5
8
+ click==8.3.1
9
+ colorama==0.4.6
10
+ contractions==0.1.73
11
+ gitdb==4.0.12
12
+ GitPython==3.1.46
13
+ idna==3.11
14
+ Jinja2==3.1.6
15
+ joblib==1.5.3
16
+ jsonschema==4.26.0
17
+ jsonschema-specifications==2025.9.1
18
+ lxml==6.0.2
19
+ MarkupSafe==3.0.3
20
+ narwhals==2.18.0
21
+ nltk==3.9.3
22
+ numpy==2.4.3
23
+ packaging==26.0
24
+ pandas==2.3.3
25
+ pillow==12.1.1
26
+ protobuf==6.33.5
27
+ pyahocorasick==2.3.0
28
+ pyarrow==23.0.1
29
+ pydeck==0.9.1
30
+ PyPDF2==3.0.1
31
+ python-dateutil==2.9.0.post0
32
+ python-docx==1.2.0
33
+ pytz==2026.1.post1
34
+ referencing==0.37.0
35
+ regex==2026.2.28
36
+ requests==2.32.5
37
+ rpds-py==0.30.0
38
+ scikit-learn==1.8.0
39
+ scipy==1.17.1
40
+ six==1.17.0
41
+ smmap==5.0.3
42
+ streamlit==1.55.0
43
+ tenacity==9.1.4
44
+ textsearch==0.0.24
45
+ threadpoolctl==3.6.0
46
+ toml==0.10.2
47
+ tornado==6.5.5
48
+ tqdm==4.67.3
49
+ typing_extensions==4.15.0
50
+ tzdata==2025.3
51
+ urllib3==2.6.3
52
+ watchdog==6.0.0