Shiva7706 commited on
Commit
3c1717d
·
verified ·
1 Parent(s): 98f67e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -89
app.py CHANGED
@@ -1,89 +1,88 @@
1
- import streamlit as st
2
- import joblib
3
- import nltk
4
- from nltk.corpus import stopwords
5
- from nltk.tokenize import word_tokenize
6
- import string
7
- import re
8
-
9
- # Download NLTK data
10
- nltk.download('punkt')
11
- nltk.download('stopwords')
12
-
13
- def preprocess_text(text):
14
- # Convert to lowercase
15
- text = text.lower()
16
-
17
- # Remove punctuation
18
- text = ''.join([char for char in text if char not in string.punctuation])
19
-
20
- # Remove numbers
21
- text = re.sub(r'\d+', '', text)
22
-
23
- # Remove extra whitespace
24
- text = ' '.join(text.split())
25
-
26
- # Tokenization
27
- tokens = word_tokenize(text)
28
-
29
- # Remove stopwords
30
- stop_words = set(stopwords.words('english'))
31
- tokens = [token for token in tokens if token not in stop_words]
32
-
33
- # Join tokens back into text
34
- return ' '.join(tokens)
35
-
36
- # Load the saved model and vectorizer
37
- model = joblib.load('spam_detector_model.joblib')
38
- vectorizer = joblib.load('tfidf_vectorizer.joblib')
39
-
40
- # Create the Streamlit interface
41
- st.title("📧 Spam Message Detector")
42
-
43
- st.write("""
44
- This app detects whether a message is spam or not.
45
- Enter your message below and click 'Analyze' to check!
46
- """)
47
-
48
- # Create text input
49
- message = st.text_area("Enter your message:", height=100)
50
-
51
- if st.button("Analyze"):
52
- if message:
53
- # Preprocess the input
54
- processed_text = preprocess_text(message)
55
-
56
- # Vectorize the text
57
- text_vectorized = vectorizer.transform([processed_text])
58
-
59
- # Make prediction
60
- prediction = model.predict(text_vectorized)[0]
61
- probability = model.predict_proba(text_vectorized)[0]
62
-
63
- # Display result
64
- st.markdown("### Analysis Result")
65
-
66
- if prediction == 1:
67
- st.error("🚨 This message is likely SPAM!")
68
- st.write(f"Confidence: {probability[1]:.2%}")
69
- else:
70
- st.success(" This message appears to be legitimate.")
71
- st.write(f"Confidence: {probability[0]:.2%}")
72
-
73
- # Show preprocessing details
74
- with st.expander("See preprocessing steps"):
75
- st.write("Original message:", message)
76
- st.write("Processed message:", processed_text)
77
- else:
78
- st.warning("Please enter a message to analyze.")
79
-
80
- # Add sidebar information
81
- with st.sidebar:
82
- st.header("About the Model")
83
- st.write("""
84
- This spam detector uses an XGBoost classifier trained on a dataset of spam and legitimate messages.
85
-
86
- Model Performance:
87
- - Training Accuracy: 99.7%
88
- - Testing Accuracy: 98.9%
89
- """)
 
1
+ import streamlit as st
2
+ import joblib
3
+ import nltk
4
+ from nltk.corpus import stopwords
5
+ from nltk.tokenize import word_tokenize
6
+ import string
7
+ import re
8
+
9
+
10
+ nltk.download('punkt')
11
+ nltk.download('stopwords')
12
+
13
+ def preprocess_text(text):
14
+
15
+ text = text.lower()
16
+
17
+
18
+ text = ''.join([char for char in text if char not in string.punctuation])
19
+
20
+
21
+ text = re.sub(r'\d+', '', text)
22
+
23
+
24
+ text = ' '.join(text.split())
25
+
26
+
27
+ tokens = word_tokenize(text)
28
+
29
+
30
+ stop_words = set(stopwords.words('english'))
31
+ tokens = [token for token in tokens if token not in stop_words]
32
+
33
+
34
+ return ' '.join(tokens)
35
+
36
+
37
+ model = joblib.load('spam_detector_model.joblib')
38
+ vectorizer = joblib.load('tfidf_vectorizer.joblib')
39
+
40
+
41
+ st.title("📧 Spam Message Detector")
42
+
43
+ st.write("""
44
+ This app detects whether a message is spam or not.
45
+ Enter your message below and click 'Analyze' to check!
46
+ """)
47
+
48
+ message = st.text_area("Enter your message:", height=100)
49
+
50
+ if st.button("Analyze"):
51
+ if message:
52
+
53
+ processed_text = preprocess_text(message)
54
+
55
+
56
+ text_vectorized = vectorizer.transform([processed_text])
57
+
58
+
59
+ prediction = model.predict(text_vectorized)[0]
60
+ probability = model.predict_proba(text_vectorized)[0]
61
+
62
+
63
+ st.markdown("### Analysis Result")
64
+
65
+ if prediction == 1:
66
+ st.error("🚨 This message is likely SPAM!")
67
+ st.write(f"Confidence: {probability[1]:.2%}")
68
+ else:
69
+ st.success("✅ This message appears to be legitimate.")
70
+ st.write(f"Confidence: {probability[0]:.2%}")
71
+
72
+
73
+ with st.expander("See preprocessing steps"):
74
+ st.write("Original message:", message)
75
+ st.write("Processed message:", processed_text)
76
+ else:
77
+ st.warning("Please enter a message to analyze.")
78
+
79
+
80
+ with st.sidebar:
81
+ st.header("About the Model")
82
+ st.write("""
83
+ This spam detector uses an XGBoost classifier trained on a dataset of spam and legitimate messages.
84
+
85
+ Model Performance:
86
+ - Training Accuracy: 99.7%
87
+ - Testing Accuracy: 98.9%
88
+ """)