meesamraza commited on
Commit
1dc2d2c
Β·
verified Β·
1 Parent(s): 27f8c63

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +49 -22
src/streamlit_app.py CHANGED
@@ -1,18 +1,19 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
 
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.naive_bayes import MultinomialNB
6
- from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
7
 
8
- # Set page config
9
  st.set_page_config(page_title="πŸ“§ Email Spam Detector", layout="centered")
10
-
11
- # Title and Description
12
  st.title("πŸ“§ Email Spam Detector")
13
- st.markdown("This app uses **Machine Learning** to classify emails as **Spam** or **Ham (Not Spam)**. Just type or paste your message below to test!")
14
 
15
- # Load and preprocess dataset
16
  @st.cache_data
17
  def load_data():
18
  df = pd.read_csv("spam.csv", encoding='latin-1')[['v1', 'v2']]
@@ -22,44 +23,70 @@ def load_data():
22
 
23
  df = load_data()
24
 
25
- # Train/test split
 
 
 
 
 
 
 
 
26
  X_train, X_test, y_train, y_test = train_test_split(
27
- df['message'], df['label'], test_size=0.2, random_state=42)
 
28
 
29
- # Vectorize with TF-IDF
30
  vectorizer = TfidfVectorizer(stop_words='english')
31
  X_train_tfidf = vectorizer.fit_transform(X_train)
32
  X_test_tfidf = vectorizer.transform(X_test)
33
 
34
- # Train model
35
  model = MultinomialNB()
36
  model.fit(X_train_tfidf, y_train)
37
 
38
- # Accuracy (optional to show)
39
- accuracy = accuracy_score(y_test, model.predict(X_test_tfidf))
 
 
 
 
40
 
41
- # Sidebar
42
- st.sidebar.header("πŸ” Model Info")
43
  st.sidebar.write(f"**Accuracy:** {accuracy:.2%}")
 
 
 
44
  st.sidebar.markdown("Model: `Multinomial Naive Bayes` \nVectorizer: `TF-IDF`")
45
 
46
- # Predict function
 
 
 
 
 
 
 
 
47
  def predict_message(msg):
48
- vect_msg = vectorizer.transform([msg])
 
49
  pred = model.predict(vect_msg)[0]
50
- return "🚫 Spam" if pred == 1 else "βœ… Ham (Not Spam)"
 
51
 
52
- # Input section
53
  st.subheader("βœ‰οΈ Test Your Message")
54
  user_input = st.text_area("Enter your email message here:")
55
 
56
  if st.button("Detect"):
57
  if user_input.strip() == "":
58
- st.warning("Please enter a message to classify.")
59
  else:
60
- result = predict_message(user_input)
61
- st.success(f"Prediction: **{result}**")
62
 
63
- # Optional: Show raw data
64
  with st.expander("πŸ“‚ View Sample Dataset"):
65
  st.dataframe(df.sample(10))
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import string
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
  from sklearn.model_selection import train_test_split
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.naive_bayes import MultinomialNB
9
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
10
 
11
+ # ----------------- STREAMLIT CONFIG -----------------
12
  st.set_page_config(page_title="πŸ“§ Email Spam Detector", layout="centered")
 
 
13
  st.title("πŸ“§ Email Spam Detector")
14
+ st.markdown("This app uses **Machine Learning** (Naive Bayes + TF-IDF) to classify emails as **Spam** or **Ham (Not Spam)**.")
15
 
16
+ # ----------------- DATA LOADING -----------------
17
  @st.cache_data
18
  def load_data():
19
  df = pd.read_csv("spam.csv", encoding='latin-1')[['v1', 'v2']]
 
23
 
24
  df = load_data()
25
 
26
+ # ----------------- PREPROCESS FUNCTION -----------------
27
+ def clean_text(text):
28
+ text = text.lower().strip()
29
+ text = text.translate(str.maketrans("", "", string.punctuation))
30
+ return text
31
+
32
+ df['message'] = df['message'].apply(clean_text)
33
+
34
+ # ----------------- TRAIN / TEST SPLIT -----------------
35
  X_train, X_test, y_train, y_test = train_test_split(
36
+ df['message'], df['label'], test_size=0.2, random_state=42
37
+ )
38
 
39
+ # ----------------- TF-IDF VECTORIZATION -----------------
40
  vectorizer = TfidfVectorizer(stop_words='english')
41
  X_train_tfidf = vectorizer.fit_transform(X_train)
42
  X_test_tfidf = vectorizer.transform(X_test)
43
 
44
+ # ----------------- MODEL TRAINING -----------------
45
  model = MultinomialNB()
46
  model.fit(X_train_tfidf, y_train)
47
 
48
+ # ----------------- METRICS -----------------
49
+ y_pred = model.predict(X_test_tfidf)
50
+ accuracy = accuracy_score(y_test, y_pred)
51
+ precision = precision_score(y_test, y_pred)
52
+ recall = recall_score(y_test, y_pred)
53
+ f1 = f1_score(y_test, y_pred)
54
 
55
+ # ----------------- SIDEBAR METRICS -----------------
56
+ st.sidebar.header("πŸ” Model Performance")
57
  st.sidebar.write(f"**Accuracy:** {accuracy:.2%}")
58
+ st.sidebar.write(f"**Precision:** {precision:.2%}")
59
+ st.sidebar.write(f"**Recall:** {recall:.2%}")
60
+ st.sidebar.write(f"**F1 Score:** {f1:.2%}")
61
  st.sidebar.markdown("Model: `Multinomial Naive Bayes` \nVectorizer: `TF-IDF`")
62
 
63
+ # Confusion Matrix
64
+ cm = confusion_matrix(y_test, y_pred)
65
+ fig, ax = plt.subplots()
66
+ sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", xticklabels=["Ham", "Spam"], yticklabels=["Ham", "Spam"])
67
+ plt.ylabel('Actual')
68
+ plt.xlabel('Predicted')
69
+ st.sidebar.pyplot(fig)
70
+
71
+ # ----------------- PREDICT FUNCTION -----------------
72
  def predict_message(msg):
73
+ msg_clean = clean_text(msg)
74
+ vect_msg = vectorizer.transform([msg_clean])
75
  pred = model.predict(vect_msg)[0]
76
+ prob = model.predict_proba(vect_msg)[0][pred]
77
+ return ("🚫 Spam", prob) if pred == 1 else ("βœ… Ham (Not Spam)", prob)
78
 
79
+ # ----------------- USER INPUT -----------------
80
  st.subheader("βœ‰οΈ Test Your Message")
81
  user_input = st.text_area("Enter your email message here:")
82
 
83
  if st.button("Detect"):
84
  if user_input.strip() == "":
85
+ st.warning("⚠️ Please enter a message to classify.")
86
  else:
87
+ result, confidence = predict_message(user_input)
88
+ st.success(f"Prediction: **{result}** \nConfidence: **{confidence:.2%}**")
89
 
90
+ # ----------------- SAMPLE DATA -----------------
91
  with st.expander("πŸ“‚ View Sample Dataset"):
92
  st.dataframe(df.sample(10))