MrUtakata commited on
Commit
e9422ac
Β·
verified Β·
1 Parent(s): cdf1735

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -23
app.py CHANGED
@@ -5,6 +5,11 @@ import joblib
5
  import pandas as pd
6
  import streamlit as st
7
 
 
 
 
 
 
8
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
9
  # 1) Text cleaning & feature functions
10
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
@@ -20,8 +25,8 @@ def featurize(title: str, body: str) -> pd.DataFrame:
20
  raw = f"{title or ''} {body or ''}"
21
  txt = clean_text(raw.lower())
22
  return pd.DataFrame([{
23
- 'content': txt,
24
- 'msg_len': len(txt),
25
  'digit_count': len(re.findall(r'\d', txt)),
26
  'url_count': txt.count('URL'),
27
  'key_flag': int(
@@ -31,15 +36,17 @@ def featurize(title: str, body: str) -> pd.DataFrame:
31
  }])
32
 
33
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
34
- # 2) Load models/artifacts
35
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
36
-
37
- @st.cache(allow_output_mutation=True)
38
  def load_models():
39
- # adjust paths if needed
40
- prep_clf_pipe = joblib.load('spam_deploy_pipeline.pkl')
41
- threshold = joblib.load('spam_threshold.pkl')
42
- return prep_clf_pipe, threshold
 
 
 
43
 
44
  pipe, thresh = load_models()
45
 
@@ -47,10 +54,9 @@ pipe, thresh = load_models()
47
  # 3) Streamlit UI
48
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
49
 
50
- st.set_page_config(page_title="E-mail Spam Detection", layout="centered")
51
  st.title("πŸ“§ E-mail Spam Detector")
52
  st.markdown(
53
- "Enter an e-mail subject and body below, then hit **Predict** "
54
  "to see the spam probability and label."
55
  )
56
 
@@ -60,20 +66,21 @@ with st.form("input_form"):
60
  submitted = st.form_submit_button("Predict")
61
 
62
  if submitted:
63
- # featurize
64
  X = featurize(subj, body)
65
- # run through preprocessing + calibrated classifier
66
- proba = pipe.predict_proba(X)[0,1]
67
- label = "🚫 SPAM" if proba >= thresh else "βœ… Not Spam"
68
- st.metric("Spam probability", f"{proba:.1%}", delta=None)
69
- st.subheader(label)
70
- if label.startswith("🚫"):
 
 
 
71
  st.warning("This message is classified as spam. Proceed with caution!")
72
  else:
 
73
  st.success("This message looks clean.")
74
 
75
- st.write("---")
76
- st.markdown(
77
- "Threshold for spam vs not-spam was set to "
78
- f"**{thresh:.2f}** (optimized for Fβ‚‚ score)."
79
- )
 
5
  import pandas as pd
6
  import streamlit as st
7
 
8
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
9
+ # 0) Must set page config first
10
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
11
+ st.set_page_config(page_title="E-mail Spam Detection", layout="centered")
12
+
13
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
14
  # 1) Text cleaning & feature functions
15
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
 
25
  raw = f"{title or ''} {body or ''}"
26
  txt = clean_text(raw.lower())
27
  return pd.DataFrame([{
28
+ 'content': txt,
29
+ 'msg_len': len(txt),
30
  'digit_count': len(re.findall(r'\d', txt)),
31
  'url_count': txt.count('URL'),
32
  'key_flag': int(
 
36
  }])
37
 
38
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
39
+ # 2) Load models/artifacts once
40
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
41
+ @st.cache_resource
 
42
  def load_models():
43
+ """
44
+ Loads the deploy pipeline (preprocessor + calibrated RF) and threshold.
45
+ This runs only once per session.
46
+ """
47
+ deploy_pipe = joblib.load('spam_deploy_pipeline.pkl')
48
+ threshold = joblib.load('spam_threshold.pkl')
49
+ return deploy_pipe, threshold
50
 
51
  pipe, thresh = load_models()
52
 
 
54
  # 3) Streamlit UI
55
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
56
 
 
57
  st.title("πŸ“§ E-mail Spam Detector")
58
  st.markdown(
59
+ "Enter an e-mail **Subject** and **Body** below, then click **Predict** "
60
  "to see the spam probability and label."
61
  )
62
 
 
66
  submitted = st.form_submit_button("Predict")
67
 
68
  if submitted:
69
+ # 1) featurize
70
  X = featurize(subj, body)
71
+
72
+ # 2) predict
73
+ proba = pipe.predict_proba(X)[0, 1]
74
+ is_spam = (proba >= thresh)
75
+
76
+ # 3) display
77
+ st.metric("Spam probability", f"{proba:.1%}")
78
+ if is_spam:
79
+ st.subheader("🚫 SPAM")
80
  st.warning("This message is classified as spam. Proceed with caution!")
81
  else:
82
+ st.subheader("βœ… Not Spam")
83
  st.success("This message looks clean.")
84
 
85
+ st.divider()
86
+ st.caption(f"Decision threshold (Fβ‚‚-optimized): {thresh:.2f}")