Spaces:

MrUtakata
/

Email_spam_detection

Sleeping

App Files Files Community

MrUtakata commited on Apr 30, 2025

Commit

e9422ac

verified ·

1 Parent(s): cdf1735

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -23

app.py CHANGED Viewed

@@ -5,6 +5,11 @@ import joblib
 import pandas as pd
 import streamlit as st
 # ———————————————————————————
 # 1) Text cleaning & feature functions
 # ———————————————————————————
@@ -20,8 +25,8 @@ def featurize(title: str, body: str) -> pd.DataFrame:
     raw = f"{title or ''} {body or ''}"
     txt = clean_text(raw.lower())
     return pd.DataFrame([{
-        'content':    txt,
-        'msg_len':    len(txt),
         'digit_count': len(re.findall(r'\d', txt)),
         'url_count':   txt.count('URL'),
         'key_flag':    int(
@@ -31,15 +36,17 @@ def featurize(title: str, body: str) -> pd.DataFrame:
     }])
 # ———————————————————————————
-# 2) Load models/artifacts
 # ———————————————————————————
-@st.cache(allow_output_mutation=True)
 def load_models():
-    # adjust paths if needed
-    prep_clf_pipe = joblib.load('spam_deploy_pipeline.pkl')
-    threshold     = joblib.load('spam_threshold.pkl')
-    return prep_clf_pipe, threshold
 pipe, thresh = load_models()
@@ -47,10 +54,9 @@ pipe, thresh = load_models()
 # 3) Streamlit UI
 # ———————————————————————————
-st.set_page_config(page_title="E-mail Spam Detection", layout="centered")
 st.title("📧 E-mail Spam Detector")
 st.markdown(
-    "Enter an e-mail subject and body below, then hit **Predict** "
     "to see the spam probability and label."
 )
@@ -60,20 +66,21 @@ with st.form("input_form"):
     submitted = st.form_submit_button("Predict")
 if submitted:
-    # featurize
     X = featurize(subj, body)
-    # run through preprocessing + calibrated classifier
-    proba = pipe.predict_proba(X)[0,1]
-    label = "🚫 SPAM" if proba >= thresh else "✅ Not Spam"
-    st.metric("Spam probability", f"{proba:.1%}", delta=None)
-    st.subheader(label)
-    if label.startswith("🚫"):
         st.warning("This message is classified as spam. Proceed with caution!")
     else:
         st.success("This message looks clean.")
-    st.write("---")
-    st.markdown(
-        "Threshold for spam vs not-spam was set to "
-        f"**{thresh:.2f}** (optimized for F₂ score)."
-    )

 import pandas as pd
 import streamlit as st
+# ———————————————————————————
+# 0) Must set page config first
+# ———————————————————————————
+st.set_page_config(page_title="E-mail Spam Detection", layout="centered")
 # ———————————————————————————
 # 1) Text cleaning & feature functions
 # ———————————————————————————
     raw = f"{title or ''} {body or ''}"
     txt = clean_text(raw.lower())
     return pd.DataFrame([{
+        'content':     txt,
+        'msg_len':     len(txt),
         'digit_count': len(re.findall(r'\d', txt)),
         'url_count':   txt.count('URL'),
         'key_flag':    int(
     }])
 # ———————————————————————————
+# 2) Load models/artifacts once
 # ———————————————————————————
+@st.cache_resource
 def load_models():
+    """
+    Loads the deploy pipeline (preprocessor + calibrated RF) and threshold.
+    This runs only once per session.
+    """
+    deploy_pipe = joblib.load('spam_deploy_pipeline.pkl')
+    threshold   = joblib.load('spam_threshold.pkl')
+    return deploy_pipe, threshold
 pipe, thresh = load_models()
 # 3) Streamlit UI
 # ———————————————————————————
 st.title("📧 E-mail Spam Detector")
 st.markdown(
+    "Enter an e-mail **Subject** and **Body** below, then click **Predict** "
     "to see the spam probability and label."
 )
     submitted = st.form_submit_button("Predict")
 if submitted:
+    # 1) featurize
     X = featurize(subj, body)
+    # 2) predict
+    proba = pipe.predict_proba(X)[0, 1]
+    is_spam = (proba >= thresh)
+    # 3) display
+    st.metric("Spam probability", f"{proba:.1%}")
+    if is_spam:
+        st.subheader("🚫 SPAM")
         st.warning("This message is classified as spam. Proceed with caution!")
     else:
+        st.subheader("✅ Not Spam")
         st.success("This message looks clean.")
+    st.divider()
+    st.caption(f"Decision threshold (F₂-optimized): {thresh:.2f}")