Spaces:

MrUtakata
/

Email_spam_detection

Sleeping

App Files Files Community

MrUtakata commited on Apr 30, 2025

Commit

cdf1735

verified ·

1 Parent(s): 71b4366

Create app.py

Browse files

Files changed (1) hide show

app.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# app.py
+import re
+import joblib
+import pandas as pd
+import streamlit as st
+# ———————————————————————————
+# 1) Text cleaning & feature functions
+# ———————————————————————————
+def clean_text(text: str) -> str:
+    text = re.sub(r'[\r\n\t]+', ' ', text)
+    text = re.sub(r'https?://\S+', ' URL ', text)
+    text = re.sub(r'[^a-z0-9\s]', ' ', text)
+    text = re.sub(r'\s{2,}', ' ', text)
+    return text.strip()
+def featurize(title: str, body: str) -> pd.DataFrame:
+    raw = f"{title or ''} {body or ''}"
+    txt = clean_text(raw.lower())
+    return pd.DataFrame([{
+        'content':    txt,
+        'msg_len':    len(txt),
+        'digit_count': len(re.findall(r'\d', txt)),
+        'url_count':   txt.count('URL'),
+        'key_flag':    int(
+            bool(re.search(r'(opportunity|reward|service)', txt))
+            and (bool(re.search(r'\d', txt)) or 'URL' in txt)
+        )
+    }])
+# ———————————————————————————
+# 2) Load models/artifacts
+# ———————————————————————————
+@st.cache(allow_output_mutation=True)
+def load_models():
+    # adjust paths if needed
+    prep_clf_pipe = joblib.load('spam_deploy_pipeline.pkl')
+    threshold     = joblib.load('spam_threshold.pkl')
+    return prep_clf_pipe, threshold
+pipe, thresh = load_models()
+# ———————————————————————————
+# 3) Streamlit UI
+# ———————————————————————————
+st.set_page_config(page_title="E-mail Spam Detection", layout="centered")
+st.title("📧 E-mail Spam Detector")
+st.markdown(
+    "Enter an e-mail subject and body below, then hit **Predict** "
+    "to see the spam probability and label."
+)
+with st.form("input_form"):
+    subj = st.text_input("Subject / Title")
+    body = st.text_area("Body text", height=200)
+    submitted = st.form_submit_button("Predict")
+if submitted:
+    # featurize
+    X = featurize(subj, body)
+    # run through preprocessing + calibrated classifier
+    proba = pipe.predict_proba(X)[0,1]
+    label = "🚫 SPAM" if proba >= thresh else "✅ Not Spam"
+    st.metric("Spam probability", f"{proba:.1%}", delta=None)
+    st.subheader(label)
+    if label.startswith("🚫"):
+        st.warning("This message is classified as spam. Proceed with caution!")
+    else:
+        st.success("This message looks clean.")
+    st.write("---")
+    st.markdown(
+        "Threshold for spam vs not-spam was set to "
+        f"**{thresh:.2f}** (optimized for F₂ score)."
+    )