Spaces:
Sleeping
Sleeping
| # app.py | |
| import re | |
| import joblib | |
| import pandas as pd | |
| import streamlit as st | |
| # βββββββββββββββββββββββββββ | |
| # 0) Must set page config first | |
| # βββββββββββββββββββββββββββ | |
| st.set_page_config(page_title="E-mail Spam Detection", layout="centered") | |
| # βββββββββββββββββββββββββββ | |
| # 1) Text cleaning & feature functions | |
| # βββββββββββββββββββββββββββ | |
| def clean_text(text: str) -> str: | |
| text = re.sub(r'[\r\n\t]+', ' ', text) | |
| text = re.sub(r'https?://\S+', ' URL ', text) | |
| text = re.sub(r'[^a-z0-9\s]', ' ', text) | |
| text = re.sub(r'\s{2,}', ' ', text) | |
| return text.strip() | |
| def featurize(title: str, body: str) -> pd.DataFrame: | |
| raw = f"{title or ''} {body or ''}" | |
| txt = clean_text(raw.lower()) | |
| return pd.DataFrame([{ | |
| 'content': txt, | |
| 'msg_len': len(txt), | |
| 'digit_count': len(re.findall(r'\d', txt)), | |
| 'url_count': txt.count('URL'), | |
| 'key_flag': int( | |
| bool(re.search(r'(opportunity|reward|service)', txt)) | |
| and (bool(re.search(r'\d', txt)) or 'URL' in txt) | |
| ) | |
| }]) | |
| # βββββββββββββββββββββββββββ | |
| # 2) Load models/artifacts once | |
| # βββββββββββββββββββββββββββ | |
| def load_models(): | |
| """ | |
| Loads the deploy pipeline (preprocessor + calibrated RF) and threshold. | |
| This runs only once per session. | |
| """ | |
| deploy_pipe = joblib.load('spam_deploy_pipeline.pkl') | |
| threshold = joblib.load('spam_threshold.pkl') | |
| return deploy_pipe, threshold | |
| pipe, thresh = load_models() | |
| # βββββββββββββββββββββββββββ | |
| # 3) Streamlit UI | |
| # βββββββββββββββββββββββββββ | |
| st.title("π§ E-mail Spam Detector") | |
| st.markdown( | |
| "Enter an e-mail **Subject** and **Body** below, then click **Predict** " | |
| "to see the spam probability and label." | |
| ) | |
| with st.form("input_form"): | |
| subj = st.text_input("Subject / Title") | |
| body = st.text_area("Body text", height=200) | |
| submitted = st.form_submit_button("Predict") | |
| if submitted: | |
| # 1) featurize | |
| X = featurize(subj, body) | |
| # 2) predict | |
| proba = pipe.predict_proba(X)[0, 1] | |
| is_spam = (proba >= thresh) | |
| # 3) display | |
| st.metric("Spam probability", f"{proba:.1%}") | |
| if is_spam: | |
| st.subheader("π« SPAM") | |
| st.warning("This message is classified as spam. Proceed with caution!") | |
| else: | |
| st.subheader("β Not Spam") | |
| st.success("This message looks clean.") | |
| st.divider() | |
| st.caption(f"Decision threshold (Fβ-optimized): {thresh:.2f}") | |