farouk04's picture
Upload folder using huggingface_hub
ca898cf verified
"""
SMS Spam Detector | Deployable Demo
Models & Contributors
---------------------
Linear SVM : Sanjivan Thiyageswaran (TP070073)
XGBoost : Mohamud Farah (TP076875)
Logistic Regression : Farouk Elouazzani (TP075438)
Multinomial NB : Devara Alandra Wicaksono (TP073570)
"""
import os
import pandas as pd
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from xgboost import XGBClassifier
# ---------------------------------------------------------------------------
# Contributors (name, student ID, model, best tuned params)
# ---------------------------------------------------------------------------
CONTRIBUTORS = {
"Linear SVM": {
"name": "Sanjivan Thiyageswaran",
"id": "TP070073",
"params": "C=10, ngram_range=(1,2)",
"emoji": "⚑",
},
"XGBoost": {
"name": "Mohamud Farah",
"id": "TP076875",
"params": "n_estimators=200, max_depth=3",
"emoji": "🌲",
},
"Logistic Regression": {
"name": "Farouk Elouazzani",
"id": "TP075438",
"params": "C=10, ngram_range=(1,2)",
"emoji": "πŸ“Š",
},
"Multinomial NB": {
"name": "Devara Alandra Wicaksono",
"id": "TP073570",
"params": "alpha=0.01, ngram_range=(1,2), min_df=2",
"emoji": "πŸ”’",
},
}
# ---------------------------------------------------------------------------
# Train all four models once at startup
# ---------------------------------------------------------------------------
CSV_PATH = os.path.join(os.path.dirname(__file__), "spam_cleaned.csv")
def train_models():
df = pd.read_csv(CSV_PATH)
X = df["text"]
le = LabelEncoder()
y = le.fit_transform(df["y"] if "y" in df.columns else df["label"])
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# --- Linear SVM (best params: C=10, ngram_range=(1,2)) ---
svm = Pipeline([
("tfidf", TfidfVectorizer(stop_words="english", ngram_range=(1, 2))),
("clf", LinearSVC(C=10, random_state=42)),
])
svm.fit(X_train, y_train)
# --- Logistic Regression (best params: C=10, ngram_range=(1,2)) ---
lr = Pipeline([
("tfidf", TfidfVectorizer(stop_words="english", ngram_range=(1, 2))),
("clf", LogisticRegression(C=10, random_state=42, max_iter=1000)),
])
lr.fit(X_train, y_train)
# --- Multinomial NB (best params: alpha=0.01, min_df=2, ngram_range=(1,2)) ---
nb = Pipeline([
("tfidf", TfidfVectorizer(ngram_range=(1, 2), min_df=2, sublinear_tf=False)),
("clf", MultinomialNB(alpha=0.01)),
])
nb.fit(X_train, y_train)
# --- XGBoost (best params: n_estimators=200, max_depth=3) ---
xgb = Pipeline([
("tfidf", TfidfVectorizer(stop_words="english")),
("clf", XGBClassifier(
n_estimators=200, max_depth=3,
eval_metric="logloss", random_state=42,
)),
])
xgb.fit(X_train, y_train)
models = {
"Linear SVM": svm,
"Logistic Regression": lr,
"Multinomial NB": nb,
"XGBoost": xgb,
}
# Compute test-set metrics for the about tab
metrics = {}
for name, model in models.items():
y_pred = model.predict(X_test)
metrics[name] = {
"accuracy": accuracy_score(y_test, y_pred),
"f1": f1_score(y_test, y_pred),
}
return models, metrics
print("Training models … (this takes a few seconds)")
MODELS, METRICS = train_models()
print("All models ready.")
# ---------------------------------------------------------------------------
# Prediction helper
# ---------------------------------------------------------------------------
LABEL_MAP = {0: "βœ… Ham (not spam)", 1: "🚨 SPAM"}
BG_MAP = {0: "#1a3a1a", 1: "#3a1a1a"}
BORDER_MAP = {0: "#2ecc71", 1: "#e74c3c"}
COLOR_MAP = {0: "#2ecc71", 1: "#e74c3c"}
def predict_sms(text: str):
if not text or not text.strip():
return "<p style='color:#888; padding:12px;'>⬆️ Enter a message above and click <b>Classify</b>.</p>"
cards = []
for model_name, model in MODELS.items():
pred = int(model.predict([text])[0])
label = LABEL_MAP[pred]
bg = BG_MAP[pred]
border = BORDER_MAP[pred]
colour = COLOR_MAP[pred]
contrib = CONTRIBUTORS[model_name]
cards.append(
f"<div style='"
f"background:{bg}; border:2px solid {border}; border-radius:12px;"
f"padding:16px 20px; flex:1; min-width:180px;'>"
f"<div style='font-size:1.6em; margin-bottom:4px;'>{contrib['emoji']}</div>"
f"<div style='font-weight:700; font-size:1.05em; color:#fff;'>{model_name}</div>"
f"<div style='color:{colour}; font-size:1.25em; font-weight:800; margin:8px 0;'>{label}</div>"
f"<div style='color:#aaa; font-size:0.8em;'>{contrib['name']}<br>{contrib['id']}</div>"
f"</div>"
)
grid = (
"<div style='display:flex; gap:12px; flex-wrap:wrap; margin-top:8px;'>"
+ "".join(cards)
+ "</div>"
)
return grid
# ---------------------------------------------------------------------------
# Build Gradio interface
# ---------------------------------------------------------------------------
# Each sub-list is [message_text, category_label] β€” category shown in the table header only
SPAM_EXAMPLES = [
# Prize / lottery scams
["WINNER!! You've been selected to receive a Β£1,000 cash prize! Call 09061701461 NOW to claim!"],
["Congratulations! You have won a 2-week holiday to Benidorm. To claim call 08718726971."],
["FREE entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121."],
# Phishing / account alerts
["Your account has been compromised. Verify your identity immediately: http://bit.ly/secure-login"],
["ALERT: Unusual sign-in detected on your PayPal account. Click here to secure it now: http://pp-verify.net"],
["Your Apple ID has been locked. Confirm your details at http://apple-id-verify.support or lose access."],
# Financial / loan fraud
["You are entitled to up to Β£3,750 in compensation from your PPI claim! Reply YES to find out more."],
["Urgent! You qualify for a Β£5,000 loan even with bad credit. No fees. Call 0800-FREE-LOAN today!"],
["Earn Β£500/day working from home. No experience needed. Start today: www.easymoney247.co.uk"],
# Delivery / package scams
["Your parcel could not be delivered. Pay the Β£1.99 redelivery fee here: http://royalmail-redeliver.com"],
["DHL NOTICE: Your package is on hold. Confirm delivery address to avoid return: http://dhl-confirm.net"],
# Urgency / limited-time offers
["LAST CHANCE: Your Sky subscription expires TODAY. Renew now and get 3 months FREE. Call 0800123456."],
["Act now! Get a FREE iPhone 15 when you upgrade your plan. Limited stock. Text IPHONE to 88833."],
# Adult / premium rate
["Hi babe, I'm lonely tonight… call me on 09065743876 (18+ only, Β£1.50/min). xoxo"],
]
HAM_EXAMPLES = [
# Casual chat
["Hey, are we still on for lunch at 1pm tomorrow? Let me know!"],
["I'll be home late tonight, don't wait up for dinner."],
["Can you pick up some milk on your way home? We're almost out."],
["Lol that was so funny last night, can't believe you said that πŸ˜‚"],
["Happy birthday!! Hope you have an amazing day πŸŽ‰πŸŽ‚"],
# Family
["Mum, I've landed safely. Will call you when I get to the hotel. Love you!"],
["Dad, can you transfer me Β£30 for groceries? I'll pay you back on Friday."],
["Don't forget grandma's birthday dinner is on Sunday at 6pm, everyone is coming."],
# Work / professional
["Hi, just a reminder that the team meeting is moved to 3pm this afternoon."],
["Please review the report I sent over and let me know if you need any changes by EOD."],
["The client confirmed the call for Thursday at 10am. Can you send the agenda?"],
# Plans / meetups
["Movie tonight? I was thinking 7pm at the Odeon, the new Marvel one is out!"],
["Running a bit late, be there in 10 mins. Order me a coffee?"],
["Are you free this weekend? Thinking of going hiking if the weather is good."],
# Reminders / errands
["Your dentist appointment is confirmed for Thursday 14th at 2:30pm."],
["Don't forget to charge your laptop before the presentation tomorrow!"],
]
ABOUT_MD = """
## About This Demo
This app classifies SMS messages as **Ham** (legitimate) or **Spam** using four machine-learning models,
all trained on the [UCI SMS Spam Collection](https://archive.ics.uci.edu/ml/datasets/sms+spam+collection) dataset.
### Models & Contributors
| # | Model | Contributor | Student ID | Best Params | Test Accuracy | Test F1 |
|---|-------|-------------|------------|-------------|:---:|:---:|
| 1 | ⚑ Linear SVM | Sanjivan Thiyageswaran | TP070073 | C=10, ngram=(1,2) | {svm_acc} | {svm_f1} |
| 2 | 🌲 XGBoost | Mohamud Farah | TP076875 | n_est=200, depth=3 | {xgb_acc} | {xgb_f1} |
| 3 | πŸ“Š Logistic Regression | Farouk Elouazzani | TP075438 | C=10, ngram=(1,2) | {lr_acc} | {lr_f1} |
| 4 | πŸ”’ Multinomial NB | Devara Alandra Wicaksono | TP073570 | alpha=0.01, ngram=(1,2) | {nb_acc} | {nb_f1} |
### How It Works
1. Input an SMS message in the text box.
2. Click **Classify**.
3. All four models independently predict whether the message is spam or ham.
4. Results are shown side-by-side with the responsible contributor.
*Built for TXSA Group Assignment β€” Asia Pacific University (APU)*
""".format(
svm_acc=f"{METRICS['Linear SVM']['accuracy']:.4f}",
svm_f1=f"{METRICS['Linear SVM']['f1']:.4f}",
xgb_acc=f"{METRICS['XGBoost']['accuracy']:.4f}",
xgb_f1=f"{METRICS['XGBoost']['f1']:.4f}",
lr_acc=f"{METRICS['Logistic Regression']['accuracy']:.4f}",
lr_f1=f"{METRICS['Logistic Regression']['f1']:.4f}",
nb_acc=f"{METRICS['Multinomial NB']['accuracy']:.4f}",
nb_f1=f"{METRICS['Multinomial NB']['f1']:.4f}",
)
with gr.Blocks(title="SMS Spam Detector") as demo:
gr.Markdown(
"""
# πŸ“± SMS Spam Detector
### TXSA Group Assignment | Asia Pacific University (APU)
Enter any SMS message and all four classifiers will vote on whether it's **spam** or **ham**.
"""
)
with gr.Tabs():
# ── Classify tab ──────────────────────────────────────────────────
with gr.Tab("πŸ” Classify"):
with gr.Row():
sms_input = gr.Textbox(
label="SMS Message",
placeholder="Type or paste an SMS message here…",
lines=4,
scale=3,
)
classify_btn = gr.Button("Classify β–Ά", variant="primary", size="lg")
results_out = gr.HTML(
value="<p style='color:#888; padding:12px;'>⬆️ Enter a message above and click <b>Classify</b>.</p>"
)
with gr.Accordion("🚨 Spam Examples β€” click any to load", open=False):
gr.Examples(
examples=SPAM_EXAMPLES,
inputs=sms_input,
label="Spam messages (prize scams Β· phishing Β· financial fraud Β· delivery scams)",
examples_per_page=5,
)
with gr.Accordion("βœ… Ham Examples β€” click any to load", open=False):
gr.Examples(
examples=HAM_EXAMPLES,
inputs=sms_input,
label="Ham messages (casual chat Β· family Β· work Β· plans Β· reminders)",
examples_per_page=5,
)
classify_btn.click(
fn=predict_sms,
inputs=sms_input,
outputs=results_out,
)
sms_input.submit(
fn=predict_sms,
inputs=sms_input,
outputs=results_out,
)
# ── About tab ─────────────────────────────────────────────────────
with gr.Tab("ℹ️ About"):
gr.Markdown(ABOUT_MD)
gr.Markdown(
"<center><small>Β© 2026 APU TXSA Group β€” Sanjivan Β· Mohamud Farah Β· Farouk Β· Devara</small></center>"
)
if __name__ == "__main__":
demo.launch(
theme=gr.themes.Soft(),
css=".result-box { font-size: 1.1em; font-weight: bold; text-align: center; }",
)