""" SMS Spam Detector | Deployable Demo Models & Contributors --------------------- Linear SVM : Sanjivan Thiyageswaran (TP070073) XGBoost : Mohamud Farah (TP076875) Logistic Regression : Farouk Elouazzani (TP075438) Multinomial NB : Devara Alandra Wicaksono (TP073570) """ import os import pandas as pd import gradio as gr from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.svm import LinearSVC from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline from sklearn.preprocessing import LabelEncoder from sklearn.metrics import accuracy_score, f1_score from xgboost import XGBClassifier # --------------------------------------------------------------------------- # Contributors (name, student ID, model, best tuned params) # --------------------------------------------------------------------------- CONTRIBUTORS = { "Linear SVM": { "name": "Sanjivan Thiyageswaran", "id": "TP070073", "params": "C=10, ngram_range=(1,2)", "emoji": "⚑", }, "XGBoost": { "name": "Mohamud Farah", "id": "TP076875", "params": "n_estimators=200, max_depth=3", "emoji": "🌲", }, "Logistic Regression": { "name": "Farouk Elouazzani", "id": "TP075438", "params": "C=10, ngram_range=(1,2)", "emoji": "πŸ“Š", }, "Multinomial NB": { "name": "Devara Alandra Wicaksono", "id": "TP073570", "params": "alpha=0.01, ngram_range=(1,2), min_df=2", "emoji": "πŸ”’", }, } # --------------------------------------------------------------------------- # Train all four models once at startup # --------------------------------------------------------------------------- CSV_PATH = os.path.join(os.path.dirname(__file__), "spam_cleaned.csv") def train_models(): df = pd.read_csv(CSV_PATH) X = df["text"] le = LabelEncoder() y = le.fit_transform(df["y"] if "y" in df.columns else df["label"]) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) # --- Linear SVM (best params: C=10, ngram_range=(1,2)) --- svm = Pipeline([ ("tfidf", TfidfVectorizer(stop_words="english", ngram_range=(1, 2))), ("clf", LinearSVC(C=10, random_state=42)), ]) svm.fit(X_train, y_train) # --- Logistic Regression (best params: C=10, ngram_range=(1,2)) --- lr = Pipeline([ ("tfidf", TfidfVectorizer(stop_words="english", ngram_range=(1, 2))), ("clf", LogisticRegression(C=10, random_state=42, max_iter=1000)), ]) lr.fit(X_train, y_train) # --- Multinomial NB (best params: alpha=0.01, min_df=2, ngram_range=(1,2)) --- nb = Pipeline([ ("tfidf", TfidfVectorizer(ngram_range=(1, 2), min_df=2, sublinear_tf=False)), ("clf", MultinomialNB(alpha=0.01)), ]) nb.fit(X_train, y_train) # --- XGBoost (best params: n_estimators=200, max_depth=3) --- xgb = Pipeline([ ("tfidf", TfidfVectorizer(stop_words="english")), ("clf", XGBClassifier( n_estimators=200, max_depth=3, eval_metric="logloss", random_state=42, )), ]) xgb.fit(X_train, y_train) models = { "Linear SVM": svm, "Logistic Regression": lr, "Multinomial NB": nb, "XGBoost": xgb, } # Compute test-set metrics for the about tab metrics = {} for name, model in models.items(): y_pred = model.predict(X_test) metrics[name] = { "accuracy": accuracy_score(y_test, y_pred), "f1": f1_score(y_test, y_pred), } return models, metrics print("Training models … (this takes a few seconds)") MODELS, METRICS = train_models() print("All models ready.") # --------------------------------------------------------------------------- # Prediction helper # --------------------------------------------------------------------------- LABEL_MAP = {0: "βœ… Ham (not spam)", 1: "🚨 SPAM"} BG_MAP = {0: "#1a3a1a", 1: "#3a1a1a"} BORDER_MAP = {0: "#2ecc71", 1: "#e74c3c"} COLOR_MAP = {0: "#2ecc71", 1: "#e74c3c"} def predict_sms(text: str): if not text or not text.strip(): return "

⬆️ Enter a message above and click Classify.

" cards = [] for model_name, model in MODELS.items(): pred = int(model.predict([text])[0]) label = LABEL_MAP[pred] bg = BG_MAP[pred] border = BORDER_MAP[pred] colour = COLOR_MAP[pred] contrib = CONTRIBUTORS[model_name] cards.append( f"
" f"
{contrib['emoji']}
" f"
{model_name}
" f"
{label}
" f"
{contrib['name']}
{contrib['id']}
" f"
" ) grid = ( "
" + "".join(cards) + "
" ) return grid # --------------------------------------------------------------------------- # Build Gradio interface # --------------------------------------------------------------------------- # Each sub-list is [message_text, category_label] β€” category shown in the table header only SPAM_EXAMPLES = [ # Prize / lottery scams ["WINNER!! You've been selected to receive a Β£1,000 cash prize! Call 09061701461 NOW to claim!"], ["Congratulations! You have won a 2-week holiday to Benidorm. To claim call 08718726971."], ["FREE entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121."], # Phishing / account alerts ["Your account has been compromised. Verify your identity immediately: http://bit.ly/secure-login"], ["ALERT: Unusual sign-in detected on your PayPal account. Click here to secure it now: http://pp-verify.net"], ["Your Apple ID has been locked. Confirm your details at http://apple-id-verify.support or lose access."], # Financial / loan fraud ["You are entitled to up to Β£3,750 in compensation from your PPI claim! Reply YES to find out more."], ["Urgent! You qualify for a Β£5,000 loan even with bad credit. No fees. Call 0800-FREE-LOAN today!"], ["Earn Β£500/day working from home. No experience needed. Start today: www.easymoney247.co.uk"], # Delivery / package scams ["Your parcel could not be delivered. Pay the Β£1.99 redelivery fee here: http://royalmail-redeliver.com"], ["DHL NOTICE: Your package is on hold. Confirm delivery address to avoid return: http://dhl-confirm.net"], # Urgency / limited-time offers ["LAST CHANCE: Your Sky subscription expires TODAY. Renew now and get 3 months FREE. Call 0800123456."], ["Act now! Get a FREE iPhone 15 when you upgrade your plan. Limited stock. Text IPHONE to 88833."], # Adult / premium rate ["Hi babe, I'm lonely tonight… call me on 09065743876 (18+ only, Β£1.50/min). xoxo"], ] HAM_EXAMPLES = [ # Casual chat ["Hey, are we still on for lunch at 1pm tomorrow? Let me know!"], ["I'll be home late tonight, don't wait up for dinner."], ["Can you pick up some milk on your way home? We're almost out."], ["Lol that was so funny last night, can't believe you said that πŸ˜‚"], ["Happy birthday!! Hope you have an amazing day πŸŽ‰πŸŽ‚"], # Family ["Mum, I've landed safely. Will call you when I get to the hotel. Love you!"], ["Dad, can you transfer me Β£30 for groceries? I'll pay you back on Friday."], ["Don't forget grandma's birthday dinner is on Sunday at 6pm, everyone is coming."], # Work / professional ["Hi, just a reminder that the team meeting is moved to 3pm this afternoon."], ["Please review the report I sent over and let me know if you need any changes by EOD."], ["The client confirmed the call for Thursday at 10am. Can you send the agenda?"], # Plans / meetups ["Movie tonight? I was thinking 7pm at the Odeon, the new Marvel one is out!"], ["Running a bit late, be there in 10 mins. Order me a coffee?"], ["Are you free this weekend? Thinking of going hiking if the weather is good."], # Reminders / errands ["Your dentist appointment is confirmed for Thursday 14th at 2:30pm."], ["Don't forget to charge your laptop before the presentation tomorrow!"], ] ABOUT_MD = """ ## About This Demo This app classifies SMS messages as **Ham** (legitimate) or **Spam** using four machine-learning models, all trained on the [UCI SMS Spam Collection](https://archive.ics.uci.edu/ml/datasets/sms+spam+collection) dataset. ### Models & Contributors | # | Model | Contributor | Student ID | Best Params | Test Accuracy | Test F1 | |---|-------|-------------|------------|-------------|:---:|:---:| | 1 | ⚑ Linear SVM | Sanjivan Thiyageswaran | TP070073 | C=10, ngram=(1,2) | {svm_acc} | {svm_f1} | | 2 | 🌲 XGBoost | Mohamud Farah | TP076875 | n_est=200, depth=3 | {xgb_acc} | {xgb_f1} | | 3 | πŸ“Š Logistic Regression | Farouk Elouazzani | TP075438 | C=10, ngram=(1,2) | {lr_acc} | {lr_f1} | | 4 | πŸ”’ Multinomial NB | Devara Alandra Wicaksono | TP073570 | alpha=0.01, ngram=(1,2) | {nb_acc} | {nb_f1} | ### How It Works 1. Input an SMS message in the text box. 2. Click **Classify**. 3. All four models independently predict whether the message is spam or ham. 4. Results are shown side-by-side with the responsible contributor. *Built for TXSA Group Assignment β€” Asia Pacific University (APU)* """.format( svm_acc=f"{METRICS['Linear SVM']['accuracy']:.4f}", svm_f1=f"{METRICS['Linear SVM']['f1']:.4f}", xgb_acc=f"{METRICS['XGBoost']['accuracy']:.4f}", xgb_f1=f"{METRICS['XGBoost']['f1']:.4f}", lr_acc=f"{METRICS['Logistic Regression']['accuracy']:.4f}", lr_f1=f"{METRICS['Logistic Regression']['f1']:.4f}", nb_acc=f"{METRICS['Multinomial NB']['accuracy']:.4f}", nb_f1=f"{METRICS['Multinomial NB']['f1']:.4f}", ) with gr.Blocks(title="SMS Spam Detector") as demo: gr.Markdown( """ # πŸ“± SMS Spam Detector ### TXSA Group Assignment | Asia Pacific University (APU) Enter any SMS message and all four classifiers will vote on whether it's **spam** or **ham**. """ ) with gr.Tabs(): # ── Classify tab ────────────────────────────────────────────────── with gr.Tab("πŸ” Classify"): with gr.Row(): sms_input = gr.Textbox( label="SMS Message", placeholder="Type or paste an SMS message here…", lines=4, scale=3, ) classify_btn = gr.Button("Classify β–Ά", variant="primary", size="lg") results_out = gr.HTML( value="

⬆️ Enter a message above and click Classify.

" ) with gr.Accordion("🚨 Spam Examples β€” click any to load", open=False): gr.Examples( examples=SPAM_EXAMPLES, inputs=sms_input, label="Spam messages (prize scams Β· phishing Β· financial fraud Β· delivery scams)", examples_per_page=5, ) with gr.Accordion("βœ… Ham Examples β€” click any to load", open=False): gr.Examples( examples=HAM_EXAMPLES, inputs=sms_input, label="Ham messages (casual chat Β· family Β· work Β· plans Β· reminders)", examples_per_page=5, ) classify_btn.click( fn=predict_sms, inputs=sms_input, outputs=results_out, ) sms_input.submit( fn=predict_sms, inputs=sms_input, outputs=results_out, ) # ── About tab ───────────────────────────────────────────────────── with gr.Tab("ℹ️ About"): gr.Markdown(ABOUT_MD) gr.Markdown( "
Β© 2026 APU TXSA Group β€” Sanjivan Β· Mohamud Farah Β· Farouk Β· Devara
" ) if __name__ == "__main__": demo.launch( theme=gr.themes.Soft(), css=".result-box { font-size: 1.1em; font-weight: bold; text-align: center; }", )