Spaces:
Sleeping
Sleeping
| """ | |
| SMS Spam Detector | Deployable Demo | |
| Models & Contributors | |
| --------------------- | |
| Linear SVM : Sanjivan Thiyageswaran (TP070073) | |
| XGBoost : Mohamud Farah (TP076875) | |
| Logistic Regression : Farouk Elouazzani (TP075438) | |
| Multinomial NB : Devara Alandra Wicaksono (TP073570) | |
| """ | |
| import os | |
| import pandas as pd | |
| import gradio as gr | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.svm import LinearSVC | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.metrics import accuracy_score, f1_score | |
| from xgboost import XGBClassifier | |
| # --------------------------------------------------------------------------- | |
| # Contributors (name, student ID, model, best tuned params) | |
| # --------------------------------------------------------------------------- | |
| CONTRIBUTORS = { | |
| "Linear SVM": { | |
| "name": "Sanjivan Thiyageswaran", | |
| "id": "TP070073", | |
| "params": "C=10, ngram_range=(1,2)", | |
| "emoji": "β‘", | |
| }, | |
| "XGBoost": { | |
| "name": "Mohamud Farah", | |
| "id": "TP076875", | |
| "params": "n_estimators=200, max_depth=3", | |
| "emoji": "π²", | |
| }, | |
| "Logistic Regression": { | |
| "name": "Farouk Elouazzani", | |
| "id": "TP075438", | |
| "params": "C=10, ngram_range=(1,2)", | |
| "emoji": "π", | |
| }, | |
| "Multinomial NB": { | |
| "name": "Devara Alandra Wicaksono", | |
| "id": "TP073570", | |
| "params": "alpha=0.01, ngram_range=(1,2), min_df=2", | |
| "emoji": "π’", | |
| }, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Train all four models once at startup | |
| # --------------------------------------------------------------------------- | |
| CSV_PATH = os.path.join(os.path.dirname(__file__), "spam_cleaned.csv") | |
| def train_models(): | |
| df = pd.read_csv(CSV_PATH) | |
| X = df["text"] | |
| le = LabelEncoder() | |
| y = le.fit_transform(df["y"] if "y" in df.columns else df["label"]) | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42, stratify=y | |
| ) | |
| # --- Linear SVM (best params: C=10, ngram_range=(1,2)) --- | |
| svm = Pipeline([ | |
| ("tfidf", TfidfVectorizer(stop_words="english", ngram_range=(1, 2))), | |
| ("clf", LinearSVC(C=10, random_state=42)), | |
| ]) | |
| svm.fit(X_train, y_train) | |
| # --- Logistic Regression (best params: C=10, ngram_range=(1,2)) --- | |
| lr = Pipeline([ | |
| ("tfidf", TfidfVectorizer(stop_words="english", ngram_range=(1, 2))), | |
| ("clf", LogisticRegression(C=10, random_state=42, max_iter=1000)), | |
| ]) | |
| lr.fit(X_train, y_train) | |
| # --- Multinomial NB (best params: alpha=0.01, min_df=2, ngram_range=(1,2)) --- | |
| nb = Pipeline([ | |
| ("tfidf", TfidfVectorizer(ngram_range=(1, 2), min_df=2, sublinear_tf=False)), | |
| ("clf", MultinomialNB(alpha=0.01)), | |
| ]) | |
| nb.fit(X_train, y_train) | |
| # --- XGBoost (best params: n_estimators=200, max_depth=3) --- | |
| xgb = Pipeline([ | |
| ("tfidf", TfidfVectorizer(stop_words="english")), | |
| ("clf", XGBClassifier( | |
| n_estimators=200, max_depth=3, | |
| eval_metric="logloss", random_state=42, | |
| )), | |
| ]) | |
| xgb.fit(X_train, y_train) | |
| models = { | |
| "Linear SVM": svm, | |
| "Logistic Regression": lr, | |
| "Multinomial NB": nb, | |
| "XGBoost": xgb, | |
| } | |
| # Compute test-set metrics for the about tab | |
| metrics = {} | |
| for name, model in models.items(): | |
| y_pred = model.predict(X_test) | |
| metrics[name] = { | |
| "accuracy": accuracy_score(y_test, y_pred), | |
| "f1": f1_score(y_test, y_pred), | |
| } | |
| return models, metrics | |
| print("Training models β¦ (this takes a few seconds)") | |
| MODELS, METRICS = train_models() | |
| print("All models ready.") | |
| # --------------------------------------------------------------------------- | |
| # Prediction helper | |
| # --------------------------------------------------------------------------- | |
| LABEL_MAP = {0: "β Ham (not spam)", 1: "π¨ SPAM"} | |
| BG_MAP = {0: "#1a3a1a", 1: "#3a1a1a"} | |
| BORDER_MAP = {0: "#2ecc71", 1: "#e74c3c"} | |
| COLOR_MAP = {0: "#2ecc71", 1: "#e74c3c"} | |
| def predict_sms(text: str): | |
| if not text or not text.strip(): | |
| return "<p style='color:#888; padding:12px;'>β¬οΈ Enter a message above and click <b>Classify</b>.</p>" | |
| cards = [] | |
| for model_name, model in MODELS.items(): | |
| pred = int(model.predict([text])[0]) | |
| label = LABEL_MAP[pred] | |
| bg = BG_MAP[pred] | |
| border = BORDER_MAP[pred] | |
| colour = COLOR_MAP[pred] | |
| contrib = CONTRIBUTORS[model_name] | |
| cards.append( | |
| f"<div style='" | |
| f"background:{bg}; border:2px solid {border}; border-radius:12px;" | |
| f"padding:16px 20px; flex:1; min-width:180px;'>" | |
| f"<div style='font-size:1.6em; margin-bottom:4px;'>{contrib['emoji']}</div>" | |
| f"<div style='font-weight:700; font-size:1.05em; color:#fff;'>{model_name}</div>" | |
| f"<div style='color:{colour}; font-size:1.25em; font-weight:800; margin:8px 0;'>{label}</div>" | |
| f"<div style='color:#aaa; font-size:0.8em;'>{contrib['name']}<br>{contrib['id']}</div>" | |
| f"</div>" | |
| ) | |
| grid = ( | |
| "<div style='display:flex; gap:12px; flex-wrap:wrap; margin-top:8px;'>" | |
| + "".join(cards) | |
| + "</div>" | |
| ) | |
| return grid | |
| # --------------------------------------------------------------------------- | |
| # Build Gradio interface | |
| # --------------------------------------------------------------------------- | |
| # Each sub-list is [message_text, category_label] β category shown in the table header only | |
| SPAM_EXAMPLES = [ | |
| # Prize / lottery scams | |
| ["WINNER!! You've been selected to receive a Β£1,000 cash prize! Call 09061701461 NOW to claim!"], | |
| ["Congratulations! You have won a 2-week holiday to Benidorm. To claim call 08718726971."], | |
| ["FREE entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121."], | |
| # Phishing / account alerts | |
| ["Your account has been compromised. Verify your identity immediately: http://bit.ly/secure-login"], | |
| ["ALERT: Unusual sign-in detected on your PayPal account. Click here to secure it now: http://pp-verify.net"], | |
| ["Your Apple ID has been locked. Confirm your details at http://apple-id-verify.support or lose access."], | |
| # Financial / loan fraud | |
| ["You are entitled to up to Β£3,750 in compensation from your PPI claim! Reply YES to find out more."], | |
| ["Urgent! You qualify for a Β£5,000 loan even with bad credit. No fees. Call 0800-FREE-LOAN today!"], | |
| ["Earn Β£500/day working from home. No experience needed. Start today: www.easymoney247.co.uk"], | |
| # Delivery / package scams | |
| ["Your parcel could not be delivered. Pay the Β£1.99 redelivery fee here: http://royalmail-redeliver.com"], | |
| ["DHL NOTICE: Your package is on hold. Confirm delivery address to avoid return: http://dhl-confirm.net"], | |
| # Urgency / limited-time offers | |
| ["LAST CHANCE: Your Sky subscription expires TODAY. Renew now and get 3 months FREE. Call 0800123456."], | |
| ["Act now! Get a FREE iPhone 15 when you upgrade your plan. Limited stock. Text IPHONE to 88833."], | |
| # Adult / premium rate | |
| ["Hi babe, I'm lonely tonight⦠call me on 09065743876 (18+ only, £1.50/min). xoxo"], | |
| ] | |
| HAM_EXAMPLES = [ | |
| # Casual chat | |
| ["Hey, are we still on for lunch at 1pm tomorrow? Let me know!"], | |
| ["I'll be home late tonight, don't wait up for dinner."], | |
| ["Can you pick up some milk on your way home? We're almost out."], | |
| ["Lol that was so funny last night, can't believe you said that π"], | |
| ["Happy birthday!! Hope you have an amazing day ππ"], | |
| # Family | |
| ["Mum, I've landed safely. Will call you when I get to the hotel. Love you!"], | |
| ["Dad, can you transfer me Β£30 for groceries? I'll pay you back on Friday."], | |
| ["Don't forget grandma's birthday dinner is on Sunday at 6pm, everyone is coming."], | |
| # Work / professional | |
| ["Hi, just a reminder that the team meeting is moved to 3pm this afternoon."], | |
| ["Please review the report I sent over and let me know if you need any changes by EOD."], | |
| ["The client confirmed the call for Thursday at 10am. Can you send the agenda?"], | |
| # Plans / meetups | |
| ["Movie tonight? I was thinking 7pm at the Odeon, the new Marvel one is out!"], | |
| ["Running a bit late, be there in 10 mins. Order me a coffee?"], | |
| ["Are you free this weekend? Thinking of going hiking if the weather is good."], | |
| # Reminders / errands | |
| ["Your dentist appointment is confirmed for Thursday 14th at 2:30pm."], | |
| ["Don't forget to charge your laptop before the presentation tomorrow!"], | |
| ] | |
| ABOUT_MD = """ | |
| ## About This Demo | |
| This app classifies SMS messages as **Ham** (legitimate) or **Spam** using four machine-learning models, | |
| all trained on the [UCI SMS Spam Collection](https://archive.ics.uci.edu/ml/datasets/sms+spam+collection) dataset. | |
| ### Models & Contributors | |
| | # | Model | Contributor | Student ID | Best Params | Test Accuracy | Test F1 | | |
| |---|-------|-------------|------------|-------------|:---:|:---:| | |
| | 1 | β‘ Linear SVM | Sanjivan Thiyageswaran | TP070073 | C=10, ngram=(1,2) | {svm_acc} | {svm_f1} | | |
| | 2 | π² XGBoost | Mohamud Farah | TP076875 | n_est=200, depth=3 | {xgb_acc} | {xgb_f1} | | |
| | 3 | π Logistic Regression | Farouk Elouazzani | TP075438 | C=10, ngram=(1,2) | {lr_acc} | {lr_f1} | | |
| | 4 | π’ Multinomial NB | Devara Alandra Wicaksono | TP073570 | alpha=0.01, ngram=(1,2) | {nb_acc} | {nb_f1} | | |
| ### How It Works | |
| 1. Input an SMS message in the text box. | |
| 2. Click **Classify**. | |
| 3. All four models independently predict whether the message is spam or ham. | |
| 4. Results are shown side-by-side with the responsible contributor. | |
| *Built for TXSA Group Assignment β Asia Pacific University (APU)* | |
| """.format( | |
| svm_acc=f"{METRICS['Linear SVM']['accuracy']:.4f}", | |
| svm_f1=f"{METRICS['Linear SVM']['f1']:.4f}", | |
| xgb_acc=f"{METRICS['XGBoost']['accuracy']:.4f}", | |
| xgb_f1=f"{METRICS['XGBoost']['f1']:.4f}", | |
| lr_acc=f"{METRICS['Logistic Regression']['accuracy']:.4f}", | |
| lr_f1=f"{METRICS['Logistic Regression']['f1']:.4f}", | |
| nb_acc=f"{METRICS['Multinomial NB']['accuracy']:.4f}", | |
| nb_f1=f"{METRICS['Multinomial NB']['f1']:.4f}", | |
| ) | |
| with gr.Blocks(title="SMS Spam Detector") as demo: | |
| gr.Markdown( | |
| """ | |
| # π± SMS Spam Detector | |
| ### TXSA Group Assignment | Asia Pacific University (APU) | |
| Enter any SMS message and all four classifiers will vote on whether it's **spam** or **ham**. | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| # ββ Classify tab ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Classify"): | |
| with gr.Row(): | |
| sms_input = gr.Textbox( | |
| label="SMS Message", | |
| placeholder="Type or paste an SMS message hereβ¦", | |
| lines=4, | |
| scale=3, | |
| ) | |
| classify_btn = gr.Button("Classify βΆ", variant="primary", size="lg") | |
| results_out = gr.HTML( | |
| value="<p style='color:#888; padding:12px;'>β¬οΈ Enter a message above and click <b>Classify</b>.</p>" | |
| ) | |
| with gr.Accordion("π¨ Spam Examples β click any to load", open=False): | |
| gr.Examples( | |
| examples=SPAM_EXAMPLES, | |
| inputs=sms_input, | |
| label="Spam messages (prize scams Β· phishing Β· financial fraud Β· delivery scams)", | |
| examples_per_page=5, | |
| ) | |
| with gr.Accordion("β Ham Examples β click any to load", open=False): | |
| gr.Examples( | |
| examples=HAM_EXAMPLES, | |
| inputs=sms_input, | |
| label="Ham messages (casual chat Β· family Β· work Β· plans Β· reminders)", | |
| examples_per_page=5, | |
| ) | |
| classify_btn.click( | |
| fn=predict_sms, | |
| inputs=sms_input, | |
| outputs=results_out, | |
| ) | |
| sms_input.submit( | |
| fn=predict_sms, | |
| inputs=sms_input, | |
| outputs=results_out, | |
| ) | |
| # ββ About tab βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("βΉοΈ About"): | |
| gr.Markdown(ABOUT_MD) | |
| gr.Markdown( | |
| "<center><small>Β© 2026 APU TXSA Group β Sanjivan Β· Mohamud Farah Β· Farouk Β· Devara</small></center>" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| theme=gr.themes.Soft(), | |
| css=".result-box { font-size: 1.1em; font-weight: bold; text-align: center; }", | |
| ) | |