Spaces:
Running
Running
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" | |
| model = SentenceTransformer(MODEL_NAME) | |
| SPAM_THRESHOLD = 0.65 # tuned default | |
| SPAM_PHRASES = [ | |
| "free money offer", | |
| "win cash prize", | |
| "claim your reward", | |
| "urgent action required", | |
| "limited time offer", | |
| "cheap loan available", | |
| "exclusive deal just for you", | |
| "click the link to claim", | |
| "account selected for reward", | |
| "lottery winner notification", | |
| "congratulations you have won", | |
| "instant approval loan", | |
| "low interest personal loan", | |
| "act now offer expires", | |
| "verify your account immediately", | |
| "earn money from home", | |
| "risk free investment", | |
| "double your money fast", | |
| "free gift voucher", | |
| "special promotion offer", | |
| "win big cash today", | |
| "huge cash reward offer", | |
| "prize money offer", | |
| "win large amount of money", | |
| "win big cash today", | |
| "huge cash reward", | |
| "prize money offer", | |
| "win large amount of money", | |
| "cash reward available", | |
| "earn big money fast" | |
| ] | |
| spam_embeddings = model.encode(SPAM_PHRASES, normalize_embeddings=True) | |
| spam_centroid = np.mean(spam_embeddings, axis=0) | |
| spam_centroid /= np.linalg.norm(spam_centroid) | |
| def predict_spam(text: str): | |
| text_embedding = model.encode(text, normalize_embeddings=True, convert_to_numpy=True) | |
| score = float(text_embedding @ spam_centroid) | |
| threshold = 0.55 | |
| label = "spam" if score >= threshold else "ham" | |
| return {"label": label, "score": round(score, 4), "threshold": threshold} |