spam-detection / app /detector.py
abhinavvvvv's picture
moptimising end point
d44da7b
import numpy as np
from sentence_transformers import SentenceTransformer
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME)
SPAM_THRESHOLD = 0.65 # tuned default
SPAM_PHRASES = [
"free money offer",
"win cash prize",
"claim your reward",
"urgent action required",
"limited time offer",
"cheap loan available",
"exclusive deal just for you",
"click the link to claim",
"account selected for reward",
"lottery winner notification",
"congratulations you have won",
"instant approval loan",
"low interest personal loan",
"act now offer expires",
"verify your account immediately",
"earn money from home",
"risk free investment",
"double your money fast",
"free gift voucher",
"special promotion offer",
"win big cash today",
"huge cash reward offer",
"prize money offer",
"win large amount of money",
"win big cash today",
"huge cash reward",
"prize money offer",
"win large amount of money",
"cash reward available",
"earn big money fast"
]
spam_embeddings = model.encode(SPAM_PHRASES, normalize_embeddings=True)
spam_centroid = np.mean(spam_embeddings, axis=0)
spam_centroid /= np.linalg.norm(spam_centroid)
def predict_spam(text: str):
text_embedding = model.encode(text, normalize_embeddings=True, convert_to_numpy=True)
score = float(text_embedding @ spam_centroid)
threshold = 0.55
label = "spam" if score >= threshold else "ham"
return {"label": label, "score": round(score, 4), "threshold": threshold}