Spaces:

abhinavdread
/

spam-detection

Sleeping

abhinavvvvv commited on Feb 4

Commit

6cbee4e

1 Parent(s): 1cb74ae

updated approach

Files changed (5) hide show

app/detector.py ADDED Viewed

+import numpy as np
+from sentence_transformers import SentenceTransformer
+MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+SPAM_THRESHOLD = 0.65  # tuned default
+model = SentenceTransformer(MODEL_NAME)
+SPAM_PHRASES = [
+    "free money offer",
+    "win cash prize",
+    "claim your reward",
+    "urgent action required",
+    "limited time offer",
+    "cheap loan available",
+    "exclusive deal just for you",
+    "click the link to claim",
+    "account selected for reward",
+    "lottery winner notification",
+    "congratulations you have won",
+    "instant approval loan",
+    "low interest personal loan",
+    "act now offer expires",
+    "verify your account immediately",
+    "earn money from home",
+    "risk free investment",
+    "double your money fast",
+    "free gift voucher",
+    "special promotion offer"
+]
+spam_embeddings = model.encode(
+    SPAM_PHRASES,
+    normalize_embeddings=True
+)
+def predict_spam(text: str):
+    """
+    Returns:
+      label: spam | ham
+      score: max cosine similarity
+      threshold: threshold used
+    """
+    text_embedding = model.encode(
+        [text],
+        normalize_embeddings=True
+    )[0]
+    similarities = spam_embeddings @ text_embedding
+    max_similarity = float(np.max(similarities))
+    label = "spam" if max_similarity >= SPAM_THRESHOLD else "ham"
+    return {
+        "label": label,
+        "score": round(max_similarity, 4),
+        "threshold": SPAM_THRESHOLD
+    }

app/main.py CHANGED Viewed

@@ -1,13 +1,23 @@
 from fastapi import FastAPI
-from app.schemas import TextRequest, PredictionResponse
-from app.model import predict
-app = FastAPI(title="SMS Spam Detector")
-@app.get("/")
-def health():
-    return {"status": "ok"}
-@app.post("/predict", response_model=PredictionResponse)
-def predict_spam(request: TextRequest):
-    return predict(request.text)

 from fastapi import FastAPI
+from .schemas import PredictRequest, PredictResponse
+from .detector import predict_spam
+app = FastAPI(
+    title="Semantic SMS Spam Detection API",
+    description="Embedding-based spam detection using sentence transformers",
+    version="1.0.0"
+)
+@app.get("/status")
+def status():
+    return {
+        "status": "ok",
+        "model": "all-MiniLM-L6-v2",
+        "method": "semantic similarity"
+    }
+@app.post("/predict", response_model=PredictResponse)
+def predict(request: PredictRequest):
+    return predict_spam(request.text)

app/model.py DELETED Viewed

@@ -1,29 +0,0 @@
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-MODEL_NAME = "mrm8488/bert-tiny-finetuned-sms-spam-detection"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
-model.eval()
-LABELS = ["ham", "spam"]
-def predict(text: str):
-    inputs = tokenizer(
-        text,
-        return_tensors="pt",
-        truncation=True,
-        padding=True
-    )
-    with torch.no_grad():
-        outputs = model(**inputs)
-        probs = torch.softmax(outputs.logits, dim=1)
-        label_id = torch.argmax(probs, dim=1).item()
-    return {
-        "label": LABELS[label_id],
-        "confidence": float(probs[0][label_id])
-    }

app/schemas.py CHANGED Viewed

@@ -1,8 +1,11 @@
 from pydantic import BaseModel
-class TextRequest(BaseModel):
     text: str
-class PredictionResponse(BaseModel):
     label: str
-    confidence: float

 from pydantic import BaseModel
+class PredictRequest(BaseModel):
     text: str
+class PredictResponse(BaseModel):
     label: str
+    score: float
+    threshold: float

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 fastapi
 uvicorn
-torch
-transformers
 pydantic

 fastapi
 uvicorn
+sentence-transformers
+numpy
 pydantic