abhinavvvvv commited on
Commit
6cbee4e
·
1 Parent(s): 1cb74ae

updated approach

Browse files
Files changed (5) hide show
  1. app/detector.py +61 -0
  2. app/main.py +19 -9
  3. app/model.py +0 -29
  4. app/schemas.py +6 -3
  5. requirements.txt +2 -2
app/detector.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer
3
+
4
+
5
+ MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
6
+ SPAM_THRESHOLD = 0.65 # tuned default
7
+
8
+
9
+ model = SentenceTransformer(MODEL_NAME)
10
+
11
+ SPAM_PHRASES = [
12
+ "free money offer",
13
+ "win cash prize",
14
+ "claim your reward",
15
+ "urgent action required",
16
+ "limited time offer",
17
+ "cheap loan available",
18
+ "exclusive deal just for you",
19
+ "click the link to claim",
20
+ "account selected for reward",
21
+ "lottery winner notification",
22
+ "congratulations you have won",
23
+ "instant approval loan",
24
+ "low interest personal loan",
25
+ "act now offer expires",
26
+ "verify your account immediately",
27
+ "earn money from home",
28
+ "risk free investment",
29
+ "double your money fast",
30
+ "free gift voucher",
31
+ "special promotion offer"
32
+ ]
33
+
34
+ spam_embeddings = model.encode(
35
+ SPAM_PHRASES,
36
+ normalize_embeddings=True
37
+ )
38
+
39
+ def predict_spam(text: str):
40
+ """
41
+ Returns:
42
+ label: spam | ham
43
+ score: max cosine similarity
44
+ threshold: threshold used
45
+ """
46
+
47
+ text_embedding = model.encode(
48
+ [text],
49
+ normalize_embeddings=True
50
+ )[0]
51
+
52
+ similarities = spam_embeddings @ text_embedding
53
+ max_similarity = float(np.max(similarities))
54
+
55
+ label = "spam" if max_similarity >= SPAM_THRESHOLD else "ham"
56
+
57
+ return {
58
+ "label": label,
59
+ "score": round(max_similarity, 4),
60
+ "threshold": SPAM_THRESHOLD
61
+ }
app/main.py CHANGED
@@ -1,13 +1,23 @@
1
  from fastapi import FastAPI
2
- from app.schemas import TextRequest, PredictionResponse
3
- from app.model import predict
4
 
5
- app = FastAPI(title="SMS Spam Detector")
 
 
 
 
6
 
7
- @app.get("/")
8
- def health():
9
- return {"status": "ok"}
10
 
11
- @app.post("/predict", response_model=PredictionResponse)
12
- def predict_spam(request: TextRequest):
13
- return predict(request.text)
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from .schemas import PredictRequest, PredictResponse
3
+ from .detector import predict_spam
4
 
5
+ app = FastAPI(
6
+ title="Semantic SMS Spam Detection API",
7
+ description="Embedding-based spam detection using sentence transformers",
8
+ version="1.0.0"
9
+ )
10
 
 
 
 
11
 
12
+ @app.get("/status")
13
+ def status():
14
+ return {
15
+ "status": "ok",
16
+ "model": "all-MiniLM-L6-v2",
17
+ "method": "semantic similarity"
18
+ }
19
+
20
+
21
+ @app.post("/predict", response_model=PredictResponse)
22
+ def predict(request: PredictRequest):
23
+ return predict_spam(request.text)
app/model.py DELETED
@@ -1,29 +0,0 @@
1
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
- import torch
3
-
4
- MODEL_NAME = "mrm8488/bert-tiny-finetuned-sms-spam-detection"
5
-
6
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
7
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
8
-
9
- model.eval()
10
-
11
- LABELS = ["ham", "spam"]
12
-
13
- def predict(text: str):
14
- inputs = tokenizer(
15
- text,
16
- return_tensors="pt",
17
- truncation=True,
18
- padding=True
19
- )
20
-
21
- with torch.no_grad():
22
- outputs = model(**inputs)
23
- probs = torch.softmax(outputs.logits, dim=1)
24
- label_id = torch.argmax(probs, dim=1).item()
25
-
26
- return {
27
- "label": LABELS[label_id],
28
- "confidence": float(probs[0][label_id])
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/schemas.py CHANGED
@@ -1,8 +1,11 @@
1
  from pydantic import BaseModel
2
 
3
- class TextRequest(BaseModel):
 
4
  text: str
5
 
6
- class PredictionResponse(BaseModel):
 
7
  label: str
8
- confidence: float
 
 
1
  from pydantic import BaseModel
2
 
3
+
4
+ class PredictRequest(BaseModel):
5
  text: str
6
 
7
+
8
+ class PredictResponse(BaseModel):
9
  label: str
10
+ score: float
11
+ threshold: float
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  fastapi
2
  uvicorn
3
- torch
4
- transformers
5
  pydantic
 
1
  fastapi
2
  uvicorn
3
+ sentence-transformers
4
+ numpy
5
  pydantic