Spaces:

Nick-2x
/

email2

Sleeping

App Files Files Community

Nick-2x commited on Mar 31

Commit

b2ed807

verified ·

1 Parent(s): 0717aa2

Create app.py

Browse files

Files changed (1) hide show

app.py +49 -0

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+app = FastAPI()
+# 1. SWAP MODEL ID HERE
+# Option A: dima806/phishing-email-detection (Good for Phishing)
+# Option B: AntiSpamInstitute/spam-detector-bert-MoE-v2.2 (Good for Spam)
+MODEL_ID = "dima806/phishing-email-detection"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
+class EmailInput(BaseModel):
+    text: str
+@app.post("/predict")
+async def predict_email(data: EmailInput):
+    # PRE-PROCESS: Handle very short text manually to avoid "Model Hallucinations"
+    if len(data.text.strip().split()) < 3:
+        return {"prediction": "legitimate", "confidence": 1.0, "is_phishing": False, "note": "Text too short for analysis"}
+    inputs = tokenizer(data.text, return_tensors="pt", truncation=True, max_length=512)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    probs = predictions[0].tolist()
+    # 2. DYNAMIC LABEL MAPPING
+    # This automatically gets labels like 'LABEL_0', 'phishing', etc., from the model config
+    confidences = {model.config.id2label[i]: prob for i, prob in enumerate(probs)}
+    # Determine the top result
+    max_label = max(confidences.items(), key=lambda x: x[1])
+    return {
+        "prediction": max_label[0],
+        "confidence": round(max_label[1], 4),
+        "all_scores": confidences,
+        "is_phishing": "phishing" in max_label[0].lower() or "spam" in max_label[0].lower()
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)