Spaces:

Nick-2x
/

email2

Sleeping

App Files Files Community

email2 / app.py

Nick-2x

Update app.py

0fcd01c verified 2 months ago

raw

history blame contribute delete

1.84 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch

	app = FastAPI()

	# 1. SWAP MODEL ID HERE
	# Option A: dima806/phishing-email-detection (Good for Phishing)
	# Option B: AntiSpamInstitute/spam-detector-bert-MoE-v2.2 (Good for Spam)
	MODEL_ID = "dima806/phishing-email-detection"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)

	class EmailInput(BaseModel):
	text: str

	@app.post("/")
	async def testingFunction():
	return "Api working fine..."

	@app.post("/predict")
	async def predict_email(data: EmailInput):
	# PRE-PROCESS: Handle very short text manually to avoid "Model Hallucinations"
	if len(data.text.strip().split()) < 3:
	return {"prediction": "legitimate", "confidence": 1.0, "is_phishing": False, "note": "Text too short for analysis"}

	inputs = tokenizer(data.text, return_tensors="pt", truncation=True, max_length=512)

	with torch.no_grad():
	outputs = model(**inputs)
	predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

	probs = predictions[0].tolist()

	# 2. DYNAMIC LABEL MAPPING
	# This automatically gets labels like 'LABEL_0', 'phishing', etc., from the model config
	confidences = {model.config.id2label[i]: prob for i, prob in enumerate(probs)}

	# Determine the top result
	max_label = max(confidences.items(), key=lambda x: x[1])

	return {
	"prediction": max_label[0],
	"confidence": round(max_label[1], 4),
	"all_scores": confidences,
	"is_phishing": "phishing" in max_label[0].lower() or "spam" in max_label[0].lower()
	}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)