File size: 2,603 Bytes
cfc63cb
a3ff60f
d4c1eaa
 
cfc63cb
63591e5
 
 
 
 
 
cfc63cb
63591e5
4910b5a
63591e5
4910b5a
63591e5
 
 
d4c1eaa
ae80ca8
63591e5
 
cfc63cb
63591e5
 
 
972ddaf
c6e66b7
 
 
 
 
 
 
63591e5
 
d4c1eaa
 
c6e66b7
63591e5
c6e66b7
 
972ddaf
 
 
c6e66b7
 
972ddaf
c6e66b7
 
cfc63cb
63591e5
 
 
c6e66b7
63591e5
 
 
 
 
 
 
 
 
4910b5a
 
63591e5
972ddaf
 
 
 
 
 
a3ff60f
63591e5
a3ff60f
cfc63cb
 
 
63591e5
d4c1eaa
4910b5a
 
d4c1eaa
 
58ba342
cfc63cb
a3ff60f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from fastapi import FastAPI, Form
from fastapi.responses import Response
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from twilio.twiml.messaging_response import MessagingResponse
import os

# -----------------------------
# Environment-safe cache path
# -----------------------------
HF_CACHE_DIR = os.getenv("HF_HOME", "/tmp/hf_cache")

# -----------------------------
# Load regression model from Hugging Face
# -----------------------------
model_id = "ST-THOMAS-OF-AQUINAS/impersonation-bart"

tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=HF_CACHE_DIR)
model = AutoModelForSequenceClassification.from_pretrained(model_id, cache_dir=HF_CACHE_DIR)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# -----------------------------
# Helper function
# -----------------------------
def predict_score(text: str, debug: bool = False):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=256
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        raw_logits = outputs.logits.squeeze()

        # Apply sigmoid to convert raw logits into probability [0,1]
        score = torch.sigmoid(raw_logits).item()

    if debug:
        return {
            "raw_logits": raw_logits.item(),
            "sigmoid_score": score
        }

    return round(score, 3)

# -----------------------------
# FastAPI app
# -----------------------------
app = FastAPI(title="Impersonation Detector st thomas of aquinas")

# Health-check route
@app.get("/")
async def health_check():
    return {"status": "✅ API is running"}

# Simple GET test
@app.get("/predict")
async def get_predict(text: str):
    score = predict_score(text)
    return {"impersonation_score": score}

# Debugging route
@app.get("/debug")
async def debug_predict(text: str):
    debug_data = predict_score(text, debug=True)
    return {"debug_output": debug_data}

# -----------------------------
# Twilio WhatsApp POST
# -----------------------------
@app.post("/whatsapp")
async def whatsapp_reply(Body: str = Form(...)):
    resp = MessagingResponse()

    if Body.strip():
        score = predict_score(Body)
        reply = f"Impersonation Score: {score}\n(0.0 = genuine, 1.0 = impersonation)"
    else:
        reply = "⚠️ No text detected."

    resp.message(reply)
    
    # Return proper TwiML XML
    return Response(content=str(resp), media_type="application/xml")