oxicity-engine / main.py
moseleydev's picture
uploaded to hugging face
8514a3c verified
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from transformers import pipeline
import re
import time
app = FastAPI(
title="Toxicity Classification API",
description="A microservice for detecting hate speech in social media text."
)
# 1. ADD CORS MIDDLEWARE
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Load the Cardiff NLP Twitter RoBERTa model
print("Loading RoBERTa Model... This may take a moment.")
classifier = pipeline(
"text-classification",
model="cardiffnlp/twitter-roberta-base-hate-latest"
)
print("Model loaded successfully!")
class TweetRequest(BaseModel):
text: str
def preprocess_tweet(text: str) -> str:
text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
text = re.sub(r'\@\w+', '@user', text)
return text.strip()
@app.post("/api/detect")
def detect_toxicity(request: TweetRequest):
start_time = time.time()
clean_text = preprocess_tweet(request.text)
# Edge case: If the tweet was only a link and is now empty
if not clean_text:
return {"error": "Tweet contains no readable text after preprocessing."}
try:
# 2. ADD TRUNCATION: Prevents the API from crashing if the text is over 512 tokens
result = classifier(clean_text, truncation=True, max_length=512)[0]
# Clean up the output for the frontend
label = result['label'].upper()
confidence = round(result['score'] * 100, 2)
process_time = round((time.time() - start_time) * 1000, 2)
return {
"original_tweet": request.text,
"clean_text": clean_text,
"prediction": {
"label": label,
"confidence_score": f"{confidence}%",
"is_toxic": label == "HATE"
},
"metadata": {
"processing_time_ms": process_time,
"model_used": "cardiffnlp/twitter-roberta-base-hate-latest"
}
}
except Exception as e:
# Catch any unexpected model errors cleanly instead of crashing the server
raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}")