Spaces:

moseleydev
/

oxicity-engine

Running

App Files Files Community

oxicity-engine / main.py

moseleydev

uploaded to hugging face

8514a3c verified 12 days ago

raw

history blame contribute delete

2.36 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from transformers import pipeline
	import re
	import time

	app = FastAPI(
	title="Toxicity Classification API",
	description="A microservice for detecting hate speech in social media text."
	)

	# 1. ADD CORS MIDDLEWARE
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Load the Cardiff NLP Twitter RoBERTa model
	print("Loading RoBERTa Model... This may take a moment.")
	classifier = pipeline(
	"text-classification",
	model="cardiffnlp/twitter-roberta-base-hate-latest"
	)
	print("Model loaded successfully!")

	class TweetRequest(BaseModel):
	text: str

	def preprocess_tweet(text: str) -> str:
	text = re.sub(r"http\S+\|www\S+\|https\S+", '', text, flags=re.MULTILINE)
	text = re.sub(r'\@\w+', '@user', text)
	return text.strip()

	@app.post("/api/detect")
	def detect_toxicity(request: TweetRequest):
	start_time = time.time()

	clean_text = preprocess_tweet(request.text)

	# Edge case: If the tweet was only a link and is now empty
	if not clean_text:
	return {"error": "Tweet contains no readable text after preprocessing."}

	try:
	# 2. ADD TRUNCATION: Prevents the API from crashing if the text is over 512 tokens
	result = classifier(clean_text, truncation=True, max_length=512)[0]

	# Clean up the output for the frontend
	label = result['label'].upper()
	confidence = round(result['score'] * 100, 2)
	process_time = round((time.time() - start_time) * 1000, 2)

	return {
	"original_tweet": request.text,
	"clean_text": clean_text,
	"prediction": {
	"label": label,
	"confidence_score": f"{confidence}%",
	"is_toxic": label == "HATE"
	},
	"metadata": {
	"processing_time_ms": process_time,
	"model_used": "cardiffnlp/twitter-roberta-base-hate-latest"
	}
	}
	except Exception as e:
	# Catch any unexpected model errors cleanly instead of crashing the server
	raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}")