Spaces:

fallinluv8
/

nexore-toxic-api

Sleeping

App Files Files Community

nexore-toxic-api / main.py

fallinluv8

Update main.py

7ff6bb3 verified 3 months ago

raw

history blame contribute delete

4.37 kB

	import uvicorn
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	# QUAN TRỌNG: Import XLMRobertaForTokenClassification để fix lỗi "Unrecognized model"
	from transformers import AutoTokenizer, XLMRobertaForTokenClassification, pipeline
	import torch
	import time

	app = FastAPI()

	MODEL_PATH = "fallinluv8/nexore-toxic-model"

	# Bạn có thể thêm từ vào đây tùy ý
	HARD_BLACKLIST = [
	"đéo", "đ**", "dm", "dcm", "đm", "vcl", "vl",
	"cc", "cmn", "cmm", "lồn", "l", "đĩ", "đ",
	"cặc", "c", "buồi", "bu", "đụ", "đụ mẹ",
	"đụ bố", "đụ con mẹ mày", "đụ con", "đụ mày"
	]

	classifier = None
	startup_error = None

	@app.on_event("startup")
	def load_model():
	global classifier, startup_error
	print(f" [STARTUP] Đang tải Model từ Hub: {MODEL_PATH}...")
	try:
	# Load Tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

	model = XLMRobertaForTokenClassification.from_pretrained(MODEL_PATH)

	classifier = pipeline(
	"token-classification",
	model=model,
	tokenizer=tokenizer,
	aggregation_strategy="simple",
	device=-1 # Chạy CPU
	)
	print(" [STARTUP] Model đã tải thành công!")
	startup_error = None
	except Exception as e:
	startup_error = str(e)
	print(f" [STARTUP] Lỗi tải model: {e}")

	class ToxicRequest(BaseModel):
	text: str

	@app.get("/")
	def home():
	# Nếu có lỗi khởi động, hiển thị ra ngay trang chủ để dễ debug
	if startup_error:
	return {
	"status": "Model Failed to Load",
	"error": startup_error,
	"suggestion": "Check config.json or use XLMRobertaForTokenClassification"
	}

	status = "Ready" if classifier else "Loading..."
	return {"status": f"AI NER Service is {status}"}

	@app.post("/predict")
	async def predict_toxicity(request: ToxicRequest):
	global classifier

	# Nếu model chưa tải xong hoặc bị lỗi, báo ngay
	if classifier is None:
	detail = startup_error if startup_error else "Model is loading..."
	raise HTTPException(status_code=503, detail=f"Service unavailable: {detail}")

	if not request.text:
	raise HTTPException(status_code=400, detail="Thiếu nội dung text")

	text_lower = request.text.lower()
	is_toxic = False
	max_score = 0.0
	toxic_words = []

	# 1. CHECK BLACKLIST (Luôn kiểm tra trước cho nhanh)
	for bad_word in HARD_BLACKLIST:
	if bad_word in text_lower:
	is_toxic = True
	max_score = 1.0
	if not any(t['word'] == bad_word for t in toxic_words):
	toxic_words.append({
	"word": bad_word,
	"score": 1.0
	})

	# 2. CHECK AI MODEL
	# Chỉ chạy AI nếu blacklist chưa bắt được hoặc để bổ sung thêm từ vi phạm
	try:
	results = classifier(request.text)
	print(f" [AI SEE] {results}") # Log để debug

	for entity in results:
	score = float(entity['score'])
	word = entity['word']
	group = entity['entity_group']

	# Logic: Lấy các nhãn BAD, TOXIC hoặc LABEL_1 (tùy model train ra cái gì)
	if group in ['BAD', 'TOXIC', 'LABEL_1'] and score > 0.6:
	if not any(t['word'].lower() == word.lower() for t in toxic_words):
	toxic_words.append({
	"word": word,
	"score": score
	})

	if score > max_score:
	max_score = score

	# Nếu AI tìm thấy từ có độ tin cậy cao
	if max_score > 0.9:
	is_toxic = True

	except Exception as e:
	print(f" [AI ERROR] {e}")
	# Nếu blacklist đã bắt được thì bỏ qua lỗi AI, trả về kết quả blacklist
	if not is_toxic:
	raise HTTPException(status_code=500, detail=f"AI Error: {str(e)}")

	return {
	"is_toxic": is_toxic,
	"score": float(max_score),
	"toxic_words": toxic_words
	}

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)