import uvicorn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
# QUAN TRỌNG: Import XLMRobertaForTokenClassification để fix lỗi "Unrecognized model"
from transformers import AutoTokenizer, XLMRobertaForTokenClassification, pipeline
import torch
import time

app = FastAPI()

MODEL_PATH = "fallinluv8/nexore-toxic-model"

# Bạn có thể thêm từ vào đây tùy ý
HARD_BLACKLIST = [
    "đéo", "đ**", "dm", "dcm", "đm", "vcl", "vl", 
    "cc", "cmn", "cmm", "lồn", "l**", "đĩ", "đ**", 
    "cặc", "c**", "buồi", "bu**", "đụ", "đụ mẹ",
    "đụ bố", "đụ con mẹ mày", "đụ con", "đụ mày"
]

classifier = None
startup_error = None  

@app.on_event("startup")
def load_model():
    global classifier, startup_error
    print(f" [STARTUP] Đang tải Model từ Hub: {MODEL_PATH}...")
    try:
        # Load Tokenizer
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        
        model = XLMRobertaForTokenClassification.from_pretrained(MODEL_PATH)
        
        classifier = pipeline(
            "token-classification", 
            model=model, 
            tokenizer=tokenizer, 
            aggregation_strategy="simple",
            device=-1 # Chạy CPU
        )
        print(" [STARTUP] Model đã tải thành công!")
        startup_error = None
    except Exception as e:
        startup_error = str(e)
        print(f" [STARTUP] Lỗi tải model: {e}")

class ToxicRequest(BaseModel):
    text: str

@app.get("/")
def home():
    # Nếu có lỗi khởi động, hiển thị ra ngay trang chủ để dễ debug
    if startup_error:
        return {
            "status": "Model Failed to Load", 
            "error": startup_error,
            "suggestion": "Check config.json or use XLMRobertaForTokenClassification"
        }
    
    status = "Ready" if classifier else "Loading..."
    return {"status": f"AI NER Service is {status}"}

@app.post("/predict")
async def predict_toxicity(request: ToxicRequest):
    global classifier
    
    # Nếu model chưa tải xong hoặc bị lỗi, báo ngay
    if classifier is None:
        detail = startup_error if startup_error else "Model is loading..."
        raise HTTPException(status_code=503, detail=f"Service unavailable: {detail}")

    if not request.text:
        raise HTTPException(status_code=400, detail="Thiếu nội dung text")
    
    text_lower = request.text.lower()
    is_toxic = False
    max_score = 0.0
    toxic_words = []

    # 1. CHECK BLACKLIST (Luôn kiểm tra trước cho nhanh)
    for bad_word in HARD_BLACKLIST:
        if bad_word in text_lower:
            is_toxic = True
            max_score = 1.0 
            if not any(t['word'] == bad_word for t in toxic_words):
                toxic_words.append({
                    "word": bad_word,
                    "score": 1.0
                })

    # 2. CHECK AI MODEL
    # Chỉ chạy AI nếu blacklist chưa bắt được hoặc để bổ sung thêm từ vi phạm
    try:
        results = classifier(request.text)
        print(f" [AI SEE] {results}") # Log để debug

        for entity in results:
            score = float(entity['score'])
            word = entity['word']
            group = entity['entity_group']
            
            # Logic: Lấy các nhãn BAD, TOXIC hoặc LABEL_1 (tùy model train ra cái gì)
            if group in ['BAD', 'TOXIC', 'LABEL_1'] and score > 0.6:
                if not any(t['word'].lower() == word.lower() for t in toxic_words):
                    toxic_words.append({
                        "word": word,
                        "score": score
                    })
                
                if score > max_score:
                    max_score = score
        
        # Nếu AI tìm thấy từ có độ tin cậy cao
        if max_score > 0.9:
            is_toxic = True

    except Exception as e:
        print(f" [AI ERROR] {e}")
        # Nếu blacklist đã bắt được thì bỏ qua lỗi AI, trả về kết quả blacklist
        if not is_toxic:
            raise HTTPException(status_code=500, detail=f"AI Error: {str(e)}")

    return {
        "is_toxic": is_toxic,
        "score": float(max_score), 
        "toxic_words": toxic_words
    }

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)