# serve-gradio/app.py

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ─── 모델 로드 ───
MODEL_ID = "CLOUDYUL/cleaner-detector"  # 이미 Hugging Face Hub에 올라가 있는 모델
device = torch.device("cpu")

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.to(device)
model.eval()

def predict_toxicity(texts):
    """
    texts: 단일 문자열 혹은 문자열 리스트
    반환: [
      { "text": "입력 문장", "label": 0 or 1, "score": 확률(float) },
      …
    ]
    """
    if isinstance(texts, str):
        texts = [texts]
    results = []
    for t in texts:
        # 토큰화
        encoding = tokenizer(
            t,
            truncation=True,
            padding="max_length",
            max_length=128,
            return_attention_mask=True,
            return_tensors="pt",
        )
        input_ids = encoding["input_ids"].to(device)
        attention_mask = encoding["attention_mask"].to(device)

        # 모델 추론
        with torch.no_grad():
            logits = model(input_ids=input_ids, attention_mask=attention_mask).logits[0]
        # 소프트맥스로 확률 계산
        probs = torch.softmax(logits, dim=-1).cpu().tolist()
        label = int(probs.index(max(probs)))  # 0: 정상, 1: 악플
        score = float(max(probs))
        results.append({"text": t, "label": label, "score": score})
    return results

# ─── Gradio 인터페이스 정의 ───
demo = gr.Interface(
    fn=predict_toxicity,
    inputs=gr.Textbox(lines=2, placeholder="여기에 테스트 문장을 입력하세요"),
    outputs=gr.JSON(label="Predictions"),
    title="AGaRiCleaner Toxicity Detector",
    description="문장을 입력하면 악플 여부(label=0 또는 1)와 확률(score)을 반환합니다."
)

if __name__ == "__main__":
    demo.launch()