CLOUDYUL's picture
Add Gradio app.py and requirements.txt
2db78dd
# serve-gradio/app.py
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# ─── λͺ¨λΈ λ‘œλ“œ ───
MODEL_ID = "CLOUDYUL/cleaner-detector" # 이미 Hugging Face Hub에 μ˜¬λΌκ°€ μžˆλŠ” λͺ¨λΈ
device = torch.device("cpu")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.to(device)
model.eval()
def predict_toxicity(texts):
"""
texts: 단일 λ¬Έμžμ—΄ ν˜Ήμ€ λ¬Έμžμ—΄ 리슀트
λ°˜ν™˜: [
{ "text": "μž…λ ₯ λ¬Έμž₯", "label": 0 or 1, "score": ν™•λ₯ (float) },
…
]
"""
if isinstance(texts, str):
texts = [texts]
results = []
for t in texts:
# 토큰화
encoding = tokenizer(
t,
truncation=True,
padding="max_length",
max_length=128,
return_attention_mask=True,
return_tensors="pt",
)
input_ids = encoding["input_ids"].to(device)
attention_mask = encoding["attention_mask"].to(device)
# λͺ¨λΈ μΆ”λ‘ 
with torch.no_grad():
logits = model(input_ids=input_ids, attention_mask=attention_mask).logits[0]
# μ†Œν”„νŠΈλ§₯슀둜 ν™•λ₯  계산
probs = torch.softmax(logits, dim=-1).cpu().tolist()
label = int(probs.index(max(probs))) # 0: 정상, 1: μ•…ν”Œ
score = float(max(probs))
results.append({"text": t, "label": label, "score": score})
return results
# ─── Gradio μΈν„°νŽ˜μ΄μŠ€ μ •μ˜ ───
demo = gr.Interface(
fn=predict_toxicity,
inputs=gr.Textbox(lines=2, placeholder="여기에 ν…ŒμŠ€νŠΈ λ¬Έμž₯을 μž…λ ₯ν•˜μ„Έμš”"),
outputs=gr.JSON(label="Predictions"),
title="AGaRiCleaner Toxicity Detector",
description="λ¬Έμž₯을 μž…λ ₯ν•˜λ©΄ μ•…ν”Œ μ—¬λΆ€(label=0 λ˜λŠ” 1)와 ν™•λ₯ (score)을 λ°˜ν™˜ν•©λ‹ˆλ‹€."
)
if __name__ == "__main__":
demo.launch()