Spaces:
Sleeping
Sleeping
File size: 1,970 Bytes
2db78dd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | # serve-gradio/app.py
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# โโโ ๋ชจ๋ธ ๋ก๋ โโโ
MODEL_ID = "CLOUDYUL/cleaner-detector" # ์ด๋ฏธ Hugging Face Hub์ ์ฌ๋ผ๊ฐ ์๋ ๋ชจ๋ธ
device = torch.device("cpu")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.to(device)
model.eval()
def predict_toxicity(texts):
"""
texts: ๋จ์ผ ๋ฌธ์์ด ํน์ ๋ฌธ์์ด ๋ฆฌ์คํธ
๋ฐํ: [
{ "text": "์
๋ ฅ ๋ฌธ์ฅ", "label": 0 or 1, "score": ํ๋ฅ (float) },
โฆ
]
"""
if isinstance(texts, str):
texts = [texts]
results = []
for t in texts:
# ํ ํฐํ
encoding = tokenizer(
t,
truncation=True,
padding="max_length",
max_length=128,
return_attention_mask=True,
return_tensors="pt",
)
input_ids = encoding["input_ids"].to(device)
attention_mask = encoding["attention_mask"].to(device)
# ๋ชจ๋ธ ์ถ๋ก
with torch.no_grad():
logits = model(input_ids=input_ids, attention_mask=attention_mask).logits[0]
# ์ํํธ๋งฅ์ค๋ก ํ๋ฅ ๊ณ์ฐ
probs = torch.softmax(logits, dim=-1).cpu().tolist()
label = int(probs.index(max(probs))) # 0: ์ ์, 1: ์
ํ
score = float(max(probs))
results.append({"text": t, "label": label, "score": score})
return results
# โโโ Gradio ์ธํฐํ์ด์ค ์ ์ โโโ
demo = gr.Interface(
fn=predict_toxicity,
inputs=gr.Textbox(lines=2, placeholder="์ฌ๊ธฐ์ ํ
์คํธ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์"),
outputs=gr.JSON(label="Predictions"),
title="AGaRiCleaner Toxicity Detector",
description="๋ฌธ์ฅ์ ์
๋ ฅํ๋ฉด ์
ํ ์ฌ๋ถ(label=0 ๋๋ 1)์ ํ๋ฅ (score)์ ๋ฐํํฉ๋๋ค."
)
if __name__ == "__main__":
demo.launch()
|