Spaces:
Sleeping
Sleeping
| # serve-gradio/app.py | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| # βββ λͺ¨λΈ λ‘λ βββ | |
| MODEL_ID = "CLOUDYUL/cleaner-detector" # μ΄λ―Έ Hugging Face Hubμ μ¬λΌκ° μλ λͺ¨λΈ | |
| device = torch.device("cpu") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID) | |
| model.to(device) | |
| model.eval() | |
| def predict_toxicity(texts): | |
| """ | |
| texts: λ¨μΌ λ¬Έμμ΄ νΉμ λ¬Έμμ΄ λ¦¬μ€νΈ | |
| λ°ν: [ | |
| { "text": "μ λ ₯ λ¬Έμ₯", "label": 0 or 1, "score": νλ₯ (float) }, | |
| β¦ | |
| ] | |
| """ | |
| if isinstance(texts, str): | |
| texts = [texts] | |
| results = [] | |
| for t in texts: | |
| # ν ν°ν | |
| encoding = tokenizer( | |
| t, | |
| truncation=True, | |
| padding="max_length", | |
| max_length=128, | |
| return_attention_mask=True, | |
| return_tensors="pt", | |
| ) | |
| input_ids = encoding["input_ids"].to(device) | |
| attention_mask = encoding["attention_mask"].to(device) | |
| # λͺ¨λΈ μΆλ‘ | |
| with torch.no_grad(): | |
| logits = model(input_ids=input_ids, attention_mask=attention_mask).logits[0] | |
| # μννΈλ§₯μ€λ‘ νλ₯ κ³μ° | |
| probs = torch.softmax(logits, dim=-1).cpu().tolist() | |
| label = int(probs.index(max(probs))) # 0: μ μ, 1: μ ν | |
| score = float(max(probs)) | |
| results.append({"text": t, "label": label, "score": score}) | |
| return results | |
| # βββ Gradio μΈν°νμ΄μ€ μ μ βββ | |
| demo = gr.Interface( | |
| fn=predict_toxicity, | |
| inputs=gr.Textbox(lines=2, placeholder="μ¬κΈ°μ ν μ€νΈ λ¬Έμ₯μ μ λ ₯νμΈμ"), | |
| outputs=gr.JSON(label="Predictions"), | |
| title="AGaRiCleaner Toxicity Detector", | |
| description="λ¬Έμ₯μ μ λ ₯νλ©΄ μ ν μ¬λΆ(label=0 λλ 1)μ νλ₯ (score)μ λ°νν©λλ€." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |