Spaces:

ZidanePMSE
/

s2t_classify

Sleeping

File size: 1,827 Bytes

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F

# ==============================
# 1. Load Model
# ==============================
MODEL_ID = "anhgf/visec-phobert-sentiment-vi"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.to(DEVICE)
model.eval()

LABELS = ["neg", "pos"]

# ==============================
# 2. Inference Function
# ==============================
def classify_sentiment(text):
    if not text or text.strip() == "":
        return {"label": "empty input", "probabilities": {}}

    # Tokenize
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

    # Model forward
    with torch.no_grad():
        logits = model(**inputs).logits
        softmax = F.softmax(logits, dim=-1)[0].cpu().tolist()

    # Because model only has 2 classes: [neg, pos]
    neg_p = softmax[0]
    pos_p = softmax[1]

    # Predict label
    label = "neg" if neg_p > pos_p else "pos"

    return {
        "label": label,
        # "probabilities": {
        #     "negative": float(neg_p),
        #     "positive": float(pos_p)
        # }
    }

# ==============================
# 3. Gradio UI
# ==============================
app = gr.Interface(
    fn=classify_sentiment,
    inputs=gr.Textbox(lines=3, label="Nhập văn bản tiếng Việt"),
    outputs=gr.JSON(label="Kết quả phân tích cảm xúc (POS / NEG)"),
    title="Vietnamese Sentiment Classification (PhoBERT)",
    description="Model này chỉ có 2 lớp: Positive và Negative."
)

if __name__ == "__main__":
    app.launch()