Spaces:

celalkartoglu
/

turkish-sentiment-app

Sleeping

File size: 5,801 Bytes

148bf6b
 
 
 
 
 
 
10ab3df
148bf6b
 
 
 
 
10ab3df
 
 
 
 
 
 
 
 
 
 
148bf6b
10ab3df
 
 
 
 
148bf6b
 
10ab3df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81ef1b9
10ab3df

import gradio as gr
import torch
from torch.nn.functional import softmax
from transformers import AutoTokenizer, AutoModelForSequenceClassification

MODEL_ID = "celalkartoglu/tr-sentiment-bert-win-v1"
LABELS   = ["Negative","Notr","Positive"]
MAX_LEN_DEFAULT = 256

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model     = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.eval()

def postprocess_label(probs, min_conf=0.55, close_gap=0.10):
    neg, notr, pos = probs
    top = probs.argmax()
    # düşük güven: Notr
    if probs[top] < min_conf:
        return "Notr"
    # Neg ve Pos birbirine çok yakınsa: Notr
    if abs(neg - pos) < close_gap and (notr >= 0.30 or probs[top] < 0.60):
        return "Notr"
    return LABELS[int(top)]

@torch.inference_mode()
def infer_one(text, max_len=MAX_LEN_DEFAULT, use_rule=False, min_conf=0.55, close_gap=0.10):
    text = (text or "").strip()
    if not text:
        return "", {"Negative": 0.0, "Notr": 0.0, "Positive": 0.0}
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=int(max_len))
    logits = model(**inputs).logits
    probs = softmax(logits, dim=-1).squeeze().tolist()
    scores = {LABELS[i]: float(probs[i]) for i in range(3)}
    label = postprocess_label(torch.tensor(probs), min_conf, close_gap) if use_rule else LABELS[int(torch.tensor(probs).argmax())]
    return label, scores

def infer_batch(texts, max_len=MAX_LEN_DEFAULT, use_rule=False, min_conf=0.55, close_gap=0.10):
    if isinstance(texts, str):
        rows = [t.strip() for t in texts.split("\n") if t.strip()]
    else:
        rows = texts or []
    results = []
    for t in rows:
        lbl, scores = infer_one(t, max_len, use_rule, min_conf, close_gap)
        results.append([t, lbl, scores["Negative"], scores["Notr"], scores["Positive"]])
    return results

def export_csv(table):
    import pandas as pd
    df = pd.DataFrame(table, columns=["text","label","Negative","Notr","Positive"])
    return gr.File.update(value=df.to_csv(index=False).encode("utf-8"), visible=True)

theme = gr.themes.Soft(primary_hue="blue", neutral_hue="slate")
CUSTOM_CSS = """
#title h1 { font-weight: 800; letter-spacing: -0.02em; }
footer {opacity:.7}
"""

with gr.Blocks(theme=theme, css=CUSTOM_CSS, title="Türkçe Duygu Analizi | BERT") as demo:
    with gr.Row():
        with gr.Column(scale=7):
            gr.Markdown(
                "<div id='title'>"
                "<h1>🇹🇷 Türkçe Duygu Analizi</h1>"
                "<p>Model: <b>celalkartoglu/tr-sentiment-bert-win-v1</b> &nbsp;|&nbsp; Sınıflar: <b>Negative</b>, <b>Notr</b>, <b>Positive</b></p>"
                "</div>"
            )
        with gr.Column(scale=5, min_width=280):
            with gr.Group():
                use_rule = gr.Checkbox(value=False, label="Notr kuralını uygula (düşük güven & Neg-Pos yakınlığı)")
                min_conf = gr.Slider(0.30, 0.85, value=0.55, step=0.01, label="Min güven (Notr eşiği)")
                close_gap = gr.Slider(0.00, 0.50, value=0.10, step=0.01, label="Neg-Pos yakınlık eşiği")
                max_len = gr.Slider(64, 384, value=MAX_LEN_DEFAULT, step=8, label="Maks. token uzunluğu")

    with gr.Tabs():
        with gr.Tab("Tek Cümle"):
            with gr.Row():
                with gr.Column(scale=7):
                    txt = gr.Textbox(label="Cümle gir", placeholder="Örn: Film gerçekten çok kötüydü.", lines=3)
                    with gr.Row():
                        btn = gr.Button("Tahmin Et", variant="primary")
                        clr = gr.Button("Temizle")
                with gr.Column(scale=5):
                    out_label = gr.Label(label="Tahmin")
                    out_scores = gr.Label(label="Olasılıklar (P)")
                    gr.Markdown("Örnekler")
                    gr.Examples(
                        examples=[
                            ["Bu yemek harikaydı, ellerinize sağlık."],
                            ["Ne çok iyi ne de çok kötüydü."],
                            ["Film gerçekten çok kötüydü."],
                            ["Ürün zamanında teslim edildi, paketleme normaldi."],
                            ["Beklediğim gibi değildi ama kötü de sayılmaz."]
                        ],
                        inputs=txt
                    )

            btn.click(
                fn=infer_one,
                inputs=[txt, max_len, use_rule, min_conf, close_gap],
                outputs=[out_label, out_scores]
            )
            clr.click(lambda: ("", {"Negative":0.0,"Notr":0.0,"Positive":0.0}), outputs=[out_label, out_scores])

        with gr.Tab("Çoklu Cümle (satır satır)"):
            with gr.Row():
                with gr.Column(scale=7):
                    multi = gr.Textbox(lines=10, label="Her satıra bir cümle yaz")
                    btn2 = gr.Button("Toplu Tahmin", variant="primary")
                with gr.Column(scale=5):
                    table = gr.Dataframe(headers=["text","label","Negative","Notr","Positive"], wrap=True, interactive=False, label="Sonuçlar")
                    with gr.Row():
                        to_csv = gr.Button("CSV Olarak İndir")
                        csv_file = gr.File(label="İndirilebilir CSV", visible=False)

            btn2.click(
                fn=infer_batch,
                inputs=[multi, max_len, use_rule, min_conf, close_gap],
                outputs=table
            )
            to_csv.click(export_csv, inputs=table, outputs=csv_file)

    gr.Markdown("---")
    gr.Markdown("💡 İpucu: Nötr cümleler negatif/pozitife kayıyorsa, sağdaki <b>Notr kuralı</b> ayarlarını kullanarak dengeleyebilirsin.")
    gr.Markdown("© 2025 • Gradio ile oluşturuldu")

demo.queue().launch()