Spaces:

narutoSiskovich
/

classifier

Sleeping

App Files Files Community

narutoSiskovich commited on Jan 25

Commit

900140b

verified ·

1 Parent(s): 1c8f881

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -66

app.py CHANGED Viewed

@@ -12,7 +12,22 @@ from transformers import (
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # =====================
-# 1) Agreement (MNLI)
 # =====================
 MNLI_MODEL = "facebook/bart-large-mnli"
 mnli_tokenizer = None
@@ -26,28 +41,22 @@ def load_mnli():
         mnli_model.to(DEVICE)
         mnli_model.eval()
-def agreement_raw_score(msg1: str, msg2: str) -> float:
     """
-    Возвращает "сырое" согласие в диапазоне [-1..+1]
-    по формуле entailment - contradiction.
     """
     load_mnli()
     inputs = mnli_tokenizer(msg1, msg2, return_tensors="pt", truncation=True).to(DEVICE)
     with torch.no_grad():
         logits = mnli_model(**inputs).logits
     probs = torch.softmax(logits, dim=-1)[0]
-    raw = (probs[2] - probs[0]).item()  # [-1..+1]
-    return raw
-def agreement_score_minus5_plus5(msg1: str, msg2: str) -> float:
-    """
-    Agreement в шкале [-5..+5]
-    """
-    raw = agreement_raw_score(msg1, msg2)
-    return round(raw * 5, 2)
 # =====================
-# 2) Sentiment (-5..+5)
 # =====================
 SENTIMENT_MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
 sent_tokenizer = None
@@ -63,7 +72,7 @@ def load_sentiment():
 def analyze_sentiment(text: str) -> float:
     """
-    Модель даёт 1..5 звёзд -> переводим в [-5..+5]
     """
     load_sentiment()
     inputs = sent_tokenizer(text, return_tensors="pt", truncation=True).to(DEVICE)
@@ -71,14 +80,12 @@ def analyze_sentiment(text: str) -> float:
         logits = sent_model(**inputs).logits
     probs = torch.softmax(logits, dim=-1)
     stars = torch.argmax(probs, dim=-1).item() + 1  # 1..5
-    score = (stars - 3) * 2.5  # -5..+5
-    return round(score, 2)
 # =====================
-# 3) Sarcasm / Irony (-5..+5)
 # =====================
-# Можно заменить модель на другую, если хочешь.
-# Эта модель популярна для сарказма.
 SARCASM_MODEL = "cardiffnlp/twitter-roberta-base-irony"
 sarcasm_pipe = None
@@ -94,56 +101,89 @@ def load_sarcasm():
 def sarcasm_score(text: str) -> float:
     """
-    Возвращает рейтинг сарказма в [-5..+5]
-    (чем выше, тем больше сарказма/иронии)
     """
     load_sarcasm()
     res = sarcasm_pipe(text)[0]
-    # Обычно метки: "irony" / "non_irony"
     label = res["label"].lower()
     conf = float(res["score"])  # 0..1
     if "irony" in label:
-        # 0..1 -> 0..+5
-        return round(conf * 5, 2)
-    else:
-        # 0..1 -> 0..-5
-        return round(-conf * 5, 2)
 # =====================
-# 4) Agreement + Sarcasm
 # =====================
 def agreement_with_irony(msg1: str, msg2: str) -> float:
     """
-    Идея:
-    - считаем agreement [-5..+5]
-    - считаем сарказм msg2 (обычно сарказм в ответе важнее)
-    - если сарказм высокий, уменьшаем "уверенность" agreement
-    Это НЕ идеальная логика, но работает лучше, чем игнорировать иронию.
     """
     base = agreement_score_minus5_plus5(msg1, msg2)
-    s2 = sarcasm_score(msg2)  # [-5..+5]
-    sarcasm_strength = abs(s2) / 5.0  # 0..1
-    # Чем больше сарказм, тем сильнее "сжимаем" agreement к нулю
-    # 0 сарказма -> множитель 1
-    # сильный сарказм -> множитель ~0.35
     multiplier = 1.0 - 0.65 * sarcasm_strength
     final_score = base * multiplier
-    return round(final_score, 2)
 # =====================
-# 5) Zero-Shot Multilabel -> [-5..+5]
 # =====================
 ZS_MODEL = "facebook/bart-large-mnli"
 zs_classifier = None
 CATEGORIES = [
-    "politique", "woke", "racism", "crime",
-    "police_abuse", "corruption", "hate_speech", "activism"
 ]
 def load_zero_shot():
@@ -157,19 +197,18 @@ def load_zero_shot():
 def classify_message(text: str) -> dict:
     """
-    Возвращает рейтинг категорий в [-5..+5]
-    (0.5 = нейтрально, >0.5 = ближе к +5, <0.5 = ближе к -5)
     """
     load_zero_shot()
-    result = zs_classifier(text, candidate_labels=CATEGORIES)
     labels = result["labels"]
     scores = result["scores"]
-    # score 0..1 -> [-5..+5]
     out = {}
     for label, score in zip(labels, scores):
-        rating = (float(score) - 0.5) * 10
-        out[label] = round(rating, 2)
     return out
 # =====================
@@ -179,16 +218,17 @@ with gr.Blocks(title="Unified NLP API (-5..+5)") as demo:
     gr.Markdown("## 📈 Unified NLP API (all scores: -5 .. +5)")
     gr.Markdown(
         """
-**Что есть что:**
 - **Agreement**: -5 = сильное противоречие, +5 = сильное согласие
 - **Sentiment**: -5 = негатив, +5 = позитив
-- **Sarcasm**: -5 = уверенно *не сарказм*, +5 = уверенно *сарказм/ирония*
-- **Categories**: рейтинг уверенности (0.5 → 0, 1.0 → +5, 0.0 → -5)
         """
     )
-    # ----- Agreement Tab -----
-    with gr.Tab("Agreement (-5..+5)"):
         msg1 = gr.Textbox(label="Message 1")
         msg2 = gr.Textbox(label="Message 2")
@@ -196,30 +236,35 @@ with gr.Blocks(title="Unified NLP API (-5..+5)") as demo:
         out_agree = gr.Number(label="Agreement Score (-5..+5)")
         btn_agree.click(fn=agreement_score_minus5_plus5, inputs=[msg1, msg2], outputs=out_agree)
-        gr.Markdown("### Agreement with Irony adjustment")
         btn_agree_irony = gr.Button("Check Agreement (with irony)")
         out_agree_irony = gr.Number(label="Agreement Score (irony-aware) (-5..+5)")
         btn_agree_irony.click(fn=agreement_with_irony, inputs=[msg1, msg2], outputs=out_agree_irony)
-    # ----- Sentiment Tab -----
-    with gr.Tab("Sentiment (-5..+5)"):
         text_sent = gr.Textbox(label="Text")
         btn_sent = gr.Button("Analyze Sentiment")
         out_sent = gr.Number(label="Sentiment Score (-5..+5)")
         btn_sent.click(fn=analyze_sentiment, inputs=text_sent, outputs=out_sent)
-    # ----- Sarcasm Tab -----
-    with gr.Tab("Sarcasm / Irony (-5..+5)"):
         text_sarc = gr.Textbox(label="Text")
         btn_sarc = gr.Button("Analyze Sarcasm")
         out_sarc = gr.Number(label="Sarcasm Score (-5..+5)")
         btn_sarc.click(fn=sarcasm_score, inputs=text_sarc, outputs=out_sarc)
-    # ----- Multilabel (Zero-Shot) Classification Tab -----
-    with gr.Tab("Multilabel Classification (-5..+5)"):
         text_clf = gr.Textbox(label="Text")
         btn_clf = gr.Button("Classify")
         out_clf = gr.Label(label="Categories & Scores (-5..+5)")
-        btn_clf.click(fn=classify_message, inputs=text_clf, outputs=out_clf)
-demo.launch()

 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # =====================
+# Helpers
+# =====================
+def clamp(x: float, lo: float = -5.0, hi: float = 5.0) -> float:
+    return max(lo, min(hi, x))
+def score01_to_minus5_plus5(p: float) -> float:
+    """
+    Перевод вероятности 0..1 в шкалу -5..+5:
+    0.0 -> -5
+    0.5 -> 0
+    1.0 -> +5
+    """
+    return clamp((float(p) - 0.5) * 10)
+# =====================
+# 1) Agreement (MNLI) -> [-5..+5]
 # =====================
 MNLI_MODEL = "facebook/bart-large-mnli"
 mnli_tokenizer = None
         mnli_model.to(DEVICE)
         mnli_model.eval()
+def agreement_score_minus5_plus5(msg1: str, msg2: str) -> float:
     """
+    Agreement: -5 = противоречие, +5 = согласие
+    Берём entailment - contradiction => [-1..+1], потом *5 => [-5..+5]
     """
     load_mnli()
     inputs = mnli_tokenizer(msg1, msg2, return_tensors="pt", truncation=True).to(DEVICE)
     with torch.no_grad():
         logits = mnli_model(**inputs).logits
     probs = torch.softmax(logits, dim=-1)[0]
+    raw = (probs[2] - probs[0]).item()  # [-1..+1]
+    return round(clamp(raw * 5), 2)
 # =====================
+# 2) Sentiment -> [-5..+5]
 # =====================
 SENTIMENT_MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
 sent_tokenizer = None
 def analyze_sentiment(text: str) -> float:
     """
+    1..5 stars -> [-5..+5]
     """
     load_sentiment()
     inputs = sent_tokenizer(text, return_tensors="pt", truncation=True).to(DEVICE)
         logits = sent_model(**inputs).logits
     probs = torch.softmax(logits, dim=-1)
     stars = torch.argmax(probs, dim=-1).item() + 1  # 1..5
+    score = (stars - 3) * 2.5
+    return round(clamp(score), 2)
 # =====================
+# 3) Sarcasm / Irony -> [-5..+5]
 # =====================
 SARCASM_MODEL = "cardiffnlp/twitter-roberta-base-irony"
 sarcasm_pipe = None
 def sarcasm_score(text: str) -> float:
     """
+    +5 = очень вероятно сарказм/ирония
+    -5 = очень вероятно НЕ сарказм
     """
     load_sarcasm()
     res = sarcasm_pipe(text)[0]
     label = res["label"].lower()
     conf = float(res["score"])  # 0..1
     if "irony" in label:
+        return round(clamp(conf * 5), 2)
+    return round(clamp(-conf * 5), 2)
+# =====================
+# 4) Neutrality -> [-5..+5]
+# =====================
+# +5 = максимально нейтрально
+# -5 = максимально заряжено/эмоционально/полярно
+#
+# Простая логика:
+# neutrality = 5 - (|sentiment| + max(0, sarcasm))/2
+# (сарказм делает текст менее нейтральным)
+def neutrality_score(text: str) -> float:
+    sent = abs(analyze_sentiment(text))          # 0..5
+    sarc = max(0.0, sarcasm_score(text))         # 0..5 (только если сарказм есть)
+    neutrality = 5.0 - (sent + sarc) / 2.0
+    return round(clamp(neutrality), 2)
 # =====================
+# 5) Agreement with irony adjustment
 # =====================
 def agreement_with_irony(msg1: str, msg2: str) -> float:
     """
+    Ирония снижает "уверенность" agreement.
     """
     base = agreement_score_minus5_plus5(msg1, msg2)
+    s2 = max(0.0, sarcasm_score(msg2))  # 0..5
+    sarcasm_strength = s2 / 5.0         # 0..1
     multiplier = 1.0 - 0.65 * sarcasm_strength
     final_score = base * multiplier
+    return round(clamp(final_score), 2)
 # =====================
+# 6) Zero-Shot Multilabel Classification -> [-5..+5]
 # =====================
 ZS_MODEL = "facebook/bart-large-mnli"
 zs_classifier = None
+# Твои категории + расширение под Twitter/X дискуссии
 CATEGORIES = [
+    # базовые
+    "politique",
+    "woke",
+    "racism",
+    "crime",
+    "police_abuse",
+    "corruption",
+    "hate_speech",
+    "activism",
+    # типичные твиттер-дискуссии
+    "outrage / moral outrage",
+    "cancel culture",
+    "culture war",
+    "polarization / us vs them",
+    "misinformation / fake news",
+    "conspiracy / deep state",
+    "propaganda / spin",
+    "whataboutism",
+    "virtue signaling",
+    "dogwhistle / coded language",
+    "trolling / bait",
+    "ragebait",
+    "harassment / bullying",
+    "callout / public shaming",
+    "ratio / pile-on",
+    "stan / fandom war",
+    "hot take",
+    "doomposting",
+    "memes / shitposting",
+    "political satire",
+    "debunking / fact-checking",
+    "support / solidarity",
 ]
 def load_zero_shot():
 def classify_message(text: str) -> dict:
     """
+    Возвращает словарь {label: rating} где rating в [-5..+5]
+    Важно: это не "истина", а "уверенность модели" относительно метки.
     """
     load_zero_shot()
+    result = zs_classifier(text, candidate_labels=CATEGORIES, multi_label=True)
     labels = result["labels"]
     scores = result["scores"]
     out = {}
     for label, score in zip(labels, scores):
+        out[label] = round(score01_to_minus5_plus5(score), 2)
     return out
 # =====================
     gr.Markdown("## 📈 Unified NLP API (all scores: -5 .. +5)")
     gr.Markdown(
         """
+**Шкалы:**
 - **Agreement**: -5 = сильное противоречие, +5 = сильное согласие
 - **Sentiment**: -5 = негатив, +5 = позитив
+- **Sarcasm**: -5 = уверенно НЕ сарказм, +5 = уверенно сарказм/ирония
+- **Neutrality**: +5 = максимально нейтрально, -5 = максимально “заряжено”
+- **Multilabel**: рейтинг уверенности метки в шкале -5..+5 (0.5 → 0)
         """
     )
+    # Agreement
+    with gr.Tab("Agreement"):
         msg1 = gr.Textbox(label="Message 1")
         msg2 = gr.Textbox(label="Message 2")
         out_agree = gr.Number(label="Agreement Score (-5..+5)")
         btn_agree.click(fn=agreement_score_minus5_plus5, inputs=[msg1, msg2], outputs=out_agree)
+        gr.Markdown("### Agreement (irony-aware)")
         btn_agree_irony = gr.Button("Check Agreement (with irony)")
         out_agree_irony = gr.Number(label="Agreement Score (irony-aware) (-5..+5)")
         btn_agree_irony.click(fn=agreement_with_irony, inputs=[msg1, msg2], outputs=out_agree_irony)
+    # Sentiment
+    with gr.Tab("Sentiment"):
         text_sent = gr.Textbox(label="Text")
         btn_sent = gr.Button("Analyze Sentiment")
         out_sent = gr.Number(label="Sentiment Score (-5..+5)")
         btn_sent.click(fn=analyze_sentiment, inputs=text_sent, outputs=out_sent)
+    # Sarcasm
+    with gr.Tab("Sarcasm / Irony"):
         text_sarc = gr.Textbox(label="Text")
         btn_sarc = gr.Button("Analyze Sarcasm")
         out_sarc = gr.Number(label="Sarcasm Score (-5..+5)")
         btn_sarc.click(fn=sarcasm_score, inputs=text_sarc, outputs=out_sarc)
+    # Neutrality
+    with gr.Tab("Neutrality"):
+        text_neu = gr.Textbox(label="Text")
+        btn_neu = gr.Button("Analyze Neutrality")
+        out_neu = gr.Number(label="Neutrality Score (-5..+5)")
+        btn_neu.click(fn=neutrality_score, inputs=text_neu, outputs=out_neu)
+    # Multilabel
+    with gr.Tab("Multilabel Classification"):
         text_clf = gr.Textbox(label="Text")
         btn_clf = gr.Button("Classify")
         out_clf = gr.Label(label="Categories & Scores (-5..+5)")
+        btn_clf.click(fn=classify_message, inputs=text_clf, outputs=out_cl