Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,22 @@ from transformers import (
|
|
| 12 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
|
| 14 |
# =====================
|
| 15 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# =====================
|
| 17 |
MNLI_MODEL = "facebook/bart-large-mnli"
|
| 18 |
mnli_tokenizer = None
|
|
@@ -26,28 +41,22 @@ def load_mnli():
|
|
| 26 |
mnli_model.to(DEVICE)
|
| 27 |
mnli_model.eval()
|
| 28 |
|
| 29 |
-
def
|
| 30 |
"""
|
| 31 |
-
|
| 32 |
-
|
| 33 |
"""
|
| 34 |
load_mnli()
|
| 35 |
inputs = mnli_tokenizer(msg1, msg2, return_tensors="pt", truncation=True).to(DEVICE)
|
| 36 |
with torch.no_grad():
|
| 37 |
logits = mnli_model(**inputs).logits
|
| 38 |
probs = torch.softmax(logits, dim=-1)[0]
|
| 39 |
-
raw = (probs[2] - probs[0]).item() # [-1..+1]
|
| 40 |
-
return raw
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
Agreement в шкале [-5..+5]
|
| 45 |
-
"""
|
| 46 |
-
raw = agreement_raw_score(msg1, msg2)
|
| 47 |
-
return round(raw * 5, 2)
|
| 48 |
|
| 49 |
# =====================
|
| 50 |
-
# 2) Sentiment
|
| 51 |
# =====================
|
| 52 |
SENTIMENT_MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
|
| 53 |
sent_tokenizer = None
|
|
@@ -63,7 +72,7 @@ def load_sentiment():
|
|
| 63 |
|
| 64 |
def analyze_sentiment(text: str) -> float:
|
| 65 |
"""
|
| 66 |
-
|
| 67 |
"""
|
| 68 |
load_sentiment()
|
| 69 |
inputs = sent_tokenizer(text, return_tensors="pt", truncation=True).to(DEVICE)
|
|
@@ -71,14 +80,12 @@ def analyze_sentiment(text: str) -> float:
|
|
| 71 |
logits = sent_model(**inputs).logits
|
| 72 |
probs = torch.softmax(logits, dim=-1)
|
| 73 |
stars = torch.argmax(probs, dim=-1).item() + 1 # 1..5
|
| 74 |
-
score = (stars - 3) * 2.5
|
| 75 |
-
return round(score, 2)
|
| 76 |
|
| 77 |
# =====================
|
| 78 |
-
# 3) Sarcasm / Irony
|
| 79 |
# =====================
|
| 80 |
-
# Можно заменить модель на другую, если хочешь.
|
| 81 |
-
# Эта модель популярна для сарказма.
|
| 82 |
SARCASM_MODEL = "cardiffnlp/twitter-roberta-base-irony"
|
| 83 |
sarcasm_pipe = None
|
| 84 |
|
|
@@ -94,56 +101,89 @@ def load_sarcasm():
|
|
| 94 |
|
| 95 |
def sarcasm_score(text: str) -> float:
|
| 96 |
"""
|
| 97 |
-
|
| 98 |
-
|
| 99 |
"""
|
| 100 |
load_sarcasm()
|
| 101 |
res = sarcasm_pipe(text)[0]
|
| 102 |
-
# Обычно метки: "irony" / "non_irony"
|
| 103 |
label = res["label"].lower()
|
| 104 |
conf = float(res["score"]) # 0..1
|
| 105 |
|
| 106 |
if "irony" in label:
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
# =====================
|
| 114 |
-
#
|
| 115 |
# =====================
|
| 116 |
def agreement_with_irony(msg1: str, msg2: str) -> float:
|
| 117 |
"""
|
| 118 |
-
И
|
| 119 |
-
- считаем agreement [-5..+5]
|
| 120 |
-
- считаем сарказм msg2 (обычно сарказм в ответе важнее)
|
| 121 |
-
- если сарказм высокий, уменьшаем "уверенность" agreement
|
| 122 |
-
|
| 123 |
-
Это НЕ идеальная логика, но работает лучше, чем игнорировать иронию.
|
| 124 |
"""
|
| 125 |
base = agreement_score_minus5_plus5(msg1, msg2)
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
s2 = sarcasm_score(msg2) # [-5..+5]
|
| 128 |
-
sarcasm_strength = abs(s2) / 5.0 # 0..1
|
| 129 |
-
|
| 130 |
-
# Чем больше сарказм, тем сильнее "сжимаем" agreement к нулю
|
| 131 |
-
# 0 сарказма -> множитель 1
|
| 132 |
-
# сильный сарказм -> множитель ~0.35
|
| 133 |
multiplier = 1.0 - 0.65 * sarcasm_strength
|
| 134 |
-
|
| 135 |
final_score = base * multiplier
|
| 136 |
-
return round(final_score, 2)
|
| 137 |
|
| 138 |
# =====================
|
| 139 |
-
#
|
| 140 |
# =====================
|
| 141 |
ZS_MODEL = "facebook/bart-large-mnli"
|
| 142 |
zs_classifier = None
|
| 143 |
|
|
|
|
| 144 |
CATEGORIES = [
|
| 145 |
-
|
| 146 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
]
|
| 148 |
|
| 149 |
def load_zero_shot():
|
|
@@ -157,19 +197,18 @@ def load_zero_shot():
|
|
| 157 |
|
| 158 |
def classify_message(text: str) -> dict:
|
| 159 |
"""
|
| 160 |
-
Возвращает р
|
| 161 |
-
|
| 162 |
"""
|
| 163 |
load_zero_shot()
|
| 164 |
-
result = zs_classifier(text, candidate_labels=CATEGORIES)
|
|
|
|
| 165 |
labels = result["labels"]
|
| 166 |
scores = result["scores"]
|
| 167 |
|
| 168 |
-
# score 0..1 -> [-5..+5]
|
| 169 |
out = {}
|
| 170 |
for label, score in zip(labels, scores):
|
| 171 |
-
|
| 172 |
-
out[label] = round(rating, 2)
|
| 173 |
return out
|
| 174 |
|
| 175 |
# =====================
|
|
@@ -179,16 +218,17 @@ with gr.Blocks(title="Unified NLP API (-5..+5)") as demo:
|
|
| 179 |
gr.Markdown("## 📈 Unified NLP API (all scores: -5 .. +5)")
|
| 180 |
gr.Markdown(
|
| 181 |
"""
|
| 182 |
-
**
|
| 183 |
- **Agreement**: -5 = сильное противоречие, +5 = сильное согласие
|
| 184 |
- **Sentiment**: -5 = негатив, +5 = позитив
|
| 185 |
-
- **Sarcasm**: -5 = уверенно
|
| 186 |
-
- **
|
|
|
|
| 187 |
"""
|
| 188 |
)
|
| 189 |
|
| 190 |
-
#
|
| 191 |
-
with gr.Tab("Agreement
|
| 192 |
msg1 = gr.Textbox(label="Message 1")
|
| 193 |
msg2 = gr.Textbox(label="Message 2")
|
| 194 |
|
|
@@ -196,30 +236,35 @@ with gr.Blocks(title="Unified NLP API (-5..+5)") as demo:
|
|
| 196 |
out_agree = gr.Number(label="Agreement Score (-5..+5)")
|
| 197 |
btn_agree.click(fn=agreement_score_minus5_plus5, inputs=[msg1, msg2], outputs=out_agree)
|
| 198 |
|
| 199 |
-
gr.Markdown("### Agreement
|
| 200 |
btn_agree_irony = gr.Button("Check Agreement (with irony)")
|
| 201 |
out_agree_irony = gr.Number(label="Agreement Score (irony-aware) (-5..+5)")
|
| 202 |
btn_agree_irony.click(fn=agreement_with_irony, inputs=[msg1, msg2], outputs=out_agree_irony)
|
| 203 |
|
| 204 |
-
#
|
| 205 |
-
with gr.Tab("Sentiment
|
| 206 |
text_sent = gr.Textbox(label="Text")
|
| 207 |
btn_sent = gr.Button("Analyze Sentiment")
|
| 208 |
out_sent = gr.Number(label="Sentiment Score (-5..+5)")
|
| 209 |
btn_sent.click(fn=analyze_sentiment, inputs=text_sent, outputs=out_sent)
|
| 210 |
|
| 211 |
-
#
|
| 212 |
-
with gr.Tab("Sarcasm / Irony
|
| 213 |
text_sarc = gr.Textbox(label="Text")
|
| 214 |
btn_sarc = gr.Button("Analyze Sarcasm")
|
| 215 |
out_sarc = gr.Number(label="Sarcasm Score (-5..+5)")
|
| 216 |
btn_sarc.click(fn=sarcasm_score, inputs=text_sarc, outputs=out_sarc)
|
| 217 |
|
| 218 |
-
#
|
| 219 |
-
with gr.Tab("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
text_clf = gr.Textbox(label="Text")
|
| 221 |
btn_clf = gr.Button("Classify")
|
| 222 |
out_clf = gr.Label(label="Categories & Scores (-5..+5)")
|
| 223 |
-
btn_clf.click(fn=classify_message, inputs=text_clf, outputs=
|
| 224 |
-
|
| 225 |
-
demo.launch()
|
|
|
|
| 12 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
|
| 14 |
# =====================
|
| 15 |
+
# Helpers
|
| 16 |
+
# =====================
|
| 17 |
+
def clamp(x: float, lo: float = -5.0, hi: float = 5.0) -> float:
|
| 18 |
+
return max(lo, min(hi, x))
|
| 19 |
+
|
| 20 |
+
def score01_to_minus5_plus5(p: float) -> float:
|
| 21 |
+
"""
|
| 22 |
+
Перевод вероятности 0..1 в шкалу -5..+5:
|
| 23 |
+
0.0 -> -5
|
| 24 |
+
0.5 -> 0
|
| 25 |
+
1.0 -> +5
|
| 26 |
+
"""
|
| 27 |
+
return clamp((float(p) - 0.5) * 10)
|
| 28 |
+
|
| 29 |
+
# =====================
|
| 30 |
+
# 1) Agreement (MNLI) -> [-5..+5]
|
| 31 |
# =====================
|
| 32 |
MNLI_MODEL = "facebook/bart-large-mnli"
|
| 33 |
mnli_tokenizer = None
|
|
|
|
| 41 |
mnli_model.to(DEVICE)
|
| 42 |
mnli_model.eval()
|
| 43 |
|
| 44 |
+
def agreement_score_minus5_plus5(msg1: str, msg2: str) -> float:
|
| 45 |
"""
|
| 46 |
+
Agreement: -5 = противоречие, +5 = согласие
|
| 47 |
+
Берём entailment - contradiction => [-1..+1], потом *5 => [-5..+5]
|
| 48 |
"""
|
| 49 |
load_mnli()
|
| 50 |
inputs = mnli_tokenizer(msg1, msg2, return_tensors="pt", truncation=True).to(DEVICE)
|
| 51 |
with torch.no_grad():
|
| 52 |
logits = mnli_model(**inputs).logits
|
| 53 |
probs = torch.softmax(logits, dim=-1)[0]
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
raw = (probs[2] - probs[0]).item() # [-1..+1]
|
| 56 |
+
return round(clamp(raw * 5), 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
# =====================
|
| 59 |
+
# 2) Sentiment -> [-5..+5]
|
| 60 |
# =====================
|
| 61 |
SENTIMENT_MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
|
| 62 |
sent_tokenizer = None
|
|
|
|
| 72 |
|
| 73 |
def analyze_sentiment(text: str) -> float:
|
| 74 |
"""
|
| 75 |
+
1..5 stars -> [-5..+5]
|
| 76 |
"""
|
| 77 |
load_sentiment()
|
| 78 |
inputs = sent_tokenizer(text, return_tensors="pt", truncation=True).to(DEVICE)
|
|
|
|
| 80 |
logits = sent_model(**inputs).logits
|
| 81 |
probs = torch.softmax(logits, dim=-1)
|
| 82 |
stars = torch.argmax(probs, dim=-1).item() + 1 # 1..5
|
| 83 |
+
score = (stars - 3) * 2.5
|
| 84 |
+
return round(clamp(score), 2)
|
| 85 |
|
| 86 |
# =====================
|
| 87 |
+
# 3) Sarcasm / Irony -> [-5..+5]
|
| 88 |
# =====================
|
|
|
|
|
|
|
| 89 |
SARCASM_MODEL = "cardiffnlp/twitter-roberta-base-irony"
|
| 90 |
sarcasm_pipe = None
|
| 91 |
|
|
|
|
| 101 |
|
| 102 |
def sarcasm_score(text: str) -> float:
|
| 103 |
"""
|
| 104 |
+
+5 = очень вероятно сарказм/ирония
|
| 105 |
+
-5 = очень вероятно НЕ сарказм
|
| 106 |
"""
|
| 107 |
load_sarcasm()
|
| 108 |
res = sarcasm_pipe(text)[0]
|
|
|
|
| 109 |
label = res["label"].lower()
|
| 110 |
conf = float(res["score"]) # 0..1
|
| 111 |
|
| 112 |
if "irony" in label:
|
| 113 |
+
return round(clamp(conf * 5), 2)
|
| 114 |
+
return round(clamp(-conf * 5), 2)
|
| 115 |
+
|
| 116 |
+
# =====================
|
| 117 |
+
# 4) Neutrality -> [-5..+5]
|
| 118 |
+
# =====================
|
| 119 |
+
# +5 = максимально нейтрально
|
| 120 |
+
# -5 = максимально заряжено/эмоционально/полярно
|
| 121 |
+
#
|
| 122 |
+
# Простая логика:
|
| 123 |
+
# neutrality = 5 - (|sentiment| + max(0, sarcasm))/2
|
| 124 |
+
# (сарказм делает текст менее нейтральным)
|
| 125 |
+
def neutrality_score(text: str) -> float:
|
| 126 |
+
sent = abs(analyze_sentiment(text)) # 0..5
|
| 127 |
+
sarc = max(0.0, sarcasm_score(text)) # 0..5 (только если сарказм есть)
|
| 128 |
+
neutrality = 5.0 - (sent + sarc) / 2.0
|
| 129 |
+
return round(clamp(neutrality), 2)
|
| 130 |
|
| 131 |
# =====================
|
| 132 |
+
# 5) Agreement with irony adjustment
|
| 133 |
# =====================
|
| 134 |
def agreement_with_irony(msg1: str, msg2: str) -> float:
|
| 135 |
"""
|
| 136 |
+
Ирония снижает "уверенность" agreement.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
"""
|
| 138 |
base = agreement_score_minus5_plus5(msg1, msg2)
|
| 139 |
+
s2 = max(0.0, sarcasm_score(msg2)) # 0..5
|
| 140 |
+
sarcasm_strength = s2 / 5.0 # 0..1
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
multiplier = 1.0 - 0.65 * sarcasm_strength
|
|
|
|
| 143 |
final_score = base * multiplier
|
| 144 |
+
return round(clamp(final_score), 2)
|
| 145 |
|
| 146 |
# =====================
|
| 147 |
+
# 6) Zero-Shot Multilabel Classification -> [-5..+5]
|
| 148 |
# =====================
|
| 149 |
ZS_MODEL = "facebook/bart-large-mnli"
|
| 150 |
zs_classifier = None
|
| 151 |
|
| 152 |
+
# Твои категории + расширение под Twitter/X дискуссии
|
| 153 |
CATEGORIES = [
|
| 154 |
+
# базовые
|
| 155 |
+
"politique",
|
| 156 |
+
"woke",
|
| 157 |
+
"racism",
|
| 158 |
+
"crime",
|
| 159 |
+
"police_abuse",
|
| 160 |
+
"corruption",
|
| 161 |
+
"hate_speech",
|
| 162 |
+
"activism",
|
| 163 |
+
|
| 164 |
+
# типичные твиттер-дискуссии
|
| 165 |
+
"outrage / moral outrage",
|
| 166 |
+
"cancel culture",
|
| 167 |
+
"culture war",
|
| 168 |
+
"polarization / us vs them",
|
| 169 |
+
"misinformation / fake news",
|
| 170 |
+
"conspiracy / deep state",
|
| 171 |
+
"propaganda / spin",
|
| 172 |
+
"whataboutism",
|
| 173 |
+
"virtue signaling",
|
| 174 |
+
"dogwhistle / coded language",
|
| 175 |
+
"trolling / bait",
|
| 176 |
+
"ragebait",
|
| 177 |
+
"harassment / bullying",
|
| 178 |
+
"callout / public shaming",
|
| 179 |
+
"ratio / pile-on",
|
| 180 |
+
"stan / fandom war",
|
| 181 |
+
"hot take",
|
| 182 |
+
"doomposting",
|
| 183 |
+
"memes / shitposting",
|
| 184 |
+
"political satire",
|
| 185 |
+
"debunking / fact-checking",
|
| 186 |
+
"support / solidarity",
|
| 187 |
]
|
| 188 |
|
| 189 |
def load_zero_shot():
|
|
|
|
| 197 |
|
| 198 |
def classify_message(text: str) -> dict:
|
| 199 |
"""
|
| 200 |
+
Возвращает словарь {label: rating} где rating в [-5..+5]
|
| 201 |
+
Важно: это не "истина", а "уверенность модели" относительно метки.
|
| 202 |
"""
|
| 203 |
load_zero_shot()
|
| 204 |
+
result = zs_classifier(text, candidate_labels=CATEGORIES, multi_label=True)
|
| 205 |
+
|
| 206 |
labels = result["labels"]
|
| 207 |
scores = result["scores"]
|
| 208 |
|
|
|
|
| 209 |
out = {}
|
| 210 |
for label, score in zip(labels, scores):
|
| 211 |
+
out[label] = round(score01_to_minus5_plus5(score), 2)
|
|
|
|
| 212 |
return out
|
| 213 |
|
| 214 |
# =====================
|
|
|
|
| 218 |
gr.Markdown("## 📈 Unified NLP API (all scores: -5 .. +5)")
|
| 219 |
gr.Markdown(
|
| 220 |
"""
|
| 221 |
+
**Шкалы:**
|
| 222 |
- **Agreement**: -5 = сильное противоречие, +5 = сильное согласие
|
| 223 |
- **Sentiment**: -5 = негатив, +5 = позитив
|
| 224 |
+
- **Sarcasm**: -5 = уверенно НЕ сарказм, +5 = уверенно сарказм/ирония
|
| 225 |
+
- **Neutrality**: +5 = максимально нейтрально, -5 = максимально “заряжено”
|
| 226 |
+
- **Multilabel**: рейтинг уверенности метки в шкале -5..+5 (0.5 → 0)
|
| 227 |
"""
|
| 228 |
)
|
| 229 |
|
| 230 |
+
# Agreement
|
| 231 |
+
with gr.Tab("Agreement"):
|
| 232 |
msg1 = gr.Textbox(label="Message 1")
|
| 233 |
msg2 = gr.Textbox(label="Message 2")
|
| 234 |
|
|
|
|
| 236 |
out_agree = gr.Number(label="Agreement Score (-5..+5)")
|
| 237 |
btn_agree.click(fn=agreement_score_minus5_plus5, inputs=[msg1, msg2], outputs=out_agree)
|
| 238 |
|
| 239 |
+
gr.Markdown("### Agreement (irony-aware)")
|
| 240 |
btn_agree_irony = gr.Button("Check Agreement (with irony)")
|
| 241 |
out_agree_irony = gr.Number(label="Agreement Score (irony-aware) (-5..+5)")
|
| 242 |
btn_agree_irony.click(fn=agreement_with_irony, inputs=[msg1, msg2], outputs=out_agree_irony)
|
| 243 |
|
| 244 |
+
# Sentiment
|
| 245 |
+
with gr.Tab("Sentiment"):
|
| 246 |
text_sent = gr.Textbox(label="Text")
|
| 247 |
btn_sent = gr.Button("Analyze Sentiment")
|
| 248 |
out_sent = gr.Number(label="Sentiment Score (-5..+5)")
|
| 249 |
btn_sent.click(fn=analyze_sentiment, inputs=text_sent, outputs=out_sent)
|
| 250 |
|
| 251 |
+
# Sarcasm
|
| 252 |
+
with gr.Tab("Sarcasm / Irony"):
|
| 253 |
text_sarc = gr.Textbox(label="Text")
|
| 254 |
btn_sarc = gr.Button("Analyze Sarcasm")
|
| 255 |
out_sarc = gr.Number(label="Sarcasm Score (-5..+5)")
|
| 256 |
btn_sarc.click(fn=sarcasm_score, inputs=text_sarc, outputs=out_sarc)
|
| 257 |
|
| 258 |
+
# Neutrality
|
| 259 |
+
with gr.Tab("Neutrality"):
|
| 260 |
+
text_neu = gr.Textbox(label="Text")
|
| 261 |
+
btn_neu = gr.Button("Analyze Neutrality")
|
| 262 |
+
out_neu = gr.Number(label="Neutrality Score (-5..+5)")
|
| 263 |
+
btn_neu.click(fn=neutrality_score, inputs=text_neu, outputs=out_neu)
|
| 264 |
+
|
| 265 |
+
# Multilabel
|
| 266 |
+
with gr.Tab("Multilabel Classification"):
|
| 267 |
text_clf = gr.Textbox(label="Text")
|
| 268 |
btn_clf = gr.Button("Classify")
|
| 269 |
out_clf = gr.Label(label="Categories & Scores (-5..+5)")
|
| 270 |
+
btn_clf.click(fn=classify_message, inputs=text_clf, outputs=out_cl
|
|
|
|
|
|