narutoSiskovich commited on
Commit
900140b
·
verified ·
1 Parent(s): 1c8f881

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -66
app.py CHANGED
@@ -12,7 +12,22 @@ from transformers import (
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
  # =====================
15
- # 1) Agreement (MNLI)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # =====================
17
  MNLI_MODEL = "facebook/bart-large-mnli"
18
  mnli_tokenizer = None
@@ -26,28 +41,22 @@ def load_mnli():
26
  mnli_model.to(DEVICE)
27
  mnli_model.eval()
28
 
29
- def agreement_raw_score(msg1: str, msg2: str) -> float:
30
  """
31
- Возвращает "сырое" согласие в диапазоне [-1..+1]
32
- по формуле entailment - contradiction.
33
  """
34
  load_mnli()
35
  inputs = mnli_tokenizer(msg1, msg2, return_tensors="pt", truncation=True).to(DEVICE)
36
  with torch.no_grad():
37
  logits = mnli_model(**inputs).logits
38
  probs = torch.softmax(logits, dim=-1)[0]
39
- raw = (probs[2] - probs[0]).item() # [-1..+1]
40
- return raw
41
 
42
- def agreement_score_minus5_plus5(msg1: str, msg2: str) -> float:
43
- """
44
- Agreement в шкале [-5..+5]
45
- """
46
- raw = agreement_raw_score(msg1, msg2)
47
- return round(raw * 5, 2)
48
 
49
  # =====================
50
- # 2) Sentiment (-5..+5)
51
  # =====================
52
  SENTIMENT_MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
53
  sent_tokenizer = None
@@ -63,7 +72,7 @@ def load_sentiment():
63
 
64
  def analyze_sentiment(text: str) -> float:
65
  """
66
- Модель даёт 1..5 звёзд -> переводим в [-5..+5]
67
  """
68
  load_sentiment()
69
  inputs = sent_tokenizer(text, return_tensors="pt", truncation=True).to(DEVICE)
@@ -71,14 +80,12 @@ def analyze_sentiment(text: str) -> float:
71
  logits = sent_model(**inputs).logits
72
  probs = torch.softmax(logits, dim=-1)
73
  stars = torch.argmax(probs, dim=-1).item() + 1 # 1..5
74
- score = (stars - 3) * 2.5 # -5..+5
75
- return round(score, 2)
76
 
77
  # =====================
78
- # 3) Sarcasm / Irony (-5..+5)
79
  # =====================
80
- # Можно заменить модель на другую, если хочешь.
81
- # Эта модель популярна для сарказма.
82
  SARCASM_MODEL = "cardiffnlp/twitter-roberta-base-irony"
83
  sarcasm_pipe = None
84
 
@@ -94,56 +101,89 @@ def load_sarcasm():
94
 
95
  def sarcasm_score(text: str) -> float:
96
  """
97
- Возвращает рейтинг сарказма в [-5..+5]
98
- (чем выше, тем больше сарказма/иронии)
99
  """
100
  load_sarcasm()
101
  res = sarcasm_pipe(text)[0]
102
- # Обычно метки: "irony" / "non_irony"
103
  label = res["label"].lower()
104
  conf = float(res["score"]) # 0..1
105
 
106
  if "irony" in label:
107
- # 0..1 -> 0..+5
108
- return round(conf * 5, 2)
109
- else:
110
- # 0..1 -> 0..-5
111
- return round(-conf * 5, 2)
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  # =====================
114
- # 4) Agreement + Sarcasm
115
  # =====================
116
  def agreement_with_irony(msg1: str, msg2: str) -> float:
117
  """
118
- Идея:
119
- - считаем agreement [-5..+5]
120
- - считаем сарказм msg2 (обычно сарказм в ответе важнее)
121
- - если сарказм высокий, уменьшаем "уверенность" agreement
122
-
123
- Это НЕ идеальная логика, но работает лучше, чем игнорировать иронию.
124
  """
125
  base = agreement_score_minus5_plus5(msg1, msg2)
 
 
126
 
127
- s2 = sarcasm_score(msg2) # [-5..+5]
128
- sarcasm_strength = abs(s2) / 5.0 # 0..1
129
-
130
- # Чем больше сарказм, тем сильнее "сжимаем" agreement к нулю
131
- # 0 сарказма -> множитель 1
132
- # сильный сарказм -> множитель ~0.35
133
  multiplier = 1.0 - 0.65 * sarcasm_strength
134
-
135
  final_score = base * multiplier
136
- return round(final_score, 2)
137
 
138
  # =====================
139
- # 5) Zero-Shot Multilabel -> [-5..+5]
140
  # =====================
141
  ZS_MODEL = "facebook/bart-large-mnli"
142
  zs_classifier = None
143
 
 
144
  CATEGORIES = [
145
- "politique", "woke", "racism", "crime",
146
- "police_abuse", "corruption", "hate_speech", "activism"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  ]
148
 
149
  def load_zero_shot():
@@ -157,19 +197,18 @@ def load_zero_shot():
157
 
158
  def classify_message(text: str) -> dict:
159
  """
160
- Возвращает рейтинг категорий в [-5..+5]
161
- (0.5 = нейтрально, >0.5 = ближе к +5, <0.5 = ближе к -5)
162
  """
163
  load_zero_shot()
164
- result = zs_classifier(text, candidate_labels=CATEGORIES)
 
165
  labels = result["labels"]
166
  scores = result["scores"]
167
 
168
- # score 0..1 -> [-5..+5]
169
  out = {}
170
  for label, score in zip(labels, scores):
171
- rating = (float(score) - 0.5) * 10
172
- out[label] = round(rating, 2)
173
  return out
174
 
175
  # =====================
@@ -179,16 +218,17 @@ with gr.Blocks(title="Unified NLP API (-5..+5)") as demo:
179
  gr.Markdown("## 📈 Unified NLP API (all scores: -5 .. +5)")
180
  gr.Markdown(
181
  """
182
- **Что есть что:**
183
  - **Agreement**: -5 = сильное противоречие, +5 = сильное согласие
184
  - **Sentiment**: -5 = негатив, +5 = позитив
185
- - **Sarcasm**: -5 = уверенно *не сарказм*, +5 = уверенно *сарказм/ирония*
186
- - **Categories**: рейтинг уверенности (0.5 → 0, 1.0 → +5, 0.0 -5)
 
187
  """
188
  )
189
 
190
- # ----- Agreement Tab -----
191
- with gr.Tab("Agreement (-5..+5)"):
192
  msg1 = gr.Textbox(label="Message 1")
193
  msg2 = gr.Textbox(label="Message 2")
194
 
@@ -196,30 +236,35 @@ with gr.Blocks(title="Unified NLP API (-5..+5)") as demo:
196
  out_agree = gr.Number(label="Agreement Score (-5..+5)")
197
  btn_agree.click(fn=agreement_score_minus5_plus5, inputs=[msg1, msg2], outputs=out_agree)
198
 
199
- gr.Markdown("### Agreement with Irony adjustment")
200
  btn_agree_irony = gr.Button("Check Agreement (with irony)")
201
  out_agree_irony = gr.Number(label="Agreement Score (irony-aware) (-5..+5)")
202
  btn_agree_irony.click(fn=agreement_with_irony, inputs=[msg1, msg2], outputs=out_agree_irony)
203
 
204
- # ----- Sentiment Tab -----
205
- with gr.Tab("Sentiment (-5..+5)"):
206
  text_sent = gr.Textbox(label="Text")
207
  btn_sent = gr.Button("Analyze Sentiment")
208
  out_sent = gr.Number(label="Sentiment Score (-5..+5)")
209
  btn_sent.click(fn=analyze_sentiment, inputs=text_sent, outputs=out_sent)
210
 
211
- # ----- Sarcasm Tab -----
212
- with gr.Tab("Sarcasm / Irony (-5..+5)"):
213
  text_sarc = gr.Textbox(label="Text")
214
  btn_sarc = gr.Button("Analyze Sarcasm")
215
  out_sarc = gr.Number(label="Sarcasm Score (-5..+5)")
216
  btn_sarc.click(fn=sarcasm_score, inputs=text_sarc, outputs=out_sarc)
217
 
218
- # ----- Multilabel (Zero-Shot) Classification Tab -----
219
- with gr.Tab("Multilabel Classification (-5..+5)"):
 
 
 
 
 
 
 
220
  text_clf = gr.Textbox(label="Text")
221
  btn_clf = gr.Button("Classify")
222
  out_clf = gr.Label(label="Categories & Scores (-5..+5)")
223
- btn_clf.click(fn=classify_message, inputs=text_clf, outputs=out_clf)
224
-
225
- demo.launch()
 
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
  # =====================
15
+ # Helpers
16
+ # =====================
17
+ def clamp(x: float, lo: float = -5.0, hi: float = 5.0) -> float:
18
+ return max(lo, min(hi, x))
19
+
20
+ def score01_to_minus5_plus5(p: float) -> float:
21
+ """
22
+ Перевод вероятности 0..1 в шкалу -5..+5:
23
+ 0.0 -> -5
24
+ 0.5 -> 0
25
+ 1.0 -> +5
26
+ """
27
+ return clamp((float(p) - 0.5) * 10)
28
+
29
+ # =====================
30
+ # 1) Agreement (MNLI) -> [-5..+5]
31
  # =====================
32
  MNLI_MODEL = "facebook/bart-large-mnli"
33
  mnli_tokenizer = None
 
41
  mnli_model.to(DEVICE)
42
  mnli_model.eval()
43
 
44
+ def agreement_score_minus5_plus5(msg1: str, msg2: str) -> float:
45
  """
46
+ Agreement: -5 = противоречие, +5 = согласие
47
+ Берём entailment - contradiction => [-1..+1], потом *5 => [-5..+5]
48
  """
49
  load_mnli()
50
  inputs = mnli_tokenizer(msg1, msg2, return_tensors="pt", truncation=True).to(DEVICE)
51
  with torch.no_grad():
52
  logits = mnli_model(**inputs).logits
53
  probs = torch.softmax(logits, dim=-1)[0]
 
 
54
 
55
+ raw = (probs[2] - probs[0]).item() # [-1..+1]
56
+ return round(clamp(raw * 5), 2)
 
 
 
 
57
 
58
  # =====================
59
+ # 2) Sentiment -> [-5..+5]
60
  # =====================
61
  SENTIMENT_MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
62
  sent_tokenizer = None
 
72
 
73
  def analyze_sentiment(text: str) -> float:
74
  """
75
+ 1..5 stars -> [-5..+5]
76
  """
77
  load_sentiment()
78
  inputs = sent_tokenizer(text, return_tensors="pt", truncation=True).to(DEVICE)
 
80
  logits = sent_model(**inputs).logits
81
  probs = torch.softmax(logits, dim=-1)
82
  stars = torch.argmax(probs, dim=-1).item() + 1 # 1..5
83
+ score = (stars - 3) * 2.5
84
+ return round(clamp(score), 2)
85
 
86
  # =====================
87
+ # 3) Sarcasm / Irony -> [-5..+5]
88
  # =====================
 
 
89
  SARCASM_MODEL = "cardiffnlp/twitter-roberta-base-irony"
90
  sarcasm_pipe = None
91
 
 
101
 
102
  def sarcasm_score(text: str) -> float:
103
  """
104
+ +5 = очень вероятно сарказм/ирония
105
+ -5 = очень вероятно НЕ сарказм
106
  """
107
  load_sarcasm()
108
  res = sarcasm_pipe(text)[0]
 
109
  label = res["label"].lower()
110
  conf = float(res["score"]) # 0..1
111
 
112
  if "irony" in label:
113
+ return round(clamp(conf * 5), 2)
114
+ return round(clamp(-conf * 5), 2)
115
+
116
+ # =====================
117
+ # 4) Neutrality -> [-5..+5]
118
+ # =====================
119
+ # +5 = максимально нейтрально
120
+ # -5 = максимально заряжено/эмоционально/полярно
121
+ #
122
+ # Простая логика:
123
+ # neutrality = 5 - (|sentiment| + max(0, sarcasm))/2
124
+ # (сарказм делает текст менее нейтральным)
125
+ def neutrality_score(text: str) -> float:
126
+ sent = abs(analyze_sentiment(text)) # 0..5
127
+ sarc = max(0.0, sarcasm_score(text)) # 0..5 (только если сарказм есть)
128
+ neutrality = 5.0 - (sent + sarc) / 2.0
129
+ return round(clamp(neutrality), 2)
130
 
131
  # =====================
132
+ # 5) Agreement with irony adjustment
133
  # =====================
134
  def agreement_with_irony(msg1: str, msg2: str) -> float:
135
  """
136
+ Ирония снижает "уверенность" agreement.
 
 
 
 
 
137
  """
138
  base = agreement_score_minus5_plus5(msg1, msg2)
139
+ s2 = max(0.0, sarcasm_score(msg2)) # 0..5
140
+ sarcasm_strength = s2 / 5.0 # 0..1
141
 
 
 
 
 
 
 
142
  multiplier = 1.0 - 0.65 * sarcasm_strength
 
143
  final_score = base * multiplier
144
+ return round(clamp(final_score), 2)
145
 
146
  # =====================
147
+ # 6) Zero-Shot Multilabel Classification -> [-5..+5]
148
  # =====================
149
  ZS_MODEL = "facebook/bart-large-mnli"
150
  zs_classifier = None
151
 
152
+ # Твои категории + расширение под Twitter/X дискуссии
153
  CATEGORIES = [
154
+ # базовые
155
+ "politique",
156
+ "woke",
157
+ "racism",
158
+ "crime",
159
+ "police_abuse",
160
+ "corruption",
161
+ "hate_speech",
162
+ "activism",
163
+
164
+ # типичные твиттер-дискуссии
165
+ "outrage / moral outrage",
166
+ "cancel culture",
167
+ "culture war",
168
+ "polarization / us vs them",
169
+ "misinformation / fake news",
170
+ "conspiracy / deep state",
171
+ "propaganda / spin",
172
+ "whataboutism",
173
+ "virtue signaling",
174
+ "dogwhistle / coded language",
175
+ "trolling / bait",
176
+ "ragebait",
177
+ "harassment / bullying",
178
+ "callout / public shaming",
179
+ "ratio / pile-on",
180
+ "stan / fandom war",
181
+ "hot take",
182
+ "doomposting",
183
+ "memes / shitposting",
184
+ "political satire",
185
+ "debunking / fact-checking",
186
+ "support / solidarity",
187
  ]
188
 
189
  def load_zero_shot():
 
197
 
198
  def classify_message(text: str) -> dict:
199
  """
200
+ Возвращает словарь {label: rating} где rating в [-5..+5]
201
+ Важно: это не "истина", а "уверенность модели" относительно метки.
202
  """
203
  load_zero_shot()
204
+ result = zs_classifier(text, candidate_labels=CATEGORIES, multi_label=True)
205
+
206
  labels = result["labels"]
207
  scores = result["scores"]
208
 
 
209
  out = {}
210
  for label, score in zip(labels, scores):
211
+ out[label] = round(score01_to_minus5_plus5(score), 2)
 
212
  return out
213
 
214
  # =====================
 
218
  gr.Markdown("## 📈 Unified NLP API (all scores: -5 .. +5)")
219
  gr.Markdown(
220
  """
221
+ **Шкалы:**
222
  - **Agreement**: -5 = сильное противоречие, +5 = сильное согласие
223
  - **Sentiment**: -5 = негатив, +5 = позитив
224
+ - **Sarcasm**: -5 = уверенно НЕ сарказм, +5 = уверенно сарказм/ирония
225
+ - **Neutrality**: +5 = максимально нейтрально, -5 = максимально “заряжено”
226
+ - **Multilabel**: рейтинг уверенности метки в шкале -5..+5 (0.5 → 0)
227
  """
228
  )
229
 
230
+ # Agreement
231
+ with gr.Tab("Agreement"):
232
  msg1 = gr.Textbox(label="Message 1")
233
  msg2 = gr.Textbox(label="Message 2")
234
 
 
236
  out_agree = gr.Number(label="Agreement Score (-5..+5)")
237
  btn_agree.click(fn=agreement_score_minus5_plus5, inputs=[msg1, msg2], outputs=out_agree)
238
 
239
+ gr.Markdown("### Agreement (irony-aware)")
240
  btn_agree_irony = gr.Button("Check Agreement (with irony)")
241
  out_agree_irony = gr.Number(label="Agreement Score (irony-aware) (-5..+5)")
242
  btn_agree_irony.click(fn=agreement_with_irony, inputs=[msg1, msg2], outputs=out_agree_irony)
243
 
244
+ # Sentiment
245
+ with gr.Tab("Sentiment"):
246
  text_sent = gr.Textbox(label="Text")
247
  btn_sent = gr.Button("Analyze Sentiment")
248
  out_sent = gr.Number(label="Sentiment Score (-5..+5)")
249
  btn_sent.click(fn=analyze_sentiment, inputs=text_sent, outputs=out_sent)
250
 
251
+ # Sarcasm
252
+ with gr.Tab("Sarcasm / Irony"):
253
  text_sarc = gr.Textbox(label="Text")
254
  btn_sarc = gr.Button("Analyze Sarcasm")
255
  out_sarc = gr.Number(label="Sarcasm Score (-5..+5)")
256
  btn_sarc.click(fn=sarcasm_score, inputs=text_sarc, outputs=out_sarc)
257
 
258
+ # Neutrality
259
+ with gr.Tab("Neutrality"):
260
+ text_neu = gr.Textbox(label="Text")
261
+ btn_neu = gr.Button("Analyze Neutrality")
262
+ out_neu = gr.Number(label="Neutrality Score (-5..+5)")
263
+ btn_neu.click(fn=neutrality_score, inputs=text_neu, outputs=out_neu)
264
+
265
+ # Multilabel
266
+ with gr.Tab("Multilabel Classification"):
267
  text_clf = gr.Textbox(label="Text")
268
  btn_clf = gr.Button("Classify")
269
  out_clf = gr.Label(label="Categories & Scores (-5..+5)")
270
+ btn_clf.click(fn=classify_message, inputs=text_clf, outputs=out_cl