Files changed (7) hide show
  1. README.md +7 -43
  2. agreement_score.py +35 -0
  3. app.py +69 -195
  4. app.yaml +0 -6
  5. classifier.py +46 -0
  6. requirements.txt +3 -6
  7. sentimental.py +33 -0
README.md CHANGED
@@ -1,48 +1,12 @@
1
  ---
2
- title: prsr lite
 
 
 
3
  sdk: gradio
4
- emoji: 📈
5
- colorFrom: indigo
6
- colorTo: yellow
7
- license: apache-2.0
8
  sdk_version: 6.3.0
 
 
9
  ---
10
- # 📈 PRSR Lite – Unified NLP API
11
 
12
- Этот Space предоставляет простой интерфейс для анализа текста с использованием NLP моделей:
13
-
14
- - **Agreement** – оценка согласованности двух сообщений (entailment/contradiction).
15
- - **Sentiment** – оценка тональности текста (-5 до +5).
16
- - **Multilabel Classification** – классификация текста по категориям: `politique`, `woke`, `racism`, `crime`, `police_abuse`, `corruption`, `hate_speech`, `activism`.
17
-
18
- ---
19
-
20
- ## ⚡ Как использовать
21
-
22
- 1. Перейдите на вкладку нужного сервиса:
23
- - **Agreement**: введите два сообщения → нажмите *Check Agreement*.
24
- - **Sentiment**: введите текст → нажмите *Analyze Sentiment*.
25
- - **Classification**: введите текст → нажмите *Classify*.
26
- 2. Результат появится сразу под кнопкой.
27
-
28
- ---
29
-
30
- ## 🛠 Технологии
31
-
32
- - [Gradio](https://gradio.app/) – интерфейс пользователя.
33
- - [Transformers](https://huggingface.co/transformers/) – NLP модели (BART, BERT, XLM-R).
34
- - PyTorch – для работы моделей.
35
-
36
- ---
37
-
38
- ## 📚 Модели
39
-
40
- - `facebook/bart-base-mnli` – для Agreement (MNLI).
41
- - `nlptown/bert-base-multilingual-uncased-sentiment` – для Sentiment.
42
- - `xlm-roberta-base` – для Multilabel Classification.
43
-
44
- ---
45
-
46
- ## ⚖️ Лицензия
47
-
48
- Apache-2.0
 
1
  ---
2
+ title: Classifier
3
+ emoji: 🌖
4
+ colorFrom: blue
5
+ colorTo: pink
6
  sdk: gradio
 
 
 
 
7
  sdk_version: 6.3.0
8
+ app_file: app.py
9
+ pinned: false
10
  ---
 
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agreement_score.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+
6
+ # === Загружаем модель один раз при старте сервиса ===
7
+ MODEL_NAME = "facebook/bart-large-mnli"
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
10
+ model.eval()
11
+
12
+ # === Создаем FastAPI приложение ===
13
+ app = FastAPI(title="Agreement Checker API")
14
+
15
+ # === Модель запроса ===
16
+ class MessagePair(BaseModel):
17
+ msg1: str
18
+ msg2: str
19
+
20
+ # === Основная логика проверки согласия ===
21
+ def check_agreement(msg1: str, msg2: str) -> float:
22
+ inputs = tokenizer(msg1, msg2, return_tensors="pt", truncation=True)
23
+ with torch.no_grad():
24
+ logits = model(**inputs).logits
25
+ probs = torch.softmax(logits, dim=-1)
26
+ entailment_prob = probs[0][2].item() # entailment
27
+ contradiction_prob = probs[0][0].item() # contradiction
28
+ score = entailment_prob - contradiction_prob
29
+ return round(score, 2)
30
+
31
+ # === Эндпоинт API ===
32
+ @app.post("/agreement")
33
+ def agreement(pair: MessagePair):
34
+ score = check_agreement(pair.msg1, pair.msg2)
35
+ return {"agreement_score": score}
app.py CHANGED
@@ -1,30 +1,19 @@
1
- import gradio as gr
 
 
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 
 
 
 
4
 
5
- # =====================
6
- # DEVICE
7
- # =====================
8
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
  # =====================
11
- # Helpers
12
  # =====================
13
- def clamp(x: float, lo: float = -5.0, hi: float = 5.0) -> float:
14
- return max(lo, min(hi, x))
15
-
16
- def score01_to_minus5_plus5(p: float) -> float:
17
- """
18
- 0.0 -> -5
19
- 0.5 -> 0
20
- 1.0 -> +5
21
- """
22
- return clamp((float(p) - 0.5) * 10)
23
-
24
- # =====================
25
- # 1) Agreement (MNLI) -> [-5..+5]
26
- # =====================
27
- MNLI_MODEL = "facebook/bart-large-mnli"
28
  mnli_tokenizer = None
29
  mnli_model = None
30
 
@@ -33,26 +22,19 @@ def load_mnli():
33
  if mnli_model is None:
34
  mnli_tokenizer = AutoTokenizer.from_pretrained(MNLI_MODEL)
35
  mnli_model = AutoModelForSequenceClassification.from_pretrained(MNLI_MODEL)
36
- mnli_model.to(DEVICE)
37
  mnli_model.eval()
38
 
39
- def agreement_score_minus5_plus5(msg1: str, msg2: str) -> float:
40
- """
41
- -5 = contradiction
42
- +5 = entailment
43
- """
44
  load_mnli()
45
- inputs = mnli_tokenizer(msg1, msg2, return_tensors="pt", truncation=True).to(DEVICE)
46
  with torch.no_grad():
47
  logits = mnli_model(**inputs).logits
48
  probs = torch.softmax(logits, dim=-1)[0]
 
49
 
50
- # entailment - contradiction => [-1..+1]
51
- raw = (probs[2] - probs[0]).item()
52
- return round(clamp(raw * 5), 2)
53
 
54
  # =====================
55
- # 2) Sentiment -> [-5..+5]
56
  # =====================
57
  SENTIMENT_MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
58
  sent_tokenizer = None
@@ -63,194 +45,86 @@ def load_sentiment():
63
  if sent_model is None:
64
  sent_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL)
65
  sent_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL)
66
- sent_model.to(DEVICE)
67
  sent_model.eval()
68
 
69
  def analyze_sentiment(text: str) -> float:
70
- """
71
- 1..5 stars -> [-5..+5]
72
- """
73
  load_sentiment()
74
- inputs = sent_tokenizer(text, return_tensors="pt", truncation=True).to(DEVICE)
75
  with torch.no_grad():
76
  logits = sent_model(**inputs).logits
77
  probs = torch.softmax(logits, dim=-1)
78
  stars = torch.argmax(probs, dim=-1).item() + 1
79
- score = (stars - 3) * 2.5
80
- return round(clamp(score), 2)
81
-
82
- # =====================
83
- # 3) Sarcasm / Irony -> [-5..+5]
84
- # =====================
85
- SARCASM_MODEL = "cardiffnlp/twitter-roberta-base-irony"
86
- sarcasm_pipe = None
87
-
88
- def load_sarcasm():
89
- global sarcasm_pipe
90
- if sarcasm_pipe is None:
91
- sarcasm_pipe = pipeline(
92
- "text-classification",
93
- model=SARCASM_MODEL,
94
- device=0 if torch.cuda.is_available() else -1,
95
- truncation=True,
96
- )
97
-
98
- def sarcasm_score(text: str) -> float:
99
- """
100
- +5 = irony
101
- -5 = non-irony
102
- """
103
- load_sarcasm()
104
- res = sarcasm_pipe(text)[0]
105
- label = res["label"].lower()
106
- conf = float(res["score"])
107
-
108
- if "irony" in label:
109
- return round(clamp(conf * 5), 2)
110
- return round(clamp(-conf * 5), 2)
111
-
112
- # =====================
113
- # 4) Neutrality -> [-5..+5]
114
- # =====================
115
- def neutrality_score(text: str) -> float:
116
- """
117
- +5 = максимально нейтрально
118
- -5 = максимально эмоционально/заряжено
119
- """
120
- sent = abs(analyze_sentiment(text)) # 0..5
121
- sarc = max(0.0, sarcasm_score(text)) # 0..5 (только если irony)
122
-
123
- neutrality = 5.0 - (sent + sarc) / 2.0
124
- return round(clamp(neutrality), 2)
125
-
126
- # =====================
127
- # 5) Agreement with irony adjustment
128
- # =====================
129
- def agreement_with_irony(msg1: str, msg2: str) -> float:
130
- base = agreement_score_minus5_plus5(msg1, msg2)
131
-
132
- s2 = max(0.0, sarcasm_score(msg2)) # 0..5
133
- sarcasm_strength = s2 / 5.0 # 0..1
134
 
135
- # чем больше сарказм, тем меньше доверяем agreement
136
- multiplier = 1.0 - 0.65 * sarcasm_strength
137
- final_score = base * multiplier
138
- return round(clamp(final_score), 2)
139
 
140
  # =====================
141
- # 6) Multilabel Zero-Shot -> [-5..+5]
142
  # =====================
143
- ZS_MODEL = "facebook/bart-large-mnli"
144
- zs_classifier = None
145
 
146
  CATEGORIES = [
147
- # базовые
148
- "politique",
149
- "woke",
150
- "racism",
151
- "crime",
152
- "police_abuse",
153
- "corruption",
154
- "hate_speech",
155
- "activism",
156
-
157
- # типичные твиттер-дискуссии
158
- "outrage / moral outrage",
159
- "cancel culture",
160
- "culture war",
161
- "polarization / us vs them",
162
- "misinformation / fake news",
163
- "conspiracy / deep state",
164
- "propaganda / spin",
165
- "whataboutism",
166
- "virtue signaling",
167
- "dogwhistle / coded language",
168
- "trolling / bait",
169
- "ragebait",
170
- "harassment / bullying",
171
- "callout / public shaming",
172
- "ratio / pile-on",
173
- "stan / fandom war",
174
- "hot take",
175
- "doomposting",
176
- "memes / shitposting",
177
- "political satire",
178
- "debunking / fact-checking",
179
- "support / solidarity",
180
  ]
181
 
182
- def load_zero_shot():
183
- global zs_classifier
184
- if zs_classifier is None:
185
- zs_classifier = pipeline(
186
- "zero-shot-classification",
187
- model=ZS_MODEL,
188
- device=0 if torch.cuda.is_available() else -1,
189
- )
190
 
191
- def classify_message(text: str) -> dict:
192
- load_zero_shot()
193
- result = zs_classifier(text, candidate_labels=CATEGORIES, multi_label=True)
 
 
 
 
 
 
194
 
195
- labels = result["labels"]
196
- scores = result["scores"]
 
 
 
 
 
 
197
 
198
- out = {}
199
- for label, score in zip(labels, scores):
200
- out[label] = round(score01_to_minus5_plus5(score), 2)
201
- return out
202
 
203
  # =====================
204
- # Gradio UI
205
  # =====================
206
- with gr.Blocks(title="Unified NLP API (-5..+5)") as demo:
207
- gr.Markdown("## 📈 Unified NLP API (all scores: -5 .. +5)")
208
- gr.Markdown(
209
- """
210
- **Шкалы:**
211
- - **Agreement**: -5 = сильное противоречие, +5 = сильное согласие
212
- - **Sentiment**: -5 = негатив, +5 = позитив
213
- - **Sarcasm**: -5 = уверенно НЕ сарказм, +5 = уверенно сарказм/ирония
214
- - **Neutrality**: +5 = максимально нейтрально, -5 = максимально “заряжено”
215
- - **Multilabel**: уверенность метки в шкале -5..+5 (0.5 → 0)
216
- """
217
- )
218
 
219
- with gr.Tab("Agreement"):
220
- msg1 = gr.Textbox(label="Message 1")
221
- msg2 = gr.Textbox(label="Message 2")
222
 
223
- btn_agree = gr.Button("Check Agreement")
224
- out_agree = gr.Number(label="Agreement Score (-5..+5)")
225
- btn_agree.click(fn=agreement_score_minus5_plus5, inputs=[msg1, msg2], outputs=out_agree)
226
 
227
- gr.Markdown("### Agreement (irony-aware)")
228
- btn_agree_irony = gr.Button("Check Agreement (with irony)")
229
- out_agree_irony = gr.Number(label="Agreement Score (irony-aware) (-5..+5)")
230
- btn_agree_irony.click(fn=agreement_with_irony, inputs=[msg1, msg2], outputs=out_agree_irony)
231
-
232
- with gr.Tab("Sentiment"):
233
- text_sent = gr.Textbox(label="Text")
234
- btn_sent = gr.Button("Analyze Sentiment")
235
- out_sent = gr.Number(label="Sentiment Score (-5..+5)")
236
- btn_sent.click(fn=analyze_sentiment, inputs=text_sent, outputs=out_sent)
237
 
238
- with gr.Tab("Sarcasm / Irony"):
239
- text_sarc = gr.Textbox(label="Text")
240
- btn_sarc = gr.Button("Analyze Sarcasm")
241
- out_sarc = gr.Number(label="Sarcasm Score (-5..+5)")
242
- btn_sarc.click(fn=sarcasm_score, inputs=text_sarc, outputs=out_sarc)
243
 
244
- with gr.Tab("Neutrality"):
245
- text_neu = gr.Textbox(label="Text")
246
- btn_neu = gr.Button("Analyze Neutrality")
247
- out_neu = gr.Number(label="Neutrality Score (-5..+5)")
248
- btn_neu.click(fn=neutrality_score, inputs=text_neu, outputs=out_neu)
249
 
250
- with gr.Tab("Multilabel Classification"):
251
- text_clf = gr.Textbox(label="Text")
252
- btn_clf = gr.Button("Classify")
253
- out_clf = gr.Label(label="Categories & Scores (-5..+5)")
254
- btn_clf.click(fn=classify_message, inputs=text_clf, outputs=out_clf)
 
 
 
 
 
 
255
 
256
- demo.launch()
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from typing import List
4
  import torch
5
+ from transformers import (
6
+ AutoTokenizer,
7
+ AutoModelForSequenceClassification,
8
+ XLMRobertaForSequenceClassification,
9
+ )
10
 
11
+ app = FastAPI(title="Unified NLP API")
 
 
 
12
 
13
  # =====================
14
+ # Agreement (MNLI)
15
  # =====================
16
+ MNLI_MODEL = "facebook/bart-base-mnli"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  mnli_tokenizer = None
18
  mnli_model = None
19
 
 
22
  if mnli_model is None:
23
  mnli_tokenizer = AutoTokenizer.from_pretrained(MNLI_MODEL)
24
  mnli_model = AutoModelForSequenceClassification.from_pretrained(MNLI_MODEL)
 
25
  mnli_model.eval()
26
 
27
+ def check_agreement(msg1: str, msg2: str) -> float:
 
 
 
 
28
  load_mnli()
29
+ inputs = mnli_tokenizer(msg1, msg2, return_tensors="pt", truncation=True)
30
  with torch.no_grad():
31
  logits = mnli_model(**inputs).logits
32
  probs = torch.softmax(logits, dim=-1)[0]
33
+ return round((probs[2] - probs[0]).item(), 2) # entailment - contradiction
34
 
 
 
 
35
 
36
  # =====================
37
+ # Sentiment
38
  # =====================
39
  SENTIMENT_MODEL = "nlptown/bert-base-multilingual-uncased-sentiment"
40
  sent_tokenizer = None
 
45
  if sent_model is None:
46
  sent_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL)
47
  sent_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL)
 
48
  sent_model.eval()
49
 
50
  def analyze_sentiment(text: str) -> float:
 
 
 
51
  load_sentiment()
52
+ inputs = sent_tokenizer(text, return_tensors="pt", truncation=True)
53
  with torch.no_grad():
54
  logits = sent_model(**inputs).logits
55
  probs = torch.softmax(logits, dim=-1)
56
  stars = torch.argmax(probs, dim=-1).item() + 1
57
+ return round((stars - 3) * 2.5, 2) # -5 .. +5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
 
 
 
 
59
 
60
  # =====================
61
+ # Multilabel classifier
62
  # =====================
63
+ CLASSIFIER_MODEL = "xlm-roberta-base"
 
64
 
65
  CATEGORIES = [
66
+ "politique", "woke", "racism", "crime",
67
+ "police_abuse", "corruption", "hate_speech", "activism"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  ]
69
 
70
+ clf_tokenizer = None
71
+ clf_model = None
 
 
 
 
 
 
72
 
73
+ def load_classifier():
74
+ global clf_tokenizer, clf_model
75
+ if clf_model is None:
76
+ clf_tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
77
+ clf_model = XLMRobertaForSequenceClassification.from_pretrained(
78
+ CLASSIFIER_MODEL,
79
+ num_labels=len(CATEGORIES)
80
+ )
81
+ clf_model.eval()
82
 
83
+ def classify_message(text: str) -> List[str]:
84
+ load_classifier()
85
+ inputs = clf_tokenizer(text, return_tensors="pt", truncation=True)
86
+ with torch.no_grad():
87
+ logits = clf_model(**inputs).logits
88
+ probs = torch.sigmoid(logits)[0]
89
+ labels = [CATEGORIES[i] for i, p in enumerate(probs) if p > 0.5]
90
+ return labels or ["neutral"]
91
 
 
 
 
 
92
 
93
  # =====================
94
+ # API schemas
95
  # =====================
96
+ class AgreementRequest(BaseModel):
97
+ msg1: str
98
+ msg2: str
 
 
 
 
 
 
 
 
 
99
 
100
+ class TextRequest(BaseModel):
101
+ text: str
 
102
 
 
 
 
103
 
104
+ # =====================
105
+ # Endpoints
106
+ # =====================
107
+ @app.post("/agreement")
108
+ def agreement(req: AgreementRequest):
109
+ return {"agreement_score": check_agreement(req.msg1, req.msg2)}
 
 
 
 
110
 
111
+ @app.post("/sentiment")
112
+ def sentiment(req: TextRequest):
113
+ return {"sentiment_score": analyze_sentiment(req.text)}
 
 
114
 
115
+ @app.post("/classify")
116
+ def classify(req: TextRequest):
117
+ return {"categories": classify_message(req.text)}
 
 
118
 
119
+ @app.get("/")
120
+ def root():
121
+ return {
122
+ "status": "ok",
123
+ "endpoints": {
124
+ "POST /sentiment": "sentiment analysis",
125
+ "POST /agreement": "text agreement",
126
+ "POST /classify": "multilabel classification",
127
+ "GET /docs": "swagger UI"
128
+ }
129
+ }
130
 
 
app.yaml DELETED
@@ -1,6 +0,0 @@
1
- title: prsr lite
2
- sdk: gradio
3
- emoji: 📈
4
- colorFrom: indigo
5
- colorTo: yellow
6
- license: apache-2.0
 
 
 
 
 
 
 
classifier.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from typing import List
4
+ import torch
5
+ from transformers import AutoTokenizer, XLMRobertaForSequenceClassification
6
+
7
+ # === Конфигурация ===
8
+ MODEL_NAME = "xlm-roberta-large"
9
+
10
+ CATEGORIES = [
11
+ "politique", "woke", "racism", "crime",
12
+ "police_abuse", "corruption", "hate_speech", "activism"
13
+ ]
14
+
15
+ # === Загрузка модели ===
16
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
+ model = XLMRobertaForSequenceClassification.from_pretrained(
18
+ MODEL_NAME,
19
+ num_labels=len(CATEGORIES)
20
+ )
21
+ model.eval()
22
+
23
+ # === FastAPI приложение ===
24
+ app = FastAPI(title="Multilabel Text Classifier API")
25
+
26
+ # === Схема запроса ===
27
+ class TextRequest(BaseModel):
28
+ text: str
29
+
30
+ # === Логика классификации ===
31
+ def classify_message(message: str) -> List[str]:
32
+ inputs = tokenizer(message, return_tensors="pt", truncation=True)
33
+ with torch.no_grad():
34
+ logits = model(**inputs).logits
35
+
36
+ probs = torch.sigmoid(logits)[0]
37
+ selected = [CATEGORIES[i] for i, p in enumerate(probs) if p > 0.5]
38
+ return selected or ["neutral"]
39
+
40
+ # === Эндпоинт ===
41
+ @app.post("/classify")
42
+ def classify(request: TextRequest):
43
+ categories = classify_message(request.text)
44
+ return {
45
+ "categories": categories
46
+ }
requirements.txt CHANGED
@@ -1,8 +1,5 @@
1
- gradio==6.3.0
2
  torch
3
  transformers
4
- datasets
5
- huggingface-hub>=0.30
6
- hf-transfer>=0.1.4
7
- protobuf<4
8
-
 
 
1
  torch
2
  transformers
3
+ fastapi
4
+ uvicorn
5
+ sentencepiece
 
 
sentimental.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+
6
+ # === Загрузка модели ===
7
+ MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
10
+ model.eval()
11
+
12
+ # === FastAPI приложение ===
13
+ app = FastAPI(title="Sentiment Analysis API")
14
+
15
+ # === Схема запроса ===
16
+ class TextRequest(BaseModel):
17
+ text: str
18
+
19
+ # === Логика сентимента ===
20
+ def analyze_sentiment(message: str) -> float:
21
+ inputs = tokenizer(message, return_tensors="pt", truncation=True)
22
+ with torch.no_grad():
23
+ logits = model(**inputs).logits
24
+ probs = torch.softmax(logits, dim=-1)
25
+ stars = torch.argmax(probs, dim=-1).item() + 1 # от 1 до 5
26
+ sentiment = (stars - 3) * 2.5 # нормируем -5..+5
27
+ return round(sentiment, 2)
28
+
29
+ # === Эндпоинт API ===
30
+ @app.post("/sentiment")
31
+ def sentiment(request: TextRequest):
32
+ score = analyze_sentiment(request.text)
33
+ return {"sentiment_score": score}