Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import gradio as gr
|
|
| 2 |
from transformers import (
|
| 3 |
AutoTokenizer, AutoModelForTokenClassification,
|
| 4 |
AutoModelForSequenceClassification,
|
| 5 |
-
|
| 6 |
)
|
| 7 |
import torch
|
| 8 |
|
|
@@ -15,21 +15,182 @@ aspect_model = AutoModelForTokenClassification.from_pretrained("EfektMotyla/bert
|
|
| 15 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("EfektMotyla/absa-roberta")
|
| 16 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("EfektMotyla/absa-roberta").to(device)
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
|
|
|
|
|
|
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def extract_aspects(text):
|
| 35 |
inputs = aspect_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
|
|
@@ -55,7 +216,7 @@ def extract_aspects(text):
|
|
| 55 |
current_tokens = []
|
| 56 |
if current_tokens:
|
| 57 |
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
|
| 58 |
-
return list(set(aspects))
|
| 59 |
|
| 60 |
def analyze(text_pl, progress=gr.Progress()):
|
| 61 |
try:
|
|
@@ -67,20 +228,35 @@ def analyze(text_pl, progress=gr.Progress()):
|
|
| 67 |
if not aspects_en:
|
| 68 |
return "Nie wykryto żadnych aspektów."
|
| 69 |
|
|
|
|
|
|
|
| 70 |
results = []
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
| 73 |
input_text = f"{text_en} [SEP] {asp}"
|
| 74 |
inputs = sentiment_tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device)
|
|
|
|
| 75 |
with torch.no_grad():
|
| 76 |
logits = sentiment_model(**inputs).logits
|
| 77 |
predicted_class_id = int(logits.argmax().cpu())
|
| 78 |
sentiment_label = {0: "negatywny", 1: "neutralny", 2: "pozytywny", 3: "konfliktowy"}[predicted_class_id]
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
return "\n".join(results)
|
|
|
|
| 82 |
except Exception as e:
|
| 83 |
-
return f"Błąd podczas analizy: {e}"
|
| 84 |
|
| 85 |
# === Gradio UI ===
|
| 86 |
demo = gr.Interface(
|
|
@@ -88,17 +264,15 @@ demo = gr.Interface(
|
|
| 88 |
inputs=gr.Textbox(
|
| 89 |
label="Komentarz po polsku",
|
| 90 |
placeholder="Np. Pizza była pyszna, ale kelner był nieuprzejmy.",
|
| 91 |
-
lines=4,
|
| 92 |
-
max_lines=6
|
| 93 |
-
),
|
| 94 |
-
outputs=gr.Markdown(
|
| 95 |
-
label="Wyniki analizy"
|
| 96 |
),
|
|
|
|
| 97 |
title="ABSA – Analiza komentarzy restauracyjnych",
|
| 98 |
description="Wykrywa aspekty i przypisuje im sentymenty (pozytywny / negatywny / neutralny / konfliktowy).",
|
| 99 |
theme="default",
|
| 100 |
allow_flagging="never"
|
| 101 |
)
|
| 102 |
|
| 103 |
-
|
| 104 |
-
demo.launch()
|
|
|
|
| 2 |
from transformers import (
|
| 3 |
AutoTokenizer, AutoModelForTokenClassification,
|
| 4 |
AutoModelForSequenceClassification,
|
| 5 |
+
pipeline
|
| 6 |
)
|
| 7 |
import torch
|
| 8 |
|
|
|
|
| 15 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("EfektMotyla/absa-roberta")
|
| 16 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("EfektMotyla/absa-roberta").to(device)
|
| 17 |
|
| 18 |
+
# === Lżejsze tłumaczenie Hugging Face pipeline ===
|
| 19 |
+
pl_to_en_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en", device=0 if torch.cuda.is_available() else -1)
|
| 20 |
+
en_to_pl_translator = pipeline("translation", model="gsarti/opus-mt-tc-en-pl", device=0 if torch.cuda.is_available() else -1)
|
| 21 |
|
| 22 |
+
def translate_pl_to_en(texts):
|
| 23 |
+
return [res["translation_text"] for res in pl_to_en_translator(texts)]
|
| 24 |
|
| 25 |
+
def translate_en_to_pl(texts):
|
| 26 |
+
return [res["translation_text"] for res in en_to_pl_translator(texts)]
|
| 27 |
|
| 28 |
+
# === Słownik znanych aspektów (EN → PL) ===
|
| 29 |
+
aspect_aliases = {
|
| 30 |
+
# JEDZENIE / SMAK
|
| 31 |
+
"food": "jedzenie",
|
| 32 |
+
"meal": "jedzenie",
|
| 33 |
+
"taste": "smak",
|
| 34 |
+
"flavor": "smak",
|
| 35 |
+
"dish": "danie",
|
| 36 |
+
"portion": "porcja",
|
| 37 |
+
"serving": "porcja",
|
| 38 |
+
"ingredients": "składniki",
|
| 39 |
+
"spices": "przyprawy",
|
| 40 |
+
"salt": "sól",
|
| 41 |
+
"fat": "tłuszcz",
|
| 42 |
+
"grease": "tłuszcz",
|
| 43 |
|
| 44 |
+
# OBSŁUGA
|
| 45 |
+
"service": "obsługa",
|
| 46 |
+
"staff": "obsługa",
|
| 47 |
+
"waiter": "obsługa",
|
| 48 |
+
"waitress": "obsługa",
|
| 49 |
+
"manager": "obsługa",
|
| 50 |
+
"attitude": "obsługa",
|
| 51 |
|
| 52 |
+
# CENY / WARTOŚĆ
|
| 53 |
+
"price": "cena",
|
| 54 |
+
"value": "cena",
|
| 55 |
+
"cost": "cena",
|
| 56 |
+
|
| 57 |
+
# ATMOSFERA / WYSTRÓJ
|
| 58 |
+
"decor": "wystrój",
|
| 59 |
+
"interior": "wystrój",
|
| 60 |
+
"design": "wystrój",
|
| 61 |
+
"counter": "wystrój",
|
| 62 |
+
"fridge": "wystrój",
|
| 63 |
+
"music": "muzyka",
|
| 64 |
+
"ambience": "klimat",
|
| 65 |
+
"atmosphere": "klimat",
|
| 66 |
+
"vibe": "klimat",
|
| 67 |
+
"climate": "klimat",
|
| 68 |
+
|
| 69 |
+
# MIEJSCE
|
| 70 |
+
"location": "lokalizacja",
|
| 71 |
+
"place": "lokalizacja",
|
| 72 |
+
"entrance": "lokalizacja",
|
| 73 |
+
"parking": "parking",
|
| 74 |
+
"toilet": "toaleta",
|
| 75 |
+
|
| 76 |
+
# CZAS / SZYBKOŚĆ
|
| 77 |
+
"waiting time": "czas oczekiwania",
|
| 78 |
+
"time": "czas oczekiwania",
|
| 79 |
+
"delay": "opóźnienie",
|
| 80 |
+
"speed": "czas oczekiwania",
|
| 81 |
+
"service time": "czas oczekiwania",
|
| 82 |
+
"slow": "czas oczekiwania",
|
| 83 |
+
"fast": "czas oczekiwania",
|
| 84 |
+
"immediate": "czas oczekiwania",
|
| 85 |
+
"late": "opóźnienie",
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# ZAPACH / CZYSTOŚĆ
|
| 89 |
+
"smell": "zapach",
|
| 90 |
+
"odor": "zapach",
|
| 91 |
+
"cleanliness": "czystość",
|
| 92 |
+
"hygiene": "czystość",
|
| 93 |
+
|
| 94 |
+
# OGÓLNE
|
| 95 |
+
"experience": "doświadczenie",
|
| 96 |
+
"visit": "wizyta",
|
| 97 |
+
"menu": "menu",
|
| 98 |
+
"variety": "menu",
|
| 99 |
+
|
| 100 |
+
# MIEJSCE / LOKALIZACJA / OTOCZENIE
|
| 101 |
+
"location": "lokalizacja",
|
| 102 |
+
"place": "lokalizacja",
|
| 103 |
+
"entrance": "lokalizacja",
|
| 104 |
+
"parking": "parking",
|
| 105 |
+
"view": "lokalizacja",
|
| 106 |
+
"lake": "lokalizacja",
|
| 107 |
+
"window": "lokalizacja",
|
| 108 |
+
"terrace": "lokalizacja",
|
| 109 |
+
"balcony": "lokalizacja",
|
| 110 |
+
"outside": "lokalizacja",
|
| 111 |
+
"area": "lokalizacja",
|
| 112 |
+
"surroundings": "lokalizacja",
|
| 113 |
+
"neighborhood": "lokalizacja",
|
| 114 |
+
"river": "lokalizacja",
|
| 115 |
+
"garden": "lokalizacja",
|
| 116 |
+
|
| 117 |
+
# NAPOJE
|
| 118 |
+
"drink": "napoje",
|
| 119 |
+
"drinks": "napoje",
|
| 120 |
+
"beverage": "napoje",
|
| 121 |
+
"coffee": "napoje",
|
| 122 |
+
"tea": "napoje",
|
| 123 |
+
"water": "napoje",
|
| 124 |
+
"juice": "napoje",
|
| 125 |
+
"alcohol": "napoje",
|
| 126 |
+
"cocktail": "napoje",
|
| 127 |
+
"wine": "napoje",
|
| 128 |
+
|
| 129 |
+
#HIGIENA
|
| 130 |
+
"dirt": "czystość",
|
| 131 |
+
"dirty": "czystość",
|
| 132 |
+
"mess": "czystość",
|
| 133 |
+
"messy": "czystość",
|
| 134 |
+
"clean": "czystość",
|
| 135 |
+
"filth": "czystość",
|
| 136 |
+
|
| 137 |
+
#KUCHNIA /JAKOŚĆ
|
| 138 |
+
"chef": "kuchnia",
|
| 139 |
+
"kitchen": "kuchnia",
|
| 140 |
+
"preparation": "kuchnia",
|
| 141 |
+
"presentation": "prezentacja",
|
| 142 |
+
"quality": "jakość",
|
| 143 |
+
"freshness": "jakość",
|
| 144 |
+
"raw": "jakość",
|
| 145 |
+
"undercooked": "jakość",
|
| 146 |
+
"burnt": "jakość",
|
| 147 |
+
"microwaved": "jakość",
|
| 148 |
+
# Wyposażenie
|
| 149 |
+
"seat": "komfort",
|
| 150 |
+
"seating": "komfort",
|
| 151 |
+
"chair": "komfort",
|
| 152 |
+
"table": "komfort",
|
| 153 |
+
"furniture": "komfort",
|
| 154 |
+
"light": "komfort",
|
| 155 |
+
"noise": "komfort",
|
| 156 |
+
"temperature": "komfort",
|
| 157 |
+
"air conditioning": "komfort",
|
| 158 |
+
|
| 159 |
+
# 🔁 OGÓLNE WRAŻENIE / WARTOŚĆ
|
| 160 |
+
"recommendation": "ogólna ocena",
|
| 161 |
+
"return": "ogólna ocena",
|
| 162 |
+
"again": "ogólna ocena",
|
| 163 |
+
"worth": "cena",
|
| 164 |
+
"overpriced": "cena",
|
| 165 |
+
"cheap": "cena",
|
| 166 |
+
"affordable": "cena",
|
| 167 |
+
|
| 168 |
+
# DZIECI / RODZINA
|
| 169 |
+
"child": "dzieci",
|
| 170 |
+
"children": "dzieci",
|
| 171 |
+
"kid": "dzieci",
|
| 172 |
+
"kids": "dzieci",
|
| 173 |
+
"child-friendly": "dzieci",
|
| 174 |
+
"kids menu": "dzieci",
|
| 175 |
+
"high chair": "dzieci",
|
| 176 |
+
"stroller": "dzieci",
|
| 177 |
+
"family": "rodzina",
|
| 178 |
+
"families": "rodzina",
|
| 179 |
+
"parent": "rodzina",
|
| 180 |
+
"parents": "rodzina",
|
| 181 |
+
"group": "rodzina",
|
| 182 |
+
"big group": "rodzina",
|
| 183 |
+
"baby": "dzieci",
|
| 184 |
+
|
| 185 |
+
# ZWIERZĘTA
|
| 186 |
+
"dog": "zwierzęta",
|
| 187 |
+
"dogs": "zwierzęta",
|
| 188 |
+
"pet": "zwierzęta",
|
| 189 |
+
"pets": "zwierzęta",
|
| 190 |
+
"pet-friendly": "zwierzęta",
|
| 191 |
+
"dog-friendly": "zwierzęta",
|
| 192 |
+
"animal": "zwierzęta",
|
| 193 |
+
}
|
| 194 |
|
| 195 |
def extract_aspects(text):
|
| 196 |
inputs = aspect_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
|
|
|
|
| 216 |
current_tokens = []
|
| 217 |
if current_tokens:
|
| 218 |
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
|
| 219 |
+
return list(set(aspects)) # usuń duplikaty
|
| 220 |
|
| 221 |
def analyze(text_pl, progress=gr.Progress()):
|
| 222 |
try:
|
|
|
|
| 228 |
if not aspects_en:
|
| 229 |
return "Nie wykryto żadnych aspektów."
|
| 230 |
|
| 231 |
+
# ✅ Usuń duplikaty (i sortuj dla powtarzalności)
|
| 232 |
+
unique_aspects = sorted(set([asp.lower() for asp in aspects_en]))
|
| 233 |
results = []
|
| 234 |
+
seen_pl_aspects = set()
|
| 235 |
+
|
| 236 |
+
for i, asp in enumerate(unique_aspects):
|
| 237 |
+
progress(0.4 + i/len(unique_aspects)*0.6, desc=f"Analiza aspektu: {asp}")
|
| 238 |
input_text = f"{text_en} [SEP] {asp}"
|
| 239 |
inputs = sentiment_tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device)
|
| 240 |
+
|
| 241 |
with torch.no_grad():
|
| 242 |
logits = sentiment_model(**inputs).logits
|
| 243 |
predicted_class_id = int(logits.argmax().cpu())
|
| 244 |
sentiment_label = {0: "negatywny", 1: "neutralny", 2: "pozytywny", 3: "konfliktowy"}[predicted_class_id]
|
| 245 |
+
|
| 246 |
+
# ✅ Tłumaczenie aspektu przez słownik lub model
|
| 247 |
+
if asp in aspect_aliases:
|
| 248 |
+
asp_pl = aspect_aliases[asp]
|
| 249 |
+
else:
|
| 250 |
+
asp_pl = translate_en_to_pl([asp])[0].lower()
|
| 251 |
+
|
| 252 |
+
if asp_pl not in seen_pl_aspects:
|
| 253 |
+
seen_pl_aspects.add(asp_pl)
|
| 254 |
+
results.append(f"{asp_pl.capitalize()} → **{sentiment_label}**")
|
| 255 |
+
|
| 256 |
return "\n".join(results)
|
| 257 |
+
|
| 258 |
except Exception as e:
|
| 259 |
+
return f"❌ Błąd podczas analizy: {e}"
|
| 260 |
|
| 261 |
# === Gradio UI ===
|
| 262 |
demo = gr.Interface(
|
|
|
|
| 264 |
inputs=gr.Textbox(
|
| 265 |
label="Komentarz po polsku",
|
| 266 |
placeholder="Np. Pizza była pyszna, ale kelner był nieuprzejmy.",
|
| 267 |
+
lines=4,
|
| 268 |
+
max_lines=6
|
|
|
|
|
|
|
|
|
|
| 269 |
),
|
| 270 |
+
outputs=gr.Markdown(label="Wyniki analizy"),
|
| 271 |
title="ABSA – Analiza komentarzy restauracyjnych",
|
| 272 |
description="Wykrywa aspekty i przypisuje im sentymenty (pozytywny / negatywny / neutralny / konfliktowy).",
|
| 273 |
theme="default",
|
| 274 |
allow_flagging="never"
|
| 275 |
)
|
| 276 |
|
| 277 |
+
if __name__ == "__main__":
|
| 278 |
+
demo.launch()
|