Spaces:
Sleeping
Sleeping
File size: 4,660 Bytes
9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 7ae4f2b 5ac897e e34ebb0 9d9d143 e69a3fb 5ac897e 7ae4f2b 668f19f 7ae4f2b 5ac897e 7ae4f2b 38026de 7ae4f2b 38026de 7ae4f2b 5ac897e 7ae4f2b 94eceb2 7ae4f2b 9d9d143 94eceb2 7ae4f2b 9d9d143 7ae4f2b 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 7ae4f2b 9d9d143 7ae4f2b e69a3fb 5ac897e 7ae4f2b 5ac897e 7ae4f2b 5ac897e 7ae4f2b 5ac897e 7ae4f2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import torch
from transformers import (
AutoTokenizer,
AutoModelForTokenClassification,
AutoModelForSequenceClassification,
pipeline
)
# ββββββββββββββββββββββ konfiguracja ββββββββββββββββββββββ
device = "cuda" if torch.cuda.is_available() else "cpu"
# Lokalne modele
aspect_tokenizer = AutoTokenizer.from_pretrained("bert-aspect-ner", local_files_only=True, use_fast=False)
aspect_model = AutoModelForTokenClassification.from_pretrained("bert-aspect-ner", local_files_only=True).to(device)
aspect_model.eval()
sentiment_tokenizer = AutoTokenizer.from_pretrained("absa-roberta", local_files_only=True)
sentiment_model = AutoModelForSequenceClassification.from_pretrained("absa-roberta", local_files_only=True).to(device)
sentiment_model.eval()
# TΕumaczenia
pl_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en", device=0 if torch.cuda.is_available() else -1)
en_to_pl = pipeline("translation", model="gsarti/opus-mt-tc-en-pl", device=0 if torch.cuda.is_available() else -1)
# Alias sΕownik
aspect_aliases = {
"food": "jedzenie", "service": "obsΕuga", "price": "cena",
"taste": "smak", "waiter": "obsΕuga", "dish": "danie",
"portion": "porcja", "staff": "obsΕuga", "decor": "wystrΓ³j",
"menu": "menu", "drink": "napoje", "location": "lokalizacja",
"time": "czas oczekiwania", "cleanliness": "czystoΕΔ", "smell": "zapach",
"value": "cena", "experience": "doΕwiadczenie", "recommendation": "ogΓ³lna ocena",
"children": "dzieci", "family": "rodzina", "pet": "zwierzΔta"
}
# ββββββββββββββββββββββ Pydantic ββββββββββββββββββββββ
class Comment(BaseModel):
text: str
class AspectSentiment(BaseModel):
aspect: str
sentiment: str
class AnalysisResult(BaseModel):
results: List[AspectSentiment]
# ββββββββββββββββββββββ logika ββββββββββββββββββββββ
def translate_pl_to_en(texts: list[str]) -> list[str]:
return [r['translation_text'] for r in pl_to_en(texts)]
def translate_en_to_pl(texts: list[str]) -> list[str]:
return [r['translation_text'] for r in en_to_pl(texts)]
def extract_aspects(text_en: str):
inputs = aspect_tokenizer(text_en, return_tensors="pt", truncation=True, padding=True).to(device)
with torch.no_grad():
outputs = aspect_model(**inputs)
preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
labels = [aspect_model.config.id2label[p] for p in preds]
aspects, current_tokens = [], []
for token, label in zip(tokens, labels):
if label == "B-ASP":
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
current_tokens = [token]
elif label == "I-ASP" and current_tokens:
current_tokens.append(token)
else:
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
current_tokens = []
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
return list({tok.replace(" ##", "").strip() for tok in aspects})
# ββββββββββββββββββββββ FastAPI ββββββββββββββββββββββ
app = FastAPI()
@app.post("/analyze", response_model=AnalysisResult)
def analyze_comment(comment: Comment):
text_pl = comment.text
text_en = translate_pl_to_en([text_pl])[0]
aspects_en = extract_aspects(text_en)
results = []
seen = set()
for asp in aspects_en:
input_text = f"{text_en} [SEP] {asp}"
inputs = sentiment_tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device)
with torch.no_grad():
logits = sentiment_model(**inputs).logits
pred = int(torch.argmax(logits, dim=1).cpu())
sentiment = ["negatywny", "neutralny", "pozytywny", "konfliktowy"][pred]
asp_lower = asp.lower()
asp_pl = aspect_aliases.get(asp_lower, translate_en_to_pl([asp])[0].lower())
if asp_pl not in seen:
seen.add(asp_pl)
results.append(AspectSentiment(aspect=asp_pl, sentiment=sentiment))
return {"results": results}
|