ABSA-REST-API / app.py
EfektMotyla's picture
Update app.py
7ae4f2b verified
raw
history blame
4.66 kB
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import torch
from transformers import (
AutoTokenizer,
AutoModelForTokenClassification,
AutoModelForSequenceClassification,
pipeline
)
# ────────────────────── konfiguracja ──────────────────────
device = "cuda" if torch.cuda.is_available() else "cpu"
# Lokalne modele
aspect_tokenizer = AutoTokenizer.from_pretrained("bert-aspect-ner", local_files_only=True, use_fast=False)
aspect_model = AutoModelForTokenClassification.from_pretrained("bert-aspect-ner", local_files_only=True).to(device)
aspect_model.eval()
sentiment_tokenizer = AutoTokenizer.from_pretrained("absa-roberta", local_files_only=True)
sentiment_model = AutoModelForSequenceClassification.from_pretrained("absa-roberta", local_files_only=True).to(device)
sentiment_model.eval()
# Tłumaczenia
pl_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en", device=0 if torch.cuda.is_available() else -1)
en_to_pl = pipeline("translation", model="gsarti/opus-mt-tc-en-pl", device=0 if torch.cuda.is_available() else -1)
# Alias słownik
aspect_aliases = {
"food": "jedzenie", "service": "obsługa", "price": "cena",
"taste": "smak", "waiter": "obsługa", "dish": "danie",
"portion": "porcja", "staff": "obsługa", "decor": "wystrój",
"menu": "menu", "drink": "napoje", "location": "lokalizacja",
"time": "czas oczekiwania", "cleanliness": "czystość", "smell": "zapach",
"value": "cena", "experience": "doświadczenie", "recommendation": "ogólna ocena",
"children": "dzieci", "family": "rodzina", "pet": "zwierzęta"
}
# ────────────────────── Pydantic ──────────────────────
class Comment(BaseModel):
text: str
class AspectSentiment(BaseModel):
aspect: str
sentiment: str
class AnalysisResult(BaseModel):
results: List[AspectSentiment]
# ────────────────────── logika ──────────────────────
def translate_pl_to_en(texts: list[str]) -> list[str]:
return [r['translation_text'] for r in pl_to_en(texts)]
def translate_en_to_pl(texts: list[str]) -> list[str]:
return [r['translation_text'] for r in en_to_pl(texts)]
def extract_aspects(text_en: str):
inputs = aspect_tokenizer(text_en, return_tensors="pt", truncation=True, padding=True).to(device)
with torch.no_grad():
outputs = aspect_model(**inputs)
preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
labels = [aspect_model.config.id2label[p] for p in preds]
aspects, current_tokens = [], []
for token, label in zip(tokens, labels):
if label == "B-ASP":
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
current_tokens = [token]
elif label == "I-ASP" and current_tokens:
current_tokens.append(token)
else:
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
current_tokens = []
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
return list({tok.replace(" ##", "").strip() for tok in aspects})
# ────────────────────── FastAPI ──────────────────────
app = FastAPI()
@app.post("/analyze", response_model=AnalysisResult)
def analyze_comment(comment: Comment):
text_pl = comment.text
text_en = translate_pl_to_en([text_pl])[0]
aspects_en = extract_aspects(text_en)
results = []
seen = set()
for asp in aspects_en:
input_text = f"{text_en} [SEP] {asp}"
inputs = sentiment_tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device)
with torch.no_grad():
logits = sentiment_model(**inputs).logits
pred = int(torch.argmax(logits, dim=1).cpu())
sentiment = ["negatywny", "neutralny", "pozytywny", "konfliktowy"][pred]
asp_lower = asp.lower()
asp_pl = aspect_aliases.get(asp_lower, translate_en_to_pl([asp])[0].lower())
if asp_pl not in seen:
seen.add(asp_pl)
results.append(AspectSentiment(aspect=asp_pl, sentiment=sentiment))
return {"results": results}