from fastapi import FastAPI from pydantic import BaseModel from typing import List import torch from transformers import ( AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification, pipeline ) # ────────────────────── konfiguracja ────────────────────── device = "cuda" if torch.cuda.is_available() else "cpu" # Lokalne modele aspect_tokenizer = AutoTokenizer.from_pretrained("bert-aspect-ner", local_files_only=True, use_fast=False) aspect_model = AutoModelForTokenClassification.from_pretrained("bert-aspect-ner", local_files_only=True).to(device) aspect_model.eval() sentiment_tokenizer = AutoTokenizer.from_pretrained("absa-roberta", local_files_only=True) sentiment_model = AutoModelForSequenceClassification.from_pretrained("absa-roberta", local_files_only=True).to(device) sentiment_model.eval() # Tłumaczenia pl_to_en = pipeline("translation", model="Helsinki-NLP/opus-mt-pl-en", device=0 if torch.cuda.is_available() else -1) en_to_pl = pipeline("translation", model="gsarti/opus-mt-tc-en-pl", device=0 if torch.cuda.is_available() else -1) # Alias słownik aspect_aliases = { "food": "jedzenie", "service": "obsługa", "price": "cena", "taste": "smak", "waiter": "obsługa", "dish": "danie", "portion": "porcja", "staff": "obsługa", "decor": "wystrój", "menu": "menu", "drink": "napoje", "location": "lokalizacja", "time": "czas oczekiwania", "cleanliness": "czystość", "smell": "zapach", "value": "cena", "experience": "doświadczenie", "recommendation": "ogólna ocena", "children": "dzieci", "family": "rodzina", "pet": "zwierzęta" } # ────────────────────── Pydantic ────────────────────── class Comment(BaseModel): text: str class AspectSentiment(BaseModel): aspect: str sentiment: str class AnalysisResult(BaseModel): results: List[AspectSentiment] # ────────────────────── logika ────────────────────── def translate_pl_to_en(texts: list[str]) -> list[str]: return [r['translation_text'] for r in pl_to_en(texts)] def translate_en_to_pl(texts: list[str]) -> list[str]: return [r['translation_text'] for r in en_to_pl(texts)] def extract_aspects(text_en: str): inputs = aspect_tokenizer(text_en, return_tensors="pt", truncation=True, padding=True).to(device) with torch.no_grad(): outputs = aspect_model(**inputs) preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy() tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) labels = [aspect_model.config.id2label[p] for p in preds] aspects, current_tokens = [], [] for token, label in zip(tokens, labels): if label == "B-ASP": if current_tokens: aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip()) current_tokens = [token] elif label == "I-ASP" and current_tokens: current_tokens.append(token) else: if current_tokens: aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip()) current_tokens = [] if current_tokens: aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip()) return list({tok.replace(" ##", "").strip() for tok in aspects}) # ────────────────────── FastAPI ────────────────────── app = FastAPI() @app.post("/analyze", response_model=AnalysisResult) def analyze_comment(comment: Comment): text_pl = comment.text text_en = translate_pl_to_en([text_pl])[0] aspects_en = extract_aspects(text_en) results = [] seen = set() for asp in aspects_en: input_text = f"{text_en} [SEP] {asp}" inputs = sentiment_tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device) with torch.no_grad(): logits = sentiment_model(**inputs).logits pred = int(torch.argmax(logits, dim=1).cpu()) sentiment = ["negatywny", "neutralny", "pozytywny", "konfliktowy"][pred] asp_lower = asp.lower() asp_pl = aspect_aliases.get(asp_lower, translate_en_to_pl([asp])[0].lower()) if asp_pl not in seen: seen.add(asp_pl) results.append(AspectSentiment(aspect=asp_pl, sentiment=sentiment)) return {"results": results}