Spaces:

EfektMotyla
/

ABSA-REST-API

Sleeping

App Files Files Community

EfektMotyla commited on May 18, 2025

Commit

9d9d143

verified ·

1 Parent(s): 5a54bd3

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -30

app.py CHANGED Viewed

@@ -1,44 +1,62 @@
-from fastapi import FastAPI, Request
 from pydantic import BaseModel
 from typing import List
 from transformers import (
-    AutoTokenizer, AutoModelForTokenClassification,
-    AutoModelForSequenceClassification, pipeline
 )
-import torch
-import os
-from pathlib import Path
-ROOT = Path(__file__).parent
-aspect_path    = ROOT / "models/bert-aspect-ner"
-sentiment_path = ROOT / "models/absa-roberta"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-aspect_tok  = AutoTokenizer.from_pretrained(aspect_path,  local_files_only=True)
-aspect_model= AutoModelForTokenClassification.from_pretrained(aspect_path, local_files_only=True).to(device)
-sent_tok  = AutoTokenizer.from_pretrained(sentiment_path,  local_files_only=True)
-sent_model= AutoModelForSequenceClassification.from_pretrained(sentiment_path, local_files_only=True).to(device)
 pl_to_en = pipeline(
     "translation",
     model="Helsinki-NLP/opus-mt-pl-en",
-    device=0 if device == "cuda" else -1
 )
 en_to_pl = pipeline(
     "translation",
     model="gsarti/opus-mt-tc-en-pl",
-    device=0 if device == "cuda" else -1
 )
-# === Dane wejściowe i wyjściowe ===
 class Comment(BaseModel):
     text: str
 class AspectSentiment(BaseModel):
     aspect: str
     sentiment: str
 class AnalysisResult(BaseModel):
     results: List[AspectSentiment]
@@ -51,26 +69,27 @@ aspect_aliases = {
     "time": "czas oczekiwania", "cleanliness": "czystość", "smell": "zapach",
     "value": "cena", "experience": "doświadczenie", "recommendation": "ogólna ocena",
     "children": "dzieci", "family": "rodzina", "pet": "zwierzęta"
-    # dodaj więcej jak chcesz
 }
-# === Funkcje pomocnicze ===
 def translate_pl_to_en(texts):
     return [res["translation_text"] for res in pl_to_en(texts)]
 def translate_en_to_pl(texts):
     return [res["translation_text"] for res in en_to_pl(texts)]
-def extract_aspects(text_en):
-    inputs = aspect_tokenizer(text_en, return_tensors="pt", truncation=True, padding=True).to(device)
     with torch.no_grad():
         outputs = aspect_model(**inputs)
     preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
     tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
     labels = [aspect_model.config.id2label[p] for p in preds]
-    aspects = []
-    current_tokens = []
     for token, label in zip(tokens, labels):
         if label == "B-ASP":
             if current_tokens:
@@ -85,27 +104,37 @@ def extract_aspects(text_en):
     if current_tokens:
         aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
-    return list(set([a.lower() for a in aspects]))
-# === Główna funkcja API ===
 app = FastAPI()
 @app.post("/analyze", response_model=AnalysisResult)
 def analyze_comment(comment: Comment):
     text_pl = comment.text
     text_en = translate_pl_to_en([text_pl])[0]
     aspects = extract_aspects(text_en)
-    result = []
     for asp in aspects:
         input_text = f"{text_en} [SEP] {asp}"
-        inputs = sentiment_tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device)
         with torch.no_grad():
             logits = sentiment_model(**inputs).logits
             predicted_class_id = int(logits.argmax().cpu())
-            sentiment_label = {0: "negatywny", 1: "neutralny", 2: "pozytywny", 3: "konfliktowy"}[predicted_class_id]
         asp_pl = aspect_aliases.get(asp, translate_en_to_pl([asp])[0].lower())
-        result.append(AspectSentiment(aspect=asp_pl, sentiment=sentiment_label))
-    return {"results": result}

+from pathlib import Path
+from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import List
+import torch
 from transformers import (
+    AutoTokenizer,
+    AutoModelForTokenClassification,
+    AutoModelForSequenceClassification,
+    pipeline,
 )
+# ────────────────────── konfiguracja ──────────────────────
 device = "cuda" if torch.cuda.is_available() else "cpu"
+ROOT = Path(__file__).parent
+MODELS_DIR = ROOT / "models"
+aspect_dir = MODELS_DIR / "bert-aspect-ner"
+sentiment_dir = MODELS_DIR / "absa-roberta"
+# ────────────────────── modele lokalne ─────────────────────
+aspect_tokenizer = AutoTokenizer.from_pretrained(
+    str(aspect_dir), local_files_only=True, use_fast=False        # ← jeśli brak tokenizer.json
+)
+aspect_model = AutoModelForTokenClassification.from_pretrained(
+    str(aspect_dir), local_files_only=True
+).to(device)
+sentiment_tokenizer = AutoTokenizer.from_pretrained(
+    str(sentiment_dir), local_files_only=True
+)
+sentiment_model = AutoModelForSequenceClassification.from_pretrained(
+    str(sentiment_dir), local_files_only=True
+).to(device)
+# ────────────────────── modele tłumaczeń (on-line) ─────────
 pl_to_en = pipeline(
     "translation",
     model="Helsinki-NLP/opus-mt-pl-en",
+    device=0 if device == "cuda" else -1,
 )
 en_to_pl = pipeline(
     "translation",
     model="gsarti/opus-mt-tc-en-pl",
+    device=0 if device == "cuda" else -1,
 )
+# ────────────────────── schemy Pydantic ────────────────────
 class Comment(BaseModel):
     text: str
 class AspectSentiment(BaseModel):
     aspect: str
     sentiment: str
 class AnalysisResult(BaseModel):
     results: List[AspectSentiment]
     "time": "czas oczekiwania", "cleanliness": "czystość", "smell": "zapach",
     "value": "cena", "experience": "doświadczenie", "recommendation": "ogólna ocena",
     "children": "dzieci", "family": "rodzina", "pet": "zwierzęta"
 }
 def translate_pl_to_en(texts):
     return [res["translation_text"] for res in pl_to_en(texts)]
 def translate_en_to_pl(texts):
     return [res["translation_text"] for res in en_to_pl(texts)]
+def extract_aspects(text_en: str):
+    inputs = aspect_tokenizer(
+        text_en, return_tensors="pt", truncation=True, padding=True
+    ).to(device)
     with torch.no_grad():
         outputs = aspect_model(**inputs)
     preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
     tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
     labels = [aspect_model.config.id2label[p] for p in preds]
+    aspects, current_tokens = [], []
     for token, label in zip(tokens, labels):
         if label == "B-ASP":
             if current_tokens:
     if current_tokens:
         aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
+    # ↓ usuń spacje z „##” i zduplikowane wyniki
+    return list({tok.replace(" ##", "") for tok in aspects})
+# ────────────────────── FastAPI ────────────────────────────
 app = FastAPI()
 @app.post("/analyze", response_model=AnalysisResult)
 def analyze_comment(comment: Comment):
     text_pl = comment.text
     text_en = translate_pl_to_en([text_pl])[0]
     aspects = extract_aspects(text_en)
+    results: list[AspectSentiment] = []
     for asp in aspects:
         input_text = f"{text_en} [SEP] {asp}"
+        inputs = sentiment_tokenizer(
+            input_text, return_tensors="pt", truncation=True, padding=True
+        ).to(device)
         with torch.no_grad():
             logits = sentiment_model(**inputs).logits
             predicted_class_id = int(logits.argmax().cpu())
+            sentiment_label = {
+                0: "negatywny",
+                1: "neutralny",
+                2: "pozytywny",
+                3: "konfliktowy",
+            }[predicted_class_id]
         asp_pl = aspect_aliases.get(asp, translate_en_to_pl([asp])[0].lower())
+        results.append(AspectSentiment(aspect=asp_pl, sentiment=sentiment_label))
+    return {"results": results}