Spaces:
Sleeping
Sleeping
File size: 5,420 Bytes
9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e e34ebb0 14bd362 e7bf8eb 14bd362 e34ebb0 e7bf8eb 14bd362 9d9d143 e69a3fb 5ac897e 9d9d143 5ac897e 14bd362 9d9d143 5ac897e 9d9d143 e69a3fb 9d9d143 e69a3fb 9d9d143 e69a3fb 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 e69a3fb 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 5ac897e 9d9d143 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
from pathlib import Path
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import torch
from transformers import (
AutoTokenizer,
AutoModelForTokenClassification,
AutoModelForSequenceClassification,
pipeline,
)
import os
print("π ZAWARTOΕΔ /app/bert-aspect-ner:")
print(os.listdir("/app/bert-aspect-ner") if os.path.exists("/app/bert-aspect-ner") else "β NIE ISTNIEJE")
print("π ZAWARTOΕΔ /app/absa-roberta:")
print(os.listdir("/app/absa-roberta") if os.path.exists("/app/absa-roberta") else "β NIE ISTNIEJE")
# ββββββββββββββββββββββ konfiguracja ββββββββββββββββββββββ
device = "cuda" if torch.cuda.is_available() else "cpu"
ROOT = Path(__file__).parent
# Uwaga: NIE ma katalogu models/
aspect_dir = ROOT / "bert-aspect-ner"
sentiment_dir = ROOT / "absa-roberta"
# ββββββββββββββββββββββ modele lokalne βββββββββββββββββββββ
aspect_tokenizer = AutoTokenizer.from_pretrained(
str(aspect_dir), local_files_only=True, use_fast=False # β jeΕli brak tokenizer.json
)
aspect_model = AutoModelForTokenClassification.from_pretrained(
str(aspect_dir), local_files_only=True
).to(device)
sentiment_tokenizer = AutoTokenizer.from_pretrained(
str(sentiment_dir), local_files_only=True
)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(
str(sentiment_dir), local_files_only=True
).to(device)
# ββββββββββββββββββββββ modele tΕumaczeΕ (on-line) βββββββββ
pl_to_en = pipeline(
"translation",
model="Helsinki-NLP/opus-mt-pl-en",
device=0 if device == "cuda" else -1,
)
en_to_pl = pipeline(
"translation",
model="gsarti/opus-mt-tc-en-pl",
device=0 if device == "cuda" else -1,
)
# ββββββββββββββββββββββ schemy Pydantic ββββββββββββββββββββ
class Comment(BaseModel):
text: str
class AspectSentiment(BaseModel):
aspect: str
sentiment: str
class AnalysisResult(BaseModel):
results: List[AspectSentiment]
# === SΕownik aliasΓ³w aspektΓ³w ENβPL (taki sam jak wczeΕniej) ===
aspect_aliases = {
"food": "jedzenie", "service": "obsΕuga", "price": "cena",
"taste": "smak", "waiter": "obsΕuga", "dish": "danie",
"portion": "porcja", "staff": "obsΕuga", "decor": "wystrΓ³j",
"menu": "menu", "drink": "napoje", "location": "lokalizacja",
"time": "czas oczekiwania", "cleanliness": "czystoΕΔ", "smell": "zapach",
"value": "cena", "experience": "doΕwiadczenie", "recommendation": "ogΓ³lna ocena",
"children": "dzieci", "family": "rodzina", "pet": "zwierzΔta"
}
def translate_pl_to_en(texts):
return [res["translation_text"] for res in pl_to_en(texts)]
def translate_en_to_pl(texts):
return [res["translation_text"] for res in en_to_pl(texts)]
def extract_aspects(text_en: str):
inputs = aspect_tokenizer(
text_en, return_tensors="pt", truncation=True, padding=True
).to(device)
with torch.no_grad():
outputs = aspect_model(**inputs)
preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
labels = [aspect_model.config.id2label[p] for p in preds]
aspects, current_tokens = [], []
for token, label in zip(tokens, labels):
if label == "B-ASP":
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
current_tokens = [token]
elif label == "I-ASP" and current_tokens:
current_tokens.append(token)
else:
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
current_tokens = []
if current_tokens:
aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
# β usuΕ spacje z β##β i zduplikowane wyniki
return list({tok.replace(" ##", "") for tok in aspects})
# ββββββββββββββββββββββ FastAPI ββββββββββββββββββββββββββββ
app = FastAPI()
@app.post("/analyze", response_model=AnalysisResult)
def analyze_comment(comment: Comment):
text_pl = comment.text
text_en = translate_pl_to_en([text_pl])[0]
aspects = extract_aspects(text_en)
results: list[AspectSentiment] = []
for asp in aspects:
input_text = f"{text_en} [SEP] {asp}"
inputs = sentiment_tokenizer(
input_text, return_tensors="pt", truncation=True, padding=True
).to(device)
with torch.no_grad():
logits = sentiment_model(**inputs).logits
predicted_class_id = int(logits.argmax().cpu())
sentiment_label = {
0: "negatywny",
1: "neutralny",
2: "pozytywny",
3: "konfliktowy",
}[predicted_class_id]
asp_pl = aspect_aliases.get(asp, translate_en_to_pl([asp])[0].lower())
results.append(AspectSentiment(aspect=asp_pl, sentiment=sentiment_label))
return {"results": results} |