EfektMotyla commited on
Commit
9d9d143
·
verified ·
1 Parent(s): 5a54bd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -30
app.py CHANGED
@@ -1,44 +1,62 @@
1
- from fastapi import FastAPI, Request
 
 
2
  from pydantic import BaseModel
3
  from typing import List
 
 
4
  from transformers import (
5
- AutoTokenizer, AutoModelForTokenClassification,
6
- AutoModelForSequenceClassification, pipeline
 
 
7
  )
8
- import torch
9
- import os
10
- from pathlib import Path
11
- ROOT = Path(__file__).parent
12
- aspect_path = ROOT / "models/bert-aspect-ner"
13
- sentiment_path = ROOT / "models/absa-roberta"
14
 
 
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
 
 
 
 
17
 
18
- aspect_tok = AutoTokenizer.from_pretrained(aspect_path, local_files_only=True)
19
- aspect_model= AutoModelForTokenClassification.from_pretrained(aspect_path, local_files_only=True).to(device)
 
 
 
 
 
20
 
21
- sent_tok = AutoTokenizer.from_pretrained(sentiment_path, local_files_only=True)
22
- sent_model= AutoModelForSequenceClassification.from_pretrained(sentiment_path, local_files_only=True).to(device)
 
 
 
 
 
 
23
  pl_to_en = pipeline(
24
  "translation",
25
  model="Helsinki-NLP/opus-mt-pl-en",
26
- device=0 if device == "cuda" else -1
27
  )
28
  en_to_pl = pipeline(
29
  "translation",
30
  model="gsarti/opus-mt-tc-en-pl",
31
- device=0 if device == "cuda" else -1
32
  )
33
 
34
- # === Dane wejściowe i wyjściowe ===
35
  class Comment(BaseModel):
36
  text: str
37
 
 
38
  class AspectSentiment(BaseModel):
39
  aspect: str
40
  sentiment: str
41
 
 
42
  class AnalysisResult(BaseModel):
43
  results: List[AspectSentiment]
44
 
@@ -51,26 +69,27 @@ aspect_aliases = {
51
  "time": "czas oczekiwania", "cleanliness": "czystość", "smell": "zapach",
52
  "value": "cena", "experience": "doświadczenie", "recommendation": "ogólna ocena",
53
  "children": "dzieci", "family": "rodzina", "pet": "zwierzęta"
54
- # dodaj więcej jak chcesz
55
  }
56
-
57
- # === Funkcje pomocnicze ===
58
  def translate_pl_to_en(texts):
59
  return [res["translation_text"] for res in pl_to_en(texts)]
60
 
 
61
  def translate_en_to_pl(texts):
62
  return [res["translation_text"] for res in en_to_pl(texts)]
63
 
64
- def extract_aspects(text_en):
65
- inputs = aspect_tokenizer(text_en, return_tensors="pt", truncation=True, padding=True).to(device)
 
 
 
66
  with torch.no_grad():
67
  outputs = aspect_model(**inputs)
 
68
  preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
69
  tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
70
  labels = [aspect_model.config.id2label[p] for p in preds]
71
 
72
- aspects = []
73
- current_tokens = []
74
  for token, label in zip(tokens, labels):
75
  if label == "B-ASP":
76
  if current_tokens:
@@ -85,27 +104,37 @@ def extract_aspects(text_en):
85
  if current_tokens:
86
  aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
87
 
88
- return list(set([a.lower() for a in aspects]))
 
89
 
90
- # === Główna funkcja API ===
 
91
  app = FastAPI()
92
 
 
93
  @app.post("/analyze", response_model=AnalysisResult)
94
  def analyze_comment(comment: Comment):
95
  text_pl = comment.text
96
  text_en = translate_pl_to_en([text_pl])[0]
97
  aspects = extract_aspects(text_en)
98
 
99
- result = []
100
  for asp in aspects:
101
  input_text = f"{text_en} [SEP] {asp}"
102
- inputs = sentiment_tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device)
 
 
103
  with torch.no_grad():
104
  logits = sentiment_model(**inputs).logits
105
  predicted_class_id = int(logits.argmax().cpu())
106
- sentiment_label = {0: "negatywny", 1: "neutralny", 2: "pozytywny", 3: "konfliktowy"}[predicted_class_id]
 
 
 
 
 
107
 
108
  asp_pl = aspect_aliases.get(asp, translate_en_to_pl([asp])[0].lower())
109
- result.append(AspectSentiment(aspect=asp_pl, sentiment=sentiment_label))
110
 
111
- return {"results": result}
 
1
+ from pathlib import Path
2
+
3
+ from fastapi import FastAPI
4
  from pydantic import BaseModel
5
  from typing import List
6
+
7
+ import torch
8
  from transformers import (
9
+ AutoTokenizer,
10
+ AutoModelForTokenClassification,
11
+ AutoModelForSequenceClassification,
12
+ pipeline,
13
  )
 
 
 
 
 
 
14
 
15
+ # ────────────────────── konfiguracja ──────────────────────
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
 
18
+ ROOT = Path(__file__).parent
19
+ MODELS_DIR = ROOT / "models"
20
+ aspect_dir = MODELS_DIR / "bert-aspect-ner"
21
+ sentiment_dir = MODELS_DIR / "absa-roberta"
22
 
23
+ # ────────────────────── modele lokalne ─────────────────────
24
+ aspect_tokenizer = AutoTokenizer.from_pretrained(
25
+ str(aspect_dir), local_files_only=True, use_fast=False # ← jeśli brak tokenizer.json
26
+ )
27
+ aspect_model = AutoModelForTokenClassification.from_pretrained(
28
+ str(aspect_dir), local_files_only=True
29
+ ).to(device)
30
 
31
+ sentiment_tokenizer = AutoTokenizer.from_pretrained(
32
+ str(sentiment_dir), local_files_only=True
33
+ )
34
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained(
35
+ str(sentiment_dir), local_files_only=True
36
+ ).to(device)
37
+
38
+ # ────────────────────── modele tłumaczeń (on-line) ─────────
39
  pl_to_en = pipeline(
40
  "translation",
41
  model="Helsinki-NLP/opus-mt-pl-en",
42
+ device=0 if device == "cuda" else -1,
43
  )
44
  en_to_pl = pipeline(
45
  "translation",
46
  model="gsarti/opus-mt-tc-en-pl",
47
+ device=0 if device == "cuda" else -1,
48
  )
49
 
50
+ # ────────────────────── schemy Pydantic ────────────────────
51
  class Comment(BaseModel):
52
  text: str
53
 
54
+
55
  class AspectSentiment(BaseModel):
56
  aspect: str
57
  sentiment: str
58
 
59
+
60
  class AnalysisResult(BaseModel):
61
  results: List[AspectSentiment]
62
 
 
69
  "time": "czas oczekiwania", "cleanliness": "czystość", "smell": "zapach",
70
  "value": "cena", "experience": "doświadczenie", "recommendation": "ogólna ocena",
71
  "children": "dzieci", "family": "rodzina", "pet": "zwierzęta"
 
72
  }
 
 
73
  def translate_pl_to_en(texts):
74
  return [res["translation_text"] for res in pl_to_en(texts)]
75
 
76
+
77
  def translate_en_to_pl(texts):
78
  return [res["translation_text"] for res in en_to_pl(texts)]
79
 
80
+
81
+ def extract_aspects(text_en: str):
82
+ inputs = aspect_tokenizer(
83
+ text_en, return_tensors="pt", truncation=True, padding=True
84
+ ).to(device)
85
  with torch.no_grad():
86
  outputs = aspect_model(**inputs)
87
+
88
  preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
89
  tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
90
  labels = [aspect_model.config.id2label[p] for p in preds]
91
 
92
+ aspects, current_tokens = [], []
 
93
  for token, label in zip(tokens, labels):
94
  if label == "B-ASP":
95
  if current_tokens:
 
104
  if current_tokens:
105
  aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip())
106
 
107
+ # usuń spacje z „##” i zduplikowane wyniki
108
+ return list({tok.replace(" ##", "") for tok in aspects})
109
 
110
+
111
+ # ────────────────────── FastAPI ────────────────────────────
112
  app = FastAPI()
113
 
114
+
115
  @app.post("/analyze", response_model=AnalysisResult)
116
  def analyze_comment(comment: Comment):
117
  text_pl = comment.text
118
  text_en = translate_pl_to_en([text_pl])[0]
119
  aspects = extract_aspects(text_en)
120
 
121
+ results: list[AspectSentiment] = []
122
  for asp in aspects:
123
  input_text = f"{text_en} [SEP] {asp}"
124
+ inputs = sentiment_tokenizer(
125
+ input_text, return_tensors="pt", truncation=True, padding=True
126
+ ).to(device)
127
  with torch.no_grad():
128
  logits = sentiment_model(**inputs).logits
129
  predicted_class_id = int(logits.argmax().cpu())
130
+ sentiment_label = {
131
+ 0: "negatywny",
132
+ 1: "neutralny",
133
+ 2: "pozytywny",
134
+ 3: "konfliktowy",
135
+ }[predicted_class_id]
136
 
137
  asp_pl = aspect_aliases.get(asp, translate_en_to_pl([asp])[0].lower())
138
+ results.append(AspectSentiment(aspect=asp_pl, sentiment=sentiment_label))
139
 
140
+ return {"results": results}