|
|
from typing import Dict, List, Any |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import torch |
|
|
|
|
|
|
|
|
class EndpointHandler(): |
|
|
def __init__(self, path=""): |
|
|
self.device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") |
|
|
self.sentiment_model = AutoModelForSequenceClassification.from_pretrained("Christian2903/amazon-review-sentiment-analysis_large").to(self.device) |
|
|
|
|
|
|
|
|
def __call__(self, data: Dict[str, List[str]]) -> Dict[str, List[int]]: |
|
|
""" |
|
|
data args: |
|
|
reviews (:obj: `str`) |
|
|
Return: |
|
|
A `dict`: will be serialized and returned |
|
|
""" |
|
|
|
|
|
reviews = data.pop("reviews", data) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
batch_size = 32 |
|
|
predictions = [] |
|
|
for i in range(0, len(reviews), batch_size): |
|
|
batch = reviews[i:i+batch_size] |
|
|
|
|
|
inputs = self.tokenizer(batch, return_tensors="pt", truncation=True, padding="max_length", max_length=256).to(self.device) |
|
|
outputs = self.sentiment_model(**inputs) |
|
|
logits = outputs[0] |
|
|
predictions.extend(logits.detach().cpu().numpy()) |
|
|
|
|
|
|
|
|
predicted_scores = [max(min(int(score + 0.5),5),1) for score in predictions] |
|
|
|
|
|
response = { |
|
|
'scores': predicted_scores |
|
|
} |
|
|
|
|
|
return response |