| | from typing import Dict, List, Any |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| | import torch |
| |
|
| |
|
| | class EndpointHandler(): |
| | def __init__(self, path=""): |
| | self.device = "cuda:0" if torch.cuda.is_available() else "cpu" |
| |
|
| | self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") |
| | self.sentiment_model = AutoModelForSequenceClassification.from_pretrained("Christian2903/amazon-review-sentiment-analysis").to(self.device) |
| |
|
| |
|
| | def __call__(self, data: Dict[str, List[str]]) -> Dict[str, List[int]]: |
| | """ |
| | data args: |
| | reviews (:obj: `str`) |
| | Return: |
| | A `dict`: will be serialized and returned |
| | """ |
| |
|
| | reviews = data.pop("reviews", data) |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | batch_size = 32 |
| | predictions = [] |
| | for i in range(0, len(reviews), batch_size): |
| | batch = reviews[i:i+batch_size] |
| | |
| | inputs = self.tokenizer(batch, return_tensors="pt", truncation=True, padding="max_length", max_length=256).to(self.device) |
| | outputs = self.sentiment_model(**inputs) |
| | logits = outputs[0] |
| | predictions.extend(logits.detach().cpu().numpy()) |
| |
|
| |
|
| | predicted_scores = [max(min(int(score + 0.5),5),1) for score in predictions] |
| |
|
| | response = { |
| | 'scores': predicted_scores |
| | } |
| |
|
| | return response |