| from typing import Dict, List, Any | |
| import torch | |
| from accelerate import Accelerator | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import numpy as np | |
| def softmax(x): | |
| z = x - max(x) | |
| numerator = np.exp(z) | |
| denominator = np.sum(numerator) | |
| softmax = numerator/denominator | |
| return softmax | |
| class EndpointHandler(): | |
| def __init__(self, path=""): | |
| self.accelerator = Accelerator() | |
| self.device = self.accelerator.device | |
| self.model = AutoModelForCausalLM.from_pretrained(path, trust_remote_code=True, device_map="auto") | |
| self.model = self.accelerator.prepare(self.model) | |
| self.tokenizer = AutoTokenizer.from_pretrained(path) | |
| self.options_tokens = [self.tokenizer.encode(choice)[-1] for choice in ["A", "B", "C", "D"]] | |
| def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: | |
| """ | |
| data args: | |
| inputs (:obj: `str` | `PIL.Image` | `np.array`) | |
| kwargss | |
| Return: | |
| A :obj:`list` | `dict`: will be serialized and returned | |
| """ | |
| with torch.no_grad(): | |
| prompt = data.pop("prompt") | |
| inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) | |
| input_size = inputs['input_ids'].size(1) | |
| input_ids = inputs["input_ids"].to(self.device) | |
| outputs = self.model(**inputs) | |
| last_token_logits = outputs.logits[:, -1, :] | |
| options_tokens_logits = last_token_logits[:, self.options_tokens].detach().cpu().numpy() | |
| conf = softmax(options_tokens_logits[0]) | |
| pred = np.argmax(options_tokens_logits[0]) | |
| return [{"pred": pred, "conf":conf}] |