| from transformers import Pipeline, AutoModelForCausalLM, AutoTokenizer |
| from transformers.pipelines import PIPELINE_REGISTRY |
| import torch |
| from torch.nn.functional import softmax |
| import numpy as np |
| class DocumentQuestionAnsweringPipeline(Pipeline): |
|
|
| def _sanitize_parameters(self, **kwargs): |
| return {}, {}, {} |
|
|
| def preprocess(self, inputs): |
| messages = [{"role": "user", "content": inputs}] |
| |
| document = "" |
| system = "System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context." |
| instruction = "Please give a full and complete answer for the question." |
|
|
| for item in messages: |
| if item['role'] == "user": |
| item['content'] = instruction + " " + item['content'] |
| break |
|
|
| conversation = '\n\n'.join( |
| ["User: " + item["content"] if item["role"] == "user" else "Assistant: " + item["content"] for item in |
| messages]) + "\n\nAssistant:" |
| formatted_input = system + "\n\n" + document + "\n\n" + conversation |
|
|
| return self.tokenizer(self.tokenizer.bos_token + formatted_input, return_tensors="pt").to(self.model.device) |
|
|
| def _forward(self, model_inputs): |
| terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")] |
|
|
| outputs = self.model.generate(**model_inputs, |
| max_new_tokens=128, |
| eos_token_id=terminators, |
| output_scores=True, |
| return_dict_in_generate=True) |
|
|
| return {"output": outputs, "input": model_inputs} |
|
|
| def postprocess(self, model_outputs): |
| output = model_outputs['output'] |
| input = model_outputs['input'] |
| |
|
|
| sequences = output.sequences |
| input_length = input['input_ids'].shape[-1] |
|
|
| generated_sequence = sequences[0, input_length:] |
| decoded_resp = self.tokenizer.decode(generated_sequence, skip_special_tokens=True) |
|
|
| |
| scores = output.scores |
| log_probs = [np.log(softmax(score, dim=-1).max().item()) for score in scores] |
| total_log_prob = np.sum(log_probs) |
| confidence = np.exp(total_log_prob / len(log_probs)) |
|
|
| return {'guess': decoded_resp, 'confidence': confidence} |
|
|