llama3_generative_qa_2 / pipeline.py
kitopang's picture
Upload DocumentQuestionAnsweringPipeline
d0311b2 verified
from transformers import Pipeline, AutoModelForCausalLM, AutoTokenizer
from transformers.pipelines import PIPELINE_REGISTRY
import torch
from torch.nn.functional import softmax
import numpy as np
class DocumentQuestionAnsweringPipeline(Pipeline):
def _sanitize_parameters(self, **kwargs):
return {}, {}, {}
def preprocess(self, inputs):
messages = [{"role": "user", "content": inputs}]
# document = inputs["document"]
document = ""
system = "System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context."
instruction = "Please give a full and complete answer for the question."
for item in messages:
if item['role'] == "user":
item['content'] = instruction + " " + item['content']
break
conversation = '\n\n'.join(
["User: " + item["content"] if item["role"] == "user" else "Assistant: " + item["content"] for item in
messages]) + "\n\nAssistant:"
formatted_input = system + "\n\n" + document + "\n\n" + conversation
return self.tokenizer(self.tokenizer.bos_token + formatted_input, return_tensors="pt").to(self.model.device)
def _forward(self, model_inputs):
terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
outputs = self.model.generate(**model_inputs,
max_new_tokens=128,
eos_token_id=terminators,
output_scores=True,
return_dict_in_generate=True)
return {"output": outputs, "input": model_inputs}
def postprocess(self, model_outputs):
output = model_outputs['output']
input = model_outputs['input']
# print("hereeee")
sequences = output.sequences
input_length = input['input_ids'].shape[-1]
generated_sequence = sequences[0, input_length:]
decoded_resp = self.tokenizer.decode(generated_sequence, skip_special_tokens=True)
# Calculate confidence
scores = output.scores
log_probs = [np.log(softmax(score, dim=-1).max().item()) for score in scores]
total_log_prob = np.sum(log_probs)
confidence = np.exp(total_log_prob / len(log_probs))
return {'guess': decoded_resp, 'confidence': confidence}