kitopang's picture
Upload DocumentQuestionAnsweringPipeline
afa1e35 verified
from transformers import Pipeline, AutoModelForCausalLM, AutoTokenizer
from transformers.pipelines import PIPELINE_REGISTRY
import torch
from torch.nn.functional import softmax
import numpy as np
from search import search_embeddings
class DocumentQuestionAnsweringPipeline(Pipeline):
def _sanitize_parameters(self, **kwargs):
return {}, {}, {}
# Inputs is just a question
def preprocess(self, inputs):
messages = [{"role": "user", "content": inputs}]
document = search_embeddings(inputs)
system = "System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context."
instruction = "Please give a full and complete answer for the question."
for item in messages:
if item['role'] == "user":
item['content'] = instruction + " " + item['content']
break
conversation = '\n\n'.join(
["User: " + item["content"] if item["role"] == "user" else "Assistant: " + item["content"] for item in
messages]) + "\n\nAssistant:"
print("document:", document)
formatted_input = system + "\n\n" + document + "\n\n" + conversation
return self.tokenizer(self.tokenizer.bos_token + formatted_input, return_tensors="pt").to(self.model.device)
def _forward(self, model_inputs):
terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
outputs = self.model.generate(**model_inputs,
max_new_tokens=128,
eos_token_id=terminators,
output_scores=True,
return_dict_in_generate=True)
return {"output": outputs, "input": model_inputs}
def postprocess(self, model_outputs):
output = model_outputs['output']
input = model_outputs['input']
# print("hereeee")
sequences = output.sequences
input_length = input['input_ids'].shape[-1]
generated_sequence = sequences[0, input_length:]
decoded_resp = self.tokenizer.decode(generated_sequence, skip_special_tokens=True)
# Calculate confidence
scores = output.scores
log_probs = [np.log(softmax(score, dim=-1).max().item()) for score in scores]
total_log_prob = np.sum(log_probs)
confidence = np.exp(total_log_prob / len(log_probs))
return {'guess': decoded_resp, 'confidence': confidence}