|
|
import torch |
|
|
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM |
|
|
from typing import Dict, List, Any |
|
|
|
|
|
class EndpointHandler(): |
|
|
def __init__(self, path=""): |
|
|
self.tokenizer = AutoTokenizer.from_pretrained(path, padding_side='left') |
|
|
self.model = AutoModelForCausalLM.from_pretrained(path, device_map="cpu", torch_dtype="auto").eval() |
|
|
|
|
|
self.token_false_id = self.tokenizer.convert_tokens_to_ids("no") |
|
|
self.token_true_id = self.tokenizer.convert_tokens_to_ids("yes") |
|
|
|
|
|
prefix = "<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n" |
|
|
suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n" |
|
|
self.prefix_tokens = self.tokenizer.encode(prefix, add_special_tokens=False) |
|
|
self.suffix_tokens = self.tokenizer.encode(suffix, add_special_tokens=False) |
|
|
|
|
|
def format_instruction(self, instruction, query, doc): |
|
|
if instruction is None: |
|
|
instruction = 'Given a web search query, retrieve relevant passages that answer the query' |
|
|
output = "<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}".format(instruction=instruction,query=query, doc=doc) |
|
|
return output |
|
|
|
|
|
|
|
|
def process_inputs(self, pairs): |
|
|
max_length = 8192 |
|
|
inputs = self.tokenizer( |
|
|
pairs, padding=False, truncation='longest_first', |
|
|
return_attention_mask=False, max_length=max_length - len(self.prefix_tokens) - len(self.suffix_tokens) |
|
|
) |
|
|
for i, ele in enumerate(inputs['input_ids']): |
|
|
inputs['input_ids'][i] = self.prefix_tokens + ele + self.suffix_tokens |
|
|
inputs = self.tokenizer.pad(inputs, padding=True, return_tensors="pt", max_length=max_length) |
|
|
for key in inputs: |
|
|
inputs[key] = inputs[key].to(self.model.device) |
|
|
return inputs |
|
|
|
|
|
|
|
|
def compute_logits(self, inputs, **kwargs): |
|
|
batch_scores = self.model(**inputs, cache_implementation="static").logits[:, -1, :] |
|
|
true_vector = batch_scores[:, self.token_true_id] |
|
|
false_vector = batch_scores[:, self.token_false_id] |
|
|
batch_scores = torch.stack([false_vector, true_vector], dim=1) |
|
|
batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1) |
|
|
scores = batch_scores[:, 1].exp().tolist() |
|
|
return scores |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
|
|
|
|
task = 'Given a web search query, retrieve relevant passages that answer the query' |
|
|
|
|
|
inputs = data.pop("inputs", data) |
|
|
|
|
|
if 'query' not in inputs or 'documents' not in inputs: |
|
|
raise ValueError("query and documents are required.") |
|
|
|
|
|
pairs = [self.format_instruction(task, inputs['query'], doc) for doc in inputs['documents']] |
|
|
|
|
|
|
|
|
inputs = self.process_inputs(pairs) |
|
|
scores = self.compute_logits(inputs) |
|
|
|
|
|
return dict(scores=scores) |
|
|
|
|
|
|