| | from typing import Dict, List, Any |
| | import torch |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | from peft import PeftModel, PeftConfig |
| |
|
| | class EndpointHandler: |
| | def __init__(self, path=""): |
| | |
| | self.peft_config = PeftConfig.from_pretrained(path) |
| | |
| | |
| | |
| | |
| | self.base_model = AutoModelForCausalLM.from_pretrained( |
| | self.peft_config.base_model_name_or_path, |
| | return_dict=True, |
| | torch_dtype=torch.float16, |
| | device_map="auto", |
| | trust_remote_code=True |
| | ) |
| | |
| | |
| | self.tokenizer = AutoTokenizer.from_pretrained( |
| | self.peft_config.base_model_name_or_path, |
| | trust_remote_code=True |
| | ) |
| | |
| | |
| | self.model = PeftModel.from_pretrained(self.base_model, path) |
| | self.model.eval() |
| |
|
| | def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
| | """ |
| | Args: |
| | data (:obj: `Dict[str, Any]`): |
| | Input data payload. Expects a key 'inputs' containing the prompt text. |
| | Optional parameters: 'temperature', 'max_new_tokens', 'top_p', etc. |
| | """ |
| | |
| | inputs = data.pop("inputs", data) |
| | parameters = data.pop("parameters", {}) |
| | |
| | |
| | max_new_tokens = parameters.get("max_new_tokens", 512) |
| | temperature = parameters.get("temperature", 0.7) |
| | top_p = parameters.get("top_p", 0.9) |
| | |
| | |
| | if isinstance(inputs, list): |
| | inputs = inputs[0] |
| |
|
| | |
| | input_ids = self.tokenizer(inputs, return_tensors="pt").input_ids.to(self.model.device) |
| | |
| | |
| | with torch.no_grad(): |
| | output_ids = self.model.generate( |
| | input_ids=input_ids, |
| | max_new_tokens=max_new_tokens, |
| | temperature=temperature, |
| | top_p=top_p, |
| | do_sample=True, |
| | pad_token_id=self.tokenizer.eos_token_id |
| | ) |
| | |
| | |
| | |
| | generated_text = self.tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) |
| | |
| | return [{"generated_text": generated_text}] |
| |
|