| | from typing import Dict, List, Any |
| | from transformers import AutoTokenizer, AutoModel |
| |
|
| |
|
| | class EndpointHandler(): |
| | def __init__(self): |
| | self.tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True) |
| | self.model = AutoModel.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True).half().cuda() |
| | self.model = self.model.eval() |
| |
|
| |
|
| | def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
| | """ |
| | data args: |
| | inputs (:obj: `str`) |
| | Return: |
| | A :obj:`list` | `dict`: will be serialized and returned |
| | """ |
| | |
| | inputs = data.pop("inputs", data) |
| |
|
| | response, history = self.model.chat(self.tokenizer, inputs, history=[]) |
| |
|
| | return [{'response': response, 'history': history}] |