| | |
| | import torch |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| |
|
| | MODEL_PATH = "GilbertAkham/deepseek-R1-multitask-lora" |
| |
|
| | class EndpointHandler: |
| | def __init__(self, path=""): |
| | print("Loading merged model...") |
| | self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True) |
| | self.model = AutoModelForCausalLM.from_pretrained( |
| | MODEL_PATH, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True |
| | ) |
| | self.model.eval() |
| | print("Model loaded successfully.") |
| |
|
| | def __call__(self, data): |
| | prompt = data.get("inputs", "") |
| | inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) |
| | with torch.no_grad(): |
| | outputs = self.model.generate( |
| | **inputs, |
| | max_new_tokens=512, |
| | temperature=0.7, |
| | top_p=0.9, |
| | do_sample=True, |
| | pad_token_id=self.tokenizer.eos_token_id, |
| | eos_token_id=self.tokenizer.eos_token_id, |
| | ) |
| | return {"generated_text": self.tokenizer.decode(outputs[0], skip_special_tokens=True)} |
| |
|