File size: 1,927 Bytes
afce62f
c2760f9
f364f84
f00f443
ef719e5
 
afce62f
c2760f9
f364f84
 
 
 
c2760f9
 
1c71c49
ef719e5
f364f84
afce62f
03a50c0
1742c4b
 
ef719e5
 
17db25e
ef719e5
e41f35b
f364f84
 
 
 
 
 
 
 
f00f443
 
f364f84
 
 
 
 
 
f00f443
f364f84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from unsloth import FastLanguageModel
from peft import PeftModel


class EndpointHandler():
    def __init__(self, path="."):
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name="mistralai/Mistral-7B-Instruct-v0.2",  # Supports Llama, Mistral - replace this!
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=True,
        )
        
        model = PeftModel.from_pretrained(model, "SITG/custsvc_entityextract_mistralv0.2instruct")
        self.model = model
        self.model.eval()
        self.device_map = "cuda"  # the device to load the model onto
        self.tokenizer = tokenizer

    def __call__(self, data: any) -> any:
        inputs = data.pop("inputs", data)
        if len(inputs) == 0:
            raise ValueError("prompt cannot be empty")
        inputs=inputs+"\n### Response:\n"
        model_input = self.tokenizer(inputs, return_tensors="pt").to(self.device_map)
        output = self.model.generate(input_ids=model_input["input_ids"].to(self.device_map),
                                     use_cache=False,
                                     temperature=0.1, top_k=1, top_p=1.0, repetition_penalty=1.4,
                                     max_new_tokens=256,
                                     do_sample=True,
                                     pad_token_id=self.tokenizer.pad_token_id,
                                     eos_token_id=self.tokenizer.eos_token_id,
                                     num_beams=1,
                                     num_return_sequences=1)
        output = self.tokenizer.decode(output[0])
        result = (output
                  .split(self.tokenizer.eos_token)[0]
                  .split("Response:")[1]
                  .strip()
                  .split("###")[0]
                  .replace("```json", "")
                  .replace("```", ""))

        return {"response": result}