HHBlair commited on
Commit
eb1f00b
·
verified ·
1 Parent(s): b5d3d24

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +32 -0
handler.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # handler.py
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ class EndpointHandler:
6
+ def __init__(self, path=""):
7
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
8
+ self.model = AutoModelForCausalLM.from_pretrained(
9
+ path,
10
+ torch_dtype=torch.float16,
11
+ device_map="auto",
12
+ load_in_4bit=True,
13
+ )
14
+ self.pipeline = pipeline(
15
+ "text-generation",
16
+ model=self.model,
17
+ tokenizer=self.tokenizer,
18
+ )
19
+
20
+ def __call__(self, data):
21
+ messages = data.get("inputs", {}).get("messages", [])
22
+ prompt = self.tokenizer.apply_chat_template(
23
+ messages, tokenize=False, add_generation_prompt=True
24
+ )
25
+ result = self.pipeline(
26
+ prompt,
27
+ max_new_tokens=data.get("parameters", {}).get("max_tokens", 500),
28
+ temperature=data.get("parameters", {}).get("temperature", 0.45),
29
+ do_sample=True,
30
+ )
31
+ text = result[0]["generated_text"][len(prompt):]
32
+ return {"choices": [{"message": {"role": "assistant", "content": text}}]}