wealthcoders commited on
Commit
53772d5
·
verified ·
1 Parent(s): dcbcc92

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +45 -0
handler.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import GenerationConfig, AutoProcessor, AutoTokenizer, AutoModelForImageTextToText, Qwen2_5_VLForConditionalGeneration
2
+ from qwen_vl_utils import process_vision_info
3
+
4
+ model_name = "Qwen/Qwen2.5-VL-7B-Instruct"
5
+ #If it is an any form of ID - return only list of keys and values.
6
+
7
+ class EndpointHandler:
8
+ def __init__(self):
9
+ self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
10
+ model_name, torch_dtype="auto", device_map="cuda"
11
+ )
12
+ self.processor = AutoProcessor.from_pretrained(model_name)
13
+
14
+ async def __call__(self, data):
15
+
16
+ messages = data.get("messages")
17
+
18
+ gen_cfg = GenerationConfig(
19
+ max_new_tokens=2048,
20
+ no_repeat_ngram_size=3,
21
+ repeat_penalty=1.2,
22
+ early_stopping=True,
23
+ )
24
+
25
+ text = self.processor.apply_chat_template(
26
+ messages, tokenize=False, add_generation_prompt=True
27
+ )
28
+ image_inputs, video_inputs = process_vision_info(messages)
29
+ inputs = self.processor(
30
+ text=[text],
31
+ images=image_inputs,
32
+ videos=video_inputs,
33
+ padding=True,
34
+ return_tensors="pt",
35
+ )
36
+
37
+ generated_ids = self.model.generate(**inputs, generation_config=gen_cfg)
38
+ generated_ids_trimmed = [
39
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
40
+ ]
41
+ output_text = self.processor.batch_decode(
42
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
43
+ )
44
+
45
+ return output_text[0]