wealthcoders
/

qwen-vl-2.5

Image-Text-to-Text

text-generation-inference

Model card Files Files and versions

wealthcoders commited on Oct 1, 2025

Commit

53772d5

·

verified ·

1 Parent(s): dcbcc92

Create handler.py

Files changed (1) hide show

handler.py +45 -0

handler.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from transformers import GenerationConfig, AutoProcessor, AutoTokenizer, AutoModelForImageTextToText, Qwen2_5_VLForConditionalGeneration
+from qwen_vl_utils import process_vision_info
+model_name = "Qwen/Qwen2.5-VL-7B-Instruct"
+#If it is an any form of ID - return only list of keys and values.
+class EndpointHandler:
+    def __init__(self):
+        self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            model_name, torch_dtype="auto", device_map="cuda"
+        )
+        self.processor = AutoProcessor.from_pretrained(model_name)
+    async def __call__(self, data):
+        messages = data.get("messages")
+        gen_cfg = GenerationConfig(
+            max_new_tokens=2048,
+            no_repeat_ngram_size=3,
+            repeat_penalty=1.2,
+            early_stopping=True,
+        )
+        text = self.processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        )
+        generated_ids = self.model.generate(**inputs, generation_config=gen_cfg)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = self.processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )
+        return output_text[0]