wealthcoders
/

qwen3-vl

Image-Text-to-Text

Model card Files Files and versions

wealthcoders commited on Oct 22, 2025

Commit

16ceee2

·

verified ·

1 Parent(s): 248decf

Update handler.py

Files changed (1) hide show

handler.py +10 -9

handler.py CHANGED Viewed

@@ -3,26 +3,27 @@ from typing import Dict, List, Any
 import torch
 class EndpointHandler:
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-        model = Qwen3VLForConditionalGeneration.from_pretrained(
-            "Qwen/Qwen3-VL-8B-Instruct",
-            device_map="auto"  # Automatically uses available GPUs
-        )
-        processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-8B-Instruct")
         # Prepare your messages with image and text
         messages = data.get("messages")
         # Process the input and generate a response
-        inputs = processor.apply_chat_template(
             messages=messages,
             tokenize=True,
             add_generation_prompt=True,
             return_dict=True,
             return_tensors="pt"
         )
-        inputs = inputs.to(model.device)
-        generated_ids = model.generate(**inputs, max_new_tokens=128)
-        output_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
         return output_text[0]

 import torch
 class EndpointHandler:
+     def __init__(self, path: str = "Qwen/Qwen3-VL-8B-Instruct"):
+        # Load tokenizer and model
+        self.processor = AutoProcessor.from_pretrained(path)
+        self.model = Qwen3VLForConditionalGeneration.from_pretrained(path, device_map="auto")
+        self.model.eval()
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         # Prepare your messages with image and text
         messages = data.get("messages")
         # Process the input and generate a response
+        inputs = self.processor.apply_chat_template(
             messages=messages,
             tokenize=True,
             add_generation_prompt=True,
             return_dict=True,
             return_tensors="pt"
         )
+        inputs = inputs.to(self.model.device)
+        generated_ids = self.model.generate(**inputs, max_new_tokens=128)
+        output_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)
         return output_text[0]