Qwen-VL-7B-2

@@ -7,21 +7,29 @@ from qwen_vl_utils import process_vision_info
 class EndpointHandler:
     def __init__(self, path=""):
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         # Load model and processor
         self.model = Qwen2VLForConditionalGeneration.from_pretrained(
-            path, torch_dtype="auto", device_map="auto"
-        )
         self.processor = AutoProcessor.from_pretrained(path)
     def __call__(self, data):
         # Extract image and text from the input data
-        image_url = data.get("image", "")
-        text_prompt = data.get("text", "")
         # Download and process the image
-        image = Image.open(BytesIO(requests.get(image_url).content))
         # Prepare the input in the format expected by the model
         messages = [

 class EndpointHandler:
     def __init__(self, path=""):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Load model and processor
         self.model = Qwen2VLForConditionalGeneration.from_pretrained(
+            path, torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32, device_map="auto"
+        ).to(self.device)
         self.processor = AutoProcessor.from_pretrained(path)
     def __call__(self, data):
         # Extract image and text from the input data
+        image_url = data.get("inputs", {}).get("image", "")
+        text_prompt = data.get("inputs", {}).get("text", "")
+        if not image_url or not text_prompt:
+            return {"error": "Both 'image' and 'text' must be provided in the input data."}
         # Download and process the image
+        try:
+            response = requests.get(image_url)
+            response.raise_for_status()
+            image = Image.open(BytesIO(response.content)).convert("RGB")
+        except Exception as e:
+            return {"error": f"Failed to load image from URL: {e}"}
         # Prepare the input in the format expected by the model
         messages = [