Florence2

@@ -1,18 +1,24 @@
 from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
 import requests
 class EndpointHandler:
     def __init__(self, model_dir):
         # Load the model with trust_remote_code=True
         self.model = AutoModelForCausalLM.from_pretrained(
             model_dir,
             trust_remote_code=True
-        ).eval().cuda()  # Use .cuda() if GPU is available, otherwise remove
         self.processor = AutoProcessor.from_pretrained(
             model_dir,
             trust_remote_code=True
         )
     def __call__(self, data):
         # Extract inputs from the request data
@@ -27,7 +33,7 @@ class EndpointHandler:
             text=task_prompt,
             images=image,
             return_tensors="pt"
-        ).to("cuda")  # Use "cpu" if GPU is not available
         # Generate output
         generated_ids = self.model.generate(

 from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
 import requests
+import torch
 class EndpointHandler:
     def __init__(self, model_dir):
+        # Check if a GPU is available; use CPU if not
+        device = "cuda" if torch.cuda.is_available() else "cpu"
         # Load the model with trust_remote_code=True
         self.model = AutoModelForCausalLM.from_pretrained(
             model_dir,
             trust_remote_code=True
+        ).eval().to(device)  # Dynamically move to the correct device
         self.processor = AutoProcessor.from_pretrained(
             model_dir,
             trust_remote_code=True
         )
+        self.device = device
     def __call__(self, data):
         # Extract inputs from the request data
             text=task_prompt,
             images=image,
             return_tensors="pt"
+        ).to(self.device)  # Use the correct device
         # Generate output
         generated_ids = self.model.generate(