Florence2

@@ -3,18 +3,13 @@ from PIL import Image
 import requests
 import torch
 class EndpointHandler:
     def __init__(self, model_dir):
-        # Check if GPU is available, otherwise use CPU
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        # Load the Florence model and processor
         self.model = AutoModelForCausalLM.from_pretrained(
             model_dir,
             trust_remote_code=True
-        ).eval().to(device)  # Dynamically move to the correct device
         self.processor = AutoProcessor.from_pretrained(
             model_dir,
             trust_remote_code=True
@@ -23,49 +18,43 @@ class EndpointHandler:
     def __call__(self, data):
         try:
-            # Extract inputs from the request data
-            task_prompt = data.get("task_prompt", "<MORE_DETAILED_CAPTION>")
-            image_url = data.get("url")  # Match the key sent from n8n
             if not image_url or not image_url.startswith("http"):
-                raise ValueError("Invalid or missing 'url' field. Please provide a valid image URL.")
-            # Load and process the image
             image = self.load_image(image_url)
-            # Prepare inputs for the Florence model
-            inputs = self.processor(
-                text=task_prompt,
                 images=image,
                 return_tensors="pt"
             ).to(self.device)
-            # Generate detailed caption using Florence
             generated_ids = self.model.generate(
-                input_ids=inputs["input_ids"],
-                pixel_values=inputs["pixel_values"],
-                max_new_tokens=512,  # Adjust token limit for detailed captions
-                num_beams=3,         # Use beam search for better captions
-                early_stopping=True  # Stop when the best output is found
             )
-            # Decode the generated text
             generated_text = self.processor.batch_decode(
                 generated_ids,
                 skip_special_tokens=True
             )[0]
             return {"caption": generated_text}
         except Exception as e:
             return {"error": str(e)}
     def load_image(self, image_url):
         try:
-            # Load image from URL
             response = requests.get(image_url, stream=True)
-            response.raise_for_status()  # Raise an error for failed requests
-            image = Image.open(response.raw).convert("RGB")
-            return image
         except Exception as e:
-            raise ValueError(f"Failed to load image from URL: {image_url}. Error: {e}")

 import requests
 import torch
 class EndpointHandler:
     def __init__(self, model_dir):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model = AutoModelForCausalLM.from_pretrained(
             model_dir,
             trust_remote_code=True
+        ).eval().to(device)
         self.processor = AutoProcessor.from_pretrained(
             model_dir,
             trust_remote_code=True
     def __call__(self, data):
         try:
+            inputs_data = data.get("inputs", {})
+            params = data.get("parameters", {})
+            image_url = inputs_data.get("url")
             if not image_url or not image_url.startswith("http"):
+                raise ValueError("Invalid or missing 'url' field")
             image = self.load_image(image_url)
+            model_inputs = self.processor(
+                text=inputs_data.get("task_prompt", "<MORE_DETAILED_CAPTION>"),
                 images=image,
                 return_tensors="pt"
             ).to(self.device)
             generated_ids = self.model.generate(
+                input_ids=model_inputs["input_ids"],
+                pixel_values=model_inputs["pixel_values"],
+                max_new_tokens=params.get("max_new_tokens", 512),
+                num_beams=params.get("num_beams", 3),
+                early_stopping=params.get("early_stopping", True),
+                do_sample=params.get("do_sample", False)
             )
             generated_text = self.processor.batch_decode(
                 generated_ids,
                 skip_special_tokens=True
             )[0]
             return {"caption": generated_text}
         except Exception as e:
             return {"error": str(e)}
     def load_image(self, image_url):
         try:
             response = requests.get(image_url, stream=True)
+            response.raise_for_status()
+            return Image.open(response.raw).convert("RGB")
         except Exception as e:
+            raise ValueError(f"Failed to load image: {str(e)}")