Florence2

@@ -2,88 +2,38 @@ from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
 import requests
 import torch
-from urllib3.util.retry import Retry
-from requests.adapters import HTTPAdapter
 class EndpointHandler:
     def __init__(self, model_dir):
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_dir,
-            trust_remote_code=True
-        ).eval().to(device)
-        self.processor = AutoProcessor.from_pretrained(
-            model_dir,
-            trust_remote_code=True
-        )
         self.device = device
-        self.session = self._create_session()
-    def _create_session(self):
-        session = requests.Session()
-        retries = Retry(
-            total=3,
-            backoff_factor=0.5,
-            status_forcelist=[429, 500, 502, 503, 504]
-        )
-        session.mount('http://', HTTPAdapter(max_retries=retries))
-        session.mount('https://', HTTPAdapter(max_retries=retries))
-        return session
     def __call__(self, data):
         try:
-            inputs_data = data.get("inputs", {})
-            params = data.get("parameters", {})
-            image_url = inputs_data.get("url")
-            if not image_url:
-                return {"error": "Missing URL in inputs"}
-            image = self.load_image(image_url)
-            if not image:
-                return {"error": "Failed to load image"}
-            model_inputs = self.processor(
-                text=inputs_data.get("task_prompt", "<MORE_DETAILED_CAPTION>"),
                 images=image,
                 return_tensors="pt"
             ).to(self.device)
             with torch.inference_mode():
-                generated_ids = self.model.generate(
-                    input_ids=model_inputs["input_ids"],
-                    pixel_values=model_inputs["pixel_values"],
-                    max_new_tokens=params.get("max_new_tokens", 512),
-                    num_beams=params.get("num_beams", 3),
-                    early_stopping=params.get("early_stopping", True),
-                    do_sample=params.get("do_sample", False)
                 )
-            generated_text = self.processor.batch_decode(
-                generated_ids,
-                skip_special_tokens=True
-            )[0]
-            return {"caption": generated_text}
         except Exception as e:
-            return {"error": f"Processing error: {str(e)}"}
-    def load_image(self, image_url):
-        try:
-            headers = {
-                "User-Agent": "Mozilla/5.0",
-                "Accept": "image/jpeg,image/png,image/*",
-                "Referer": image_url
-            }
-            response = self.session.get(
-                image_url,
-                stream=True,
-                headers=headers,
-                timeout=15,
-                verify=False  # Added for SSL issues
-            )
-            response.raise_for_status()
-            return Image.open(response.raw).convert("RGB")
-        except Exception as e:
-            return None

 from PIL import Image
 import requests
 import torch
 class EndpointHandler:
     def __init__(self, model_dir):
         device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True).to(device)
+        self.processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True)
         self.device = device
     def __call__(self, data):
         try:
+            url = data.get("inputs", {}).get("url")
+            if not url:
+                return {"error": "Missing URL"}
+            response = requests.get(url, verify=False)
+            image = Image.open(response.raw).convert("RGB")
+            inputs = self.processor(
+                text="<MORE_DETAILED_CAPTION>",
                 images=image,
                 return_tensors="pt"
             ).to(self.device)
             with torch.inference_mode():
+                output = self.model.generate(
+                    **inputs,
+                    max_new_tokens=512,
+                    num_beams=3
                 )
+            text = self.processor.batch_decode(output, skip_special_tokens=True)[0]
+            return {"caption": text}
         except Exception as e:
+            return {"error": str(e)}