arjunanand13
/

florence-enphaseall2-25e

@@ -6,6 +6,7 @@ from io import BytesIO
 from PIL import Image
 import requests
 from transformers import AutoModelForCausalLM, AutoProcessor
 import os
 def install(package):
@@ -13,45 +14,60 @@ def install(package):
 class EndpointHandler:
     def __init__(self, path=""):
-        required_packages = ['timm', 'einops', 'flash-attn', 'Pillow','-U transformers']
         for package in required_packages:
             try:
                 install(package)
                 print(f"Successfully installed {package}")
             except Exception as e:
                 print(f"Failed to install {package}: {str(e)}")
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
         self.model_name = "arjunanand13/florence-enphaseall2-25e"
         self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_name,
-            trust_remote_code=True,
         ).to(self.device)
         self.processor = AutoProcessor.from_pretrained(
-            self.model_name,
-            trust_remote_code=True,
         )
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-    def process_image(self,image_data):
         print("[DEBUG] Attempting to process image")
         try:
-            # Check if image_data is a file path
             if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
                 with open(image_data, 'rb') as image_file:
                     print("[DEBUG] File opened successfully")
                     image = Image.open(image_file)
             else:
-                # Assume image_data is base64 encoded
                 print("[DEBUG] Decoding base64 image data")
                 image_bytes = base64.b64decode(image_data)
                 image = Image.open(BytesIO(image_bytes))
             print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode)
             return image
         except Exception as e:
@@ -59,42 +75,139 @@ class EndpointHandler:
             return None
     def __call__(self, data):
         try:
-            # Extract inputs from the expected Hugging Face format
             inputs = data.pop("inputs", data)
-            # Check if inputs is a dict or string
             if isinstance(inputs, dict):
                 image_path = inputs.get("image", None)
                 text_input = inputs.get("text", "")
             else:
-                # If inputs is not a dict, assume it's the image path
                 image_path = inputs
                 text_input = "What is in this image?"
-            print("[INFO]",image_path,text_input)
-            # Process image
             image = self.process_image(image_path) if image_path else None
-            print("[INFO]",image)
-            # Prepare inputs for the model
             model_inputs = self.processor(
                 images=image if image else None,
                 text=text_input,
                 return_tensors="pt"
             )
-            # Move inputs to device
             model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
-                           for k, v in model_inputs.items()}
-            # Generate output
             with torch.no_grad():
                 outputs = self.model.generate(**model_inputs)
-            # Decode outputs
             decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
-            print(f"[INFO],{decoded_outputs}")
-            print(f"[INFO],{decoded_outputs[0]}")
             return {"generated_text": decoded_outputs[0]}
         except Exception as e:
-            return {"error": str(e)}

 from PIL import Image
 import requests
 from transformers import AutoModelForCausalLM, AutoProcessor
+from tokenizers import Tokenizer, pre_tokenizers  # Ensure tokenizers library is installed
 import os
 def install(package):
 class EndpointHandler:
     def __init__(self, path=""):
+        # Install all required packages
+        required_packages = ['timm', 'einops', 'flash-attn', 'Pillow', 'tokenizers', '-U transformers']
         for package in required_packages:
             try:
                 install(package)
                 print(f"Successfully installed {package}")
             except Exception as e:
                 print(f"Failed to install {package}: {str(e)}")
+        # Set the device (GPU/CPU)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
+        # Load the model
         self.model_name = "arjunanand13/florence-enphaseall2-25e"
         self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            trust_remote_code=True
         ).to(self.device)
+        # Load the processor
         self.processor = AutoProcessor.from_pretrained(
+            self.model_name,
+            trust_remote_code=True
         )
+        # Add a whitespace pre-tokenizer to prevent tokenizer issues
+        self.add_pre_tokenizer()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+    def add_pre_tokenizer(self):
+        """Adds a whitespace pre-tokenizer to avoid issues with missing tokenizers."""
+        try:
+            tokenizer = Tokenizer.from_pretrained(self.model_name)
+            tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
+            print("[INFO] Added Whitespace pre-tokenizer.")
+        except Exception as e:
+            print(f"[ERROR] Failed to add pre-tokenizer: {str(e)}")
+    def process_image(self, image_data):
+        """Processes an image from file path or base64-encoded string."""
         print("[DEBUG] Attempting to process image")
         try:
             if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
                 with open(image_data, 'rb') as image_file:
                     print("[DEBUG] File opened successfully")
                     image = Image.open(image_file)
             else:
                 print("[DEBUG] Decoding base64 image data")
                 image_bytes = base64.b64decode(image_data)
                 image = Image.open(BytesIO(image_bytes))
             print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode)
             return image
         except Exception as e:
             return None
     def __call__(self, data):
+        """Processes the input data and generates text output."""
         try:
             inputs = data.pop("inputs", data)
             if isinstance(inputs, dict):
                 image_path = inputs.get("image", None)
                 text_input = inputs.get("text", "")
             else:
                 image_path = inputs
                 text_input = "What is in this image?"
+            print("[INFO] Image path:", image_path, "| Text input:", text_input)
             image = self.process_image(image_path) if image_path else None
             model_inputs = self.processor(
                 images=image if image else None,
                 text=text_input,
                 return_tensors="pt"
             )
             model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
+                            for k, v in model_inputs.items()}
             with torch.no_grad():
                 outputs = self.model.generate(**model_inputs)
             decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
+            print(f"[INFO] Generated text: {decoded_outputs[0]}")
             return {"generated_text": decoded_outputs[0]}
         except Exception as e:
+            return {"error": str(e)}
+# import subprocess
+# import sys
+# import torch
+# import base64
+# from io import BytesIO
+# from PIL import Image
+# import requests
+# from transformers import AutoModelForCausalLM, AutoProcessor
+# import os
+# def install(package):
+#     subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])
+# class EndpointHandler:
+#     def __init__(self, path=""):
+#         required_packages = ['timm', 'einops', 'flash-attn', 'Pillow','-U transformers']
+#         for package in required_packages:
+#             try:
+#                 install(package)
+#                 print(f"Successfully installed {package}")
+#             except Exception as e:
+#                 print(f"Failed to install {package}: {str(e)}")
+#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#         print(f"Using device: {self.device}")
+#         self.model_name = "arjunanand13/florence-enphaseall2-25e"
+#         self.model = AutoModelForCausalLM.from_pretrained(
+#             self.model_name,
+#             trust_remote_code=True,
+#         ).to(self.device)
+#         self.processor = AutoProcessor.from_pretrained(
+#             self.model_name,
+#             trust_remote_code=True,
+#         )
+#         if torch.cuda.is_available():
+#             torch.cuda.empty_cache()
+#     def process_image(self,image_data):
+#         print("[DEBUG] Attempting to process image")
+#         try:
+#             # Check if image_data is a file path
+#             if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
+#                 with open(image_data, 'rb') as image_file:
+#                     print("[DEBUG] File opened successfully")
+#                     image = Image.open(image_file)
+#             else:
+#                 # Assume image_data is base64 encoded
+#                 print("[DEBUG] Decoding base64 image data")
+#                 image_bytes = base64.b64decode(image_data)
+#                 image = Image.open(BytesIO(image_bytes))
+#             print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode)
+#             return image
+#         except Exception as e:
+#             print(f"[ERROR] Error processing image: {str(e)}")
+#             return None
+#     def __call__(self, data):
+#         try:
+#             # Extract inputs from the expected Hugging Face format
+#             inputs = data.pop("inputs", data)
+#             # Check if inputs is a dict or string
+#             if isinstance(inputs, dict):
+#                 image_path = inputs.get("image", None)
+#                 text_input = inputs.get("text", "")
+#             else:
+#                 # If inputs is not a dict, assume it's the image path
+#                 image_path = inputs
+#                 text_input = "What is in this image?"
+#             print("[INFO]",image_path,text_input)
+#             # Process image
+#             image = self.process_image(image_path) if image_path else None
+#             print("[INFO]",image)
+#             # Prepare inputs for the model
+#             model_inputs = self.processor(
+#                 images=image if image else None,
+#                 text=text_input,
+#                 return_tensors="pt"
+#             )
+#             # Move inputs to device
+#             model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
+#                            for k, v in model_inputs.items()}
+#             # Generate output
+#             with torch.no_grad():
+#                 outputs = self.model.generate(**model_inputs)
+#             # Decode outputs
+#             decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
+#             print(f"[INFO],{decoded_outputs}")
+#             print(f"[INFO],{decoded_outputs[0]}")
+#             return {"generated_text": decoded_outputs[0]}
+#         except Exception as e:
+#             return {"error": str(e)}