| import subprocess | |
| import sys | |
| import torch | |
| import base64 | |
| from io import BytesIO | |
| from PIL import Image | |
| import requests | |
| from transformers import AutoModelForCausalLM, AutoProcessor | |
| from tokenizers import Tokenizer, pre_tokenizers | |
| import os | |
| def install(package): | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package]) | |
| class EndpointHandler: | |
| def __init__(self, path=""): | |
| # Install necessary packages | |
| required_packages = ['timm', 'einops', 'flash-attn', 'Pillow', 'transformers==4.43.3'] | |
| for package in required_packages: | |
| try: | |
| install(package) | |
| print(f"Successfully installed {package}") | |
| except Exception as e: | |
| print(f"Failed to install {package}: {str(e)}") | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print(f"Using device: {self.device}") | |
| # Load the model | |
| self.model_name = "arjunanand13/florence-enphaseall2-25e" | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.model_name, trust_remote_code=True | |
| ).to(self.device) | |
| # Manually load the tokenizer with a whitespace pre-tokenizer | |
| self.tokenizer = self.load_tokenizer() | |
| # Initialize the processor | |
| self.processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True) | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| def load_tokenizer(self): | |
| """Manually loads the tokenizer and adds a whitespace pre-tokenizer.""" | |
| try: | |
| tokenizer = Tokenizer.from_pretrained(self.model_name) | |
| tokenizer.pre_tokenizer = pre_tokenizers.Whitespace() | |
| print("[INFO] Whitespace pre-tokenizer added.") | |
| return tokenizer | |
| except Exception as e: | |
| print(f"[ERROR] Failed to load tokenizer: {str(e)}") | |
| return None | |
| def process_image(self, image_data): | |
| """Processes image data from file path or base64-encoded string.""" | |
| print("[DEBUG] Attempting to process image") | |
| try: | |
| if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data): | |
| with open(image_data, 'rb') as image_file: | |
| print("[DEBUG] File opened successfully") | |
| image = Image.open(image_file) | |
| else: | |
| print("[DEBUG] Decoding base64 image data") | |
| image_bytes = base64.b64decode(image_data) | |
| image = Image.open(BytesIO(image_bytes)) | |
| print("[DEBUG] Image opened:", image.format, image.size, image.mode) | |
| return image | |
| except Exception as e: | |
| print(f"[ERROR] Error processing image: {str(e)}") | |
| return None | |
| def __call__(self, data): | |
| """Processes input and generates model output.""" | |
| try: | |
| inputs = data.pop("inputs", data) | |
| if isinstance(inputs, dict): | |
| image_path = inputs.get("image", None) | |
| text_input = inputs.get("text", "") | |
| else: | |
| image_path = inputs | |
| text_input = "What is in this image?" | |
| print("[INFO] Image path:", image_path, "| Text input:", text_input) | |
| image = self.process_image(image_path) if image_path else None | |
| model_inputs = self.processor( | |
| images=image if image else None, | |
| text=text_input, | |
| return_tensors="pt" | |
| ) | |
| model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v | |
| for k, v in model_inputs.items()} | |
| with torch.no_grad(): | |
| outputs = self.model.generate(**model_inputs) | |
| decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True) | |
| print(f"[INFO] Generated text: {decoded_outputs[0]}") | |
| return {"generated_text": decoded_outputs[0]} | |
| except Exception as e: | |
| print(f"[ERROR] {str(e)}") | |
| return {"error": str(e)} | |
| # import subprocess | |
| # import sys | |
| # import torch | |
| # import base64 | |
| # from io import BytesIO | |
| # from PIL import Image | |
| # import requests | |
| # from transformers import AutoModelForCausalLM, AutoProcessor | |
| # import os | |
| # def install(package): | |
| # subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package]) | |
| # class EndpointHandler: | |
| # def __init__(self, path=""): | |
| # required_packages = ['timm', 'einops', 'flash-attn', 'Pillow','-U transformers'] | |
| # for package in required_packages: | |
| # try: | |
| # install(package) | |
| # print(f"Successfully installed {package}") | |
| # except Exception as e: | |
| # print(f"Failed to install {package}: {str(e)}") | |
| # self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # print(f"Using device: {self.device}") | |
| # self.model_name = "arjunanand13/florence-enphaseall2-25e" | |
| # self.model = AutoModelForCausalLM.from_pretrained( | |
| # self.model_name, | |
| # trust_remote_code=True, | |
| # ).to(self.device) | |
| # self.processor = AutoProcessor.from_pretrained( | |
| # self.model_name, | |
| # trust_remote_code=True, | |
| # ) | |
| # if torch.cuda.is_available(): | |
| # torch.cuda.empty_cache() | |
| # def process_image(self,image_data): | |
| # print("[DEBUG] Attempting to process image") | |
| # try: | |
| # # Check if image_data is a file path | |
| # if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data): | |
| # with open(image_data, 'rb') as image_file: | |
| # print("[DEBUG] File opened successfully") | |
| # image = Image.open(image_file) | |
| # else: | |
| # # Assume image_data is base64 encoded | |
| # print("[DEBUG] Decoding base64 image data") | |
| # image_bytes = base64.b64decode(image_data) | |
| # image = Image.open(BytesIO(image_bytes)) | |
| # print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode) | |
| # return image | |
| # except Exception as e: | |
| # print(f"[ERROR] Error processing image: {str(e)}") | |
| # return None | |
| # def __call__(self, data): | |
| # try: | |
| # # Extract inputs from the expected Hugging Face format | |
| # inputs = data.pop("inputs", data) | |
| # # Check if inputs is a dict or string | |
| # if isinstance(inputs, dict): | |
| # image_path = inputs.get("image", None) | |
| # text_input = inputs.get("text", "") | |
| # else: | |
| # # If inputs is not a dict, assume it's the image path | |
| # image_path = inputs | |
| # text_input = "What is in this image?" | |
| # print("[INFO]",image_path,text_input) | |
| # # Process image | |
| # image = self.process_image(image_path) if image_path else None | |
| # print("[INFO]",image) | |
| # # Prepare inputs for the model | |
| # model_inputs = self.processor( | |
| # images=image if image else None, | |
| # text=text_input, | |
| # return_tensors="pt" | |
| # ) | |
| # # Move inputs to device | |
| # model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v | |
| # for k, v in model_inputs.items()} | |
| # # Generate output | |
| # with torch.no_grad(): | |
| # outputs = self.model.generate(**model_inputs) | |
| # # Decode outputs | |
| # decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True) | |
| # print(f"[INFO],{decoded_outputs}") | |
| # print(f"[INFO],{decoded_outputs[0]}") | |
| # return {"generated_text": decoded_outputs[0]} | |
| # except Exception as e: | |
| # return {"error": str(e)} |