File size: 8,200 Bytes
579374c 5267f10 137b217 f37920f 579374c 01287f2 73d74f3 579374c c5175dd 579374c f6a1b8c 579374c f6a1b8c 9525474 579374c 01287f2 579374c f6a1b8c 01287f2 f6a1b8c 01287f2 f6a1b8c 579374c 01287f2 f6a1b8c 01287f2 f6a1b8c 01287f2 f6a1b8c 01287f2 579374c f6a1b8c 01287f2 579374c 01287f2 579374c f6a1b8c 579374c f6a1b8c 579374c f6a1b8c 579374c f6a1b8c 01287f2 f6a1b8c 579374c f6a1b8c 579374c f6a1b8c 579374c f6a1b8c 579374c 01287f2 f6a1b8c 01287f2 f6a1b8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import subprocess
import sys
import torch
import base64
from io import BytesIO
from PIL import Image
import requests
from transformers import AutoModelForCausalLM, AutoProcessor
from tokenizers import Tokenizer, pre_tokenizers
import os
def install(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])
class EndpointHandler:
def __init__(self, path=""):
# Install necessary packages
required_packages = ['timm', 'einops', 'flash-attn', 'Pillow', 'transformers==4.43.3']
for package in required_packages:
try:
install(package)
print(f"Successfully installed {package}")
except Exception as e:
print(f"Failed to install {package}: {str(e)}")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {self.device}")
# Load the model
self.model_name = "arjunanand13/florence-enphaseall2-25e"
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name, trust_remote_code=True
).to(self.device)
# Manually load the tokenizer with a whitespace pre-tokenizer
self.tokenizer = self.load_tokenizer()
# Initialize the processor
self.processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)
if torch.cuda.is_available():
torch.cuda.empty_cache()
def load_tokenizer(self):
"""Manually loads the tokenizer and adds a whitespace pre-tokenizer."""
try:
tokenizer = Tokenizer.from_pretrained(self.model_name)
tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
print("[INFO] Whitespace pre-tokenizer added.")
return tokenizer
except Exception as e:
print(f"[ERROR] Failed to load tokenizer: {str(e)}")
return None
def process_image(self, image_data):
"""Processes image data from file path or base64-encoded string."""
print("[DEBUG] Attempting to process image")
try:
if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
with open(image_data, 'rb') as image_file:
print("[DEBUG] File opened successfully")
image = Image.open(image_file)
else:
print("[DEBUG] Decoding base64 image data")
image_bytes = base64.b64decode(image_data)
image = Image.open(BytesIO(image_bytes))
print("[DEBUG] Image opened:", image.format, image.size, image.mode)
return image
except Exception as e:
print(f"[ERROR] Error processing image: {str(e)}")
return None
def __call__(self, data):
"""Processes input and generates model output."""
try:
inputs = data.pop("inputs", data)
if isinstance(inputs, dict):
image_path = inputs.get("image", None)
text_input = inputs.get("text", "")
else:
image_path = inputs
text_input = "What is in this image?"
print("[INFO] Image path:", image_path, "| Text input:", text_input)
image = self.process_image(image_path) if image_path else None
model_inputs = self.processor(
images=image if image else None,
text=text_input,
return_tensors="pt"
)
model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
for k, v in model_inputs.items()}
with torch.no_grad():
outputs = self.model.generate(**model_inputs)
decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
print(f"[INFO] Generated text: {decoded_outputs[0]}")
return {"generated_text": decoded_outputs[0]}
except Exception as e:
print(f"[ERROR] {str(e)}")
return {"error": str(e)}
# import subprocess
# import sys
# import torch
# import base64
# from io import BytesIO
# from PIL import Image
# import requests
# from transformers import AutoModelForCausalLM, AutoProcessor
# import os
# def install(package):
# subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])
# class EndpointHandler:
# def __init__(self, path=""):
# required_packages = ['timm', 'einops', 'flash-attn', 'Pillow','-U transformers']
# for package in required_packages:
# try:
# install(package)
# print(f"Successfully installed {package}")
# except Exception as e:
# print(f"Failed to install {package}: {str(e)}")
# self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {self.device}")
# self.model_name = "arjunanand13/florence-enphaseall2-25e"
# self.model = AutoModelForCausalLM.from_pretrained(
# self.model_name,
# trust_remote_code=True,
# ).to(self.device)
# self.processor = AutoProcessor.from_pretrained(
# self.model_name,
# trust_remote_code=True,
# )
# if torch.cuda.is_available():
# torch.cuda.empty_cache()
# def process_image(self,image_data):
# print("[DEBUG] Attempting to process image")
# try:
# # Check if image_data is a file path
# if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
# with open(image_data, 'rb') as image_file:
# print("[DEBUG] File opened successfully")
# image = Image.open(image_file)
# else:
# # Assume image_data is base64 encoded
# print("[DEBUG] Decoding base64 image data")
# image_bytes = base64.b64decode(image_data)
# image = Image.open(BytesIO(image_bytes))
# print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode)
# return image
# except Exception as e:
# print(f"[ERROR] Error processing image: {str(e)}")
# return None
# def __call__(self, data):
# try:
# # Extract inputs from the expected Hugging Face format
# inputs = data.pop("inputs", data)
# # Check if inputs is a dict or string
# if isinstance(inputs, dict):
# image_path = inputs.get("image", None)
# text_input = inputs.get("text", "")
# else:
# # If inputs is not a dict, assume it's the image path
# image_path = inputs
# text_input = "What is in this image?"
# print("[INFO]",image_path,text_input)
# # Process image
# image = self.process_image(image_path) if image_path else None
# print("[INFO]",image)
# # Prepare inputs for the model
# model_inputs = self.processor(
# images=image if image else None,
# text=text_input,
# return_tensors="pt"
# )
# # Move inputs to device
# model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
# for k, v in model_inputs.items()}
# # Generate output
# with torch.no_grad():
# outputs = self.model.generate(**model_inputs)
# # Decode outputs
# decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
# print(f"[INFO],{decoded_outputs}")
# print(f"[INFO],{decoded_outputs[0]}")
# return {"generated_text": decoded_outputs[0]}
# except Exception as e:
# return {"error": str(e)} |