File size: 8,200 Bytes
579374c
 
 
 
 
 
 
 
5267f10
137b217
 
f37920f
 
579374c
 
 
01287f2
73d74f3
579374c
 
c5175dd
579374c
 
 
f6a1b8c
579374c
 
f6a1b8c
 
9525474
579374c
01287f2
579374c
f6a1b8c
01287f2
 
f6a1b8c
01287f2
 
f6a1b8c
579374c
 
 
01287f2
 
f6a1b8c
 
 
01287f2
 
f6a1b8c
01287f2
 
f6a1b8c
 
01287f2
579374c
 
 
 
 
 
 
 
 
 
f6a1b8c
01287f2
579374c
 
 
 
 
 
01287f2
579374c
 
f6a1b8c
579374c
 
 
 
 
 
f6a1b8c
 
 
579374c
f6a1b8c
579374c
 
 
 
 
f6a1b8c
01287f2
f6a1b8c
 
579374c
 
f6a1b8c
579374c
f6a1b8c
579374c
f6a1b8c
579374c
01287f2
f6a1b8c
 
 
 
01287f2
f6a1b8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import subprocess
import sys
import torch
import base64
from io import BytesIO
from PIL import Image
import requests
from transformers import AutoModelForCausalLM, AutoProcessor
from tokenizers import Tokenizer, pre_tokenizers  
import os

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])

class EndpointHandler:
    def __init__(self, path=""):
        # Install necessary packages
        required_packages = ['timm', 'einops', 'flash-attn', 'Pillow',  'transformers==4.43.3']
        for package in required_packages:
            try:
                install(package)
                print(f"Successfully installed {package}")
            except Exception as e:
                print(f"Failed to install {package}: {str(e)}")

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")

        # Load the model
        self.model_name = "arjunanand13/florence-enphaseall2-25e"
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name, trust_remote_code=True
        ).to(self.device)

        # Manually load the tokenizer with a whitespace pre-tokenizer
        self.tokenizer = self.load_tokenizer()

        # Initialize the processor
        self.processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)

        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    def load_tokenizer(self):
        """Manually loads the tokenizer and adds a whitespace pre-tokenizer."""
        try:
            tokenizer = Tokenizer.from_pretrained(self.model_name)
            tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
            print("[INFO] Whitespace pre-tokenizer added.")
            return tokenizer
        except Exception as e:
            print(f"[ERROR] Failed to load tokenizer: {str(e)}")
            return None

    def process_image(self, image_data):
        """Processes image data from file path or base64-encoded string."""
        print("[DEBUG] Attempting to process image")
        try:
            if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
                with open(image_data, 'rb') as image_file:
                    print("[DEBUG] File opened successfully")
                    image = Image.open(image_file)
            else:
                print("[DEBUG] Decoding base64 image data")
                image_bytes = base64.b64decode(image_data)
                image = Image.open(BytesIO(image_bytes))

            print("[DEBUG] Image opened:", image.format, image.size, image.mode)
            return image
        except Exception as e:
            print(f"[ERROR] Error processing image: {str(e)}")
            return None

    def __call__(self, data):
        """Processes input and generates model output."""
        try:
            inputs = data.pop("inputs", data)

            if isinstance(inputs, dict):
                image_path = inputs.get("image", None)
                text_input = inputs.get("text", "")
            else:
                image_path = inputs
                text_input = "What is in this image?"

            print("[INFO] Image path:", image_path, "| Text input:", text_input)

            image = self.process_image(image_path) if image_path else None

            model_inputs = self.processor(
                images=image if image else None,
                text=text_input,
                return_tensors="pt"
            )

            model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
                            for k, v in model_inputs.items()}

            with torch.no_grad():
                outputs = self.model.generate(**model_inputs)

            decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
            print(f"[INFO] Generated text: {decoded_outputs[0]}")
            return {"generated_text": decoded_outputs[0]}

        except Exception as e:
            print(f"[ERROR] {str(e)}")
            return {"error": str(e)}




# import subprocess
# import sys
# import torch
# import base64
# from io import BytesIO
# from PIL import Image
# import requests
# from transformers import AutoModelForCausalLM, AutoProcessor
# import os

# def install(package):
#     subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])

# class EndpointHandler:
#     def __init__(self, path=""):
#         required_packages = ['timm', 'einops', 'flash-attn', 'Pillow','-U transformers']
#         for package in required_packages:
#             try:
#                 install(package)
#                 print(f"Successfully installed {package}")
#             except Exception as e:
#                 print(f"Failed to install {package}: {str(e)}")
        
#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#         print(f"Using device: {self.device}")
        
#         self.model_name = "arjunanand13/florence-enphaseall2-25e"
#         self.model = AutoModelForCausalLM.from_pretrained(
#             self.model_name,
#             trust_remote_code=True,
#         ).to(self.device)
        
#         self.processor = AutoProcessor.from_pretrained(
#             self.model_name,
#             trust_remote_code=True,
#         )
        
#         if torch.cuda.is_available():
#             torch.cuda.empty_cache()

#     def process_image(self,image_data):
#         print("[DEBUG] Attempting to process image")
#         try:
#             # Check if image_data is a file path
#             if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
#                 with open(image_data, 'rb') as image_file:
#                     print("[DEBUG] File opened successfully")
#                     image = Image.open(image_file)
#             else:
#                 # Assume image_data is base64 encoded
#                 print("[DEBUG] Decoding base64 image data")
#                 image_bytes = base64.b64decode(image_data)
#                 image = Image.open(BytesIO(image_bytes))
            
#             print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode)
#             return image
#         except Exception as e:
#             print(f"[ERROR] Error processing image: {str(e)}")
#             return None

#     def __call__(self, data):
#         try:
#             # Extract inputs from the expected Hugging Face format
#             inputs = data.pop("inputs", data)
            
#             # Check if inputs is a dict or string
#             if isinstance(inputs, dict):
#                 image_path = inputs.get("image", None)
#                 text_input = inputs.get("text", "")
#             else:
#                 # If inputs is not a dict, assume it's the image path
#                 image_path = inputs
#                 text_input = "What is in this image?"
#             print("[INFO]",image_path,text_input)
#             # Process image
#             image = self.process_image(image_path) if image_path else None
#             print("[INFO]",image)
#             # Prepare inputs for the model
#             model_inputs = self.processor(
#                 images=image if image else None,
#                 text=text_input,
#                 return_tensors="pt"
#             )
            
#             # Move inputs to device
#             model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v 
#                            for k, v in model_inputs.items()}
            
#             # Generate output
#             with torch.no_grad():
#                 outputs = self.model.generate(**model_inputs)
            
#             # Decode outputs
#             decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
#             print(f"[INFO],{decoded_outputs}")
#             print(f"[INFO],{decoded_outputs[0]}")
#             return {"generated_text": decoded_outputs[0]}
        
#         except Exception as e:
#             return {"error": str(e)}