File size: 5,881 Bytes
62dfa41
 
 
 
 
 
 
 
 
 
 
 
 
3dca46e
62dfa41
 
 
 
 
 
 
 
 
 
3dca46e
62dfa41
 
 
 
 
 
 
 
 
 
 
 
 
5550fda
 
62dfa41
5550fda
 
 
 
 
 
 
 
 
 
 
 
 
62dfa41
5550fda
62dfa41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180fa6a
62dfa41
 
180fa6a
62dfa41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fbd780
 
62dfa41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import subprocess
import sys
import torch
import base64
from io import BytesIO
from PIL import Image
import requests
from transformers import AutoModelForCausalLM, AutoProcessor
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])

class EndpointHandler:
    def __init__(self, path=""):
        required_packages = ['timm', 'einops', 'flash-attn', 'Pillow','transformers']
        for package in required_packages:
            try:
                install(package)
                print(f"Successfully installed {package}")
            except Exception as e:
                print(f"Failed to install {package}: {str(e)}")
        
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")
        
        self.model_name = "microsoft/Florence-2-base"
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            trust_remote_code=True,
        ).to(self.device)
        
        self.processor = AutoProcessor.from_pretrained(
            self.model_name,
            trust_remote_code=True,
        )
        
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    def process_image(self,image_data):
        print("[DEBUG] Attempting to process image")
        try:
            # Check if image_data is a file path
            if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
                with open(image_data, 'rb') as image_file:
                    print("[DEBUG] File opened successfully")
                    image = Image.open(image_file)
            else:
                # Assume image_data is base64 encoded
                print("[DEBUG] Decoding base64 image data")
                image_bytes = base64.b64decode(image_data)
                image = Image.open(BytesIO(image_bytes))
            
            print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode)
            return image
        except Exception as e:
            print(f"[ERROR] Error processing image: {str(e)}")
            return None

    def __call__(self, data):
        try:
            # Extract inputs from the expected Hugging Face format
            inputs = data.pop("inputs", data)
            
            # Check if inputs is a dict or string
            if isinstance(inputs, dict):
                image_path = inputs.get("image", None)
                text_input = inputs.get("text", "")
            else:
                # If inputs is not a dict, assume it's the image path
                image_path = inputs
                text_input = "What is in this image?"
            print("[INFO]",image_path,text_input)
            # Process image
            image = self.process_image(image_path) if image_path else None
            print("[INFO]",image)
            # Prepare inputs for the model
            model_inputs = self.processor(
                images=image if image else None,
                text=text_input,
                return_tensors="pt"
            )
            
            # Move inputs to device
            model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v 
                           for k, v in model_inputs.items()}
            
            # Generate output
            with torch.no_grad():
                outputs = self.model.generate(**model_inputs)
            
            # Decode outputs
            decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
            print(f"[INFO],{decoded_outputs}")
            print(f"[INFO],{decoded_outputs[0]}")
            return {"generated_text": decoded_outputs[0]}
        
        except Exception as e:
            return {"error": str(e)}
# import subprocess
# import sys
# import torch
# from transformers import AutoModelForCausalLM, AutoProcessor

# def install(package):
#     subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])

# class EndpointHandler:
#     def __init__(self, path=""):
        
#         required_packages = ['timm', 'einops', 'flash-attn']
#         for package in required_packages:
#             try:
#                 install(package)
#                 print(f"Successfully installed {package}")
#             except Exception as e:
#                 print(f"Failed to install {package}: {str(e)}")
        
        
#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#         print(f"Using device: {self.device}")
        

#         self.model_name = "microsoft/Florence-2-base-ft"
#         self.model = AutoModelForCausalLM.from_pretrained(
#             self.model_name,
#             trust_remote_code=True,
#             revision='refs/pr/6'
#         ).to(self.device)
        
#         self.processor = AutoProcessor.from_pretrained(
#             self.model_name,
#             trust_remote_code=True,
#             revision='refs/pr/6'
#         )
        

#         if torch.cuda.is_available():
#             torch.cuda.empty_cache()
    
#     def __call__(self, data):
#         try:
            
#             inputs = data.pop("inputs", data)
            
           
#             processed_inputs = self.processor(inputs, return_tensors="pt")
            

#             processed_inputs = {k: v.to(self.device) for k, v in processed_inputs.items()}
            
           
#             with torch.no_grad():
#                 outputs = self.model.generate(**processed_inputs)
            
           
#             decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
            
#             return {"outputs": decoded_outputs}
#         except Exception as e:
#             return {"error": str(e)}