import subprocess
import sys
import torch
import base64
from io import BytesIO
from PIL import Image
import requests
from transformers import AutoModelForCausalLM, AutoProcessor
from tokenizers import Tokenizer, pre_tokenizers  
import os

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])

class EndpointHandler:
    def __init__(self, path=""):
        # Install necessary packages
        required_packages = ['timm', 'einops', 'flash-attn', 'Pillow',  'transformers==4.43.3']
        for package in required_packages:
            try:
                install(package)
                print(f"Successfully installed {package}")
            except Exception as e:
                print(f"Failed to install {package}: {str(e)}")

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")

        # Load the model
        self.model_name = "arjunanand13/florence-enphaseall2-25e"
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name, trust_remote_code=True
        ).to(self.device)

        # Manually load the tokenizer with a whitespace pre-tokenizer
        self.tokenizer = self.load_tokenizer()

        # Initialize the processor
        self.processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)

        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    def load_tokenizer(self):
        """Manually loads the tokenizer and adds a whitespace pre-tokenizer."""
        try:
            tokenizer = Tokenizer.from_pretrained(self.model_name)
            tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
            print("[INFO] Whitespace pre-tokenizer added.")
            return tokenizer
        except Exception as e:
            print(f"[ERROR] Failed to load tokenizer: {str(e)}")
            return None

    def process_image(self, image_data):
        """Processes image data from file path or base64-encoded string."""
        print("[DEBUG] Attempting to process image")
        try:
            if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
                with open(image_data, 'rb') as image_file:
                    print("[DEBUG] File opened successfully")
                    image = Image.open(image_file)
            else:
                print("[DEBUG] Decoding base64 image data")
                image_bytes = base64.b64decode(image_data)
                image = Image.open(BytesIO(image_bytes))

            print("[DEBUG] Image opened:", image.format, image.size, image.mode)
            return image
        except Exception as e:
            print(f"[ERROR] Error processing image: {str(e)}")
            return None

    def __call__(self, data):
        """Processes input and generates model output."""
        try:
            inputs = data.pop("inputs", data)

            if isinstance(inputs, dict):
                image_path = inputs.get("image", None)
                text_input = inputs.get("text", "")
            else:
                image_path = inputs
                text_input = "What is in this image?"

            print("[INFO] Image path:", image_path, "| Text input:", text_input)

            image = self.process_image(image_path) if image_path else None

            model_inputs = self.processor(
                images=image if image else None,
                text=text_input,
                return_tensors="pt"
            )

            model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
                            for k, v in model_inputs.items()}

            with torch.no_grad():
                outputs = self.model.generate(**model_inputs)

            decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
            print(f"[INFO] Generated text: {decoded_outputs[0]}")
            return {"generated_text": decoded_outputs[0]}

        except Exception as e:
            print(f"[ERROR] {str(e)}")
            return {"error": str(e)}


# import subprocess
# import sys
# import torch
# import base64
# from io import BytesIO
# from PIL import Image
# import requests
# from transformers import AutoModelForCausalLM, AutoProcessor
# import os

# def install(package):
#     subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])

# class EndpointHandler:
#     def __init__(self, path=""):
#         required_packages = ['timm', 'einops', 'flash-attn', 'Pillow','-U transformers']
#         for package in required_packages:
#             try:
#                 install(package)
#                 print(f"Successfully installed {package}")
#             except Exception as e:
#                 print(f"Failed to install {package}: {str(e)}")
        
#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#         print(f"Using device: {self.device}")
        
#         self.model_name = "arjunanand13/florence-enphaseall2-25e"
#         self.model = AutoModelForCausalLM.from_pretrained(
#             self.model_name,
#             trust_remote_code=True,
#         ).to(self.device)
        
#         self.processor = AutoProcessor.from_pretrained(
#             self.model_name,
#             trust_remote_code=True,
#         )
        
#         if torch.cuda.is_available():
#             torch.cuda.empty_cache()

#     def process_image(self,image_data):
#         print("[DEBUG] Attempting to process image")
#         try:
#             # Check if image_data is a file path
#             if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
#                 with open(image_data, 'rb') as image_file:
#                     print("[DEBUG] File opened successfully")
#                     image = Image.open(image_file)
#             else:
#                 # Assume image_data is base64 encoded
#                 print("[DEBUG] Decoding base64 image data")
#                 image_bytes = base64.b64decode(image_data)
#                 image = Image.open(BytesIO(image_bytes))
            
#             print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode)
#             return image
#         except Exception as e:
#             print(f"[ERROR] Error processing image: {str(e)}")
#             return None

#     def __call__(self, data):
#         try:
#             # Extract inputs from the expected Hugging Face format
#             inputs = data.pop("inputs", data)
            
#             # Check if inputs is a dict or string
#             if isinstance(inputs, dict):
#                 image_path = inputs.get("image", None)
#                 text_input = inputs.get("text", "")
#             else:
#                 # If inputs is not a dict, assume it's the image path
#                 image_path = inputs
#                 text_input = "What is in this image?"
#             print("[INFO]",image_path,text_input)
#             # Process image
#             image = self.process_image(image_path) if image_path else None
#             print("[INFO]",image)
#             # Prepare inputs for the model
#             model_inputs = self.processor(
#                 images=image if image else None,
#                 text=text_input,
#                 return_tensors="pt"
#             )
            
#             # Move inputs to device
#             model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v 
#                            for k, v in model_inputs.items()}
            
#             # Generate output
#             with torch.no_grad():
#                 outputs = self.model.generate(**model_inputs)
            
#             # Decode outputs
#             decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
#             print(f"[INFO],{decoded_outputs}")
#             print(f"[INFO],{decoded_outputs[0]}")
#             return {"generated_text": decoded_outputs[0]}
        
#         except Exception as e:
#             return {"error": str(e)}