from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
from transformers.utils import TensorType
import numpy as np
from PIL import Image
import torch

class PrivateDetectorImageProcessor(BaseImageProcessor):
    model_input_names = ["pixel_values"]
    
    def __init__(self, size=480, **kwargs):
        super().__init__(**kwargs)
        self.size = size
        
    def preprocess(self, images, return_tensors=None, **kwargs):
        if not isinstance(images, list):
            images = [images]
            
        processed_images = []
        for img in images:
            if not isinstance(img, Image.Image):
                # If it's a numpy array or torch tensor, convert to PIL Image
                if isinstance(img, torch.Tensor):
                    img = img.numpy()
                if isinstance(img, np.ndarray):
                    # Handle channels first vs channels last
                    if img.shape[0] == 3:
                        img = img.transpose(1, 2, 0)
                    img = Image.fromarray(img.astype(np.uint8))
                else:
                    raise ValueError("Unsupported image type")

            # 1. Resize preserving aspect ratio to fit inside size x size
            w, h = img.size
            scale = self.size / max(w, h)
            new_w = int(round(w * scale))
            new_h = int(round(h * scale))
            
            # bilinear interpolation matching TF bilinear resize
            img_resized = img.resize((new_w, new_h), Image.Resampling.BILINEAR)
            
            # 2. Pad with 128 (gray background) to size x size
            pad_w = self.size - new_w
            pad_h = self.size - new_h
            left = pad_w // 2
            top = pad_h // 2
            
            # Create a gray background image
            bg = Image.new("RGB", (self.size, self.size), (128, 128, 128))
            bg.paste(img_resized, (left, top))
            
            # Convert to numpy and normalize to [-1.0, 1.0]
            arr = np.array(bg, dtype=np.float32)
            arr = (arr - 128.0) / 128.0
            
            # Channels first: (3, H, W)
            arr = np.transpose(arr, (2, 0, 1))
            processed_images.append(arr)
            
        data = {"pixel_values": processed_images}
        return BatchFeature(data=data, tensor_type=return_tensors)