from transformers import ProcessorMixin from PIL import Image import torch import torchvision.transforms as T import numpy as np import os import json class DFineProcessor(ProcessorMixin): processor_class = "DFineProcessor" def __init__(self, size=640): self.size = size def resize_with_aspect_ratio(self, image, size): orig_w, orig_h = image.size ratio = min(size / orig_w, size / orig_h) new_w, new_h = int(orig_w * ratio), int(orig_h * ratio) image = image.resize((new_w, new_h), Image.BILINEAR) new_image = Image.new("RGB", (size, size)) pad_w, pad_h = (size - new_w) // 2, (size - new_h) // 2 new_image.paste(image, (pad_w, pad_h)) return new_image, ratio, pad_w, pad_h def __call__(self, images, return_tensors="pt"): if not isinstance(images, list): images = [images] processed_images = [] ratios = [] pad_ws = [] pad_hs = [] for image in images: if isinstance(image, np.ndarray): image = Image.fromarray(image[..., ::-1]) if image.shape[-1] == 3 else Image.fromarray(image) if not isinstance(image, Image.Image): raise ValueError("All inputs must be PIL images.") resized_img, ratio, pad_w, pad_h = self.resize_with_aspect_ratio(image, self.size) tensor_img = T.ToTensor()(resized_img) processed_images.append(tensor_img) ratios.append(ratio) pad_ws.append(pad_w) pad_hs.append(pad_h) torch_imgs = torch.stack(processed_images) ratios = torch.tensor(ratios) pad_w = torch.tensor(pad_ws) pad_h = torch.tensor(pad_hs) orig_target_sizes = torch.tensor([[self.size, self.size]]) return { "images": torch_imgs, "orig_target_sizes": orig_target_sizes, "ratio": ratios, "pad_w": pad_w, "pad_h": pad_h, } def save_pretrained(self, save_directory): os.makedirs(save_directory, exist_ok=True) with open(os.path.join(save_directory, "preprocessor_config.json"), "w") as f: json.dump({"processor_class": self.__class__.__name__}, f) @classmethod def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): return cls()