Spaces:
Runtime error
Runtime error
| from abc import ABC, abstractmethod | |
| import torch | |
| import torch.nn as nn | |
| class ProcessorWrapper: | |
| def __init__( | |
| self, | |
| transform, | |
| height=378, | |
| width=378, | |
| image_mean=[0.48145466, 0.4578275, 0.40821073], | |
| ): | |
| self._crop_size = { | |
| "height": height, | |
| "width": width, | |
| } | |
| self._transforms = transform | |
| # print(transform) | |
| self.image_mean = image_mean | |
| def crop_size(self): | |
| return self._crop_size | |
| def preprocess(self, image, return_tensors="pt"): | |
| # Ensure image is a PIL Image | |
| output = {} | |
| output["pixel_values"] = [self._transforms(image)] | |
| return output | |
| class BaseVisionTower(nn.Module): | |
| def __init__(self, vision_tower_name, args, delay_load=False): | |
| super().__init__() | |
| self.is_loaded = False | |
| self.args = args | |
| self.vision_tower_name = vision_tower_name | |
| self.select_layer = args.mm_vision_select_layer | |
| self.select_feature = getattr(args, "mm_vision_select_feature", "patch") | |
| self.unfreeze_mm_vision_tower = getattr(args, "unfreeze_mm_vision_tower", False) | |
| self.delay_load = delay_load | |
| def load_model(self, device_map=None): | |
| raise NotImplementedError("Subclasses must implement load_model") | |
| def _forward(self, images): | |
| raise NotImplementedError("Subclasses must implement forward") | |
| def forward(self, images): | |
| if type(images) is list: | |
| image_features = [self._forward(image.unsqueeze(0)) for image in images] | |
| else: | |
| image_features = self._forward(images) | |
| return image_features | |
| def dummy_feature(self): | |
| return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype) | |
| def dtype(self): | |
| # Dynamically infer the dtype from the first parameter, if not explicitly specified | |
| if hasattr(self.vision_tower, "dtype"): | |
| return self.vision_tower.dtype | |
| else: | |
| params = list(self.vision_tower.parameters()) | |
| return ( | |
| params[0].dtype if len(params) > 0 else torch.float32 | |
| ) # Default to torch.float32 if no parameters | |
| def device(self): | |
| # Dynamically infer the device from the first parameter, if not explicitly specified | |
| if hasattr(self.vision_tower, "device"): | |
| return self.vision_tower.device | |
| else: | |
| params = list(self.vision_tower.parameters()) | |
| return ( | |
| params[0].device if len(params) > 0 else torch.device("cpu") | |
| ) # Default to CPU if no parameters | |
| def config(self): | |
| if self.is_loaded: | |
| return self.vision_tower.config | |
| else: | |
| return self.cfg_only | |
| def hidden_size(self): | |
| try: | |
| return self.config.hidden_size | |
| except: | |
| return self._hidden_size | |
| def image_size(self): # resolution | |
| # return self.config.image_size | |
| try: | |
| return self.config.image_size | |
| except: | |
| return self._image_size | |
| def patch_size(self): | |
| # return self.config.patch_size | |
| try: | |
| return self.config.patch_size | |
| except: | |
| return self._patch_size | |
| def num_patches_per_side(self): | |
| if self._interp_size is not None: | |
| return int(self._interp_size**0.5) | |
| try: | |
| return self.image_size // self.patch_size | |
| except: | |
| return self._num_patches_per_side | |
| def num_patches(self): | |
| if self._interp_size is not None: | |
| return self._interp_size | |
| try: | |
| return self.num_patches_per_side**2 | |
| except: | |
| return self._num_patches | |