| from transformers import LlavaNextConfig | |
| class Granite4VisionConfig(LlavaNextConfig): | |
| model_type = "granite4_vision" | |
| def __init__( | |
| self, | |
| pretrained_vision_tower: str = "", | |
| pretrained_language_model: str = "", | |
| downsample_rate=None, | |
| downsample_method="interpolate", | |
| use_image_newline_parameter=True, | |
| **kwargs | |
| ): | |
| self.pretrained_vision_tower = pretrained_vision_tower | |
| self.pretrained_language_model = pretrained_language_model | |
| self.downsample_method = downsample_method | |
| self.downsample_rate = downsample_rate | |
| self.use_image_newline_parameter = use_image_newline_parameter | |
| super().__init__(**kwargs) | |
| class Granite4VisionConfigNaflex(Granite4VisionConfig): | |
| model_type = "granite4_vision_naflex" |