capri / configuration_capri.py
Ligul's picture
Upload folder using huggingface_hub
fd6509b verified
from transformers import PretrainedConfig
class CapriConfig(PretrainedConfig):
model_type = "capri"
def __init__(
self,
text_model_name_or_path="Qwen/Qwen2.5-0.5B",
vision_model_name_or_path="google/siglip2-base-patch16-224",
adapter_subdir="text_adapter",
projector_type="mlp",
projector_in_dim=768,
projector_hidden_dim=3072,
projector_out_dim=896,
image_token="<image>",
image_token_id=151665,
prompt_prefix="<image> Caption:",
max_length=64,
load_vision_tower_by_default=False,
processor_class="CapriProcessor",
**kwargs,
):
self.text_model_name_or_path = text_model_name_or_path
self.vision_model_name_or_path = vision_model_name_or_path
self.adapter_subdir = adapter_subdir
self.projector_type = projector_type
self.projector_in_dim = projector_in_dim
self.projector_hidden_dim = projector_hidden_dim
self.projector_out_dim = projector_out_dim
self.image_token = image_token
self.image_token_id = image_token_id
self.prompt_prefix = prompt_prefix
self.max_length = max_length
self.load_vision_tower_by_default = load_vision_tower_by_default
self.processor_class = processor_class
super().__init__(**kwargs)