MultiEmbedTR / configuration_multimodal.py
utkubascakir's picture
Upload folder using huggingface_hub
ff7fe7e verified
from transformers import PretrainedConfig
class MultimodalConfig(PretrainedConfig):
model_type = "multimodal_embedder"
def __init__(
self,
text_model_name="newmindai/modernbert-base-tr-uncased-allnli-stsb",
vision_model_name="facebook/dinov2-base",
text_dim=768,
image_dim=768,
embed_dim=384,
temperature_init=1/0.07,
use_mean_pooling_for_text=True,
**kwargs
):
super().__init__(**kwargs)
self.text_model_name = text_model_name
self.vision_model_name = vision_model_name
self.text_dim = text_dim
self.image_dim = image_dim
self.embed_dim = embed_dim
self.temperature_init = temperature_init
self.use_mean_pooling_for_text = use_mean_pooling_for_text