SLIP / configuration_slip.py
LeoChen085's picture
Fix mixed-dtype handling in configuration_slip.py
cd6a9b8 verified
"""SLIP model configuration for HuggingFace Hub."""
from transformers import PretrainedConfig
class SLIPConfig(PretrainedConfig):
model_type = "slip"
def __init__(
self,
llm_model_name="google/gemma-3-270m",
max_llm_len=768,
num_img_queries=64,
num_heads=5,
caption_loss_weight=1.0,
contrastive_loss_weight=1.0,
use_lora=False,
unlocked_layers=4,
split_layer=12,
common_dim=640,
post_train=True,
sensor_encoder=None,
**kwargs,
):
super().__init__(**kwargs)
self.llm_model_name = llm_model_name
self.max_llm_len = max_llm_len
self.num_img_queries = num_img_queries
self.num_heads = num_heads
self.caption_loss_weight = caption_loss_weight
self.contrastive_loss_weight = contrastive_loss_weight
self.use_lora = use_lora
self.unlocked_layers = unlocked_layers
self.split_layer = split_layer
self.common_dim = common_dim
self.post_train = post_train
self.sensor_encoder = sensor_encoder or {
"embed_dim": 768,
"num_heads": 12,
"mlp_ratio": 4,
"depth": 12,
"dropout_rate": 0.1,
"learnable_pos_emb": False,
"max_position_embeddings": 4880,
"patch_size": None,
"channel_attn_type": "all_attn",
}