from transformers import PretrainedConfig class AuroraConfig(PretrainedConfig): model_type = "aurora" def __init__( self, token_len: int = 48, hidden_size: int = 512, intermediate_size: int = 1024, num_enc_layers: int = 12, num_dec_layers: int = 12, num_attention_heads: int = 8, hidden_act: str = "silu", rope_theta: int = 10000, dropout_rate: float = 0.2, max_position_embeddings: int = 10000, num_sampling_steps: int = 50, flow_loss_depth: int = 3, diffusion_batch_mul: int = 4, threshold_ratio: list[float] = [0.2, 0.3, 0.4, 0.5], mask_ratio: float = 0.5, norm_mode: str = 'batch', num_prototypes: int = 1024, num_retriever_enc_layers: int = 1, num_retriever_dec_layers: int = 1, num_text_cross_layers: int = 1, num_vision_cross_layers: int = 1, num_text_connect_layers: int = 1, num_vision_connect_layers: int = 1, num_distill: int = 10, **kwargs, ): self.token_len = token_len self.hidden_size = hidden_size self.intermediate_size = intermediate_size self.num_enc_layers = num_enc_layers self.num_dec_layers = num_dec_layers self.num_attention_heads = num_attention_heads self.hidden_act = hidden_act self.rope_theta = rope_theta self.dropout_rate = dropout_rate self.max_position_embeddings = max_position_embeddings self.num_sampling_steps = num_sampling_steps self.flow_loss_depth = flow_loss_depth self.diffusion_batch_mul = diffusion_batch_mul self.threshold_ratio = threshold_ratio self.mask_ratio = mask_ratio self.norm_mode = norm_mode self.num_prototypes = num_prototypes self.num_retriever_enc_layers = num_retriever_enc_layers self.num_retriever_dec_layers = num_retriever_dec_layers self.num_text_cross_layers = num_text_cross_layers self.num_vision_cross_layers = num_vision_cross_layers self.num_text_connect_layers = num_text_connect_layers self.num_vision_connect_layers = num_vision_connect_layers self.num_distill = num_distill super().__init__( **kwargs, )