""" GSLM Model Configuration """ import json import os from typing import Optional from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) class GSLMConfig(PretrainedConfig): """ Configuration class for GSLM (Generative Spoken Language Model). This configuration class stores all parameters needed to initialize a GSLMModel. """ model_type = "gslm" def __init__( self, vocab_size: int = 204, d_model: int = 1024, nhead: int = 16, num_layers: int = 12, dim_feedforward: int = 4096, dropout: float = 0.1, attention_dropout: float = 0.1, max_seq_length: int = 3072, pad_idx: int = 204, share_input_output_embed: bool = True, activation: str = "relu", architecture: str = "transformer_lm_big", **kwargs ): """ Initialize GSLM configuration. Args: vocab_size: Size of the vocabulary d_model: Dimensionality of the embeddings and hidden states nhead: Number of attention heads num_layers: Number of transformer layers dim_feedforward: Dimensionality of the feedforward network dropout: Dropout probability attention_dropout: Dropout probability for attention weights max_seq_length: Maximum sequence length pad_idx: Padding token index share_input_output_embed: Whether to share input and output embeddings activation: Activation function ("relu" or "gelu") architecture: Model architecture name """ self.vocab_size = vocab_size self.d_model = d_model self.nhead = nhead self.num_layers = num_layers self.dim_feedforward = dim_feedforward self.dropout = dropout self.attention_dropout = attention_dropout self.max_seq_length = max_seq_length self.pad_idx = pad_idx self.share_input_output_embed = share_input_output_embed self.activation = activation self.architecture = architecture super().__init__(**kwargs)