GSLM-HuBERT200 / config.py
klemenk's picture
Update config.py
723c75b verified
"""
GSLM Model Configuration
"""
import json
import os
from typing import Optional
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
class GSLMConfig(PretrainedConfig):
"""
Configuration class for GSLM (Generative Spoken Language Model).
This configuration class stores all parameters needed to initialize a GSLMModel.
"""
model_type = "gslm"
def __init__(
self,
vocab_size: int = 204,
d_model: int = 1024,
nhead: int = 16,
num_layers: int = 12,
dim_feedforward: int = 4096,
dropout: float = 0.1,
attention_dropout: float = 0.1,
max_seq_length: int = 3072,
pad_idx: int = 204,
share_input_output_embed: bool = True,
activation: str = "relu",
architecture: str = "transformer_lm_big",
**kwargs
):
"""
Initialize GSLM configuration.
Args:
vocab_size: Size of the vocabulary
d_model: Dimensionality of the embeddings and hidden states
nhead: Number of attention heads
num_layers: Number of transformer layers
dim_feedforward: Dimensionality of the feedforward network
dropout: Dropout probability
attention_dropout: Dropout probability for attention weights
max_seq_length: Maximum sequence length
pad_idx: Padding token index
share_input_output_embed: Whether to share input and output embeddings
activation: Activation function ("relu" or "gelu")
architecture: Model architecture name
"""
self.vocab_size = vocab_size
self.d_model = d_model
self.nhead = nhead
self.num_layers = num_layers
self.dim_feedforward = dim_feedforward
self.dropout = dropout
self.attention_dropout = attention_dropout
self.max_seq_length = max_seq_length
self.pad_idx = pad_idx
self.share_input_output_embed = share_input_output_embed
self.activation = activation
self.architecture = architecture
super().__init__(**kwargs)