TuKoResearch
/

GSLM-HuBERT200

Model card Files Files and versions

GSLM-HuBERT200 / config.py

klemenk's picture

Update config.py

723c75b verified 5 months ago

history blame contribute delete

2.25 kB

	"""
	GSLM Model Configuration
	"""

	import json
	import os
	from typing import Optional
	from transformers.configuration_utils import PretrainedConfig
	from transformers.utils import logging

	logger = logging.get_logger(__name__)


	class GSLMConfig(PretrainedConfig):
	"""
	Configuration class for GSLM (Generative Spoken Language Model).

	This configuration class stores all parameters needed to initialize a GSLMModel.
	"""

	model_type = "gslm"

	def __init__(
	self,
	vocab_size: int = 204,
	d_model: int = 1024,
	nhead: int = 16,
	num_layers: int = 12,
	dim_feedforward: int = 4096,
	dropout: float = 0.1,
	attention_dropout: float = 0.1,
	max_seq_length: int = 3072,
	pad_idx: int = 204,
	share_input_output_embed: bool = True,
	activation: str = "relu",
	architecture: str = "transformer_lm_big",
	**kwargs
	):
	"""
	Initialize GSLM configuration.

	Args:
	vocab_size: Size of the vocabulary
	d_model: Dimensionality of the embeddings and hidden states
	nhead: Number of attention heads
	num_layers: Number of transformer layers
	dim_feedforward: Dimensionality of the feedforward network
	dropout: Dropout probability
	attention_dropout: Dropout probability for attention weights
	max_seq_length: Maximum sequence length
	pad_idx: Padding token index
	share_input_output_embed: Whether to share input and output embeddings
	activation: Activation function ("relu" or "gelu")
	architecture: Model architecture name
	"""
	self.vocab_size = vocab_size
	self.d_model = d_model
	self.nhead = nhead
	self.num_layers = num_layers
	self.dim_feedforward = dim_feedforward
	self.dropout = dropout
	self.attention_dropout = attention_dropout
	self.max_seq_length = max_seq_length
	self.pad_idx = pad_idx
	self.share_input_output_embed = share_input_output_embed
	self.activation = activation
	self.architecture = architecture

	super().__init__(**kwargs)