fst_353M / config.py
williamconvertino's picture
Upload folder using huggingface_hub
439046a verified
from transformers import PretrainedConfig
class FSTConfig(PretrainedConfig):
model_type = "fst"
def __init__(
self,
# Core
vocab_size: int = 50257,
hidden_size: int = 2048,
num_hidden_layers: int = 24,
num_attention_heads: int = 32,
intermediate_size: int = 8192,
max_position_embeddings: int = 2048,
# Attention
use_causal_attention: bool = True,
use_cache: bool = True, # Disable during training
# Initialization and Normalization
initializer_range: float = 0.02,
# Tokenizer
bos_token_id: int | None = None,
eos_token_id: int | None = None,
pad_token_id: int | None = None,
**kwargs,
):
super().__init__(
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
pad_token_id=pad_token_id,
**kwargs,
)
# Core
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.max_position_embeddings = max_position_embeddings
# Attention
self.use_causal_attention = use_causal_attention
self.use_cache = use_cache
# Initialization and Normalization
self.initializer_range = initializer_range