DFM-1.3B / configuration_dfm.py
kl1's picture
Upload folder using huggingface_hub
7b6c98e verified
from transformers import PretrainedConfig
class DFMConfig(PretrainedConfig):
model_type = "dfm"
def __init__(
self,
vocab_size=50257,
hidden_size=2048,
cond_dim=256,
n_blocks=21,
n_heads=32,
dropout=0.1,
sequence_length=1024,
source_distribution="mask",
flow_scheduler_type="polynomial",
flow_exponent=1.0,
flow_loss_function="generalized_kl",
sampling_steps=1024,
bos_token_id=50256,
eos_token_id=50256,
mask_token_id=50257,
tokenizer_name="gpt2",
dtype="bfloat16",
**kwargs,
):
super().__init__(
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
**kwargs,
)
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.cond_dim = cond_dim
self.n_blocks = n_blocks
self.n_heads = n_heads
self.dropout = dropout
self.sequence_length = sequence_length
self.source_distribution = source_distribution
self.flow_scheduler_type = flow_scheduler_type
self.flow_exponent = flow_exponent
self.flow_loss_function = flow_loss_function
self.sampling_steps = sampling_steps
self.mask_token_id = mask_token_id
self.tokenizer_name = tokenizer_name
self.dtype = dtype