from transformers import PretrainedConfig class DFMConfig(PretrainedConfig): model_type = "dfm" def __init__( self, vocab_size=50257, hidden_size=2048, cond_dim=256, n_blocks=21, n_heads=32, dropout=0.1, sequence_length=1024, source_distribution="mask", flow_scheduler_type="polynomial", flow_exponent=1.0, flow_loss_function="generalized_kl", sampling_steps=1024, bos_token_id=50256, eos_token_id=50256, mask_token_id=50257, tokenizer_name="gpt2", dtype="bfloat16", **kwargs, ): super().__init__( bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs, ) self.vocab_size = vocab_size self.hidden_size = hidden_size self.cond_dim = cond_dim self.n_blocks = n_blocks self.n_heads = n_heads self.dropout = dropout self.sequence_length = sequence_length self.source_distribution = source_distribution self.flow_scheduler_type = flow_scheduler_type self.flow_exponent = flow_exponent self.flow_loss_function = flow_loss_function self.sampling_steps = sampling_steps self.mask_token_id = mask_token_id self.tokenizer_name = tokenizer_name self.dtype = dtype