{ "model_type": "dfm", "architectures": [ "DFMModel" ], "auto_map": { "AutoConfig": "configuration_dfm.DFMConfig", "AutoModel": "modeling_dfm.DFMModel" }, "vocab_size": 50257, "hidden_size": 2048, "cond_dim": 256, "num_hidden_layers": 21, "n_blocks": 21, "num_attention_heads": 32, "n_heads": 32, "max_position_embeddings": 1024, "sequence_length": 1024, "dropout": 0.1, "rotary_dim": 64, "source_distribution": "mask", "flow_scheduler_type": "polynomial", "flow_exponent": 1.0, "flow_loss_function": "generalized_kl", "sampling_steps": 1024, "bos_token_id": 50256, "eos_token_id": 50256, "mask_token_id": 50257, "tokenizer_name": "gpt2", "dtype": "bfloat16" }