| { | |
| "model_type": "dfm", | |
| "architectures": [ | |
| "DFMModel" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "configuration_dfm.DFMConfig", | |
| "AutoModel": "modeling_dfm.DFMModel" | |
| }, | |
| "vocab_size": 50257, | |
| "hidden_size": 2048, | |
| "cond_dim": 256, | |
| "num_hidden_layers": 21, | |
| "n_blocks": 21, | |
| "num_attention_heads": 32, | |
| "n_heads": 32, | |
| "max_position_embeddings": 1024, | |
| "sequence_length": 1024, | |
| "dropout": 0.1, | |
| "rotary_dim": 64, | |
| "source_distribution": "mask", | |
| "flow_scheduler_type": "polynomial", | |
| "flow_exponent": 1.0, | |
| "flow_loss_function": "generalized_kl", | |
| "sampling_steps": 1024, | |
| "bos_token_id": 50256, | |
| "eos_token_id": 50256, | |
| "mask_token_id": 50257, | |
| "tokenizer_name": "gpt2", | |
| "dtype": "bfloat16" | |
| } | |