| { | |
| "model_type": "smoldlm", | |
| "architecture": "block_diffusion_lm", | |
| "n_layer": 30, | |
| "n_embd": 576, | |
| "n_head": 9, | |
| "n_kv_head": 3, | |
| "mlp_hidden": 1536, | |
| "vocab_size": 49152, | |
| "seq_len": 2048, | |
| "block_size": 32, | |
| "num_blocks": 64, | |
| "tied_embeddings": true, | |
| "attention": "gated_query_attention", | |
| "noise_schedule": "linear", | |
| "optimizer": "muonclip_adamw", | |
| "total_params": "144.47M", | |
| "tokenizer": "smoldlm", | |
| "mask_token_id": 0, | |
| "eos_token_id": 1, | |
| "pad_token_id": 2 | |
| } |