| { | |
| "embed_dim": 768, | |
| "num_layers": 8, | |
| "num_heads": 4, | |
| "num_masks": 16, | |
| "num_waves_per_mask": 8, | |
| "topk_masks": 8, | |
| "attn_alpha": 3.0, | |
| "content_rank": 8, | |
| "content_mix": 0.15, | |
| "learned_content": true, | |
| "use_sin_waves": false, | |
| "ffn_mult": 4, | |
| "vocab_size": 50257, | |
| "seq_len": 256 | |
| } |