| { | |
| "vocab_size": 10000, | |
| "dim": 1536, | |
| "num_layers": 6, | |
| "num_heads": 8, | |
| "max_recursion": 4, | |
| "num_experts": 4, | |
| "ffn_expansion": 4, | |
| "max_position_embeddings": 2048, | |
| "model_type": "MoR", | |
| "architecture": "MixtureOfRecursions", | |
| "hidden_act": "gelu" | |
| } |
| { | |
| "vocab_size": 10000, | |
| "dim": 1536, | |
| "num_layers": 6, | |
| "num_heads": 8, | |
| "max_recursion": 4, | |
| "num_experts": 4, | |
| "ffn_expansion": 4, | |
| "max_position_embeddings": 2048, | |
| "model_type": "MoR", | |
| "architecture": "MixtureOfRecursions", | |
| "hidden_act": "gelu" | |
| } |