| { | |
| "vocab_size": 1183855, | |
| "seq_len": 4096, | |
| "hidden_size": 32, | |
| "num_heads": 4, | |
| "expansion": 4, | |
| "H_layers": 8, | |
| "L_layers": 2, | |
| "H_cycles": 1, | |
| "L_cycles": 1, | |
| "halt_max_steps": 4, | |
| "halt_epsilon": 0.01, | |
| "dropout": 0.1 | |
| } | |
| { | |
| "vocab_size": 1183855, | |
| "seq_len": 4096, | |
| "hidden_size": 32, | |
| "num_heads": 4, | |
| "expansion": 4, | |
| "H_layers": 8, | |
| "L_layers": 2, | |
| "H_cycles": 1, | |
| "L_cycles": 1, | |
| "halt_max_steps": 4, | |
| "halt_epsilon": 0.01, | |
| "dropout": 0.1 | |
| } | |