| { | |
| "vocab_size": 50000, | |
| "dim": 512, | |
| "n_layers": 8, | |
| "n_heads": 8, | |
| "max_seq_len": 384, | |
| "channel_top_k": 128, | |
| "token_top_k": 48, | |
| "ffn_mult": 4, | |
| "mem_dim": 128, | |
| "mem_size": 15000, | |
| "mem_k": 8, | |
| "mem_threshold": 0.4, | |
| "n_neuron_types": 7, | |
| "stem_plasticity": 0.02, | |
| "reversion_rate": 0.01, | |
| "min_stem_ratio": 0.1, | |
| "inhibition_strength": 0.08, | |
| "modulation_strength": 0.1, | |
| "excitation_strength": 0.3, | |
| "dropout": 0.1, | |
| "batch_size": 10, | |
| "lr": 0.00025, | |
| "max_steps": 5000, | |
| "warmup": 1000, | |
| "log_every": 100, | |
| "eval_every": 1000, | |
| "pad_id": 0, | |
| "eos_id": 2 | |
| } |