| { | |
| "vocab_size": 50000, | |
| "dim": 512, | |
| "n_layers": 8, | |
| "n_heads": 8, | |
| "max_seq_len": 512, | |
| "channel_top_k": 128, | |
| "token_top_k": 64, | |
| "ffn_mult": 4, | |
| "mem_dim": 128, | |
| "mem_size": 20000, | |
| "mem_k": 12, | |
| "mem_threshold": 0.4, | |
| "dropout": 0.1, | |
| "batch_size": 8, | |
| "lr": 0.0002, | |
| "max_steps": 30000, | |
| "warmup": 1500, | |
| "log_every": 100, | |
| "eval_every": 1000, | |
| "pad_id": 0, | |
| "eos_id": 2 | |
| } |