| { | |
| "arch": "tension", | |
| "vocab_size": 32768, | |
| "dim": 256, | |
| "num_layers": 6, | |
| "num_heads": 4, | |
| "window": 32, | |
| "ffn_mult": 3, | |
| "max_seq_len": 256, | |
| "dropout": 0.1, | |
| "use_grad_checkpoint": false, | |
| "use_oscillation": true, | |
| "use_rope": false, | |
| "use_triton": false | |
| } |
| { | |
| "arch": "tension", | |
| "vocab_size": 32768, | |
| "dim": 256, | |
| "num_layers": 6, | |
| "num_heads": 4, | |
| "window": 32, | |
| "ffn_mult": 3, | |
| "max_seq_len": 256, | |
| "dropout": 0.1, | |
| "use_grad_checkpoint": false, | |
| "use_oscillation": true, | |
| "use_rope": false, | |
| "use_triton": false | |
| } |