File size: 734 Bytes
768506b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | {
"model_config": {
"n_layer": 4,
"d_model": 512,
"n_ctx": 512,
"d_head": 16,
"d_mlp": 2048,
"vocab_size": 4096,
"use_rms_norm": true,
"tie_embeddings": false,
"use_positional_embeddings": false,
"use_bigram_table": false,
"use_attention_sinks": true,
"activation": "gelu",
"dropout": 0.0,
"use_bias": true
},
"sparsity_config": {
"enable_weight_sparsity": false,
"target_l0_fraction": 1,
"enable_activation_sparsity": false,
"activation_topk_fraction": 0.25
},
"training_config": {
"total_tokens": 2000000000,
"batch_size": 128,
"dataset_name": "SimpleStories/SimpleStories",
"tokenizer_name": "SimpleStories/SimpleStories-1.25M"
}
} |