ss_dense / config.json
jacobcd52's picture
Upload config.json with huggingface_hub
768506b verified
{
"model_config": {
"n_layer": 4,
"d_model": 512,
"n_ctx": 512,
"d_head": 16,
"d_mlp": 2048,
"vocab_size": 4096,
"use_rms_norm": true,
"tie_embeddings": false,
"use_positional_embeddings": false,
"use_bigram_table": false,
"use_attention_sinks": true,
"activation": "gelu",
"dropout": 0.0,
"use_bias": true
},
"sparsity_config": {
"enable_weight_sparsity": false,
"target_l0_fraction": 1,
"enable_activation_sparsity": false,
"activation_topk_fraction": 0.25
},
"training_config": {
"total_tokens": 2000000000,
"batch_size": 128,
"dataset_name": "SimpleStories/SimpleStories",
"tokenizer_name": "SimpleStories/SimpleStories-1.25M"
}
}