{ "model_config": { "n_layer": 4, "d_model": 512, "n_ctx": 512, "d_head": 16, "d_mlp": 2048, "vocab_size": 4096, "use_rms_norm": true, "tie_embeddings": false, "use_positional_embeddings": false, "use_bigram_table": false, "use_attention_sinks": true, "activation": "gelu", "dropout": 0.0, "use_bias": true }, "sparsity_config": { "enable_weight_sparsity": false, "target_l0_fraction": 1, "enable_activation_sparsity": false, "activation_topk_fraction": 0.25 }, "training_config": { "total_tokens": 2000000000, "batch_size": 128, "dataset_name": "SimpleStories/SimpleStories", "tokenizer_name": "SimpleStories/SimpleStories-1.25M" } }