| { | |
| "backbone_freeze_steps": 0, | |
| "clip_grad_per_group": false, | |
| "combine_mhc": true, | |
| "compressed_vocab_size": 21091, | |
| "conv_dilation": 3, | |
| "conv_kernel_size": 4, | |
| "conv_zero_init": true, | |
| "embedding_dim": 512, | |
| "enable_telemetry": false, | |
| "enable_tokenizer_compression": true, | |
| "engram_dtype": null, | |
| "engram_version": "1.2.2", | |
| "engram_vocab_size_per_ngram": [ | |
| 128000, | |
| 128000 | |
| ], | |
| "entropy_loss_weight": 0.0, | |
| "gating_zero_init": true, | |
| "hc_mult": 4, | |
| "hidden_size": 2048, | |
| "layer_container_path": "model.layers", | |
| "learning_rate_multiplier": 5.0, | |
| "max_ngram_size": 3, | |
| "model_type": "engram", | |
| "n_head_per_ngram": 8, | |
| "ngram_sizes": [ | |
| 2, | |
| 3 | |
| ], | |
| "pad_id": 2, | |
| "seed": 0, | |
| "target_layers": [ | |
| 2 | |
| ], | |
| "target_modules": null, | |
| "tokenizer_name_or_path": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", | |
| "train_mode": null, | |
| "transformers_version": null, | |
| "use_cache": false, | |
| "use_sparse_embeddings": true, | |
| "weight_decay": 0.0, | |
| "wrap_peft": false | |
| } | |