| { | |
| "n_layers": 8, | |
| "d_model": 1024, | |
| "n_ctx": 1024, | |
| "d_head": 64, | |
| "model_name": "custom", | |
| "n_heads": 16, | |
| "d_mlp": 4096, | |
| "act_fn": "solu_ln", | |
| "d_vocab": 50278, | |
| "eps": 1e-05, | |
| "use_attn_result": false, | |
| "use_attn_scale": true, | |
| "use_local_attn": false, | |
| "model_family": null, | |
| "checkpoint": null, | |
| "tokenizer_name": "EleutherAI/gpt-neox-20b", | |
| "window_size": null, | |
| "attn_types": null, | |
| "init_mode": "gpt2", | |
| "normalization_type": "LNPre", | |
| "device": "cuda", | |
| "attention_dir": "causal", | |
| "attn_only": false, | |
| "seed": 42, | |
| "initializer_range": 0.025, | |
| "init_weights": true, | |
| "scale_attn_by_inverse_layer_idx": false, | |
| "positional_embedding_type": "standard", | |
| "final_rms": false, | |
| "d_vocab_out": 50278, | |
| "parallel_attn_mlp": false | |
| } |