| { | |
| "n_layers": 8, | |
| "d_model": 512, | |
| "n_ctx": 59, | |
| "d_head": 64, | |
| "model_name": "Othello-GPT-Transformer-Lens", | |
| "n_heads": 8, | |
| "d_mlp": 2048, | |
| "act_fn": "gelu", | |
| "d_vocab": 61, | |
| "eps": 1e-05, | |
| "use_attn_result": false, | |
| "use_attn_scale": true, | |
| "attn_scale": 8.0, | |
| "use_split_qkv_input": false, | |
| "use_hook_mlp_in": false, | |
| "use_attn_in": false, | |
| "use_local_attn": false, | |
| "ungroup_grouped_query_attention": false, | |
| "original_architecture": "mingpt", | |
| "from_checkpoint": false, | |
| "checkpoint_index": null, | |
| "checkpoint_label_type": null, | |
| "checkpoint_value": null, | |
| "tokenizer_name": null, | |
| "window_size": null, | |
| "attn_types": null, | |
| "init_mode": "gpt2", | |
| "normalization_type": "LN", | |
| "device": "cpu", | |
| "n_devices": 1, | |
| "attention_dir": "causal", | |
| "attn_only": false, | |
| "seed": null, | |
| "initializer_range": 0.035355339059327376, | |
| "init_weights": false, | |
| "scale_attn_by_inverse_layer_idx": false, | |
| "positional_embedding_type": "standard", | |
| "final_rms": false, | |
| "d_vocab_out": 61, | |
| "parallel_attn_mlp": false, | |
| "rotary_dim": null, | |
| "n_params": 25165824, | |
| "use_hook_tokens": false, | |
| "gated_mlp": false, | |
| "default_prepend_bos": true, | |
| "dtype": "float32", | |
| "tokenizer_prepends_bos": null, | |
| "n_key_value_heads": null, | |
| "post_embedding_ln": false, | |
| "rotary_base": 10000, | |
| "trust_remote_code": false, | |
| "rotary_adjacent_pairs": false, | |
| "load_in_4bit": false, | |
| "num_experts": null, | |
| "experts_per_token": null, | |
| "relative_attention_max_distance": null, | |
| "relative_attention_num_buckets": null, | |
| "decoder_start_token_id": null, | |
| "tie_word_embeddings": false, | |
| "use_normalization_before_and_after": false, | |
| "attn_scores_soft_cap": -1.0, | |
| "output_logits_soft_cap": -1.0, | |
| "use_NTK_by_parts_rope": false, | |
| "NTK_by_parts_low_freq_factor": 1.0, | |
| "NTK_by_parts_high_freq_factor": 4.0, | |
| "NTK_by_parts_factor": 8.0 | |
| } |