mntss
/

tl-model-upload

Model card Files Files and versions

xet

Community

mntss commited on Nov 13, 2024

Commit

4bd4aca

verified ·

1 Parent(s): 2640d76

Uploaded model at 0.00 seconds

Browse files

Files changed (1) hide show

tl_config.json +68 -0

tl_config.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "n_layers": 8,
+  "d_model": 512,
+  "n_ctx": 59,
+  "d_head": 64,
+  "model_name": "Othello-GPT-Transformer-Lens",
+  "n_heads": 8,
+  "d_mlp": 2048,
+  "act_fn": "gelu",
+  "d_vocab": 61,
+  "eps": 1e-05,
+  "use_attn_result": false,
+  "use_attn_scale": true,
+  "attn_scale": 8.0,
+  "use_split_qkv_input": false,
+  "use_hook_mlp_in": false,
+  "use_attn_in": false,
+  "use_local_attn": false,
+  "ungroup_grouped_query_attention": false,
+  "original_architecture": "mingpt",
+  "from_checkpoint": false,
+  "checkpoint_index": null,
+  "checkpoint_label_type": null,
+  "checkpoint_value": null,
+  "tokenizer_name": null,
+  "window_size": null,
+  "attn_types": null,
+  "init_mode": "gpt2",
+  "normalization_type": "LN",
+  "device": "cpu",
+  "n_devices": 1,
+  "attention_dir": "causal",
+  "attn_only": false,
+  "seed": null,
+  "initializer_range": 0.035355339059327376,
+  "init_weights": false,
+  "scale_attn_by_inverse_layer_idx": false,
+  "positional_embedding_type": "standard",
+  "final_rms": false,
+  "d_vocab_out": 61,
+  "parallel_attn_mlp": false,
+  "rotary_dim": null,
+  "n_params": 25165824,
+  "use_hook_tokens": false,
+  "gated_mlp": false,
+  "default_prepend_bos": true,
+  "dtype": "float32",
+  "tokenizer_prepends_bos": null,
+  "n_key_value_heads": null,
+  "post_embedding_ln": false,
+  "rotary_base": 10000,
+  "trust_remote_code": false,
+  "rotary_adjacent_pairs": false,
+  "load_in_4bit": false,
+  "num_experts": null,
+  "experts_per_token": null,
+  "relative_attention_max_distance": null,
+  "relative_attention_num_buckets": null,
+  "decoder_start_token_id": null,
+  "tie_word_embeddings": false,
+  "use_normalization_before_and_after": false,
+  "attn_scores_soft_cap": -1.0,
+  "output_logits_soft_cap": -1.0,
+  "use_NTK_by_parts_rope": false,
+  "NTK_by_parts_low_freq_factor": 1.0,
+  "NTK_by_parts_high_freq_factor": 4.0,
+  "NTK_by_parts_factor": 8.0
+}