Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +9 -0
config.json +63 -0
pytorch_model.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# HookedTransformer Model
+This is a HookedTransformer model from the `transformer_lens` library.
+To use this model, you'll need to install `transformer_lens` and load it as follows:
+```python
+from transformer_lens import HookedTransformer
+model = HookedTransformer.from_pretrained('D0TheMath/saved_model')
+```

config.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "n_layers": 2,
+  "d_model": 256,
+  "n_ctx": 1024,
+  "d_head": 32,
+  "model_name": "custom",
+  "n_heads": 8,
+  "d_mlp": null,
+  "act_fn": null,
+  "d_vocab": 5000,
+  "eps": 1e-05,
+  "use_attn_result": false,
+  "use_attn_scale": true,
+  "attn_scale": 5.656854249492381,
+  "use_split_qkv_input": false,
+  "use_hook_mlp_in": false,
+  "use_attn_in": false,
+  "use_local_attn": false,
+  "original_architecture": null,
+  "from_checkpoint": false,
+  "checkpoint_index": null,
+  "checkpoint_label_type": null,
+  "checkpoint_value": null,
+  "tokenizer_name": "georgeyw/TinyStories-tokenizer-5k",
+  "window_size": null,
+  "attn_types": null,
+  "init_mode": "gpt2",
+  "normalization_type": "LN",
+  "device": "cuda",
+  "n_devices": 1,
+  "attention_dir": "causal",
+  "attn_only": true,
+  "seed": 1,
+  "initializer_range": 0.05,
+  "init_weights": true,
+  "scale_attn_by_inverse_layer_idx": false,
+  "positional_embedding_type": "shortformer",
+  "final_rms": false,
+  "d_vocab_out": 5000,
+  "parallel_attn_mlp": false,
+  "rotary_dim": null,
+  "n_params": 524288,
+  "use_hook_tokens": false,
+  "gated_mlp": false,
+  "default_prepend_bos": true,
+  "dtype": "torch.float32",
+  "tokenizer_prepends_bos": false,
+  "n_key_value_heads": null,
+  "post_embedding_ln": false,
+  "rotary_base": 10000,
+  "trust_remote_code": false,
+  "rotary_adjacent_pairs": false,
+  "load_in_4bit": false,
+  "num_experts": null,
+  "experts_per_token": null,
+  "relative_attention_max_distance": null,
+  "relative_attention_num_buckets": null,
+  "decoder_start_token_id": null,
+  "tie_word_embeddings": false,
+  "use_normalization_before_and_after": false,
+  "attn_scores_soft_cap": -1.0,
+  "output_logits_soft_cap": -1.0
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53093601bf47482bc3265d20651990b55eff290340bdc1c58ec319ab520fa0d7
+size 15529160