guidelabs
/

steerling-8b

Text Generation

causal-diffusion

interpretability

concept-steering

masked-diffusion

Model card Files Files and versions

Update config.json

#2

by AyaGL - opened Mar 17

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

Files changed (1) hide show

config.json +34 -28

config.json CHANGED Viewed

@@ -1,11 +1,19 @@
 {
-  "model_type": "causal_diffusion",
   "interpretable": true,
   "n_layers": 32,
   "n_head": 32,
   "n_embd": 4096,
-  "block_size": 4096,
   "n_kv_heads": 4,
   "diff_block_size": 64,
   "use_rms_norm": true,
   "norm_eps": 1e-05,
@@ -14,39 +22,37 @@
   "use_rope": true,
   "rope_base": 500000.0,
   "rope_full_precision": true,
   "mlp_type": "swiglu",
   "activation": "gelu",
   "mlp_ratio": 4,
   "intermediate_size": null,
   "use_bias": false,
-  "clip_qkv": 10.0,
   "weight_sharing": true,
   "pad_token_id": 100277,
   "bos_token_id": 100278,
   "eos_token_id": 100257,
-  "endofchunk_token_id": 100279,
   "mask_token_id": 100280,
-  "vocab_size": 100281,
-  "concept": {
-    "n_concepts": 33732,
-    "n_unknown_concepts": 101196,
-    "max_concepts": 16,
-    "concept_dim": 4096,
-    "use_attention_known": false,
-    "use_attention_unknown": false,
-    "topk_known": 16,
-    "topk_known_features": 32,
-    "unknown_topk": 128,
-    "use_unknown": true,
-    "apply_topk_to_unknown": true,
-    "topk_on_logits": false,
-    "factorize_unknown": true,
-    "factorize_rank": 256,
-    "use_epsilon_correction": true,
-    "block_size": 4096,
-    "pad_multiple": 16,
-    "store_unknown_weights": false,
-    "inject_layer": 16,
-    "inject_alpha": 1.0
-  }
-}

 {
+  "model_type": "steerling",
+  "auto_map": {
+    "AutoConfig": "configuration_steerling.SteerlingConfig",
+    "AutoModel": "modeling_steerling.SteerlingForCausalLM",
+    "AutoModelForCausalLM": "modeling_steerling.SteerlingForCausalLM",
+    "AutoTokenizer": ["tokenization_steerling.SteerlingTokenizer", null]
+  },
+  "architectures": ["SteerlingForCausalLM"],
   "interpretable": true,
+  "vocab_size": 100281,
   "n_layers": 32,
   "n_head": 32,
   "n_embd": 4096,
   "n_kv_heads": 4,
+  "block_size": 4096,
   "diff_block_size": 64,
   "use_rms_norm": true,
   "norm_eps": 1e-05,
   "use_rope": true,
   "rope_base": 500000.0,
   "rope_full_precision": true,
+  "clip_qkv": 10.0,
   "mlp_type": "swiglu",
   "activation": "gelu",
   "mlp_ratio": 4,
   "intermediate_size": null,
   "use_bias": false,
   "weight_sharing": true,
   "pad_token_id": 100277,
   "bos_token_id": 100278,
   "eos_token_id": 100257,
   "mask_token_id": 100280,
+  "endofchunk_token_id": 100279,
+  "n_concepts": 33732,
+  "n_unknown_concepts": 101196,
+  "concept_dim": 4096,
+  "use_attention_known": false,
+  "use_attention_unknown": false,
+  "topk_known": 16,
+  "topk_known_features": 32,
+  "unknown_topk": 128,
+  "use_unknown": true,
+  "apply_topk_to_unknown": true,
+  "topk_on_logits": false,
+  "factorize_unknown": true,
+  "factorize_rank": 256,
+  "use_epsilon_correction": true,
+  "concept_block_size": 4096,
+  "pad_multiple": 16,
+  "store_unknown_weights": false,
+  "inject_layer": 16,
+  "inject_alpha": 1.0,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.48.0"
+}