jacobcd52
/

scae

Model card Files Files and versions

xet

Community

jacobcd52 commited on Dec 3, 2024

Commit

27a9de9

verified ·

1 Parent(s): 820aecd

Upload config.json with huggingface_hub

Browse files

Files changed (1) hide show

config.json +49 -48

config.json CHANGED Viewed

@@ -26,56 +26,56 @@
         "attn_11": 768
     },
     "dict_sizes": {
-        "mlp_0": 6144,
-        "attn_0": 6144,
-        "mlp_1": 6144,
-        "attn_1": 6144,
-        "mlp_2": 6144,
-        "attn_2": 6144,
-        "mlp_3": 6144,
-        "attn_3": 6144,
-        "mlp_4": 6144,
-        "attn_4": 6144,
-        "mlp_5": 6144,
-        "attn_5": 6144,
-        "mlp_6": 6144,
-        "attn_6": 6144,
-        "mlp_7": 6144,
-        "attn_7": 6144,
-        "mlp_8": 6144,
-        "attn_8": 6144,
-        "mlp_9": 6144,
-        "attn_9": 6144,
-        "mlp_10": 6144,
-        "attn_10": 6144,
-        "mlp_11": 6144,
-        "attn_11": 6144
     },
     "ks": {
-        "mlp_0": 64,
-        "attn_0": 64,
-        "mlp_1": 64,
-        "attn_1": 64,
-        "mlp_2": 64,
-        "attn_2": 64,
-        "mlp_3": 64,
-        "attn_3": 64,
-        "mlp_4": 64,
-        "attn_4": 64,
-        "mlp_5": 64,
-        "attn_5": 64,
-        "mlp_6": 64,
-        "attn_6": 64,
-        "mlp_7": 64,
-        "attn_7": 64,
-        "mlp_8": 64,
-        "attn_8": 64,
-        "mlp_9": 64,
-        "attn_9": 64,
-        "mlp_10": 64,
-        "attn_10": 64,
-        "mlp_11": 64,
-        "attn_11": 64
     },
     "layers": [],
     "lm_name": "",
@@ -107,6 +107,7 @@
     ],
     "connection_sparsity_coeff": 0.01,
     "use_sparse_connections": false,
     "buffer_config": {
         "ctx_len": 128,
         "refresh_batch_size": 256,

         "attn_11": 768
     },
     "dict_sizes": {
+        "mlp_0": 12288,
+        "attn_0": 12288,
+        "mlp_1": 12288,
+        "attn_1": 12288,
+        "mlp_2": 12288,
+        "attn_2": 12288,
+        "mlp_3": 12288,
+        "attn_3": 12288,
+        "mlp_4": 12288,
+        "attn_4": 12288,
+        "mlp_5": 12288,
+        "attn_5": 12288,
+        "mlp_6": 12288,
+        "attn_6": 12288,
+        "mlp_7": 12288,
+        "attn_7": 12288,
+        "mlp_8": 12288,
+        "attn_8": 12288,
+        "mlp_9": 12288,
+        "attn_9": 12288,
+        "mlp_10": 12288,
+        "attn_10": 12288,
+        "mlp_11": 12288,
+        "attn_11": 12288
     },
     "ks": {
+        "mlp_0": 128,
+        "attn_0": 128,
+        "mlp_1": 128,
+        "attn_1": 128,
+        "mlp_2": 128,
+        "attn_2": 128,
+        "mlp_3": 128,
+        "attn_3": 128,
+        "mlp_4": 128,
+        "attn_4": 128,
+        "mlp_5": 128,
+        "attn_5": 128,
+        "mlp_6": 128,
+        "attn_6": 128,
+        "mlp_7": 128,
+        "attn_7": 128,
+        "mlp_8": 128,
+        "attn_8": 128,
+        "mlp_9": 128,
+        "attn_9": 128,
+        "mlp_10": 128,
+        "attn_10": 128,
+        "mlp_11": 128,
+        "attn_11": 128
     },
     "layers": [],
     "lm_name": "",
     ],
     "connection_sparsity_coeff": 0.01,
     "use_sparse_connections": false,
+    "dtype": "torch.float32",
     "buffer_config": {
         "ctx_len": 128,
         "refresh_batch_size": 256,