Upload config.json with huggingface_hub
Browse files- config.json +49 -48
config.json
CHANGED
|
@@ -26,56 +26,56 @@
|
|
| 26 |
"attn_11": 768
|
| 27 |
},
|
| 28 |
"dict_sizes": {
|
| 29 |
-
"mlp_0":
|
| 30 |
-
"attn_0":
|
| 31 |
-
"mlp_1":
|
| 32 |
-
"attn_1":
|
| 33 |
-
"mlp_2":
|
| 34 |
-
"attn_2":
|
| 35 |
-
"mlp_3":
|
| 36 |
-
"attn_3":
|
| 37 |
-
"mlp_4":
|
| 38 |
-
"attn_4":
|
| 39 |
-
"mlp_5":
|
| 40 |
-
"attn_5":
|
| 41 |
-
"mlp_6":
|
| 42 |
-
"attn_6":
|
| 43 |
-
"mlp_7":
|
| 44 |
-
"attn_7":
|
| 45 |
-
"mlp_8":
|
| 46 |
-
"attn_8":
|
| 47 |
-
"mlp_9":
|
| 48 |
-
"attn_9":
|
| 49 |
-
"mlp_10":
|
| 50 |
-
"attn_10":
|
| 51 |
-
"mlp_11":
|
| 52 |
-
"attn_11":
|
| 53 |
},
|
| 54 |
"ks": {
|
| 55 |
-
"mlp_0":
|
| 56 |
-
"attn_0":
|
| 57 |
-
"mlp_1":
|
| 58 |
-
"attn_1":
|
| 59 |
-
"mlp_2":
|
| 60 |
-
"attn_2":
|
| 61 |
-
"mlp_3":
|
| 62 |
-
"attn_3":
|
| 63 |
-
"mlp_4":
|
| 64 |
-
"attn_4":
|
| 65 |
-
"mlp_5":
|
| 66 |
-
"attn_5":
|
| 67 |
-
"mlp_6":
|
| 68 |
-
"attn_6":
|
| 69 |
-
"mlp_7":
|
| 70 |
-
"attn_7":
|
| 71 |
-
"mlp_8":
|
| 72 |
-
"attn_8":
|
| 73 |
-
"mlp_9":
|
| 74 |
-
"attn_9":
|
| 75 |
-
"mlp_10":
|
| 76 |
-
"attn_10":
|
| 77 |
-
"mlp_11":
|
| 78 |
-
"attn_11":
|
| 79 |
},
|
| 80 |
"layers": [],
|
| 81 |
"lm_name": "",
|
|
@@ -107,6 +107,7 @@
|
|
| 107 |
],
|
| 108 |
"connection_sparsity_coeff": 0.01,
|
| 109 |
"use_sparse_connections": false,
|
|
|
|
| 110 |
"buffer_config": {
|
| 111 |
"ctx_len": 128,
|
| 112 |
"refresh_batch_size": 256,
|
|
|
|
| 26 |
"attn_11": 768
|
| 27 |
},
|
| 28 |
"dict_sizes": {
|
| 29 |
+
"mlp_0": 12288,
|
| 30 |
+
"attn_0": 12288,
|
| 31 |
+
"mlp_1": 12288,
|
| 32 |
+
"attn_1": 12288,
|
| 33 |
+
"mlp_2": 12288,
|
| 34 |
+
"attn_2": 12288,
|
| 35 |
+
"mlp_3": 12288,
|
| 36 |
+
"attn_3": 12288,
|
| 37 |
+
"mlp_4": 12288,
|
| 38 |
+
"attn_4": 12288,
|
| 39 |
+
"mlp_5": 12288,
|
| 40 |
+
"attn_5": 12288,
|
| 41 |
+
"mlp_6": 12288,
|
| 42 |
+
"attn_6": 12288,
|
| 43 |
+
"mlp_7": 12288,
|
| 44 |
+
"attn_7": 12288,
|
| 45 |
+
"mlp_8": 12288,
|
| 46 |
+
"attn_8": 12288,
|
| 47 |
+
"mlp_9": 12288,
|
| 48 |
+
"attn_9": 12288,
|
| 49 |
+
"mlp_10": 12288,
|
| 50 |
+
"attn_10": 12288,
|
| 51 |
+
"mlp_11": 12288,
|
| 52 |
+
"attn_11": 12288
|
| 53 |
},
|
| 54 |
"ks": {
|
| 55 |
+
"mlp_0": 128,
|
| 56 |
+
"attn_0": 128,
|
| 57 |
+
"mlp_1": 128,
|
| 58 |
+
"attn_1": 128,
|
| 59 |
+
"mlp_2": 128,
|
| 60 |
+
"attn_2": 128,
|
| 61 |
+
"mlp_3": 128,
|
| 62 |
+
"attn_3": 128,
|
| 63 |
+
"mlp_4": 128,
|
| 64 |
+
"attn_4": 128,
|
| 65 |
+
"mlp_5": 128,
|
| 66 |
+
"attn_5": 128,
|
| 67 |
+
"mlp_6": 128,
|
| 68 |
+
"attn_6": 128,
|
| 69 |
+
"mlp_7": 128,
|
| 70 |
+
"attn_7": 128,
|
| 71 |
+
"mlp_8": 128,
|
| 72 |
+
"attn_8": 128,
|
| 73 |
+
"mlp_9": 128,
|
| 74 |
+
"attn_9": 128,
|
| 75 |
+
"mlp_10": 128,
|
| 76 |
+
"attn_10": 128,
|
| 77 |
+
"mlp_11": 128,
|
| 78 |
+
"attn_11": 128
|
| 79 |
},
|
| 80 |
"layers": [],
|
| 81 |
"lm_name": "",
|
|
|
|
| 107 |
],
|
| 108 |
"connection_sparsity_coeff": 0.01,
|
| 109 |
"use_sparse_connections": false,
|
| 110 |
+
"dtype": "torch.float32",
|
| 111 |
"buffer_config": {
|
| 112 |
"ctx_len": 128,
|
| 113 |
"refresh_batch_size": 256,
|