Upload 26 files
Browse files- 0-mlp-in-x4-k30/config.json +27 -0
- 0-mlp-in-x4-k30/model.safetensors +3 -0
- 0-mlp-out-x4-k30/config.json +27 -0
- 0-mlp-out-x4-k30/model.safetensors +3 -0
- 1-mlp-in-x4-k30/config.json +27 -0
- 1-mlp-in-x4-k30/model.safetensors +3 -0
- 1-mlp-out-x4-k30/config.json +27 -0
- 1-mlp-out-x4-k30/model.safetensors +3 -0
- 1-resid-mid-x4-k30/config.json +27 -0
- 1-resid-mid-x4-k30/model.safetensors +3 -0
- 2-mlp-in-x4-k30/config.json +27 -0
- 2-mlp-in-x4-k30/model.safetensors +3 -0
- 2-mlp-out-x4-k30/config.json +27 -0
- 2-mlp-out-x4-k30/model.safetensors +3 -0
- 3-mlp-in-x4-k30/config.json +27 -0
- 3-mlp-in-x4-k30/model.safetensors +3 -0
- 3-mlp-out-x4-k30/config.json +27 -0
- 3-mlp-out-x4-k30/model.safetensors +3 -0
- 4-mlp-in-x4-k30/config.json +27 -0
- 4-mlp-in-x4-k30/model.safetensors +3 -0
- 4-mlp-out-x4-k30/config.json +27 -0
- 4-mlp-out-x4-k30/model.safetensors +3 -0
- 5-mlp-in-x4-k30/config.json +27 -0
- 5-mlp-in-x4-k30/model.safetensors +3 -0
- 5-mlp-out-x4-k30/config.json +27 -0
- 5-mlp-out-x4-k30/model.safetensors +3 -0
0-mlp-in-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-in",
|
| 4 |
+
0
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 500,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
0-mlp-in-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4d6e1da824ed07bcc68bc81e7ad567899ac81b05b6d209df683e493671c6038
|
| 3 |
+
size 8390896
|
0-mlp-out-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-out",
|
| 4 |
+
0
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 500,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
0-mlp-out-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87adf583a590b45a89c91df1f967a1c4ded6650c5ecda51a19d760bd0a263c84
|
| 3 |
+
size 8390896
|
1-mlp-in-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-in",
|
| 4 |
+
1
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 500,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
1-mlp-in-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b02b4ece25d6a0d0052577ffeb1a4d6581a02ba3234ee7081757bca013e2b9b
|
| 3 |
+
size 8390896
|
1-mlp-out-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-out",
|
| 4 |
+
1
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 500,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
1-mlp-out-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7eb3dd78691aec74affc76ca4123f9077a94df0be2e84290f2486212761e4515
|
| 3 |
+
size 8390896
|
1-resid-mid-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-out",
|
| 4 |
+
3
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 500,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
1-resid-mid-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ed092c899d479df2781220b79668f9e65ddde1655504400e745adccde76e259
|
| 3 |
+
size 8390896
|
2-mlp-in-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-in",
|
| 4 |
+
2
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 500,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
2-mlp-in-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7e7685c3a5fc8ad31cfb812a8b32e61001799bd49061fcbc37990f67fd040f7
|
| 3 |
+
size 8390896
|
2-mlp-out-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-out",
|
| 4 |
+
2
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 500,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
2-mlp-out-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ccbe1bc5980e502b3559f26a55ac5793a083c863f00f8fa24fa02b512cc4a85
|
| 3 |
+
size 8390896
|
3-mlp-in-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-in",
|
| 4 |
+
3
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 100,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
3-mlp-in-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d454b66d175e17be88d443728804482935b9658a4526f8b0c18eda8d57a6207e
|
| 3 |
+
size 8390896
|
3-mlp-out-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-out",
|
| 4 |
+
3
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 100,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
3-mlp-out-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03872f3c4c79bb7f8a1612fd5a860a18958d9fc5a53323b2072a5f69d1122cb9
|
| 3 |
+
size 8390896
|
4-mlp-in-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-in",
|
| 4 |
+
4
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 100,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
4-mlp-in-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b20b25592eed19b0412df73bebcffedebd06f7ee87129015744d1a01de4e260d
|
| 3 |
+
size 8390896
|
4-mlp-out-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-out",
|
| 4 |
+
4
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 100,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
4-mlp-out-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:219b9ff4d64cc6123f7378a303fbb7f95c624ffe9129524ef21f5989ce5bf017
|
| 3 |
+
size 8390896
|
5-mlp-in-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-in",
|
| 4 |
+
5
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 100,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
5-mlp-in-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb8948caf1abea71979638467cd8d665ca7250d99cd911d7ff748e47e6036f87
|
| 3 |
+
size 8390896
|
5-mlp-out-x4-k30/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"point": [
|
| 3 |
+
"mlp-out",
|
| 4 |
+
5
|
| 5 |
+
],
|
| 6 |
+
"target": null,
|
| 7 |
+
"expansion": 4,
|
| 8 |
+
"lr": 1e-4,
|
| 9 |
+
"in_batch": 32,
|
| 10 |
+
"out_batch": 4096,
|
| 11 |
+
"n_buffers": 75,
|
| 12 |
+
"n_batches": 256,
|
| 13 |
+
"d_model": 512,
|
| 14 |
+
"n_ctx": 256,
|
| 15 |
+
"k": 30,
|
| 16 |
+
"val_steps": 100,
|
| 17 |
+
"dead_thresh": 2,
|
| 18 |
+
"normalize": null,
|
| 19 |
+
"init_scale": 1.0,
|
| 20 |
+
"bilinear_encoder": false,
|
| 21 |
+
"encoder_bias": false,
|
| 22 |
+
"token_lookup": false,
|
| 23 |
+
"decoder_decay": 0.0,
|
| 24 |
+
"normalize_decoder": true,
|
| 25 |
+
"tag": null,
|
| 26 |
+
"kwargs": {}
|
| 27 |
+
}
|
5-mlp-out-x4-k30/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab4b43e016fcf46ffd0b3fa97711b13ee94a5832fa22c5a2a139655e02d36f75
|
| 3 |
+
size 8390896
|