Commit ·
42f840f
1
Parent(s): 54a6afb
feat: add models from normalized training
Browse files- README.md +4 -4
- pytorch/anitag2vec_1a770feaa2bd9094_c7359727bcee4f8b_i128_e10_s60203_b300_p3841472.pth +0 -3
- pytorch/anitag2vec_1a770feaa2bd9094_c7359727bcee4f8b_i128_e15_s60203_b300_p3841472.pth +0 -3
- pytorch/anitag2vec_1a770feaa2bd9094_c7359727bcee4f8b_i128_e5_s60203_b300_p3841472.pth +0 -3
- pytorch/{anitag2vec_a75bfbdfe75fe264_c7359727bcee4f8b_i128_e10_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e12_s157043_b256_p1871744.pth} +1 -1
- pytorch/{anitag2vec_a75bfbdfe75fe264_c7359727bcee4f8b_i128_e15_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e14_s157043_b256_p1871744.pth} +1 -1
- pytorch/{anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e15_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e3_s157043_b256_p1871744.pth} +1 -1
- pytorch/{anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e20_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e6_s157043_b256_p1871744.pth} +1 -1
- pytorch/{anitag2vec_a75bfbdfe75fe264_c7359727bcee4f8b_i128_e5_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e9_s157043_b256_p1871744.pth} +1 -1
- pytorch/anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e10_s60203_b256_p1871744.pth +0 -3
- pytorch/anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e5_s60203_b256_p1871744.pth +0 -3
- pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e25_s60203_b256_p1871744.pth +0 -3
- pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e30_s60203_b256_p1871744.pth +0 -3
- pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e35_s60203_b256_p1871744.pth +0 -3
- pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e40_s60203_b256_p1871744.pth +0 -3
- pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e45_s60203_b256_p1871744.pth +0 -3
- pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e50_s60203_b256_p1871744.pth +0 -3
- pytorch/anitag2vec_e2ef15bc7697d18c_c7359727bcee4f8b_i128_e10_s60203_b100_p1871744.pth +0 -3
- pytorch/anitag2vec_e2ef15bc7697d18c_c7359727bcee4f8b_i128_e15_s60203_b100_p1871744.pth +0 -3
- pytorch/anitag2vec_e2ef15bc7697d18c_c7359727bcee4f8b_i128_e5_s60203_b100_p1871744.pth +0 -3
- pytorch/{setup_params_e2ef15bc7697d18c_c7359727bcee4f8b.json → config_63fc21b89723d1ce_b0d065e705028cb3.json} +0 -5
- pytorch/errors_1a770feaa2bd9094_c7359727bcee4f8b.json +0 -1
- pytorch/errors_63fc21b89723d1ce_b0d065e705028cb3_896b40b1cf682c44.json +125 -0
- pytorch/errors_8ea07c7d34b64b69_c7359727bcee4f8b.json +0 -1
- pytorch/errors_a75bfbdfe75fe264_c7359727bcee4f8b.json +0 -1
- pytorch/errors_dec65b57a17b7033_c7359727bcee4f8b.json +0 -1
- pytorch/errors_e2ef15bc7697d18c_c7359727bcee4f8b.json +0 -1
- pytorch/setup_params_1a770feaa2bd9094_c7359727bcee4f8b.json +0 -14
- pytorch/setup_params_8ea07c7d34b64b69_c7359727bcee4f8b.json +0 -14
- pytorch/setup_params_a75bfbdfe75fe264_c7359727bcee4f8b.json +0 -14
- pytorch/setup_params_dec65b57a17b7033_c7359727bcee4f8b.json +0 -14
- pytorch/{token_dataset_c7359727bcee4f8b_vocab_size_5000_freq_3.json → token_dataset_b0d065e705028cb3_vocab_size_5000_freq_3.json} +0 -0
README.md
CHANGED
|
@@ -11,12 +11,12 @@ Training and inference examples are all available on [my github](https://github.
|
|
| 11 |
Implementation is detailed in [this blog post](https://blog.afmichael.dev/posts/2026/set-embeddings-and-anitag2vec/).
|
| 12 |
|
| 13 |
```python
|
| 14 |
-
TOKENIZER_PATH = "./checkpoints/
|
| 15 |
-
CONFIG_PATH = "./checkpoints/
|
| 16 |
-
MODEL_PATH = "./checkpoints/
|
| 17 |
|
| 18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
-
cfg =
|
| 20 |
print(cfg)
|
| 21 |
tagtok = TagBPETokenizer(vocab_size=cfg.HYPERP_TAGTOK_VOCAB_SIZE, min_frequency=cfg.HYPERP_TAGTOK_MIN_FREQ)
|
| 22 |
tagtok.load(TOKENIZER_PATH)
|
|
|
|
| 11 |
Implementation is detailed in [this blog post](https://blog.afmichael.dev/posts/2026/set-embeddings-and-anitag2vec/).
|
| 12 |
|
| 13 |
```python
|
| 14 |
+
TOKENIZER_PATH = "./checkpoints/token_dataset_b0d065e705028cb3_vocab_size_5000_freq_3.json"
|
| 15 |
+
CONFIG_PATH = "./checkpoints/config_63fc21b89723d1ce_b0d065e705028cb3.json"
|
| 16 |
+
MODEL_PATH = "./checkpoints/anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e14_s157043_b256_p1871744.pth"
|
| 17 |
|
| 18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
+
cfg = ModelConfig.load_from_file(CONFIG_PATH)
|
| 20 |
print(cfg)
|
| 21 |
tagtok = TagBPETokenizer(vocab_size=cfg.HYPERP_TAGTOK_VOCAB_SIZE, min_frequency=cfg.HYPERP_TAGTOK_MIN_FREQ)
|
| 22 |
tagtok.load(TOKENIZER_PATH)
|
pytorch/anitag2vec_1a770feaa2bd9094_c7359727bcee4f8b_i128_e10_s60203_b300_p3841472.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:815e3f012aa2286131a015474c14607b8979654f73abe1eac7d5d4a69c71478d
|
| 3 |
-
size 15386061
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_1a770feaa2bd9094_c7359727bcee4f8b_i128_e15_s60203_b300_p3841472.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a456120b0678a8410c96e72a13abd3c85ed9a7f22b8a9575090af851ed9eb2e5
|
| 3 |
-
size 15386061
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_1a770feaa2bd9094_c7359727bcee4f8b_i128_e5_s60203_b300_p3841472.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:cb4f4c402d8d8c42c793a3c5b6a9c0c6fd70c01f266899682c8f0f697755fe3f
|
| 3 |
-
size 15386018
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/{anitag2vec_a75bfbdfe75fe264_c7359727bcee4f8b_i128_e10_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e12_s157043_b256_p1871744.pth}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7501057
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:993d1ca84e551dcd0029cab4f2d4ffde380bc89995dd2e60ab6b9ef3e3bf0f37
|
| 3 |
size 7501057
|
pytorch/{anitag2vec_a75bfbdfe75fe264_c7359727bcee4f8b_i128_e15_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e14_s157043_b256_p1871744.pth}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7501057
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30922b7f670051d4e8c98ce32e3b1633eca2e1cdbdda8267d42badf0a64ac237
|
| 3 |
size 7501057
|
pytorch/{anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e15_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e3_s157043_b256_p1871744.pth}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7501026
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2ce7307132dd61a09ec9b5e7facf071c867a91dcb49d8ce4b57c16e558b541b
|
| 3 |
size 7501026
|
pytorch/{anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e20_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e6_s157043_b256_p1871744.pth}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7501026
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad60d037823ec3baaf9c5bc7078cd808c07883952d3be92b7221e1c74ef39e65
|
| 3 |
size 7501026
|
pytorch/{anitag2vec_a75bfbdfe75fe264_c7359727bcee4f8b_i128_e5_s60203_b256_p1871744.pth → anitag2vec_63fc21b89723d1ce_b0d065e705028cb3_i128_e9_s157043_b256_p1871744.pth}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7501026
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed28bc6bb065e2535d79814a01bf9be4e226ba841489b0e718c9c28cd2d3c6c5
|
| 3 |
size 7501026
|
pytorch/anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e10_s60203_b256_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:cc0f3fc6c5c3bbac0b13832680bb361ab5c5ab1b2b260e627451488dcd26a680
|
| 3 |
-
size 7501026
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e5_s60203_b256_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:9f1a3a89251213a2e295d3ca5620b2c6c89816b85c84d39a0820340d00ab7025
|
| 3 |
-
size 7500995
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e25_s60203_b256_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:2f213b76590e8b8ba2791e716373ea0bd95d23a24baf3c49d353931a9b41ff33
|
| 3 |
-
size 7501026
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e30_s60203_b256_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7c5ebe5483db89bbbf4300fbca49241bb6e5de89756fa46a637f0b2594eb8e29
|
| 3 |
-
size 7501026
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e35_s60203_b256_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:cb8b43a3d4984b4a2a25134b79e5337d88138cc858f2846fdaec9fa5bae4cd91
|
| 3 |
-
size 7501026
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e40_s60203_b256_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c56502ef75796927c61ec44b51d8b9742a688046366eec144d5a7e62c4d87e6f
|
| 3 |
-
size 7501026
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e45_s60203_b256_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:8aebbb04ea35c7e1e41087a53c1079c5fcf0a4490ecbe98e4238c6800e6a5dd2
|
| 3 |
-
size 7501026
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_dec65b57a17b7033_c7359727bcee4f8b_i128_e50_s60203_b256_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:5a56371fbb6de784dfb6be3283ca26b8f0cbc5f65f6e1dc3da549e8c51762640
|
| 3 |
-
size 7501026
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_e2ef15bc7697d18c_c7359727bcee4f8b_i128_e10_s60203_b100_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:530247608e8529c387f8f2efe09b46d84f87c96611480214482d2d3c41fbd9c9
|
| 3 |
-
size 7501057
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_e2ef15bc7697d18c_c7359727bcee4f8b_i128_e15_s60203_b100_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:9edee4e6b36012d65d3ac3c12789fee5b15b6ce24fca3d30bfe0aef23aa29529
|
| 3 |
-
size 7501057
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/anitag2vec_e2ef15bc7697d18c_c7359727bcee4f8b_i128_e5_s60203_b100_p1871744.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c8541749b6ed0ed88e029050f1559762fe398582db3f977a762a8cfada46092a
|
| 3 |
-
size 7501026
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/{setup_params_e2ef15bc7697d18c_c7359727bcee4f8b.json → config_63fc21b89723d1ce_b0d065e705028cb3.json}
RENAMED
|
@@ -1,9 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"TRAINING_TAKE_EXAMPLES": 70000,
|
| 3 |
-
"TRAINING_BATCH_SIZE": 100,
|
| 4 |
-
"TRAINING_PERM_LIMIT": 8,
|
| 5 |
-
"TRAINING_SUBARRAY_COUNT": 5,
|
| 6 |
-
"TRAINING_EPOCHS": 15,
|
| 7 |
"HYPERP_TAGTOK_MAX_TOKEN_CLAMP": 128,
|
| 8 |
"HYPERP_TAGTOK_VOCAB_SIZE": 5000,
|
| 9 |
"HYPERP_TAGTOK_MIN_FREQ": 3,
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"HYPERP_TAGTOK_MAX_TOKEN_CLAMP": 128,
|
| 3 |
"HYPERP_TAGTOK_VOCAB_SIZE": 5000,
|
| 4 |
"HYPERP_TAGTOK_MIN_FREQ": 3,
|
pytorch/errors_1a770feaa2bd9094_c7359727bcee4f8b.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
[1.5465073392758915, 1.191551031164862, 1.141250568539349, 1.114217244570528, 1.0815079971332455, 1.0874991256799271, 1.0734269378790215, 1.0518524273117977, 1.0536742379416282, 1.0285864217364373, 1.0393868693664892, 1.0260481107887345, 1.026582830580906, 1.0139693416173186, 1.012866616545625]
|
|
|
|
|
|
pytorch/errors_63fc21b89723d1ce_b0d065e705028cb3_896b40b1cf682c44.json
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"training_epoch_losses": [
|
| 3 |
+
0.42451960702672453,
|
| 4 |
+
0.21573004836416787,
|
| 5 |
+
0.19177069690940046,
|
| 6 |
+
0.17504892407246056,
|
| 7 |
+
0.16358874733362602,
|
| 8 |
+
0.1564829849337716,
|
| 9 |
+
0.1508709951410076,
|
| 10 |
+
0.14437850462181753,
|
| 11 |
+
0.14374356180526535,
|
| 12 |
+
0.13792503227143024,
|
| 13 |
+
0.13572524861108015,
|
| 14 |
+
0.13273334489825883,
|
| 15 |
+
0.13153954560137338,
|
| 16 |
+
0.1280266696811693,
|
| 17 |
+
0.12730544583162093
|
| 18 |
+
],
|
| 19 |
+
"eval_epoch_losses": [
|
| 20 |
+
0.2445748666466414,
|
| 21 |
+
0.2009395990945116,
|
| 22 |
+
0.18108840770076348,
|
| 23 |
+
0.1758084242008155,
|
| 24 |
+
0.16031231463568496,
|
| 25 |
+
0.14987653319405603,
|
| 26 |
+
0.14861636272855575,
|
| 27 |
+
0.1410926070205773,
|
| 28 |
+
0.13936658880284317,
|
| 29 |
+
0.13503981845842414,
|
| 30 |
+
0.13185396185734227,
|
| 31 |
+
0.13749252805557055,
|
| 32 |
+
0.12948728526671288,
|
| 33 |
+
0.126551025016587,
|
| 34 |
+
0.12883004533197684
|
| 35 |
+
],
|
| 36 |
+
"test_losses": [
|
| 37 |
+
0.09175209701061249,
|
| 38 |
+
0.10383385419845581,
|
| 39 |
+
0.10482125729322433,
|
| 40 |
+
0.11379732936620712,
|
| 41 |
+
0.12626798450946808,
|
| 42 |
+
0.10898862034082413,
|
| 43 |
+
0.17151252925395966,
|
| 44 |
+
0.1757516860961914,
|
| 45 |
+
0.06563282012939453,
|
| 46 |
+
0.12461118400096893,
|
| 47 |
+
0.055585816502571106,
|
| 48 |
+
0.15210691094398499,
|
| 49 |
+
0.1360796093940735,
|
| 50 |
+
0.21055088937282562,
|
| 51 |
+
0.13093489408493042,
|
| 52 |
+
0.14872413873672485,
|
| 53 |
+
0.10653124749660492,
|
| 54 |
+
0.15394757688045502,
|
| 55 |
+
0.10651152580976486,
|
| 56 |
+
0.12105809152126312,
|
| 57 |
+
0.16416239738464355,
|
| 58 |
+
0.08760879188776016,
|
| 59 |
+
0.12546351552009583,
|
| 60 |
+
0.14228001236915588,
|
| 61 |
+
0.1927514374256134,
|
| 62 |
+
0.1498272716999054,
|
| 63 |
+
0.21303273737430573,
|
| 64 |
+
0.11621835082769394,
|
| 65 |
+
0.09819044172763824,
|
| 66 |
+
0.09028632938861847,
|
| 67 |
+
0.0884910523891449,
|
| 68 |
+
0.13156066834926605,
|
| 69 |
+
0.23093491792678833,
|
| 70 |
+
0.19826459884643555,
|
| 71 |
+
0.10931743681430817,
|
| 72 |
+
0.15826274454593658,
|
| 73 |
+
0.12335523962974548,
|
| 74 |
+
0.24228322505950928,
|
| 75 |
+
0.14932997524738312,
|
| 76 |
+
0.0679045170545578,
|
| 77 |
+
0.10480809211730957,
|
| 78 |
+
0.23186515271663666,
|
| 79 |
+
0.11789989471435547,
|
| 80 |
+
0.118819959461689,
|
| 81 |
+
0.1612658053636551,
|
| 82 |
+
0.18907234072685242,
|
| 83 |
+
0.07295972108840942,
|
| 84 |
+
0.06288766860961914,
|
| 85 |
+
0.1774468868970871,
|
| 86 |
+
0.095305897295475,
|
| 87 |
+
0.1588001698255539,
|
| 88 |
+
0.10207927227020264,
|
| 89 |
+
0.17765334248542786,
|
| 90 |
+
0.13650205731391907,
|
| 91 |
+
0.08141625672578812,
|
| 92 |
+
0.09815841168165207,
|
| 93 |
+
0.11967574059963226,
|
| 94 |
+
0.08025066554546356,
|
| 95 |
+
0.14091147482395172,
|
| 96 |
+
0.16877298057079315,
|
| 97 |
+
0.1309853494167328,
|
| 98 |
+
0.18363714218139648,
|
| 99 |
+
0.14098019897937775,
|
| 100 |
+
0.13023510575294495,
|
| 101 |
+
0.09921633452177048,
|
| 102 |
+
0.15699028968811035,
|
| 103 |
+
0.07227090746164322,
|
| 104 |
+
0.056603025645017624,
|
| 105 |
+
0.16534572839736938,
|
| 106 |
+
0.1160995289683342,
|
| 107 |
+
0.12309978902339935,
|
| 108 |
+
0.1590557098388672,
|
| 109 |
+
0.11739999055862427,
|
| 110 |
+
0.09106240421533585,
|
| 111 |
+
0.03778073191642761
|
| 112 |
+
],
|
| 113 |
+
"training_config": {
|
| 114 |
+
"TRAINING_EVAL_SPLIT": 20000,
|
| 115 |
+
"TRAINING_TEST_SPLIT": 19000,
|
| 116 |
+
"TRAINING_BATCH_SIZE": 256,
|
| 117 |
+
"TRAINING_PERM_LIMIT": 8,
|
| 118 |
+
"TRAINING_SUBARRAY_COUNT": 7,
|
| 119 |
+
"TRAINING_SHUFFLE_SEED": 44276,
|
| 120 |
+
"TRAINING_EPOCHS": 15,
|
| 121 |
+
"TRAINING_LOGITS_TEMPERATURE": 0.07,
|
| 122 |
+
"TRAINING_AUG_DROP_PROB": 0.3,
|
| 123 |
+
"TRAINING_LEARNING_RATE": 0.0001
|
| 124 |
+
}
|
| 125 |
+
}
|
pytorch/errors_8ea07c7d34b64b69_c7359727bcee4f8b.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
[1.5338175347295857, 1.22657845560777, 1.143834860521858, 1.0836338509442442, 1.0620510950684547, 1.0459884273803841, 1.024043610666768, 1.0206486447635343, 1.0067960839655439, 1.016337098711628, 0.9958153492060758, 0.9780209674421003, 0.9733868704761489, 0.9744852811603223, 0.9751925069396779, 0.9733117154594195, 0.9643709912138471, 0.9583197345925589, 0.9579972770001929, 0.9580096569606813]
|
|
|
|
|
|
pytorch/errors_a75bfbdfe75fe264_c7359727bcee4f8b.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
[1.5629867716866024, 1.2090288374131009, 1.1101135576175432, 1.0536913489133626, 1.0466712425320834, 1.0251403781822173, 1.0098970648595842, 1.0102424606428309, 0.9888654357548488, 0.9933246557237738, 0.9835211908665754, 0.9751109461915695, 0.9704712323718152, 0.9701493646381265, 0.9687612371171935]
|
|
|
|
|
|
pytorch/errors_dec65b57a17b7033_c7359727bcee4f8b.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
[1.5776104553271149, 1.217878203649642, 1.122273737343691, 1.0736174890297954, 1.0503659271082635, 1.0280952292604972, 1.023885629439758, 1.0176116310186305, 1.0110528254407947, 0.988821699647075, 0.9956680362002325, 0.9879678449893402, 0.9823854678010536, 0.967902198433876, 0.9723736886250771, 0.9659656348117327, 0.9554944373035835, 0.9522047054211972, 0.9606035141495325, 0.9525656859248371, 0.947982734542782, 0.9380048908672091, 0.9451856041106127, 0.945508426150023, 0.9375066331635087, 0.9257406412418616, 0.9255363149410587, 0.9310430130463535, 0.9432411346647699, 0.9272872375608501, 0.9227262659598205, 0.9262345289274797, 0.9259119953139353, 0.9168865921259937, 0.9316303334119966, 0.9201462741120386, 0.9204749166965485, 0.913366913037785, 0.9123165252602706, 0.9159364377037954, 0.9116012292393183, 0.9128472015766774, 0.9050465597439621, 0.9195494040594263, 0.9129077554759333, 0.9134560997708369, 0.8982684943130461, 0.916633656090599, 0.901568399900097, 0.9055851963743314]
|
|
|
|
|
|
pytorch/errors_e2ef15bc7697d18c_c7359727bcee4f8b.json
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
[0.9241503570645825, 0.734504614527504, 0.6832865893438839, 0.6563414675744494, 0.6379766185772003, 0.6300161842127048, 0.6187568047279098, 0.6050779993306447, 0.6162416961177863, 0.5955178568771559, 0.5995774066960707, 0.5992496137232728, 0.5938771347100315, 0.5892666344180283, 0.5769423848360925]
|
|
|
|
|
|
pytorch/setup_params_1a770feaa2bd9094_c7359727bcee4f8b.json
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"TRAINING_TAKE_EXAMPLES": 70000,
|
| 3 |
-
"TRAINING_BATCH_SIZE": 300,
|
| 4 |
-
"TRAINING_PERM_LIMIT": 8,
|
| 5 |
-
"TRAINING_SUBARRAY_COUNT": 5,
|
| 6 |
-
"TRAINING_EPOCHS": 15,
|
| 7 |
-
"HYPERP_TAGTOK_MAX_TOKEN_CLAMP": 128,
|
| 8 |
-
"HYPERP_TAGTOK_VOCAB_SIZE": 5000,
|
| 9 |
-
"HYPERP_TAGTOK_MIN_FREQ": 3,
|
| 10 |
-
"HYPERP_TRANSFORMER_D_MODEL": 192,
|
| 11 |
-
"HYPERP_TRANSFORMER_N_HEADS": 6,
|
| 12 |
-
"HYPERP_TRANSFORMER_N_LAYERS": 3,
|
| 13 |
-
"HYPERP_OUTPUT_EMB": 128
|
| 14 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/setup_params_8ea07c7d34b64b69_c7359727bcee4f8b.json
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"TRAINING_TAKE_EXAMPLES": 70000,
|
| 3 |
-
"TRAINING_BATCH_SIZE": 256,
|
| 4 |
-
"TRAINING_PERM_LIMIT": 8,
|
| 5 |
-
"TRAINING_SUBARRAY_COUNT": 7,
|
| 6 |
-
"TRAINING_EPOCHS": 20,
|
| 7 |
-
"HYPERP_TAGTOK_MAX_TOKEN_CLAMP": 128,
|
| 8 |
-
"HYPERP_TAGTOK_VOCAB_SIZE": 5000,
|
| 9 |
-
"HYPERP_TAGTOK_MIN_FREQ": 3,
|
| 10 |
-
"HYPERP_TRANSFORMER_D_MODEL": 128,
|
| 11 |
-
"HYPERP_TRANSFORMER_N_HEADS": 8,
|
| 12 |
-
"HYPERP_TRANSFORMER_N_LAYERS": 2,
|
| 13 |
-
"HYPERP_OUTPUT_EMB": 128
|
| 14 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/setup_params_a75bfbdfe75fe264_c7359727bcee4f8b.json
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"TRAINING_TAKE_EXAMPLES": 70000,
|
| 3 |
-
"TRAINING_BATCH_SIZE": 256,
|
| 4 |
-
"TRAINING_PERM_LIMIT": 8,
|
| 5 |
-
"TRAINING_SUBARRAY_COUNT": 5,
|
| 6 |
-
"TRAINING_EPOCHS": 15,
|
| 7 |
-
"HYPERP_TAGTOK_MAX_TOKEN_CLAMP": 128,
|
| 8 |
-
"HYPERP_TAGTOK_VOCAB_SIZE": 5000,
|
| 9 |
-
"HYPERP_TAGTOK_MIN_FREQ": 3,
|
| 10 |
-
"HYPERP_TRANSFORMER_D_MODEL": 128,
|
| 11 |
-
"HYPERP_TRANSFORMER_N_HEADS": 8,
|
| 12 |
-
"HYPERP_TRANSFORMER_N_LAYERS": 2,
|
| 13 |
-
"HYPERP_OUTPUT_EMB": 128
|
| 14 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/setup_params_dec65b57a17b7033_c7359727bcee4f8b.json
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"TRAINING_TAKE_EXAMPLES": 70000,
|
| 3 |
-
"TRAINING_BATCH_SIZE": 256,
|
| 4 |
-
"TRAINING_PERM_LIMIT": 8,
|
| 5 |
-
"TRAINING_SUBARRAY_COUNT": 7,
|
| 6 |
-
"TRAINING_EPOCHS": 50,
|
| 7 |
-
"HYPERP_TAGTOK_MAX_TOKEN_CLAMP": 128,
|
| 8 |
-
"HYPERP_TAGTOK_VOCAB_SIZE": 5000,
|
| 9 |
-
"HYPERP_TAGTOK_MIN_FREQ": 3,
|
| 10 |
-
"HYPERP_TRANSFORMER_D_MODEL": 128,
|
| 11 |
-
"HYPERP_TRANSFORMER_N_HEADS": 8,
|
| 12 |
-
"HYPERP_TRANSFORMER_N_LAYERS": 2,
|
| 13 |
-
"HYPERP_OUTPUT_EMB": 128
|
| 14 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pytorch/{token_dataset_c7359727bcee4f8b_vocab_size_5000_freq_3.json → token_dataset_b0d065e705028cb3_vocab_size_5000_freq_3.json}
RENAMED
|
The diff for this file is too large to render.
See raw diff
|
|
|