Upload via push_to_hf.py
Browse files- base/d20/meta_021400.json +6 -6
- base/d20/model_021400.pt +1 -1
- base/d20/optim_021400_rank0.pt +1 -1
- base/d20/optim_021400_rank1.pt +3 -0
- base/d20/optim_021400_rank2.pt +3 -0
- base/d20/optim_021400_rank3.pt +3 -0
- base/d20/optim_021400_rank4.pt +3 -0
- base/d20/optim_021400_rank5.pt +3 -0
- base/d20/optim_021400_rank6.pt +3 -0
- base/d20/optim_021400_rank7.pt +3 -0
base/d20/meta_021400.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"step": 21400,
|
| 3 |
-
"val_bpb": 0.
|
| 4 |
"model_config": {
|
| 5 |
"sequence_len": 2048,
|
| 6 |
"vocab_size": 65536,
|
|
@@ -10,7 +10,7 @@
|
|
| 10 |
"n_embd": 1280
|
| 11 |
},
|
| 12 |
"user_config": {
|
| 13 |
-
"run": "
|
| 14 |
"device_type": "",
|
| 15 |
"depth": 20,
|
| 16 |
"max_seq_len": 2048,
|
|
@@ -40,11 +40,11 @@
|
|
| 40 |
"max_seq_len": 2048,
|
| 41 |
"dataloader_state_dict": {
|
| 42 |
"pq_idx": 198,
|
| 43 |
-
"rg_idx":
|
| 44 |
},
|
| 45 |
"loop_state": {
|
| 46 |
-
"min_val_bpb": 0.
|
| 47 |
-
"smooth_train_loss": 2.
|
| 48 |
-
"total_training_time":
|
| 49 |
}
|
| 50 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"step": 21400,
|
| 3 |
+
"val_bpb": 0.8120380958080629,
|
| 4 |
"model_config": {
|
| 5 |
"sequence_len": 2048,
|
| 6 |
"vocab_size": 65536,
|
|
|
|
| 10 |
"n_embd": 1280
|
| 11 |
},
|
| 12 |
"user_config": {
|
| 13 |
+
"run": "d20",
|
| 14 |
"device_type": "",
|
| 15 |
"depth": 20,
|
| 16 |
"max_seq_len": 2048,
|
|
|
|
| 40 |
"max_seq_len": 2048,
|
| 41 |
"dataloader_state_dict": {
|
| 42 |
"pq_idx": 198,
|
| 43 |
+
"rg_idx": 40
|
| 44 |
},
|
| 45 |
"loop_state": {
|
| 46 |
+
"min_val_bpb": 0.8120380958080629,
|
| 47 |
+
"smooth_train_loss": 2.7197287798216516,
|
| 48 |
+
"total_training_time": 11182.78338599205
|
| 49 |
}
|
| 50 |
}
|
base/d20/model_021400.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2076230219
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e270e9375a47b4308ed43d81bc5e70c52a6096266b646ccc7700ab5106f9f8d8
|
| 3 |
size 2076230219
|
base/d20/optim_021400_rank0.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 407643053
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d05457f5c3c465a1c1268a1c3b99518b4ceabc49eac14be0aee6567caf67460c
|
| 3 |
size 407643053
|
base/d20/optim_021400_rank1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29854629d95497379ef281da3aad3b58952bac487ef23c6b913c2b0fa719e53f
|
| 3 |
+
size 407643053
|
base/d20/optim_021400_rank2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50945075e54e19ad8ea7cc108ed0a59a6c065a2aa965fd52885a0fbcd6f1bcf0
|
| 3 |
+
size 407643053
|
base/d20/optim_021400_rank3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:117ce7782c1d791077a3a5f90035a56c412c5a444da146909309fa1834af0fe2
|
| 3 |
+
size 407643053
|
base/d20/optim_021400_rank4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81afbe5f3ef65938a889d1d49ff4915139e82019d3d4506f699bb3138d24b0af
|
| 3 |
+
size 355213661
|
base/d20/optim_021400_rank5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1d1465066638548aae1dba0437227438d1fc40250fee4b12a45dd0be3366d29
|
| 3 |
+
size 355213661
|
base/d20/optim_021400_rank6.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a31a0ad35825dc7fc9a8e55d8cb9dd146389aa961f6bdc9177a4e48e9623bb6
|
| 3 |
+
size 355213661
|
base/d20/optim_021400_rank7.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6851ad37ce41d5cec3dc64d26d684c55d43faedeec730bf5b0a52e8e0294e31f
|
| 3 |
+
size 355213661
|