Checkpoint at step 60
Browse files- checkpoints/{checkpoint-20 β checkpoint-40}/small_experts_and_gates.bin +1 -1
- checkpoints/{checkpoint-20 β checkpoint-60}/config.json +0 -0
- checkpoints/{checkpoint-20 β checkpoint-60}/generation_config.json +0 -0
- checkpoints/{checkpoint-20 β checkpoint-60}/model-00001-of-00003.safetensors +1 -1
- checkpoints/{checkpoint-20 β checkpoint-60}/model-00002-of-00003.safetensors +1 -1
- checkpoints/{checkpoint-20 β checkpoint-60}/model-00003-of-00003.safetensors +1 -1
- checkpoints/{checkpoint-20 β checkpoint-60}/model.safetensors.index.json +0 -0
- checkpoints/{checkpoint-20 β checkpoint-60}/optimizer.pt +2 -2
- checkpoints/{checkpoint-20 β checkpoint-60}/rng_state.pth +0 -0
- checkpoints/{checkpoint-20 β checkpoint-60}/scheduler.pt +1 -1
- checkpoints/{checkpoint-20 β checkpoint-60}/trainer_state.json +31 -3
- checkpoints/{checkpoint-20 β checkpoint-60}/training_args.bin +0 -0
- logs/events.out.tfevents.1756831118.5624918391e3.1490.0 +2 -2
checkpoints/{checkpoint-20 β checkpoint-40}/small_experts_and_gates.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 206711911
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcad30c967aa7b4708df7b4d5a68f0b6d8c6123a399b6c35b9e0a4e29e67abb8
|
| 3 |
size 206711911
|
checkpoints/{checkpoint-20 β checkpoint-60}/config.json
RENAMED
|
File without changes
|
checkpoints/{checkpoint-20 β checkpoint-60}/generation_config.json
RENAMED
|
File without changes
|
checkpoints/{checkpoint-20 β checkpoint-60}/model-00001-of-00003.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998915096
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48b53e83b3b9e51270e2b63ef55313427723ff57c75e4cf4c8269e04de45b290
|
| 3 |
size 4998915096
|
checkpoints/{checkpoint-20 β checkpoint-60}/model-00002-of-00003.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998953920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d944c12069808ce0f621767a0eec41762042e9c6daad49b55d161adf7b52702
|
| 3 |
size 4998953920
|
checkpoints/{checkpoint-20 β checkpoint-60}/model-00003-of-00003.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4046769808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b183dccf9c40fe45cbbaea5eed282b461eb701195ae659c5c2f3a6f73d10c88
|
| 3 |
size 4046769808
|
checkpoints/{checkpoint-20 β checkpoint-60}/model.safetensors.index.json
RENAMED
|
File without changes
|
checkpoints/{checkpoint-20 β checkpoint-60}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f466719a427bface1fffa8fbb6d8e31641cd12d2939657c34d508b7693e82a30
|
| 3 |
+
size 393467848
|
checkpoints/{checkpoint-20 β checkpoint-60}/rng_state.pth
RENAMED
|
File without changes
|
checkpoints/{checkpoint-20 β checkpoint-60}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e1be529198179cd559ddcb4c59a9f665944a456be4a70f4f5dcf79350fe0534
|
| 3 |
size 1465
|
checkpoints/{checkpoint-20 β checkpoint-60}/trainer_state.json
RENAMED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -22,6 +22,34 @@
|
|
| 22 |
"learning_rate": 3.106605624591236e-07,
|
| 23 |
"loss": 106.5866,
|
| 24 |
"step": 20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
}
|
| 26 |
],
|
| 27 |
"logging_steps": 10,
|
|
@@ -41,7 +69,7 @@
|
|
| 41 |
"attributes": {}
|
| 42 |
}
|
| 43 |
},
|
| 44 |
-
"total_flos":
|
| 45 |
"train_batch_size": 2,
|
| 46 |
"trial_name": null,
|
| 47 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.0029433948380213027,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 60,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 22 |
"learning_rate": 3.106605624591236e-07,
|
| 23 |
"loss": 106.5866,
|
| 24 |
"step": 20
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.0014716974190106514,
|
| 28 |
+
"grad_norm": 52.75,
|
| 29 |
+
"learning_rate": 4.7416612164813603e-07,
|
| 30 |
+
"loss": 107.8943,
|
| 31 |
+
"step": 30
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.001962263225347535,
|
| 35 |
+
"grad_norm": 68.5,
|
| 36 |
+
"learning_rate": 6.376716808371485e-07,
|
| 37 |
+
"loss": 109.0438,
|
| 38 |
+
"step": 40
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.002452829031684419,
|
| 42 |
+
"grad_norm": 53.75,
|
| 43 |
+
"learning_rate": 8.011772400261609e-07,
|
| 44 |
+
"loss": 106.0855,
|
| 45 |
+
"step": 50
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.0029433948380213027,
|
| 49 |
+
"grad_norm": 63.25,
|
| 50 |
+
"learning_rate": 9.646827992151733e-07,
|
| 51 |
+
"loss": 106.0407,
|
| 52 |
+
"step": 60
|
| 53 |
}
|
| 54 |
],
|
| 55 |
"logging_steps": 10,
|
|
|
|
| 69 |
"attributes": {}
|
| 70 |
}
|
| 71 |
},
|
| 72 |
+
"total_flos": 1.6323732414922752e+17,
|
| 73 |
"train_batch_size": 2,
|
| 74 |
"trial_name": null,
|
| 75 |
"trial_params": null
|
checkpoints/{checkpoint-20 β checkpoint-60}/training_args.bin
RENAMED
|
File without changes
|
logs/events.out.tfevents.1756831118.5624918391e3.1490.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a16f40df0bc3e351cb0924387161cfe2a0c9e6a01de958593980e5f0db6e1bb
|
| 3 |
+
size 6486
|