Training in progress, step 730000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c26fc7d85d9f02e6a64dd6a80217974cd29a1b0b54ea54e9b185baad200d5cbf
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4d2617dcaf424c89a4b688e6f4da6209c2c4f8b6273e0866be4b1e433d0bac8
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b88df4274470d41979d3cdbe4a25129230446986c8181439f49998ce0a51f2de
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be607f7560436df027ef62ca453f5afb8d7770ec356ed4a4ec23eedf6a0db7f4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29def8111a742b9412e55fb093f94afe63bb7e770d7ed5d28292a3cdbf42c223
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb6393653f317c7043e9a8b3debb3d85fe1b374103dd2a08137b8029e1480248
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95cdcd8f948ae7991b3b8cef0a5275b9e2e19dc1c57b631487377d234a0e9f31
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5334,11 +5334,85 @@
|
|
| 5334 |
"eval_samples_per_second": 1289.466,
|
| 5335 |
"eval_steps_per_second": 20.631,
|
| 5336 |
"step": 720000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5337 |
}
|
| 5338 |
],
|
| 5339 |
"max_steps": 1000000,
|
| 5340 |
"num_train_epochs": 16,
|
| 5341 |
-
"total_flos": 5.
|
| 5342 |
"trial_name": null,
|
| 5343 |
"trial_params": null
|
| 5344 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.147250599355598,
|
| 5 |
+
"global_step": 730000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5334 |
"eval_samples_per_second": 1289.466,
|
| 5335 |
"eval_steps_per_second": 20.631,
|
| 5336 |
"step": 720000
|
| 5337 |
+
},
|
| 5338 |
+
{
|
| 5339 |
+
"epoch": 11.01,
|
| 5340 |
+
"learning_rate": 3.7739549441414945e-05,
|
| 5341 |
+
"loss": 0.2427,
|
| 5342 |
+
"step": 721000
|
| 5343 |
+
},
|
| 5344 |
+
{
|
| 5345 |
+
"epoch": 11.03,
|
| 5346 |
+
"learning_rate": 3.755524134615825e-05,
|
| 5347 |
+
"loss": 0.2429,
|
| 5348 |
+
"step": 722000
|
| 5349 |
+
},
|
| 5350 |
+
{
|
| 5351 |
+
"epoch": 11.04,
|
| 5352 |
+
"learning_rate": 3.7371397419981925e-05,
|
| 5353 |
+
"loss": 0.2428,
|
| 5354 |
+
"step": 723000
|
| 5355 |
+
},
|
| 5356 |
+
{
|
| 5357 |
+
"epoch": 11.06,
|
| 5358 |
+
"learning_rate": 3.7188019673373706e-05,
|
| 5359 |
+
"loss": 0.2431,
|
| 5360 |
+
"step": 724000
|
| 5361 |
+
},
|
| 5362 |
+
{
|
| 5363 |
+
"epoch": 11.07,
|
| 5364 |
+
"learning_rate": 3.700511011172325e-05,
|
| 5365 |
+
"loss": 0.2436,
|
| 5366 |
+
"step": 725000
|
| 5367 |
+
},
|
| 5368 |
+
{
|
| 5369 |
+
"epoch": 11.07,
|
| 5370 |
+
"eval_runtime": 0.7297,
|
| 5371 |
+
"eval_samples_per_second": 1370.472,
|
| 5372 |
+
"eval_steps_per_second": 21.928,
|
| 5373 |
+
"step": 725000
|
| 5374 |
+
},
|
| 5375 |
+
{
|
| 5376 |
+
"epoch": 11.09,
|
| 5377 |
+
"learning_rate": 3.682267073530023e-05,
|
| 5378 |
+
"loss": 0.243,
|
| 5379 |
+
"step": 726000
|
| 5380 |
+
},
|
| 5381 |
+
{
|
| 5382 |
+
"epoch": 11.1,
|
| 5383 |
+
"learning_rate": 3.664070353923245e-05,
|
| 5384 |
+
"loss": 0.2424,
|
| 5385 |
+
"step": 727000
|
| 5386 |
+
},
|
| 5387 |
+
{
|
| 5388 |
+
"epoch": 11.12,
|
| 5389 |
+
"learning_rate": 3.645921051348396e-05,
|
| 5390 |
+
"loss": 0.2423,
|
| 5391 |
+
"step": 728000
|
| 5392 |
+
},
|
| 5393 |
+
{
|
| 5394 |
+
"epoch": 11.13,
|
| 5395 |
+
"learning_rate": 3.627819364283345e-05,
|
| 5396 |
+
"loss": 0.2456,
|
| 5397 |
+
"step": 729000
|
| 5398 |
+
},
|
| 5399 |
+
{
|
| 5400 |
+
"epoch": 11.15,
|
| 5401 |
+
"learning_rate": 3.6097654906852405e-05,
|
| 5402 |
+
"loss": 0.2431,
|
| 5403 |
+
"step": 730000
|
| 5404 |
+
},
|
| 5405 |
+
{
|
| 5406 |
+
"epoch": 11.15,
|
| 5407 |
+
"eval_runtime": 0.7906,
|
| 5408 |
+
"eval_samples_per_second": 1264.795,
|
| 5409 |
+
"eval_steps_per_second": 20.237,
|
| 5410 |
+
"step": 730000
|
| 5411 |
}
|
| 5412 |
],
|
| 5413 |
"max_steps": 1000000,
|
| 5414 |
"num_train_epochs": 16,
|
| 5415 |
+
"total_flos": 5.117311827465705e+22,
|
| 5416 |
"trial_name": null,
|
| 5417 |
"trial_params": null
|
| 5418 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4d2617dcaf424c89a4b688e6f4da6209c2c4f8b6273e0866be4b1e433d0bac8
|
| 3 |
size 449471589
|