Training in progress, step 740000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0b6142827a79f5d4b326c8ef95883604994380c09564cc48d7c1a0eecc0ae0c
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23b5410b91904505f995b54f8dbaf35031127f48559f667a312bfe26edbdc46c
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d16bcb150fb723167f32e28532d305d9b788035b9c1c04eefc4171601b8a86cf
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f68eb57082d644a52febc0af19784501f7d94576defdbcf673807dd01942a834
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93e011812af607ddca2f8883a544e1029094dd30ea8f434fd3b69dea782324c8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ac4fa903420068a2c49e97bb4ec5079f04b5e57b740f4f724d3e2eb49716420
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d9f0f128872a454b4739c6d1bc039a16d454358b055fa818e1343bc269f4881
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 11.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5408,11 +5408,85 @@
|
|
| 5408 |
"eval_samples_per_second": 1264.795,
|
| 5409 |
"eval_steps_per_second": 20.237,
|
| 5410 |
"step": 730000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5411 |
}
|
| 5412 |
],
|
| 5413 |
"max_steps": 1000000,
|
| 5414 |
"num_train_epochs": 16,
|
| 5415 |
-
"total_flos": 5.
|
| 5416 |
"trial_name": null,
|
| 5417 |
"trial_params": null
|
| 5418 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.299952662360468,
|
| 5 |
+
"global_step": 740000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5408 |
"eval_samples_per_second": 1264.795,
|
| 5409 |
"eval_steps_per_second": 20.237,
|
| 5410 |
"step": 730000
|
| 5411 |
+
},
|
| 5412 |
+
{
|
| 5413 |
+
"epoch": 11.16,
|
| 5414 |
+
"learning_rate": 3.591759627988353e-05,
|
| 5415 |
+
"loss": 0.242,
|
| 5416 |
+
"step": 731000
|
| 5417 |
+
},
|
| 5418 |
+
{
|
| 5419 |
+
"epoch": 11.18,
|
| 5420 |
+
"learning_rate": 3.573801973101913e-05,
|
| 5421 |
+
"loss": 0.2418,
|
| 5422 |
+
"step": 732000
|
| 5423 |
+
},
|
| 5424 |
+
{
|
| 5425 |
+
"epoch": 11.19,
|
| 5426 |
+
"learning_rate": 3.5558927224079534e-05,
|
| 5427 |
+
"loss": 0.2418,
|
| 5428 |
+
"step": 733000
|
| 5429 |
+
},
|
| 5430 |
+
{
|
| 5431 |
+
"epoch": 11.21,
|
| 5432 |
+
"learning_rate": 3.5380320717591716e-05,
|
| 5433 |
+
"loss": 0.2419,
|
| 5434 |
+
"step": 734000
|
| 5435 |
+
},
|
| 5436 |
+
{
|
| 5437 |
+
"epoch": 11.22,
|
| 5438 |
+
"learning_rate": 3.5202202164767836e-05,
|
| 5439 |
+
"loss": 0.2418,
|
| 5440 |
+
"step": 735000
|
| 5441 |
+
},
|
| 5442 |
+
{
|
| 5443 |
+
"epoch": 11.22,
|
| 5444 |
+
"eval_runtime": 0.8971,
|
| 5445 |
+
"eval_samples_per_second": 1114.723,
|
| 5446 |
+
"eval_steps_per_second": 17.836,
|
| 5447 |
+
"step": 735000
|
| 5448 |
+
},
|
| 5449 |
+
{
|
| 5450 |
+
"epoch": 11.24,
|
| 5451 |
+
"learning_rate": 3.5024573513483864e-05,
|
| 5452 |
+
"loss": 0.2415,
|
| 5453 |
+
"step": 736000
|
| 5454 |
+
},
|
| 5455 |
+
{
|
| 5456 |
+
"epoch": 11.25,
|
| 5457 |
+
"learning_rate": 3.484743670625822e-05,
|
| 5458 |
+
"loss": 0.2414,
|
| 5459 |
+
"step": 737000
|
| 5460 |
+
},
|
| 5461 |
+
{
|
| 5462 |
+
"epoch": 11.27,
|
| 5463 |
+
"learning_rate": 3.467079368023068e-05,
|
| 5464 |
+
"loss": 0.2413,
|
| 5465 |
+
"step": 738000
|
| 5466 |
+
},
|
| 5467 |
+
{
|
| 5468 |
+
"epoch": 11.28,
|
| 5469 |
+
"learning_rate": 3.449464636714107e-05,
|
| 5470 |
+
"loss": 0.2415,
|
| 5471 |
+
"step": 739000
|
| 5472 |
+
},
|
| 5473 |
+
{
|
| 5474 |
+
"epoch": 11.3,
|
| 5475 |
+
"learning_rate": 3.431899669330819e-05,
|
| 5476 |
+
"loss": 0.2414,
|
| 5477 |
+
"step": 740000
|
| 5478 |
+
},
|
| 5479 |
+
{
|
| 5480 |
+
"epoch": 11.3,
|
| 5481 |
+
"eval_runtime": 0.7754,
|
| 5482 |
+
"eval_samples_per_second": 1289.598,
|
| 5483 |
+
"eval_steps_per_second": 20.634,
|
| 5484 |
+
"step": 740000
|
| 5485 |
}
|
| 5486 |
],
|
| 5487 |
"max_steps": 1000000,
|
| 5488 |
"num_train_epochs": 16,
|
| 5489 |
+
"total_flos": 5.187412088496052e+22,
|
| 5490 |
"trial_name": null,
|
| 5491 |
"trial_params": null
|
| 5492 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23b5410b91904505f995b54f8dbaf35031127f48559f667a312bfe26edbdc46c
|
| 3 |
size 449471589
|