Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +46 -4
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -533,6 +533,48 @@
|
|
| 533 |
"learning_rate": 9.163991939592081e-07,
|
| 534 |
"loss": 0.4315,
|
| 535 |
"step": 750
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
}
|
| 537 |
],
|
| 538 |
"logging_steps": 10,
|
|
@@ -547,12 +589,12 @@
|
|
| 547 |
"should_evaluate": false,
|
| 548 |
"should_log": false,
|
| 549 |
"should_save": true,
|
| 550 |
-
"should_training_stop":
|
| 551 |
},
|
| 552 |
"attributes": {}
|
| 553 |
}
|
| 554 |
},
|
| 555 |
-
"total_flos": 2.
|
| 556 |
"train_batch_size": 1,
|
| 557 |
"trial_name": null,
|
| 558 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.9924190213645763,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 816,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 533 |
"learning_rate": 9.163991939592081e-07,
|
| 534 |
"loss": 0.4315,
|
| 535 |
"step": 750
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 2.7865839650815527,
|
| 539 |
+
"grad_norm": 0.39825597405433655,
|
| 540 |
+
"learning_rate": 6.643898936001369e-07,
|
| 541 |
+
"loss": 0.4214,
|
| 542 |
+
"step": 760
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 2.823340225132093,
|
| 546 |
+
"grad_norm": 0.38193032145500183,
|
| 547 |
+
"learning_rate": 4.5236393674580324e-07,
|
| 548 |
+
"loss": 0.4221,
|
| 549 |
+
"step": 770
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 2.8600964851826327,
|
| 553 |
+
"grad_norm": 0.4201795160770416,
|
| 554 |
+
"learning_rate": 2.8066968148658945e-07,
|
| 555 |
+
"loss": 0.428,
|
| 556 |
+
"step": 780
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 2.8968527452331725,
|
| 560 |
+
"grad_norm": 0.3711008131504059,
|
| 561 |
+
"learning_rate": 1.4958922103305574e-07,
|
| 562 |
+
"loss": 0.4211,
|
| 563 |
+
"step": 790
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 2.9336090052837123,
|
| 567 |
+
"grad_norm": 0.39392825961112976,
|
| 568 |
+
"learning_rate": 5.9337920237534375e-08,
|
| 569 |
+
"loss": 0.4295,
|
| 570 |
+
"step": 800
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 2.9703652653342525,
|
| 574 |
+
"grad_norm": 0.3807792067527771,
|
| 575 |
+
"learning_rate": 1.0064061750253429e-08,
|
| 576 |
+
"loss": 0.4133,
|
| 577 |
+
"step": 810
|
| 578 |
}
|
| 579 |
],
|
| 580 |
"logging_steps": 10,
|
|
|
|
| 589 |
"should_evaluate": false,
|
| 590 |
"should_log": false,
|
| 591 |
"should_save": true,
|
| 592 |
+
"should_training_stop": true
|
| 593 |
},
|
| 594 |
"attributes": {}
|
| 595 |
}
|
| 596 |
},
|
| 597 |
+
"total_flos": 2.716624724636467e+16,
|
| 598 |
"train_batch_size": 1,
|
| 599 |
"trial_name": null,
|
| 600 |
"trial_params": null
|