Fanucci
commited on
Training in progress, step 900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4995335576
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a213889093690708333a72ba85b3e173a26f185c33fba33332e53260dbd1c4aa
|
| 3 |
size 4995335576
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1857639032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:972947c98f4763776af4efd2f002616917f749771c80dff97f83bfb67f4acd2d
|
| 3 |
size 1857639032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13706103974
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e127ed7b7fc1b3994f8c17585153440be7f03c26e5b46dbf6c360aa0ff0978e4
|
| 3 |
size 13706103974
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82be7d57b17f6620866913ecd4ff8b7c828f1f4bbbb44f8426b657efc6ab9fa7
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9eac997032339e8d5b43afb42a0ae765d18d35f79086924503e8ce32f56fdeb
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1,
|
| 3 |
"best_metric": 1.4945952892303467,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -747,6 +747,49 @@
|
|
| 747 |
"eval_samples_per_second": 15.239,
|
| 748 |
"eval_steps_per_second": 15.239,
|
| 749 |
"step": 850
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
}
|
| 751 |
],
|
| 752 |
"logging_steps": 10,
|
|
@@ -761,7 +804,7 @@
|
|
| 761 |
"early_stopping_threshold": 0.0
|
| 762 |
},
|
| 763 |
"attributes": {
|
| 764 |
-
"early_stopping_patience_counter":
|
| 765 |
}
|
| 766 |
},
|
| 767 |
"TrainerControl": {
|
|
@@ -775,7 +818,7 @@
|
|
| 775 |
"attributes": {}
|
| 776 |
}
|
| 777 |
},
|
| 778 |
-
"total_flos": 3.
|
| 779 |
"train_batch_size": 1,
|
| 780 |
"trial_name": null,
|
| 781 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 1,
|
| 3 |
"best_metric": 1.4945952892303467,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.056214865708931916,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 747 |
"eval_samples_per_second": 15.239,
|
| 748 |
"eval_steps_per_second": 15.239,
|
| 749 |
"step": 850
|
| 750 |
+
},
|
| 751 |
+
{
|
| 752 |
+
"epoch": 0.05371642723297939,
|
| 753 |
+
"grad_norm": 482.0,
|
| 754 |
+
"learning_rate": 0.002461047124653279,
|
| 755 |
+
"loss": 8.2881,
|
| 756 |
+
"step": 860
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
"epoch": 0.05434103685196752,
|
| 760 |
+
"grad_norm": 1864.0,
|
| 761 |
+
"learning_rate": 0.0021292109072704956,
|
| 762 |
+
"loss": 7.7393,
|
| 763 |
+
"step": 870
|
| 764 |
+
},
|
| 765 |
+
{
|
| 766 |
+
"epoch": 0.05496564647095565,
|
| 767 |
+
"grad_norm": 181.0,
|
| 768 |
+
"learning_rate": 0.0018204036358303172,
|
| 769 |
+
"loss": 7.7977,
|
| 770 |
+
"step": 880
|
| 771 |
+
},
|
| 772 |
+
{
|
| 773 |
+
"epoch": 0.055590256089943786,
|
| 774 |
+
"grad_norm": 70.0,
|
| 775 |
+
"learning_rate": 0.001534936253078606,
|
| 776 |
+
"loss": 7.4679,
|
| 777 |
+
"step": 890
|
| 778 |
+
},
|
| 779 |
+
{
|
| 780 |
+
"epoch": 0.056214865708931916,
|
| 781 |
+
"grad_norm": 756.0,
|
| 782 |
+
"learning_rate": 0.0012730962004717683,
|
| 783 |
+
"loss": 6.7936,
|
| 784 |
+
"step": 900
|
| 785 |
+
},
|
| 786 |
+
{
|
| 787 |
+
"epoch": 0.056214865708931916,
|
| 788 |
+
"eval_loss": 7.608245372772217,
|
| 789 |
+
"eval_runtime": 53.9447,
|
| 790 |
+
"eval_samples_per_second": 15.627,
|
| 791 |
+
"eval_steps_per_second": 15.627,
|
| 792 |
+
"step": 900
|
| 793 |
}
|
| 794 |
],
|
| 795 |
"logging_steps": 10,
|
|
|
|
| 804 |
"early_stopping_threshold": 0.0
|
| 805 |
},
|
| 806 |
"attributes": {
|
| 807 |
+
"early_stopping_patience_counter": 18
|
| 808 |
}
|
| 809 |
},
|
| 810 |
"TrainerControl": {
|
|
|
|
| 818 |
"attributes": {}
|
| 819 |
}
|
| 820 |
},
|
| 821 |
+
"total_flos": 3.676159475712e+16,
|
| 822 |
"train_batch_size": 1,
|
| 823 |
"trial_name": null,
|
| 824 |
"trial_params": null
|