Fanucci
commited on
Training in progress, step 950, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4995335576
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4725f7683f2b8dc2c37be3613593f25b041ac5cfee2a86d851142562e03f31e2
|
| 3 |
size 4995335576
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1857639032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe054c5b7dbc4504491e8399e4c3dd2f601a8021e18628ca1a900249c2072e44
|
| 3 |
size 1857639032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13706103974
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4483c56b3458c6f88b40364283839fd954d028d7abdcc22bb698f9239fe76b79
|
| 3 |
size 13706103974
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:645b71843375baf4cc10bc75a2f0f04e91b8e3f8e8929f518a87022898b3bc20
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a288ddd8ddfd7759ce5e03ca2b9f4240d1938c54de141af3734f632c4e8a713b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1,
|
| 3 |
"best_metric": 1.4945952892303467,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -790,6 +790,49 @@
|
|
| 790 |
"eval_samples_per_second": 15.627,
|
| 791 |
"eval_steps_per_second": 15.627,
|
| 792 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
}
|
| 794 |
],
|
| 795 |
"logging_steps": 10,
|
|
@@ -804,7 +847,7 @@
|
|
| 804 |
"early_stopping_threshold": 0.0
|
| 805 |
},
|
| 806 |
"attributes": {
|
| 807 |
-
"early_stopping_patience_counter":
|
| 808 |
}
|
| 809 |
},
|
| 810 |
"TrainerControl": {
|
|
@@ -818,7 +861,7 @@
|
|
| 818 |
"attributes": {}
|
| 819 |
}
|
| 820 |
},
|
| 821 |
-
"total_flos": 3.
|
| 822 |
"train_batch_size": 1,
|
| 823 |
"trial_name": null,
|
| 824 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 1,
|
| 3 |
"best_metric": 1.4945952892303467,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.05933791380387258,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 950,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 790 |
"eval_samples_per_second": 15.627,
|
| 791 |
"eval_steps_per_second": 15.627,
|
| 792 |
"step": 900
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"epoch": 0.05683947532792005,
|
| 796 |
+
"grad_norm": 63.5,
|
| 797 |
+
"learning_rate": 0.0010351471287475406,
|
| 798 |
+
"loss": 7.814,
|
| 799 |
+
"step": 910
|
| 800 |
+
},
|
| 801 |
+
{
|
| 802 |
+
"epoch": 0.05746408494690818,
|
| 803 |
+
"grad_norm": 1088.0,
|
| 804 |
+
"learning_rate": 0.0008213286324510738,
|
| 805 |
+
"loss": 7.1516,
|
| 806 |
+
"step": 920
|
| 807 |
+
},
|
| 808 |
+
{
|
| 809 |
+
"epoch": 0.05808869456589631,
|
| 810 |
+
"grad_norm": 268.0,
|
| 811 |
+
"learning_rate": 0.000631856008683518,
|
| 812 |
+
"loss": 7.2965,
|
| 813 |
+
"step": 930
|
| 814 |
+
},
|
| 815 |
+
{
|
| 816 |
+
"epoch": 0.05871330418488445,
|
| 817 |
+
"grad_norm": 600.0,
|
| 818 |
+
"learning_rate": 0.00046692004031609894,
|
| 819 |
+
"loss": 7.5007,
|
| 820 |
+
"step": 940
|
| 821 |
+
},
|
| 822 |
+
{
|
| 823 |
+
"epoch": 0.05933791380387258,
|
| 824 |
+
"grad_norm": 148.0,
|
| 825 |
+
"learning_rate": 0.0003266868038879434,
|
| 826 |
+
"loss": 7.4526,
|
| 827 |
+
"step": 950
|
| 828 |
+
},
|
| 829 |
+
{
|
| 830 |
+
"epoch": 0.05933791380387258,
|
| 831 |
+
"eval_loss": 7.6293864250183105,
|
| 832 |
+
"eval_runtime": 52.7226,
|
| 833 |
+
"eval_samples_per_second": 15.989,
|
| 834 |
+
"eval_steps_per_second": 15.989,
|
| 835 |
+
"step": 950
|
| 836 |
}
|
| 837 |
],
|
| 838 |
"logging_steps": 10,
|
|
|
|
| 847 |
"early_stopping_threshold": 0.0
|
| 848 |
},
|
| 849 |
"attributes": {
|
| 850 |
+
"early_stopping_patience_counter": 19
|
| 851 |
}
|
| 852 |
},
|
| 853 |
"TrainerControl": {
|
|
|
|
| 861 |
"attributes": {}
|
| 862 |
}
|
| 863 |
},
|
| 864 |
+
"total_flos": 3.880390557696e+16,
|
| 865 |
"train_batch_size": 1,
|
| 866 |
"trial_name": null,
|
| 867 |
"trial_params": null
|