Training in progress, step 5500, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +73 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2682482800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:843ffea3e47027a7327b46056614528e573a8eb208925c13ef01de733d872085
|
| 3 |
size 2682482800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5365108834
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:020e3cbb17c3204164f195677f3b07302a30bdd875a5e4274d98f682a414c00e
|
| 3 |
size 5365108834
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9972da412683217d3e7b5c8b7b27bb7cb54e37fcb06d0959653aa9cad5d36fc8
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e64edb59ac4e53d4505685902ba836e67456c610161bcc738cae4fc6ba12a85d
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e05485df9c0772c57db6278171bd1d12be10e5f20dbf942e364c40f5fbd3287d
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdab421c47fae8409d29d61cb7a02864fe4a42719ec643482d144bf7b2ce3282
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c4950c64cff23a8cf10836c8406c5d9f7e6c7ef15fb647d3bd7f359bce3314c
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -707,6 +707,76 @@
|
|
| 707 |
"learning_rate": 6.162112373349709e-05,
|
| 708 |
"loss": 0.92,
|
| 709 |
"step": 5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
}
|
| 711 |
],
|
| 712 |
"logging_steps": 50,
|
|
@@ -726,7 +796,7 @@
|
|
| 726 |
"attributes": {}
|
| 727 |
}
|
| 728 |
},
|
| 729 |
-
"total_flos":
|
| 730 |
"train_batch_size": 2,
|
| 731 |
"trial_name": null,
|
| 732 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.4221377680185069,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 5500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 707 |
"learning_rate": 6.162112373349709e-05,
|
| 708 |
"loss": 0.92,
|
| 709 |
"step": 5000
|
| 710 |
+
},
|
| 711 |
+
{
|
| 712 |
+
"epoch": 0.38759922336244723,
|
| 713 |
+
"grad_norm": 0.6962669491767883,
|
| 714 |
+
"learning_rate": 6.123733497083206e-05,
|
| 715 |
+
"loss": 0.9186,
|
| 716 |
+
"step": 5050
|
| 717 |
+
},
|
| 718 |
+
{
|
| 719 |
+
"epoch": 0.39143683943534274,
|
| 720 |
+
"grad_norm": 0.6156628131866455,
|
| 721 |
+
"learning_rate": 6.085354620816702e-05,
|
| 722 |
+
"loss": 0.9139,
|
| 723 |
+
"step": 5100
|
| 724 |
+
},
|
| 725 |
+
{
|
| 726 |
+
"epoch": 0.3952744555082383,
|
| 727 |
+
"grad_norm": 0.4484277069568634,
|
| 728 |
+
"learning_rate": 6.0469757445502e-05,
|
| 729 |
+
"loss": 0.914,
|
| 730 |
+
"step": 5150
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"epoch": 0.3991120715811338,
|
| 734 |
+
"grad_norm": 0.6082286834716797,
|
| 735 |
+
"learning_rate": 6.0085968682836965e-05,
|
| 736 |
+
"loss": 0.9148,
|
| 737 |
+
"step": 5200
|
| 738 |
+
},
|
| 739 |
+
{
|
| 740 |
+
"epoch": 0.40294968765402933,
|
| 741 |
+
"grad_norm": 0.6756613850593567,
|
| 742 |
+
"learning_rate": 5.970217992017194e-05,
|
| 743 |
+
"loss": 0.9137,
|
| 744 |
+
"step": 5250
|
| 745 |
+
},
|
| 746 |
+
{
|
| 747 |
+
"epoch": 0.40678730372692484,
|
| 748 |
+
"grad_norm": 0.6353741884231567,
|
| 749 |
+
"learning_rate": 5.9318391157506915e-05,
|
| 750 |
+
"loss": 0.9094,
|
| 751 |
+
"step": 5300
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"epoch": 0.41062491979982035,
|
| 755 |
+
"grad_norm": 0.6543828845024109,
|
| 756 |
+
"learning_rate": 5.893460239484189e-05,
|
| 757 |
+
"loss": 0.9089,
|
| 758 |
+
"step": 5350
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"epoch": 0.41446253587271586,
|
| 762 |
+
"grad_norm": 0.6633620262145996,
|
| 763 |
+
"learning_rate": 5.855081363217685e-05,
|
| 764 |
+
"loss": 0.9105,
|
| 765 |
+
"step": 5400
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"epoch": 0.4183001519456114,
|
| 769 |
+
"grad_norm": 0.6769128441810608,
|
| 770 |
+
"learning_rate": 5.816702486951182e-05,
|
| 771 |
+
"loss": 0.9095,
|
| 772 |
+
"step": 5450
|
| 773 |
+
},
|
| 774 |
+
{
|
| 775 |
+
"epoch": 0.4221377680185069,
|
| 776 |
+
"grad_norm": 0.6803929209709167,
|
| 777 |
+
"learning_rate": 5.7783236106846794e-05,
|
| 778 |
+
"loss": 0.9085,
|
| 779 |
+
"step": 5500
|
| 780 |
}
|
| 781 |
],
|
| 782 |
"logging_steps": 50,
|
|
|
|
| 796 |
"attributes": {}
|
| 797 |
}
|
| 798 |
},
|
| 799 |
+
"total_flos": 9.426997587409371e+18,
|
| 800 |
"train_batch_size": 2,
|
| 801 |
"trial_name": null,
|
| 802 |
"trial_params": null
|