Training in progress, step 13748, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +521 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737582948
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9429983e59f652175f71152fba6eaf3af3a03dcccaed4b1c0446ada02b2b54e6
|
| 3 |
size 737582948
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475256250
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e8f963dd44ad4b5a4ff6a887f814dc448e58639e52eefe4e323265991e6b18d
|
| 3 |
size 1475256250
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75615b5e6cc125bb94988b3c50b73a5f8c3305643e30a3d5b2f3189a2032ba16
|
| 3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2e8e6885d573427d2de37a77bf587fa112946ff22d3ea4df32210439a557a5b
|
| 3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cc6d8d5bb2a96a1ebfb5cf92fac012f69410a414ce89ccd7c5ae11f14e596fa
|
| 3 |
+
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b1ad1f84976b61e4cfaae51278742d669e0df2692aced4131064ecd61c1edf2
|
| 3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35fafd6395e4cb387bb75fb28a0482502f9e17f6c3b0e3e256daf180373b3f0b
|
| 3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48c89bb33c92eb59bfef32b9537aa0cfa50296c7262cfdb9eb91256dc5b5e9f3
|
| 3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32dfdc872866fda5b64b7229bac1e43cf4fe2356a4c82d10a2502643547790ec
|
| 3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a48787eaef9585df14b508d1097c445291248a545d320eeaf26f46b061d496a
|
| 3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2813db161368db76429d904a036e1161875e895320a4ce21cc6fa1fdd51aa271
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 6.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -9107,6 +9107,524 @@
|
|
| 9107 |
"learning_rate": 4.981531808618395e-05,
|
| 9108 |
"loss": 0.6818,
|
| 9109 |
"step": 13000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9110 |
}
|
| 9111 |
],
|
| 9112 |
"logging_steps": 10,
|
|
@@ -9121,7 +9639,7 @@
|
|
| 9121 |
"should_evaluate": false,
|
| 9122 |
"should_log": false,
|
| 9123 |
"should_save": true,
|
| 9124 |
-
"should_training_stop":
|
| 9125 |
},
|
| 9126 |
"attributes": {}
|
| 9127 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.99692914763958,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 13748,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 9107 |
"learning_rate": 4.981531808618395e-05,
|
| 9108 |
"loss": 0.6818,
|
| 9109 |
"step": 13000
|
| 9110 |
+
},
|
| 9111 |
+
{
|
| 9112 |
+
"epoch": 6.621171379019555,
|
| 9113 |
+
"grad_norm": 0.13289377093315125,
|
| 9114 |
+
"learning_rate": 4.981517602317332e-05,
|
| 9115 |
+
"loss": 0.6811,
|
| 9116 |
+
"step": 13010
|
| 9117 |
+
},
|
| 9118 |
+
{
|
| 9119 |
+
"epoch": 6.626262947700043,
|
| 9120 |
+
"grad_norm": 0.18308168649673462,
|
| 9121 |
+
"learning_rate": 4.9815033960162695e-05,
|
| 9122 |
+
"loss": 0.678,
|
| 9123 |
+
"step": 13020
|
| 9124 |
+
},
|
| 9125 |
+
{
|
| 9126 |
+
"epoch": 6.631354516380531,
|
| 9127 |
+
"grad_norm": 0.12425180524587631,
|
| 9128 |
+
"learning_rate": 4.981489189715207e-05,
|
| 9129 |
+
"loss": 0.6816,
|
| 9130 |
+
"step": 13030
|
| 9131 |
+
},
|
| 9132 |
+
{
|
| 9133 |
+
"epoch": 6.636446085061019,
|
| 9134 |
+
"grad_norm": 0.13754673302173615,
|
| 9135 |
+
"learning_rate": 4.981474983414144e-05,
|
| 9136 |
+
"loss": 0.6773,
|
| 9137 |
+
"step": 13040
|
| 9138 |
+
},
|
| 9139 |
+
{
|
| 9140 |
+
"epoch": 6.641537653741508,
|
| 9141 |
+
"grad_norm": 0.15316608548164368,
|
| 9142 |
+
"learning_rate": 4.9814607771130814e-05,
|
| 9143 |
+
"loss": 0.6765,
|
| 9144 |
+
"step": 13050
|
| 9145 |
+
},
|
| 9146 |
+
{
|
| 9147 |
+
"epoch": 6.646629222421995,
|
| 9148 |
+
"grad_norm": 0.136078342795372,
|
| 9149 |
+
"learning_rate": 4.981446570812018e-05,
|
| 9150 |
+
"loss": 0.6767,
|
| 9151 |
+
"step": 13060
|
| 9152 |
+
},
|
| 9153 |
+
{
|
| 9154 |
+
"epoch": 6.651720791102484,
|
| 9155 |
+
"grad_norm": 0.12898576259613037,
|
| 9156 |
+
"learning_rate": 4.9814323645109554e-05,
|
| 9157 |
+
"loss": 0.6786,
|
| 9158 |
+
"step": 13070
|
| 9159 |
+
},
|
| 9160 |
+
{
|
| 9161 |
+
"epoch": 6.656812359782972,
|
| 9162 |
+
"grad_norm": 0.11854422837495804,
|
| 9163 |
+
"learning_rate": 4.981418158209893e-05,
|
| 9164 |
+
"loss": 0.6806,
|
| 9165 |
+
"step": 13080
|
| 9166 |
+
},
|
| 9167 |
+
{
|
| 9168 |
+
"epoch": 6.66190392846346,
|
| 9169 |
+
"grad_norm": 0.1517888456583023,
|
| 9170 |
+
"learning_rate": 4.98140395190883e-05,
|
| 9171 |
+
"loss": 0.6829,
|
| 9172 |
+
"step": 13090
|
| 9173 |
+
},
|
| 9174 |
+
{
|
| 9175 |
+
"epoch": 6.666995497143948,
|
| 9176 |
+
"grad_norm": 0.1091533899307251,
|
| 9177 |
+
"learning_rate": 4.9813897456077666e-05,
|
| 9178 |
+
"loss": 0.6774,
|
| 9179 |
+
"step": 13100
|
| 9180 |
+
},
|
| 9181 |
+
{
|
| 9182 |
+
"epoch": 6.672087065824436,
|
| 9183 |
+
"grad_norm": 0.13526228070259094,
|
| 9184 |
+
"learning_rate": 4.981375539306704e-05,
|
| 9185 |
+
"loss": 0.6747,
|
| 9186 |
+
"step": 13110
|
| 9187 |
+
},
|
| 9188 |
+
{
|
| 9189 |
+
"epoch": 6.677178634504925,
|
| 9190 |
+
"grad_norm": 0.144491046667099,
|
| 9191 |
+
"learning_rate": 4.981361333005641e-05,
|
| 9192 |
+
"loss": 0.6787,
|
| 9193 |
+
"step": 13120
|
| 9194 |
+
},
|
| 9195 |
+
{
|
| 9196 |
+
"epoch": 6.682270203185412,
|
| 9197 |
+
"grad_norm": 0.16958777606487274,
|
| 9198 |
+
"learning_rate": 4.9813471267045786e-05,
|
| 9199 |
+
"loss": 0.6744,
|
| 9200 |
+
"step": 13130
|
| 9201 |
+
},
|
| 9202 |
+
{
|
| 9203 |
+
"epoch": 6.687361771865901,
|
| 9204 |
+
"grad_norm": 0.14115367829799652,
|
| 9205 |
+
"learning_rate": 4.981332920403516e-05,
|
| 9206 |
+
"loss": 0.6791,
|
| 9207 |
+
"step": 13140
|
| 9208 |
+
},
|
| 9209 |
+
{
|
| 9210 |
+
"epoch": 6.692453340546389,
|
| 9211 |
+
"grad_norm": 0.11081673204898834,
|
| 9212 |
+
"learning_rate": 4.981318714102453e-05,
|
| 9213 |
+
"loss": 0.6795,
|
| 9214 |
+
"step": 13150
|
| 9215 |
+
},
|
| 9216 |
+
{
|
| 9217 |
+
"epoch": 6.6975449092268775,
|
| 9218 |
+
"grad_norm": 0.14843027293682098,
|
| 9219 |
+
"learning_rate": 4.9813045078013905e-05,
|
| 9220 |
+
"loss": 0.6807,
|
| 9221 |
+
"step": 13160
|
| 9222 |
+
},
|
| 9223 |
+
{
|
| 9224 |
+
"epoch": 6.702636477907365,
|
| 9225 |
+
"grad_norm": 0.12543180584907532,
|
| 9226 |
+
"learning_rate": 4.981290301500328e-05,
|
| 9227 |
+
"loss": 0.6778,
|
| 9228 |
+
"step": 13170
|
| 9229 |
+
},
|
| 9230 |
+
{
|
| 9231 |
+
"epoch": 6.707728046587853,
|
| 9232 |
+
"grad_norm": 0.13169404864311218,
|
| 9233 |
+
"learning_rate": 4.981276095199265e-05,
|
| 9234 |
+
"loss": 0.675,
|
| 9235 |
+
"step": 13180
|
| 9236 |
+
},
|
| 9237 |
+
{
|
| 9238 |
+
"epoch": 6.712819615268342,
|
| 9239 |
+
"grad_norm": 0.15343239903450012,
|
| 9240 |
+
"learning_rate": 4.9812618888982024e-05,
|
| 9241 |
+
"loss": 0.6819,
|
| 9242 |
+
"step": 13190
|
| 9243 |
+
},
|
| 9244 |
+
{
|
| 9245 |
+
"epoch": 6.7179111839488295,
|
| 9246 |
+
"grad_norm": 0.13029424846172333,
|
| 9247 |
+
"learning_rate": 4.981247682597139e-05,
|
| 9248 |
+
"loss": 0.6778,
|
| 9249 |
+
"step": 13200
|
| 9250 |
+
},
|
| 9251 |
+
{
|
| 9252 |
+
"epoch": 6.723002752629318,
|
| 9253 |
+
"grad_norm": 0.11084284633398056,
|
| 9254 |
+
"learning_rate": 4.9812334762960764e-05,
|
| 9255 |
+
"loss": 0.6824,
|
| 9256 |
+
"step": 13210
|
| 9257 |
+
},
|
| 9258 |
+
{
|
| 9259 |
+
"epoch": 6.728094321309806,
|
| 9260 |
+
"grad_norm": 0.11253423988819122,
|
| 9261 |
+
"learning_rate": 4.981219269995014e-05,
|
| 9262 |
+
"loss": 0.6798,
|
| 9263 |
+
"step": 13220
|
| 9264 |
+
},
|
| 9265 |
+
{
|
| 9266 |
+
"epoch": 6.7331858899902945,
|
| 9267 |
+
"grad_norm": 0.1311793029308319,
|
| 9268 |
+
"learning_rate": 4.981205063693951e-05,
|
| 9269 |
+
"loss": 0.6814,
|
| 9270 |
+
"step": 13230
|
| 9271 |
+
},
|
| 9272 |
+
{
|
| 9273 |
+
"epoch": 6.738277458670782,
|
| 9274 |
+
"grad_norm": 0.12919209897518158,
|
| 9275 |
+
"learning_rate": 4.981190857392888e-05,
|
| 9276 |
+
"loss": 0.6768,
|
| 9277 |
+
"step": 13240
|
| 9278 |
+
},
|
| 9279 |
+
{
|
| 9280 |
+
"epoch": 6.743369027351271,
|
| 9281 |
+
"grad_norm": 0.12355062365531921,
|
| 9282 |
+
"learning_rate": 4.9811766510918256e-05,
|
| 9283 |
+
"loss": 0.6799,
|
| 9284 |
+
"step": 13250
|
| 9285 |
+
},
|
| 9286 |
+
{
|
| 9287 |
+
"epoch": 6.748460596031759,
|
| 9288 |
+
"grad_norm": 0.1338970810174942,
|
| 9289 |
+
"learning_rate": 4.981162444790763e-05,
|
| 9290 |
+
"loss": 0.6771,
|
| 9291 |
+
"step": 13260
|
| 9292 |
+
},
|
| 9293 |
+
{
|
| 9294 |
+
"epoch": 6.7535521647122465,
|
| 9295 |
+
"grad_norm": 0.14117179811000824,
|
| 9296 |
+
"learning_rate": 4.9811482384897e-05,
|
| 9297 |
+
"loss": 0.6799,
|
| 9298 |
+
"step": 13270
|
| 9299 |
+
},
|
| 9300 |
+
{
|
| 9301 |
+
"epoch": 6.758643733392735,
|
| 9302 |
+
"grad_norm": 0.1848529875278473,
|
| 9303 |
+
"learning_rate": 4.9811340321886375e-05,
|
| 9304 |
+
"loss": 0.6755,
|
| 9305 |
+
"step": 13280
|
| 9306 |
+
},
|
| 9307 |
+
{
|
| 9308 |
+
"epoch": 6.763735302073223,
|
| 9309 |
+
"grad_norm": 0.1720336526632309,
|
| 9310 |
+
"learning_rate": 4.981119825887575e-05,
|
| 9311 |
+
"loss": 0.67,
|
| 9312 |
+
"step": 13290
|
| 9313 |
+
},
|
| 9314 |
+
{
|
| 9315 |
+
"epoch": 6.768826870753712,
|
| 9316 |
+
"grad_norm": 0.1607787162065506,
|
| 9317 |
+
"learning_rate": 4.981105619586512e-05,
|
| 9318 |
+
"loss": 0.6827,
|
| 9319 |
+
"step": 13300
|
| 9320 |
+
},
|
| 9321 |
+
{
|
| 9322 |
+
"epoch": 6.773918439434199,
|
| 9323 |
+
"grad_norm": 0.14998158812522888,
|
| 9324 |
+
"learning_rate": 4.981091413285449e-05,
|
| 9325 |
+
"loss": 0.6759,
|
| 9326 |
+
"step": 13310
|
| 9327 |
+
},
|
| 9328 |
+
{
|
| 9329 |
+
"epoch": 6.779010008114687,
|
| 9330 |
+
"grad_norm": 0.11763730645179749,
|
| 9331 |
+
"learning_rate": 4.981077206984386e-05,
|
| 9332 |
+
"loss": 0.6747,
|
| 9333 |
+
"step": 13320
|
| 9334 |
+
},
|
| 9335 |
+
{
|
| 9336 |
+
"epoch": 6.784101576795176,
|
| 9337 |
+
"grad_norm": 0.12859204411506653,
|
| 9338 |
+
"learning_rate": 4.9810630006833234e-05,
|
| 9339 |
+
"loss": 0.6785,
|
| 9340 |
+
"step": 13330
|
| 9341 |
+
},
|
| 9342 |
+
{
|
| 9343 |
+
"epoch": 6.7891931454756635,
|
| 9344 |
+
"grad_norm": 0.12227821350097656,
|
| 9345 |
+
"learning_rate": 4.98104879438226e-05,
|
| 9346 |
+
"loss": 0.6794,
|
| 9347 |
+
"step": 13340
|
| 9348 |
+
},
|
| 9349 |
+
{
|
| 9350 |
+
"epoch": 6.794284714156152,
|
| 9351 |
+
"grad_norm": 0.11308576911687851,
|
| 9352 |
+
"learning_rate": 4.9810345880811974e-05,
|
| 9353 |
+
"loss": 0.6777,
|
| 9354 |
+
"step": 13350
|
| 9355 |
+
},
|
| 9356 |
+
{
|
| 9357 |
+
"epoch": 6.79937628283664,
|
| 9358 |
+
"grad_norm": 0.12252433598041534,
|
| 9359 |
+
"learning_rate": 4.981020381780135e-05,
|
| 9360 |
+
"loss": 0.6778,
|
| 9361 |
+
"step": 13360
|
| 9362 |
+
},
|
| 9363 |
+
{
|
| 9364 |
+
"epoch": 6.804467851517129,
|
| 9365 |
+
"grad_norm": 0.11951456218957901,
|
| 9366 |
+
"learning_rate": 4.981006175479072e-05,
|
| 9367 |
+
"loss": 0.6778,
|
| 9368 |
+
"step": 13370
|
| 9369 |
+
},
|
| 9370 |
+
{
|
| 9371 |
+
"epoch": 6.809559420197616,
|
| 9372 |
+
"grad_norm": 0.13758736848831177,
|
| 9373 |
+
"learning_rate": 4.980991969178009e-05,
|
| 9374 |
+
"loss": 0.6757,
|
| 9375 |
+
"step": 13380
|
| 9376 |
+
},
|
| 9377 |
+
{
|
| 9378 |
+
"epoch": 6.814650988878105,
|
| 9379 |
+
"grad_norm": 0.15930655598640442,
|
| 9380 |
+
"learning_rate": 4.9809777628769466e-05,
|
| 9381 |
+
"loss": 0.675,
|
| 9382 |
+
"step": 13390
|
| 9383 |
+
},
|
| 9384 |
+
{
|
| 9385 |
+
"epoch": 6.819742557558593,
|
| 9386 |
+
"grad_norm": 0.16790159046649933,
|
| 9387 |
+
"learning_rate": 4.980963556575884e-05,
|
| 9388 |
+
"loss": 0.6685,
|
| 9389 |
+
"step": 13400
|
| 9390 |
+
},
|
| 9391 |
+
{
|
| 9392 |
+
"epoch": 6.824834126239081,
|
| 9393 |
+
"grad_norm": 0.1681044101715088,
|
| 9394 |
+
"learning_rate": 4.980949350274821e-05,
|
| 9395 |
+
"loss": 0.683,
|
| 9396 |
+
"step": 13410
|
| 9397 |
+
},
|
| 9398 |
+
{
|
| 9399 |
+
"epoch": 6.829925694919569,
|
| 9400 |
+
"grad_norm": 0.1336173415184021,
|
| 9401 |
+
"learning_rate": 4.9809351439737585e-05,
|
| 9402 |
+
"loss": 0.6746,
|
| 9403 |
+
"step": 13420
|
| 9404 |
+
},
|
| 9405 |
+
{
|
| 9406 |
+
"epoch": 6.835017263600057,
|
| 9407 |
+
"grad_norm": 0.11793011426925659,
|
| 9408 |
+
"learning_rate": 4.980920937672696e-05,
|
| 9409 |
+
"loss": 0.6789,
|
| 9410 |
+
"step": 13430
|
| 9411 |
+
},
|
| 9412 |
+
{
|
| 9413 |
+
"epoch": 6.840108832280546,
|
| 9414 |
+
"grad_norm": 0.14056985080242157,
|
| 9415 |
+
"learning_rate": 4.980906731371633e-05,
|
| 9416 |
+
"loss": 0.6797,
|
| 9417 |
+
"step": 13440
|
| 9418 |
+
},
|
| 9419 |
+
{
|
| 9420 |
+
"epoch": 6.845200400961033,
|
| 9421 |
+
"grad_norm": 0.11312086880207062,
|
| 9422 |
+
"learning_rate": 4.9808925250705705e-05,
|
| 9423 |
+
"loss": 0.6777,
|
| 9424 |
+
"step": 13450
|
| 9425 |
+
},
|
| 9426 |
+
{
|
| 9427 |
+
"epoch": 6.850291969641522,
|
| 9428 |
+
"grad_norm": 0.14550986886024475,
|
| 9429 |
+
"learning_rate": 4.980878318769507e-05,
|
| 9430 |
+
"loss": 0.6792,
|
| 9431 |
+
"step": 13460
|
| 9432 |
+
},
|
| 9433 |
+
{
|
| 9434 |
+
"epoch": 6.85538353832201,
|
| 9435 |
+
"grad_norm": 0.13276565074920654,
|
| 9436 |
+
"learning_rate": 4.9808641124684444e-05,
|
| 9437 |
+
"loss": 0.6797,
|
| 9438 |
+
"step": 13470
|
| 9439 |
+
},
|
| 9440 |
+
{
|
| 9441 |
+
"epoch": 6.8604751070024985,
|
| 9442 |
+
"grad_norm": 0.1404767632484436,
|
| 9443 |
+
"learning_rate": 4.980849906167382e-05,
|
| 9444 |
+
"loss": 0.6767,
|
| 9445 |
+
"step": 13480
|
| 9446 |
+
},
|
| 9447 |
+
{
|
| 9448 |
+
"epoch": 6.865566675682986,
|
| 9449 |
+
"grad_norm": 0.11344119906425476,
|
| 9450 |
+
"learning_rate": 4.980835699866319e-05,
|
| 9451 |
+
"loss": 0.6779,
|
| 9452 |
+
"step": 13490
|
| 9453 |
+
},
|
| 9454 |
+
{
|
| 9455 |
+
"epoch": 6.870658244363474,
|
| 9456 |
+
"grad_norm": 0.18248707056045532,
|
| 9457 |
+
"learning_rate": 4.9808214935652563e-05,
|
| 9458 |
+
"loss": 0.6819,
|
| 9459 |
+
"step": 13500
|
| 9460 |
+
},
|
| 9461 |
+
{
|
| 9462 |
+
"epoch": 6.875749813043963,
|
| 9463 |
+
"grad_norm": 0.13696008920669556,
|
| 9464 |
+
"learning_rate": 4.9808072872641937e-05,
|
| 9465 |
+
"loss": 0.6789,
|
| 9466 |
+
"step": 13510
|
| 9467 |
+
},
|
| 9468 |
+
{
|
| 9469 |
+
"epoch": 6.8808413817244505,
|
| 9470 |
+
"grad_norm": 0.1089053824543953,
|
| 9471 |
+
"learning_rate": 4.98079308096313e-05,
|
| 9472 |
+
"loss": 0.6833,
|
| 9473 |
+
"step": 13520
|
| 9474 |
+
},
|
| 9475 |
+
{
|
| 9476 |
+
"epoch": 6.885932950404939,
|
| 9477 |
+
"grad_norm": 0.13730046153068542,
|
| 9478 |
+
"learning_rate": 4.9807788746620676e-05,
|
| 9479 |
+
"loss": 0.685,
|
| 9480 |
+
"step": 13530
|
| 9481 |
+
},
|
| 9482 |
+
{
|
| 9483 |
+
"epoch": 6.891024519085427,
|
| 9484 |
+
"grad_norm": 0.11708593368530273,
|
| 9485 |
+
"learning_rate": 4.980764668361005e-05,
|
| 9486 |
+
"loss": 0.6797,
|
| 9487 |
+
"step": 13540
|
| 9488 |
+
},
|
| 9489 |
+
{
|
| 9490 |
+
"epoch": 6.896116087765915,
|
| 9491 |
+
"grad_norm": 0.14479976892471313,
|
| 9492 |
+
"learning_rate": 4.980750462059942e-05,
|
| 9493 |
+
"loss": 0.6779,
|
| 9494 |
+
"step": 13550
|
| 9495 |
+
},
|
| 9496 |
+
{
|
| 9497 |
+
"epoch": 6.901207656446403,
|
| 9498 |
+
"grad_norm": 0.13402192294597626,
|
| 9499 |
+
"learning_rate": 4.9807362557588795e-05,
|
| 9500 |
+
"loss": 0.6775,
|
| 9501 |
+
"step": 13560
|
| 9502 |
+
},
|
| 9503 |
+
{
|
| 9504 |
+
"epoch": 6.906299225126891,
|
| 9505 |
+
"grad_norm": 0.1378648430109024,
|
| 9506 |
+
"learning_rate": 4.980722049457817e-05,
|
| 9507 |
+
"loss": 0.6799,
|
| 9508 |
+
"step": 13570
|
| 9509 |
+
},
|
| 9510 |
+
{
|
| 9511 |
+
"epoch": 6.91139079380738,
|
| 9512 |
+
"grad_norm": 0.1424325555562973,
|
| 9513 |
+
"learning_rate": 4.980707843156754e-05,
|
| 9514 |
+
"loss": 0.6777,
|
| 9515 |
+
"step": 13580
|
| 9516 |
+
},
|
| 9517 |
+
{
|
| 9518 |
+
"epoch": 6.9164823624878675,
|
| 9519 |
+
"grad_norm": 0.12795968353748322,
|
| 9520 |
+
"learning_rate": 4.9806936368556915e-05,
|
| 9521 |
+
"loss": 0.6756,
|
| 9522 |
+
"step": 13590
|
| 9523 |
+
},
|
| 9524 |
+
{
|
| 9525 |
+
"epoch": 6.921573931168356,
|
| 9526 |
+
"grad_norm": 0.16961532831192017,
|
| 9527 |
+
"learning_rate": 4.980679430554628e-05,
|
| 9528 |
+
"loss": 0.6762,
|
| 9529 |
+
"step": 13600
|
| 9530 |
+
},
|
| 9531 |
+
{
|
| 9532 |
+
"epoch": 6.926665499848844,
|
| 9533 |
+
"grad_norm": 0.16084560751914978,
|
| 9534 |
+
"learning_rate": 4.9806652242535654e-05,
|
| 9535 |
+
"loss": 0.6783,
|
| 9536 |
+
"step": 13610
|
| 9537 |
+
},
|
| 9538 |
+
{
|
| 9539 |
+
"epoch": 6.931757068529333,
|
| 9540 |
+
"grad_norm": 0.1510113775730133,
|
| 9541 |
+
"learning_rate": 4.980651017952503e-05,
|
| 9542 |
+
"loss": 0.676,
|
| 9543 |
+
"step": 13620
|
| 9544 |
+
},
|
| 9545 |
+
{
|
| 9546 |
+
"epoch": 6.93684863720982,
|
| 9547 |
+
"grad_norm": 0.1436864286661148,
|
| 9548 |
+
"learning_rate": 4.98063681165144e-05,
|
| 9549 |
+
"loss": 0.6769,
|
| 9550 |
+
"step": 13630
|
| 9551 |
+
},
|
| 9552 |
+
{
|
| 9553 |
+
"epoch": 6.941940205890308,
|
| 9554 |
+
"grad_norm": 0.14651361107826233,
|
| 9555 |
+
"learning_rate": 4.980622605350377e-05,
|
| 9556 |
+
"loss": 0.6786,
|
| 9557 |
+
"step": 13640
|
| 9558 |
+
},
|
| 9559 |
+
{
|
| 9560 |
+
"epoch": 6.947031774570797,
|
| 9561 |
+
"grad_norm": 0.12080514430999756,
|
| 9562 |
+
"learning_rate": 4.9806083990493146e-05,
|
| 9563 |
+
"loss": 0.6719,
|
| 9564 |
+
"step": 13650
|
| 9565 |
+
},
|
| 9566 |
+
{
|
| 9567 |
+
"epoch": 6.952123343251285,
|
| 9568 |
+
"grad_norm": 0.18036852777004242,
|
| 9569 |
+
"learning_rate": 4.980594192748252e-05,
|
| 9570 |
+
"loss": 0.6776,
|
| 9571 |
+
"step": 13660
|
| 9572 |
+
},
|
| 9573 |
+
{
|
| 9574 |
+
"epoch": 6.957214911931773,
|
| 9575 |
+
"grad_norm": 0.15538708865642548,
|
| 9576 |
+
"learning_rate": 4.980579986447189e-05,
|
| 9577 |
+
"loss": 0.677,
|
| 9578 |
+
"step": 13670
|
| 9579 |
+
},
|
| 9580 |
+
{
|
| 9581 |
+
"epoch": 6.962306480612261,
|
| 9582 |
+
"grad_norm": 0.14524763822555542,
|
| 9583 |
+
"learning_rate": 4.9805657801461266e-05,
|
| 9584 |
+
"loss": 0.6725,
|
| 9585 |
+
"step": 13680
|
| 9586 |
+
},
|
| 9587 |
+
{
|
| 9588 |
+
"epoch": 6.96739804929275,
|
| 9589 |
+
"grad_norm": 0.13171471655368805,
|
| 9590 |
+
"learning_rate": 4.980551573845064e-05,
|
| 9591 |
+
"loss": 0.6814,
|
| 9592 |
+
"step": 13690
|
| 9593 |
+
},
|
| 9594 |
+
{
|
| 9595 |
+
"epoch": 6.972489617973237,
|
| 9596 |
+
"grad_norm": 0.14730645716190338,
|
| 9597 |
+
"learning_rate": 4.980537367544001e-05,
|
| 9598 |
+
"loss": 0.6828,
|
| 9599 |
+
"step": 13700
|
| 9600 |
+
},
|
| 9601 |
+
{
|
| 9602 |
+
"epoch": 6.977581186653726,
|
| 9603 |
+
"grad_norm": 0.1142466589808464,
|
| 9604 |
+
"learning_rate": 4.980523161242938e-05,
|
| 9605 |
+
"loss": 0.677,
|
| 9606 |
+
"step": 13710
|
| 9607 |
+
},
|
| 9608 |
+
{
|
| 9609 |
+
"epoch": 6.982672755334214,
|
| 9610 |
+
"grad_norm": 0.11980883777141571,
|
| 9611 |
+
"learning_rate": 4.980508954941875e-05,
|
| 9612 |
+
"loss": 0.6847,
|
| 9613 |
+
"step": 13720
|
| 9614 |
+
},
|
| 9615 |
+
{
|
| 9616 |
+
"epoch": 6.987764324014702,
|
| 9617 |
+
"grad_norm": 0.10882198065519333,
|
| 9618 |
+
"learning_rate": 4.9804947486408125e-05,
|
| 9619 |
+
"loss": 0.6749,
|
| 9620 |
+
"step": 13730
|
| 9621 |
+
},
|
| 9622 |
+
{
|
| 9623 |
+
"epoch": 6.99285589269519,
|
| 9624 |
+
"grad_norm": 0.1418180912733078,
|
| 9625 |
+
"learning_rate": 4.980480542339749e-05,
|
| 9626 |
+
"loss": 0.675,
|
| 9627 |
+
"step": 13740
|
| 9628 |
}
|
| 9629 |
],
|
| 9630 |
"logging_steps": 10,
|
|
|
|
| 9639 |
"should_evaluate": false,
|
| 9640 |
"should_log": false,
|
| 9641 |
"should_save": true,
|
| 9642 |
+
"should_training_stop": true
|
| 9643 |
},
|
| 9644 |
"attributes": {}
|
| 9645 |
}
|