Fanucci
commited on
Training in progress, step 5760, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 389074464
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9235f4607cf8e84409de4ce4b2847c74a696066b57d3cdb69cbc2e01b28806d0
|
| 3 |
size 389074464
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 198011700
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5ff9daa50a0a5c480018483e7ad671a0dce75147275c76f66a36458c77efbee
|
| 3 |
size 198011700
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4570cde6877f7ecdff7adaa553bffdfd408783d3b318129cb1d03c18f4128e5c
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b35e714094c6b7ca02b4593d2c4332f2835d3b2ebcdf357e02eb76c44a693fd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.9804788827896118,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-5700",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -40219,6 +40219,426 @@
|
|
| 40219 |
"eval_samples_per_second": 11.75,
|
| 40220 |
"eval_steps_per_second": 5.875,
|
| 40221 |
"step": 5700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40222 |
}
|
| 40223 |
],
|
| 40224 |
"logging_steps": 1,
|
|
@@ -40242,12 +40662,12 @@
|
|
| 40242 |
"should_evaluate": false,
|
| 40243 |
"should_log": false,
|
| 40244 |
"should_save": true,
|
| 40245 |
-
"should_training_stop":
|
| 40246 |
},
|
| 40247 |
"attributes": {}
|
| 40248 |
}
|
| 40249 |
},
|
| 40250 |
-
"total_flos": 1.
|
| 40251 |
"train_batch_size": 2,
|
| 40252 |
"trial_name": null,
|
| 40253 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.9804788827896118,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-5700",
|
| 4 |
+
"epoch": 0.07748444593912897,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 5760,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 40219 |
"eval_samples_per_second": 11.75,
|
| 40220 |
"eval_steps_per_second": 5.875,
|
| 40221 |
"step": 5700
|
| 40222 |
+
},
|
| 40223 |
+
{
|
| 40224 |
+
"epoch": 0.07669076845468303,
|
| 40225 |
+
"grad_norm": 1.586574673652649,
|
| 40226 |
+
"learning_rate": 5.195178525271294e-08,
|
| 40227 |
+
"loss": 1.0676,
|
| 40228 |
+
"step": 5701
|
| 40229 |
+
},
|
| 40230 |
+
{
|
| 40231 |
+
"epoch": 0.07670422061543636,
|
| 40232 |
+
"grad_norm": 1.4937348365783691,
|
| 40233 |
+
"learning_rate": 5.0205778298384464e-08,
|
| 40234 |
+
"loss": 0.9485,
|
| 40235 |
+
"step": 5702
|
| 40236 |
+
},
|
| 40237 |
+
{
|
| 40238 |
+
"epoch": 0.07671767277618967,
|
| 40239 |
+
"grad_norm": 1.4224642515182495,
|
| 40240 |
+
"learning_rate": 4.848960771166588e-08,
|
| 40241 |
+
"loss": 0.9619,
|
| 40242 |
+
"step": 5703
|
| 40243 |
+
},
|
| 40244 |
+
{
|
| 40245 |
+
"epoch": 0.076731124936943,
|
| 40246 |
+
"grad_norm": 1.5152407884597778,
|
| 40247 |
+
"learning_rate": 4.680327400486961e-08,
|
| 40248 |
+
"loss": 1.1684,
|
| 40249 |
+
"step": 5704
|
| 40250 |
+
},
|
| 40251 |
+
{
|
| 40252 |
+
"epoch": 0.07674457709769632,
|
| 40253 |
+
"grad_norm": 1.5055732727050781,
|
| 40254 |
+
"learning_rate": 4.514677768138187e-08,
|
| 40255 |
+
"loss": 1.0995,
|
| 40256 |
+
"step": 5705
|
| 40257 |
+
},
|
| 40258 |
+
{
|
| 40259 |
+
"epoch": 0.07675802925844964,
|
| 40260 |
+
"grad_norm": 1.5480650663375854,
|
| 40261 |
+
"learning_rate": 4.352011923569599e-08,
|
| 40262 |
+
"loss": 1.0017,
|
| 40263 |
+
"step": 5706
|
| 40264 |
+
},
|
| 40265 |
+
{
|
| 40266 |
+
"epoch": 0.07677148141920295,
|
| 40267 |
+
"grad_norm": 1.6415358781814575,
|
| 40268 |
+
"learning_rate": 4.192329915339022e-08,
|
| 40269 |
+
"loss": 0.8832,
|
| 40270 |
+
"step": 5707
|
| 40271 |
+
},
|
| 40272 |
+
{
|
| 40273 |
+
"epoch": 0.07678493357995628,
|
| 40274 |
+
"grad_norm": 1.4596832990646362,
|
| 40275 |
+
"learning_rate": 4.035631791113881e-08,
|
| 40276 |
+
"loss": 1.0574,
|
| 40277 |
+
"step": 5708
|
| 40278 |
+
},
|
| 40279 |
+
{
|
| 40280 |
+
"epoch": 0.0767983857407096,
|
| 40281 |
+
"grad_norm": 1.5040479898452759,
|
| 40282 |
+
"learning_rate": 3.881917597670093e-08,
|
| 40283 |
+
"loss": 0.9738,
|
| 40284 |
+
"step": 5709
|
| 40285 |
+
},
|
| 40286 |
+
{
|
| 40287 |
+
"epoch": 0.07681183790146293,
|
| 40288 |
+
"grad_norm": 1.401673674583435,
|
| 40289 |
+
"learning_rate": 3.731187380893175e-08,
|
| 40290 |
+
"loss": 0.8504,
|
| 40291 |
+
"step": 5710
|
| 40292 |
+
},
|
| 40293 |
+
{
|
| 40294 |
+
"epoch": 0.07682529006221624,
|
| 40295 |
+
"grad_norm": 1.6924898624420166,
|
| 40296 |
+
"learning_rate": 3.583441185779357e-08,
|
| 40297 |
+
"loss": 1.1012,
|
| 40298 |
+
"step": 5711
|
| 40299 |
+
},
|
| 40300 |
+
{
|
| 40301 |
+
"epoch": 0.07683874222296956,
|
| 40302 |
+
"grad_norm": 1.5380204916000366,
|
| 40303 |
+
"learning_rate": 3.438679056432248e-08,
|
| 40304 |
+
"loss": 0.9772,
|
| 40305 |
+
"step": 5712
|
| 40306 |
+
},
|
| 40307 |
+
{
|
| 40308 |
+
"epoch": 0.07685219438372289,
|
| 40309 |
+
"grad_norm": 1.3671621084213257,
|
| 40310 |
+
"learning_rate": 3.296901036065059e-08,
|
| 40311 |
+
"loss": 0.7888,
|
| 40312 |
+
"step": 5713
|
| 40313 |
+
},
|
| 40314 |
+
{
|
| 40315 |
+
"epoch": 0.07686564654447621,
|
| 40316 |
+
"grad_norm": 1.4451839923858643,
|
| 40317 |
+
"learning_rate": 3.1581071670006015e-08,
|
| 40318 |
+
"loss": 0.8835,
|
| 40319 |
+
"step": 5714
|
| 40320 |
+
},
|
| 40321 |
+
{
|
| 40322 |
+
"epoch": 0.07687909870522953,
|
| 40323 |
+
"grad_norm": 1.5461901426315308,
|
| 40324 |
+
"learning_rate": 3.022297490670178e-08,
|
| 40325 |
+
"loss": 1.161,
|
| 40326 |
+
"step": 5715
|
| 40327 |
+
},
|
| 40328 |
+
{
|
| 40329 |
+
"epoch": 0.07689255086598284,
|
| 40330 |
+
"grad_norm": 1.5095206499099731,
|
| 40331 |
+
"learning_rate": 2.8894720476158044e-08,
|
| 40332 |
+
"loss": 1.0474,
|
| 40333 |
+
"step": 5716
|
| 40334 |
+
},
|
| 40335 |
+
{
|
| 40336 |
+
"epoch": 0.07690600302673617,
|
| 40337 |
+
"grad_norm": 1.3444650173187256,
|
| 40338 |
+
"learning_rate": 2.7596308774879842e-08,
|
| 40339 |
+
"loss": 0.9731,
|
| 40340 |
+
"step": 5717
|
| 40341 |
+
},
|
| 40342 |
+
{
|
| 40343 |
+
"epoch": 0.0769194551874895,
|
| 40344 |
+
"grad_norm": 1.8739641904830933,
|
| 40345 |
+
"learning_rate": 2.6327740190446037e-08,
|
| 40346 |
+
"loss": 1.1698,
|
| 40347 |
+
"step": 5718
|
| 40348 |
+
},
|
| 40349 |
+
{
|
| 40350 |
+
"epoch": 0.07693290734824282,
|
| 40351 |
+
"grad_norm": 1.4870262145996094,
|
| 40352 |
+
"learning_rate": 2.50890151015426e-08,
|
| 40353 |
+
"loss": 0.902,
|
| 40354 |
+
"step": 5719
|
| 40355 |
+
},
|
| 40356 |
+
{
|
| 40357 |
+
"epoch": 0.07694635950899613,
|
| 40358 |
+
"grad_norm": 1.540949821472168,
|
| 40359 |
+
"learning_rate": 2.388013387796262e-08,
|
| 40360 |
+
"loss": 1.1144,
|
| 40361 |
+
"step": 5720
|
| 40362 |
+
},
|
| 40363 |
+
{
|
| 40364 |
+
"epoch": 0.07695981166974945,
|
| 40365 |
+
"grad_norm": 1.6155050992965698,
|
| 40366 |
+
"learning_rate": 2.270109688056188e-08,
|
| 40367 |
+
"loss": 0.9566,
|
| 40368 |
+
"step": 5721
|
| 40369 |
+
},
|
| 40370 |
+
{
|
| 40371 |
+
"epoch": 0.07697326383050278,
|
| 40372 |
+
"grad_norm": 1.5978723764419556,
|
| 40373 |
+
"learning_rate": 2.1551904461292184e-08,
|
| 40374 |
+
"loss": 1.0488,
|
| 40375 |
+
"step": 5722
|
| 40376 |
+
},
|
| 40377 |
+
{
|
| 40378 |
+
"epoch": 0.0769867159912561,
|
| 40379 |
+
"grad_norm": 1.4267683029174805,
|
| 40380 |
+
"learning_rate": 2.043255696322355e-08,
|
| 40381 |
+
"loss": 0.9771,
|
| 40382 |
+
"step": 5723
|
| 40383 |
+
},
|
| 40384 |
+
{
|
| 40385 |
+
"epoch": 0.07700016815200941,
|
| 40386 |
+
"grad_norm": 1.5521541833877563,
|
| 40387 |
+
"learning_rate": 1.9343054720466492e-08,
|
| 40388 |
+
"loss": 1.1641,
|
| 40389 |
+
"step": 5724
|
| 40390 |
+
},
|
| 40391 |
+
{
|
| 40392 |
+
"epoch": 0.07701362031276274,
|
| 40393 |
+
"grad_norm": 1.5656774044036865,
|
| 40394 |
+
"learning_rate": 1.8283398058283053e-08,
|
| 40395 |
+
"loss": 1.0749,
|
| 40396 |
+
"step": 5725
|
| 40397 |
+
},
|
| 40398 |
+
{
|
| 40399 |
+
"epoch": 0.07702707247351606,
|
| 40400 |
+
"grad_norm": 1.6160752773284912,
|
| 40401 |
+
"learning_rate": 1.725358729297577e-08,
|
| 40402 |
+
"loss": 1.0366,
|
| 40403 |
+
"step": 5726
|
| 40404 |
+
},
|
| 40405 |
+
{
|
| 40406 |
+
"epoch": 0.07704052463426939,
|
| 40407 |
+
"grad_norm": 1.6153086423873901,
|
| 40408 |
+
"learning_rate": 1.62536227319654e-08,
|
| 40409 |
+
"loss": 0.9984,
|
| 40410 |
+
"step": 5727
|
| 40411 |
+
},
|
| 40412 |
+
{
|
| 40413 |
+
"epoch": 0.0770539767950227,
|
| 40414 |
+
"grad_norm": 1.3504514694213867,
|
| 40415 |
+
"learning_rate": 1.5283504673757608e-08,
|
| 40416 |
+
"loss": 0.8402,
|
| 40417 |
+
"step": 5728
|
| 40418 |
+
},
|
| 40419 |
+
{
|
| 40420 |
+
"epoch": 0.07706742895577602,
|
| 40421 |
+
"grad_norm": 1.4485338926315308,
|
| 40422 |
+
"learning_rate": 1.4343233407931866e-08,
|
| 40423 |
+
"loss": 0.9682,
|
| 40424 |
+
"step": 5729
|
| 40425 |
+
},
|
| 40426 |
+
{
|
| 40427 |
+
"epoch": 0.07708088111652935,
|
| 40428 |
+
"grad_norm": 1.7915130853652954,
|
| 40429 |
+
"learning_rate": 1.343280921518586e-08,
|
| 40430 |
+
"loss": 1.0206,
|
| 40431 |
+
"step": 5730
|
| 40432 |
+
},
|
| 40433 |
+
{
|
| 40434 |
+
"epoch": 0.07709433327728267,
|
| 40435 |
+
"grad_norm": 1.4139072895050049,
|
| 40436 |
+
"learning_rate": 1.2552232367279981e-08,
|
| 40437 |
+
"loss": 1.0062,
|
| 40438 |
+
"step": 5731
|
| 40439 |
+
},
|
| 40440 |
+
{
|
| 40441 |
+
"epoch": 0.07710778543803598,
|
| 40442 |
+
"grad_norm": 1.5041614770889282,
|
| 40443 |
+
"learning_rate": 1.1701503127092838e-08,
|
| 40444 |
+
"loss": 0.9514,
|
| 40445 |
+
"step": 5732
|
| 40446 |
+
},
|
| 40447 |
+
{
|
| 40448 |
+
"epoch": 0.0771212375987893,
|
| 40449 |
+
"grad_norm": 1.5072709321975708,
|
| 40450 |
+
"learning_rate": 1.0880621748576847e-08,
|
| 40451 |
+
"loss": 0.8834,
|
| 40452 |
+
"step": 5733
|
| 40453 |
+
},
|
| 40454 |
+
{
|
| 40455 |
+
"epoch": 0.07713468975954263,
|
| 40456 |
+
"grad_norm": 1.4389986991882324,
|
| 40457 |
+
"learning_rate": 1.0089588476758227e-08,
|
| 40458 |
+
"loss": 0.9029,
|
| 40459 |
+
"step": 5734
|
| 40460 |
+
},
|
| 40461 |
+
{
|
| 40462 |
+
"epoch": 0.07714814192029595,
|
| 40463 |
+
"grad_norm": 1.4401514530181885,
|
| 40464 |
+
"learning_rate": 9.32840354779252e-09,
|
| 40465 |
+
"loss": 1.0095,
|
| 40466 |
+
"step": 5735
|
| 40467 |
+
},
|
| 40468 |
+
{
|
| 40469 |
+
"epoch": 0.07716159408104926,
|
| 40470 |
+
"grad_norm": 1.7930731773376465,
|
| 40471 |
+
"learning_rate": 8.597067188897966e-09,
|
| 40472 |
+
"loss": 1.2101,
|
| 40473 |
+
"step": 5736
|
| 40474 |
+
},
|
| 40475 |
+
{
|
| 40476 |
+
"epoch": 0.07717504624180259,
|
| 40477 |
+
"grad_norm": 1.6275607347488403,
|
| 40478 |
+
"learning_rate": 7.895579618388827e-09,
|
| 40479 |
+
"loss": 1.0339,
|
| 40480 |
+
"step": 5737
|
| 40481 |
+
},
|
| 40482 |
+
{
|
| 40483 |
+
"epoch": 0.07718849840255591,
|
| 40484 |
+
"grad_norm": 1.538468837738037,
|
| 40485 |
+
"learning_rate": 7.223941045664262e-09,
|
| 40486 |
+
"loss": 1.0778,
|
| 40487 |
+
"step": 5738
|
| 40488 |
+
},
|
| 40489 |
+
{
|
| 40490 |
+
"epoch": 0.07720195056330924,
|
| 40491 |
+
"grad_norm": 1.4641597270965576,
|
| 40492 |
+
"learning_rate": 6.582151671208348e-09,
|
| 40493 |
+
"loss": 0.9457,
|
| 40494 |
+
"step": 5739
|
| 40495 |
+
},
|
| 40496 |
+
{
|
| 40497 |
+
"epoch": 0.07721540272406255,
|
| 40498 |
+
"grad_norm": 1.4278123378753662,
|
| 40499 |
+
"learning_rate": 5.970211686623372e-09,
|
| 40500 |
+
"loss": 0.9369,
|
| 40501 |
+
"step": 5740
|
| 40502 |
+
},
|
| 40503 |
+
{
|
| 40504 |
+
"epoch": 0.07722885488481587,
|
| 40505 |
+
"grad_norm": 1.355145812034607,
|
| 40506 |
+
"learning_rate": 5.388121274574331e-09,
|
| 40507 |
+
"loss": 0.8486,
|
| 40508 |
+
"step": 5741
|
| 40509 |
+
},
|
| 40510 |
+
{
|
| 40511 |
+
"epoch": 0.0772423070455692,
|
| 40512 |
+
"grad_norm": 1.5918469429016113,
|
| 40513 |
+
"learning_rate": 4.83588060882223e-09,
|
| 40514 |
+
"loss": 0.9758,
|
| 40515 |
+
"step": 5742
|
| 40516 |
+
},
|
| 40517 |
+
{
|
| 40518 |
+
"epoch": 0.07725575920632252,
|
| 40519 |
+
"grad_norm": 1.686596155166626,
|
| 40520 |
+
"learning_rate": 4.313489854212982e-09,
|
| 40521 |
+
"loss": 1.0544,
|
| 40522 |
+
"step": 5743
|
| 40523 |
+
},
|
| 40524 |
+
{
|
| 40525 |
+
"epoch": 0.07726921136707583,
|
| 40526 |
+
"grad_norm": 1.381110429763794,
|
| 40527 |
+
"learning_rate": 3.820949166699616e-09,
|
| 40528 |
+
"loss": 0.8962,
|
| 40529 |
+
"step": 5744
|
| 40530 |
+
},
|
| 40531 |
+
{
|
| 40532 |
+
"epoch": 0.07728266352782916,
|
| 40533 |
+
"grad_norm": 1.5031489133834839,
|
| 40534 |
+
"learning_rate": 3.3582586932978666e-09,
|
| 40535 |
+
"loss": 1.0251,
|
| 40536 |
+
"step": 5745
|
| 40537 |
+
},
|
| 40538 |
+
{
|
| 40539 |
+
"epoch": 0.07729611568858248,
|
| 40540 |
+
"grad_norm": 1.4953575134277344,
|
| 40541 |
+
"learning_rate": 2.9254185721416803e-09,
|
| 40542 |
+
"loss": 1.0684,
|
| 40543 |
+
"step": 5746
|
| 40544 |
+
},
|
| 40545 |
+
{
|
| 40546 |
+
"epoch": 0.0773095678493358,
|
| 40547 |
+
"grad_norm": 1.4256117343902588,
|
| 40548 |
+
"learning_rate": 2.5224289324277118e-09,
|
| 40549 |
+
"loss": 0.9782,
|
| 40550 |
+
"step": 5747
|
| 40551 |
+
},
|
| 40552 |
+
{
|
| 40553 |
+
"epoch": 0.07732302001008912,
|
| 40554 |
+
"grad_norm": 1.7791322469711304,
|
| 40555 |
+
"learning_rate": 2.149289894459727e-09,
|
| 40556 |
+
"loss": 1.2141,
|
| 40557 |
+
"step": 5748
|
| 40558 |
+
},
|
| 40559 |
+
{
|
| 40560 |
+
"epoch": 0.07733647217084244,
|
| 40561 |
+
"grad_norm": 1.8340305089950562,
|
| 40562 |
+
"learning_rate": 1.8060015696264032e-09,
|
| 40563 |
+
"loss": 1.059,
|
| 40564 |
+
"step": 5749
|
| 40565 |
+
},
|
| 40566 |
+
{
|
| 40567 |
+
"epoch": 0.07734992433159577,
|
| 40568 |
+
"grad_norm": 1.5660282373428345,
|
| 40569 |
+
"learning_rate": 1.4925640603902224e-09,
|
| 40570 |
+
"loss": 0.9299,
|
| 40571 |
+
"step": 5750
|
| 40572 |
+
},
|
| 40573 |
+
{
|
| 40574 |
+
"epoch": 0.07736337649234909,
|
| 40575 |
+
"grad_norm": 1.551103949546814,
|
| 40576 |
+
"learning_rate": 1.208977460342986e-09,
|
| 40577 |
+
"loss": 1.1289,
|
| 40578 |
+
"step": 5751
|
| 40579 |
+
},
|
| 40580 |
+
{
|
| 40581 |
+
"epoch": 0.0773768286531024,
|
| 40582 |
+
"grad_norm": 1.676748514175415,
|
| 40583 |
+
"learning_rate": 9.552418541058928e-10,
|
| 40584 |
+
"loss": 0.9262,
|
| 40585 |
+
"step": 5752
|
| 40586 |
+
},
|
| 40587 |
+
{
|
| 40588 |
+
"epoch": 0.07739028081385572,
|
| 40589 |
+
"grad_norm": 1.4658743143081665,
|
| 40590 |
+
"learning_rate": 7.313573174516642e-10,
|
| 40591 |
+
"loss": 0.8708,
|
| 40592 |
+
"step": 5753
|
| 40593 |
+
},
|
| 40594 |
+
{
|
| 40595 |
+
"epoch": 0.07740373297460905,
|
| 40596 |
+
"grad_norm": 1.6699283123016357,
|
| 40597 |
+
"learning_rate": 5.37323917204624e-10,
|
| 40598 |
+
"loss": 1.083,
|
| 40599 |
+
"step": 5754
|
| 40600 |
+
},
|
| 40601 |
+
{
|
| 40602 |
+
"epoch": 0.07741718513536237,
|
| 40603 |
+
"grad_norm": 1.4694359302520752,
|
| 40604 |
+
"learning_rate": 3.731417112740054e-10,
|
| 40605 |
+
"loss": 0.8933,
|
| 40606 |
+
"step": 5755
|
| 40607 |
+
},
|
| 40608 |
+
{
|
| 40609 |
+
"epoch": 0.07743063729611568,
|
| 40610 |
+
"grad_norm": 1.7645187377929688,
|
| 40611 |
+
"learning_rate": 2.388107486761548e-10,
|
| 40612 |
+
"loss": 1.1274,
|
| 40613 |
+
"step": 5756
|
| 40614 |
+
},
|
| 40615 |
+
{
|
| 40616 |
+
"epoch": 0.07744408945686901,
|
| 40617 |
+
"grad_norm": 1.5873627662658691,
|
| 40618 |
+
"learning_rate": 1.343310695234301e-10,
|
| 40619 |
+
"loss": 1.0934,
|
| 40620 |
+
"step": 5757
|
| 40621 |
+
},
|
| 40622 |
+
{
|
| 40623 |
+
"epoch": 0.07745754161762233,
|
| 40624 |
+
"grad_norm": 1.486417293548584,
|
| 40625 |
+
"learning_rate": 5.97027049908938e-11,
|
| 40626 |
+
"loss": 1.0576,
|
| 40627 |
+
"step": 5758
|
| 40628 |
+
},
|
| 40629 |
+
{
|
| 40630 |
+
"epoch": 0.07747099377837566,
|
| 40631 |
+
"grad_norm": 1.636743187904358,
|
| 40632 |
+
"learning_rate": 1.4925677360722036e-11,
|
| 40633 |
+
"loss": 1.1294,
|
| 40634 |
+
"step": 5759
|
| 40635 |
+
},
|
| 40636 |
+
{
|
| 40637 |
+
"epoch": 0.07748444593912897,
|
| 40638 |
+
"grad_norm": 1.4540817737579346,
|
| 40639 |
+
"learning_rate": 0.0,
|
| 40640 |
+
"loss": 0.8497,
|
| 40641 |
+
"step": 5760
|
| 40642 |
}
|
| 40643 |
],
|
| 40644 |
"logging_steps": 1,
|
|
|
|
| 40662 |
"should_evaluate": false,
|
| 40663 |
"should_log": false,
|
| 40664 |
"should_save": true,
|
| 40665 |
+
"should_training_stop": true
|
| 40666 |
},
|
| 40667 |
"attributes": {}
|
| 40668 |
}
|
| 40669 |
},
|
| 40670 |
+
"total_flos": 1.6511313071466086e+18,
|
| 40671 |
"train_batch_size": 2,
|
| 40672 |
"trial_name": null,
|
| 40673 |
"trial_params": null
|