| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 785, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 13.448105258759517, | |
| "learning_rate": 2.0253164556962026e-06, | |
| "loss": 1.2075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31800004839897156, | |
| "step": 5, | |
| "valid_targets_mean": 748.6, | |
| "valid_targets_min": 403 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 9.982424737572133, | |
| "learning_rate": 4.556962025316456e-06, | |
| "loss": 1.1769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3006700873374939, | |
| "step": 10, | |
| "valid_targets_mean": 714.3, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 3.8464182382921748, | |
| "learning_rate": 7.08860759493671e-06, | |
| "loss": 1.0706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2761875092983246, | |
| "step": 15, | |
| "valid_targets_mean": 833.6, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 2.1620562822697003, | |
| "learning_rate": 9.620253164556963e-06, | |
| "loss": 1.0013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25137242674827576, | |
| "step": 20, | |
| "valid_targets_mean": 814.9, | |
| "valid_targets_min": 392 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.6337626383709007, | |
| "learning_rate": 1.2151898734177216e-05, | |
| "loss": 0.9265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2275439351797104, | |
| "step": 25, | |
| "valid_targets_mean": 728.6, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 1.9026845295535284, | |
| "learning_rate": 1.468354430379747e-05, | |
| "loss": 0.8832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20272783935070038, | |
| "step": 30, | |
| "valid_targets_mean": 751.1, | |
| "valid_targets_min": 439 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 1.073091888557403, | |
| "learning_rate": 1.7215189873417723e-05, | |
| "loss": 0.8224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21370205283164978, | |
| "step": 35, | |
| "valid_targets_mean": 786.2, | |
| "valid_targets_min": 463 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.8617650331409201, | |
| "learning_rate": 1.974683544303798e-05, | |
| "loss": 0.8024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1904214322566986, | |
| "step": 40, | |
| "valid_targets_mean": 843.4, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.759980239581234, | |
| "learning_rate": 2.2278481012658228e-05, | |
| "loss": 0.7507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15183305740356445, | |
| "step": 45, | |
| "valid_targets_mean": 724.6, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.7463412043901649, | |
| "learning_rate": 2.481012658227848e-05, | |
| "loss": 0.736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1823093295097351, | |
| "step": 50, | |
| "valid_targets_mean": 826.6, | |
| "valid_targets_min": 562 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.6886141045868305, | |
| "learning_rate": 2.7341772151898737e-05, | |
| "loss": 0.7099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17409449815750122, | |
| "step": 55, | |
| "valid_targets_mean": 738.2, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.6723991947723206, | |
| "learning_rate": 2.987341772151899e-05, | |
| "loss": 0.6817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1746065616607666, | |
| "step": 60, | |
| "valid_targets_mean": 817.4, | |
| "valid_targets_min": 503 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.7219762987855783, | |
| "learning_rate": 3.240506329113924e-05, | |
| "loss": 0.7042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15542849898338318, | |
| "step": 65, | |
| "valid_targets_mean": 631.1, | |
| "valid_targets_min": 418 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.711945449946034, | |
| "learning_rate": 3.49367088607595e-05, | |
| "loss": 0.6683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1588052213191986, | |
| "step": 70, | |
| "valid_targets_mean": 717.1, | |
| "valid_targets_min": 483 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.7723131517405141, | |
| "learning_rate": 3.746835443037975e-05, | |
| "loss": 0.6602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16386288404464722, | |
| "step": 75, | |
| "valid_targets_mean": 850.6, | |
| "valid_targets_min": 252 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.7455251121043946, | |
| "learning_rate": 4e-05, | |
| "loss": 0.6492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16289247572422028, | |
| "step": 80, | |
| "valid_targets_mean": 734.2, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.6806696789899241, | |
| "learning_rate": 3.999504991751045e-05, | |
| "loss": 0.6491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14095750451087952, | |
| "step": 85, | |
| "valid_targets_mean": 764.7, | |
| "valid_targets_min": 487 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.6997408572409954, | |
| "learning_rate": 3.9980202120373464e-05, | |
| "loss": 0.6471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15331107378005981, | |
| "step": 90, | |
| "valid_targets_mean": 785.2, | |
| "valid_targets_min": 492 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.7180343376382498, | |
| "learning_rate": 3.995546395837111e-05, | |
| "loss": 0.6481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17766906321048737, | |
| "step": 95, | |
| "valid_targets_mean": 781.8, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.7022835185401193, | |
| "learning_rate": 3.992084767709763e-05, | |
| "loss": 0.6207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17354968190193176, | |
| "step": 100, | |
| "valid_targets_mean": 836.8, | |
| "valid_targets_min": 418 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.7367281503709805, | |
| "learning_rate": 3.987637041189781e-05, | |
| "loss": 0.6272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.162840336561203, | |
| "step": 105, | |
| "valid_targets_mean": 837.1, | |
| "valid_targets_min": 434 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.6596850958225633, | |
| "learning_rate": 3.982205417938482e-05, | |
| "loss": 0.6176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14747610688209534, | |
| "step": 110, | |
| "valid_targets_mean": 715.6, | |
| "valid_targets_min": 485 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.6740339776230146, | |
| "learning_rate": 3.975792586654179e-05, | |
| "loss": 0.6183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15198436379432678, | |
| "step": 115, | |
| "valid_targets_mean": 681.2, | |
| "valid_targets_min": 400 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.6279891719181242, | |
| "learning_rate": 3.968401721741259e-05, | |
| "loss": 0.6138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1494234800338745, | |
| "step": 120, | |
| "valid_targets_mean": 762.9, | |
| "valid_targets_min": 313 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.6762014593224054, | |
| "learning_rate": 3.960036481738819e-05, | |
| "loss": 0.6152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14409850537776947, | |
| "step": 125, | |
| "valid_targets_mean": 826.2, | |
| "valid_targets_min": 432 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.6951704276499515, | |
| "learning_rate": 3.950701007509667e-05, | |
| "loss": 0.6211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1555303931236267, | |
| "step": 130, | |
| "valid_targets_mean": 755.7, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.6296778796453734, | |
| "learning_rate": 3.940399920190552e-05, | |
| "loss": 0.6129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1367054283618927, | |
| "step": 135, | |
| "valid_targets_mean": 662.6, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.6783518650883749, | |
| "learning_rate": 3.92913831890467e-05, | |
| "loss": 0.5886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15633609890937805, | |
| "step": 140, | |
| "valid_targets_mean": 727.8, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.708146207975277, | |
| "learning_rate": 3.916921778237556e-05, | |
| "loss": 0.5948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16752688586711884, | |
| "step": 145, | |
| "valid_targets_mean": 762.7, | |
| "valid_targets_min": 541 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.6593931091646447, | |
| "learning_rate": 3.903756345477612e-05, | |
| "loss": 0.5933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.164844810962677, | |
| "step": 150, | |
| "valid_targets_mean": 851.9, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.6510250678307847, | |
| "learning_rate": 3.889648537622657e-05, | |
| "loss": 0.5897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1499878615140915, | |
| "step": 155, | |
| "valid_targets_mean": 824.8, | |
| "valid_targets_min": 424 | |
| }, | |
| { | |
| "epoch": 1.0192, | |
| "grad_norm": 0.6781043016492474, | |
| "learning_rate": 3.874605338153952e-05, | |
| "loss": 0.5918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1404564082622528, | |
| "step": 160, | |
| "valid_targets_mean": 745.8, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 1.0512, | |
| "grad_norm": 0.6508626538105656, | |
| "learning_rate": 3.8586341935793265e-05, | |
| "loss": 0.5805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14573101699352264, | |
| "step": 165, | |
| "valid_targets_mean": 690.4, | |
| "valid_targets_min": 426 | |
| }, | |
| { | |
| "epoch": 1.0832, | |
| "grad_norm": 0.6313686061473457, | |
| "learning_rate": 3.841743009747089e-05, | |
| "loss": 0.5801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13861840963363647, | |
| "step": 170, | |
| "valid_targets_mean": 763.2, | |
| "valid_targets_min": 457 | |
| }, | |
| { | |
| "epoch": 1.1152, | |
| "grad_norm": 0.6718637932023319, | |
| "learning_rate": 3.8239401479325714e-05, | |
| "loss": 0.5727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13479197025299072, | |
| "step": 175, | |
| "valid_targets_mean": 740.8, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 1.1472, | |
| "grad_norm": 0.6534534284351228, | |
| "learning_rate": 3.8052344206992276e-05, | |
| "loss": 0.5688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11501454561948776, | |
| "step": 180, | |
| "valid_targets_mean": 669.1, | |
| "valid_targets_min": 361 | |
| }, | |
| { | |
| "epoch": 1.1792, | |
| "grad_norm": 0.6298087553175363, | |
| "learning_rate": 3.7856350875363396e-05, | |
| "loss": 0.5533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12853585183620453, | |
| "step": 185, | |
| "valid_targets_mean": 759.6, | |
| "valid_targets_min": 433 | |
| }, | |
| { | |
| "epoch": 1.2112, | |
| "grad_norm": 0.6629648321914637, | |
| "learning_rate": 3.765151850275497e-05, | |
| "loss": 0.5718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13259121775627136, | |
| "step": 190, | |
| "valid_targets_mean": 673.8, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 1.2432, | |
| "grad_norm": 0.6446910197966146, | |
| "learning_rate": 3.7437948482881104e-05, | |
| "loss": 0.5721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1383952796459198, | |
| "step": 195, | |
| "valid_targets_mean": 777.4, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 1.2752, | |
| "grad_norm": 0.6715593999996728, | |
| "learning_rate": 3.721574653466336e-05, | |
| "loss": 0.5767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13751399517059326, | |
| "step": 200, | |
| "valid_targets_mean": 690.3, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 1.3072, | |
| "grad_norm": 0.6634970200747077, | |
| "learning_rate": 3.698502264989903e-05, | |
| "loss": 0.5749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14345307648181915, | |
| "step": 205, | |
| "valid_targets_mean": 795.1, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 1.3392, | |
| "grad_norm": 0.6852785997622871, | |
| "learning_rate": 3.674589103881432e-05, | |
| "loss": 0.5757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16555346548557281, | |
| "step": 210, | |
| "valid_targets_mean": 953.1, | |
| "valid_targets_min": 495 | |
| }, | |
| { | |
| "epoch": 1.3712, | |
| "grad_norm": 0.7182921119480419, | |
| "learning_rate": 3.64984700735293e-05, | |
| "loss": 0.5662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13560381531715393, | |
| "step": 215, | |
| "valid_targets_mean": 657.9, | |
| "valid_targets_min": 380 | |
| }, | |
| { | |
| "epoch": 1.4032, | |
| "grad_norm": 0.6383915774320499, | |
| "learning_rate": 3.624288222946273e-05, | |
| "loss": 0.5598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1385164111852646, | |
| "step": 220, | |
| "valid_targets_mean": 883.5, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 1.4352, | |
| "grad_norm": 0.6197700397838696, | |
| "learning_rate": 3.597925402470578e-05, | |
| "loss": 0.5645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12843455374240875, | |
| "step": 225, | |
| "valid_targets_mean": 750.9, | |
| "valid_targets_min": 434 | |
| }, | |
| { | |
| "epoch": 1.4672, | |
| "grad_norm": 0.5711420210151498, | |
| "learning_rate": 3.570771595739445e-05, | |
| "loss": 0.5598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14530006051063538, | |
| "step": 230, | |
| "valid_targets_mean": 942.8, | |
| "valid_targets_min": 528 | |
| }, | |
| { | |
| "epoch": 1.4992, | |
| "grad_norm": 0.6210204975257504, | |
| "learning_rate": 3.5428402441111964e-05, | |
| "loss": 0.5593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1555243879556656, | |
| "step": 235, | |
| "valid_targets_mean": 887.2, | |
| "valid_targets_min": 537 | |
| }, | |
| { | |
| "epoch": 1.5312000000000001, | |
| "grad_norm": 0.6647791326711432, | |
| "learning_rate": 3.5141451738352936e-05, | |
| "loss": 0.5609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13146094977855682, | |
| "step": 240, | |
| "valid_targets_mean": 744.5, | |
| "valid_targets_min": 461 | |
| }, | |
| { | |
| "epoch": 1.5632000000000001, | |
| "grad_norm": 0.6693133474516781, | |
| "learning_rate": 3.4847005892082266e-05, | |
| "loss": 0.577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1364261358976364, | |
| "step": 245, | |
| "valid_targets_mean": 682.8, | |
| "valid_targets_min": 444 | |
| }, | |
| { | |
| "epoch": 1.5952, | |
| "grad_norm": 0.7006371063676372, | |
| "learning_rate": 3.454521065542273e-05, | |
| "loss": 0.5673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13232733309268951, | |
| "step": 250, | |
| "valid_targets_mean": 724.2, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 1.6272, | |
| "grad_norm": 0.655773431685838, | |
| "learning_rate": 3.423621541950597e-05, | |
| "loss": 0.561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13282510638237, | |
| "step": 255, | |
| "valid_targets_mean": 766.9, | |
| "valid_targets_min": 402 | |
| }, | |
| { | |
| "epoch": 1.6592, | |
| "grad_norm": 0.6172192241649032, | |
| "learning_rate": 3.3920173139522664e-05, | |
| "loss": 0.5643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13580721616744995, | |
| "step": 260, | |
| "valid_targets_mean": 764.2, | |
| "valid_targets_min": 400 | |
| }, | |
| { | |
| "epoch": 1.6912, | |
| "grad_norm": 0.6628955705415062, | |
| "learning_rate": 3.35972402590084e-05, | |
| "loss": 0.5581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1439439356327057, | |
| "step": 265, | |
| "valid_targets_mean": 715.3, | |
| "valid_targets_min": 309 | |
| }, | |
| { | |
| "epoch": 1.7231999999999998, | |
| "grad_norm": 0.641312579448422, | |
| "learning_rate": 3.326757663240291e-05, | |
| "loss": 0.5624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1477782130241394, | |
| "step": 270, | |
| "valid_targets_mean": 739.2, | |
| "valid_targets_min": 472 | |
| }, | |
| { | |
| "epoch": 1.7551999999999999, | |
| "grad_norm": 0.6833918688567646, | |
| "learning_rate": 3.293134544592073e-05, | |
| "loss": 0.5578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15322580933570862, | |
| "step": 275, | |
| "valid_targets_mean": 752.0, | |
| "valid_targets_min": 463 | |
| }, | |
| { | |
| "epoch": 1.7872, | |
| "grad_norm": 0.640918224676768, | |
| "learning_rate": 3.258871313677274e-05, | |
| "loss": 0.5599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13823790848255157, | |
| "step": 280, | |
| "valid_targets_mean": 789.4, | |
| "valid_targets_min": 321 | |
| }, | |
| { | |
| "epoch": 1.8192, | |
| "grad_norm": 0.6654266115883691, | |
| "learning_rate": 3.2239849310778316e-05, | |
| "loss": 0.5485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14035166800022125, | |
| "step": 285, | |
| "valid_targets_mean": 807.4, | |
| "valid_targets_min": 448 | |
| }, | |
| { | |
| "epoch": 1.8512, | |
| "grad_norm": 0.6123014675225724, | |
| "learning_rate": 3.188492665840909e-05, | |
| "loss": 0.5557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12820711731910706, | |
| "step": 290, | |
| "valid_targets_mean": 850.4, | |
| "valid_targets_min": 424 | |
| }, | |
| { | |
| "epoch": 1.8832, | |
| "grad_norm": 0.6754595695415974, | |
| "learning_rate": 3.1524120869305726e-05, | |
| "loss": 0.5627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15113355219364166, | |
| "step": 295, | |
| "valid_targets_mean": 696.0, | |
| "valid_targets_min": 472 | |
| }, | |
| { | |
| "epoch": 1.9152, | |
| "grad_norm": 0.6578705510880959, | |
| "learning_rate": 3.11576105453101e-05, | |
| "loss": 0.5519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15135137736797333, | |
| "step": 300, | |
| "valid_targets_mean": 767.6, | |
| "valid_targets_min": 517 | |
| }, | |
| { | |
| "epoch": 1.9472, | |
| "grad_norm": 0.6449455299555445, | |
| "learning_rate": 3.0785577112055916e-05, | |
| "loss": 0.548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14119261503219604, | |
| "step": 305, | |
| "valid_targets_mean": 762.3, | |
| "valid_targets_min": 434 | |
| }, | |
| { | |
| "epoch": 1.9792, | |
| "grad_norm": 0.6408490242266223, | |
| "learning_rate": 3.040820472916153e-05, | |
| "loss": 0.5456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12257601320743561, | |
| "step": 310, | |
| "valid_targets_mean": 696.9, | |
| "valid_targets_min": 300 | |
| }, | |
| { | |
| "epoch": 2.0064, | |
| "grad_norm": 0.7541041516650534, | |
| "learning_rate": 3.002568019906939e-05, | |
| "loss": 0.5427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15626591444015503, | |
| "step": 315, | |
| "valid_targets_mean": 719.5, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 2.0384, | |
| "grad_norm": 0.665090463756649, | |
| "learning_rate": 2.963819287457733e-05, | |
| "loss": 0.5352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13162535429000854, | |
| "step": 320, | |
| "valid_targets_mean": 670.2, | |
| "valid_targets_min": 440 | |
| }, | |
| { | |
| "epoch": 2.0704, | |
| "grad_norm": 0.6366823920807368, | |
| "learning_rate": 2.924593456510733e-05, | |
| "loss": 0.5321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13383950293064117, | |
| "step": 325, | |
| "valid_targets_mean": 810.5, | |
| "valid_targets_min": 394 | |
| }, | |
| { | |
| "epoch": 2.1024, | |
| "grad_norm": 0.6213548819315227, | |
| "learning_rate": 2.8849099441758306e-05, | |
| "loss": 0.5265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10977214574813843, | |
| "step": 330, | |
| "valid_targets_mean": 728.4, | |
| "valid_targets_min": 429 | |
| }, | |
| { | |
| "epoch": 2.1344, | |
| "grad_norm": 0.6547366858654076, | |
| "learning_rate": 2.844788394118979e-05, | |
| "loss": 0.5254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1424219012260437, | |
| "step": 335, | |
| "valid_targets_mean": 800.0, | |
| "valid_targets_min": 452 | |
| }, | |
| { | |
| "epoch": 2.1664, | |
| "grad_norm": 0.6619785188617626, | |
| "learning_rate": 2.8042486668384164e-05, | |
| "loss": 0.5306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12859517335891724, | |
| "step": 340, | |
| "valid_targets_mean": 695.9, | |
| "valid_targets_min": 435 | |
| }, | |
| { | |
| "epoch": 2.1984, | |
| "grad_norm": 0.687101029751781, | |
| "learning_rate": 2.7633108298335582e-05, | |
| "loss": 0.5184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12573498487472534, | |
| "step": 345, | |
| "valid_targets_mean": 777.2, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 2.2304, | |
| "grad_norm": 0.645409223244431, | |
| "learning_rate": 2.721995147671416e-05, | |
| "loss": 0.5215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.118968665599823, | |
| "step": 350, | |
| "valid_targets_mean": 712.1, | |
| "valid_targets_min": 426 | |
| }, | |
| { | |
| "epoch": 2.2624, | |
| "grad_norm": 0.671387281197927, | |
| "learning_rate": 2.68032207195547e-05, | |
| "loss": 0.5203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12927383184432983, | |
| "step": 355, | |
| "valid_targets_mean": 742.2, | |
| "valid_targets_min": 464 | |
| }, | |
| { | |
| "epoch": 2.2944, | |
| "grad_norm": 0.7041426331177664, | |
| "learning_rate": 2.6383122312019604e-05, | |
| "loss": 0.5165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13757221400737762, | |
| "step": 360, | |
| "valid_targets_mean": 762.4, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 2.3264, | |
| "grad_norm": 0.5959539114433047, | |
| "learning_rate": 2.595986420628597e-05, | |
| "loss": 0.5197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1234401986002922, | |
| "step": 365, | |
| "valid_targets_mean": 834.3, | |
| "valid_targets_min": 459 | |
| }, | |
| { | |
| "epoch": 2.3584, | |
| "grad_norm": 0.6531185712082775, | |
| "learning_rate": 2.5533655918607573e-05, | |
| "loss": 0.5098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1260676383972168, | |
| "step": 370, | |
| "valid_targets_mean": 682.6, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 2.3904, | |
| "grad_norm": 0.6634208938956202, | |
| "learning_rate": 2.510470842560259e-05, | |
| "loss": 0.533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1401265263557434, | |
| "step": 375, | |
| "valid_targets_mean": 758.4, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 2.4224, | |
| "grad_norm": 0.6727520164532284, | |
| "learning_rate": 2.467323405981841e-05, | |
| "loss": 0.5271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13311098515987396, | |
| "step": 380, | |
| "valid_targets_mean": 742.8, | |
| "valid_targets_min": 454 | |
| }, | |
| { | |
| "epoch": 2.4544, | |
| "grad_norm": 0.6427726949014289, | |
| "learning_rate": 2.423944640462533e-05, | |
| "loss": 0.5202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13644330203533173, | |
| "step": 385, | |
| "valid_targets_mean": 789.9, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 2.4864, | |
| "grad_norm": 0.6840033417316511, | |
| "learning_rate": 2.3803560188490968e-05, | |
| "loss": 0.5308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12673068046569824, | |
| "step": 390, | |
| "valid_targets_mean": 702.9, | |
| "valid_targets_min": 420 | |
| }, | |
| { | |
| "epoch": 2.5183999999999997, | |
| "grad_norm": 0.6563061162401826, | |
| "learning_rate": 2.336579117868789e-05, | |
| "loss": 0.5297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1355002373456955, | |
| "step": 395, | |
| "valid_targets_mean": 729.9, | |
| "valid_targets_min": 526 | |
| }, | |
| { | |
| "epoch": 2.5504, | |
| "grad_norm": 0.641354939892573, | |
| "learning_rate": 2.292635607448711e-05, | |
| "loss": 0.5177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12141138315200806, | |
| "step": 400, | |
| "valid_targets_mean": 733.4, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 2.5824, | |
| "grad_norm": 0.8906863552430406, | |
| "learning_rate": 2.248547239989008e-05, | |
| "loss": 0.5165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12363258004188538, | |
| "step": 405, | |
| "valid_targets_mean": 859.4, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 2.6144, | |
| "grad_norm": 0.676141962708069, | |
| "learning_rate": 2.204335839595255e-05, | |
| "loss": 0.5358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14006322622299194, | |
| "step": 410, | |
| "valid_targets_mean": 766.3, | |
| "valid_targets_min": 300 | |
| }, | |
| { | |
| "epoch": 2.6464, | |
| "grad_norm": 0.615596066669323, | |
| "learning_rate": 2.1600232912753452e-05, | |
| "loss": 0.5215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12042121589183807, | |
| "step": 415, | |
| "valid_targets_mean": 843.1, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 2.6784, | |
| "grad_norm": 0.615943796645789, | |
| "learning_rate": 2.1156315301062293e-05, | |
| "loss": 0.5144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1382143646478653, | |
| "step": 420, | |
| "valid_targets_mean": 864.3, | |
| "valid_targets_min": 464 | |
| }, | |
| { | |
| "epoch": 2.7104, | |
| "grad_norm": 1.1772547291108486, | |
| "learning_rate": 2.0711825303758712e-05, | |
| "loss": 0.535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1458306610584259, | |
| "step": 425, | |
| "valid_targets_mean": 898.8, | |
| "valid_targets_min": 554 | |
| }, | |
| { | |
| "epoch": 2.7424, | |
| "grad_norm": 0.6393166323484019, | |
| "learning_rate": 2.0266982947057962e-05, | |
| "loss": 0.5235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13645440340042114, | |
| "step": 430, | |
| "valid_targets_mean": 740.2, | |
| "valid_targets_min": 391 | |
| }, | |
| { | |
| "epoch": 2.7744, | |
| "grad_norm": 0.6786636785060219, | |
| "learning_rate": 1.9822008431596083e-05, | |
| "loss": 0.5337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14586326479911804, | |
| "step": 435, | |
| "valid_targets_mean": 734.4, | |
| "valid_targets_min": 313 | |
| }, | |
| { | |
| "epoch": 2.8064, | |
| "grad_norm": 0.6138448927187343, | |
| "learning_rate": 1.937712202342881e-05, | |
| "loss": 0.5094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14957866072654724, | |
| "step": 440, | |
| "valid_targets_mean": 983.4, | |
| "valid_targets_min": 476 | |
| }, | |
| { | |
| "epoch": 2.8384, | |
| "grad_norm": 0.6115317247558727, | |
| "learning_rate": 1.8932543944998037e-05, | |
| "loss": 0.5215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13156554102897644, | |
| "step": 445, | |
| "valid_targets_mean": 756.7, | |
| "valid_targets_min": 509 | |
| }, | |
| { | |
| "epoch": 2.8704, | |
| "grad_norm": 0.6480026309160363, | |
| "learning_rate": 1.8488494266119877e-05, | |
| "loss": 0.5164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13125823438167572, | |
| "step": 450, | |
| "valid_targets_mean": 749.1, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 2.9024, | |
| "grad_norm": 0.6221700405662001, | |
| "learning_rate": 1.804519279504834e-05, | |
| "loss": 0.5289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1366758495569229, | |
| "step": 455, | |
| "valid_targets_mean": 716.8, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 2.9344, | |
| "grad_norm": 0.6835135386375543, | |
| "learning_rate": 1.7602858969668365e-05, | |
| "loss": 0.5423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1429317593574524, | |
| "step": 460, | |
| "valid_targets_mean": 738.8, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 2.9664, | |
| "grad_norm": 0.6476761584366846, | |
| "learning_rate": 1.716171174887231e-05, | |
| "loss": 0.5177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12608763575553894, | |
| "step": 465, | |
| "valid_targets_mean": 771.1, | |
| "valid_targets_min": 577 | |
| }, | |
| { | |
| "epoch": 2.9984, | |
| "grad_norm": 0.6705239133879689, | |
| "learning_rate": 1.6721969504173484e-05, | |
| "loss": 0.5304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.156068354845047, | |
| "step": 470, | |
| "valid_targets_mean": 891.4, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 3.0256, | |
| "grad_norm": 0.6254648310969626, | |
| "learning_rate": 1.628384991161041e-05, | |
| "loss": 0.4947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12908436357975006, | |
| "step": 475, | |
| "valid_targets_mean": 856.1, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 3.0576, | |
| "grad_norm": 0.6835751653097379, | |
| "learning_rate": 1.5847569843995452e-05, | |
| "loss": 0.5142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14210891723632812, | |
| "step": 480, | |
| "valid_targets_mean": 753.6, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 3.0896, | |
| "grad_norm": 0.6619131317365188, | |
| "learning_rate": 1.5413345263560922e-05, | |
| "loss": 0.5013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12965424358844757, | |
| "step": 485, | |
| "valid_targets_mean": 726.4, | |
| "valid_targets_min": 444 | |
| }, | |
| { | |
| "epoch": 3.1216, | |
| "grad_norm": 0.6591313448070079, | |
| "learning_rate": 1.4981391115056032e-05, | |
| "loss": 0.5063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1259090006351471, | |
| "step": 490, | |
| "valid_targets_mean": 749.0, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 3.1536, | |
| "grad_norm": 0.6569116597796409, | |
| "learning_rate": 1.455192121934748e-05, | |
| "loss": 0.5012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1285029649734497, | |
| "step": 495, | |
| "valid_targets_mean": 837.4, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 3.1856, | |
| "grad_norm": 0.6316910437995757, | |
| "learning_rate": 1.4125148167576303e-05, | |
| "loss": 0.5031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11511541903018951, | |
| "step": 500, | |
| "valid_targets_mean": 672.5, | |
| "valid_targets_min": 430 | |
| }, | |
| { | |
| "epoch": 3.2176, | |
| "grad_norm": 0.6389505729458541, | |
| "learning_rate": 1.3701283215923563e-05, | |
| "loss": 0.5044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12613186240196228, | |
| "step": 505, | |
| "valid_targets_mean": 796.8, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 3.2496, | |
| "grad_norm": 0.6629145801717702, | |
| "learning_rate": 1.328053618103677e-05, | |
| "loss": 0.495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11542559415102005, | |
| "step": 510, | |
| "valid_targets_mean": 727.7, | |
| "valid_targets_min": 439 | |
| }, | |
| { | |
| "epoch": 3.2816, | |
| "grad_norm": 0.634372579989713, | |
| "learning_rate": 1.2863115336168916e-05, | |
| "loss": 0.5014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12187671661376953, | |
| "step": 515, | |
| "valid_targets_mean": 753.3, | |
| "valid_targets_min": 482 | |
| }, | |
| { | |
| "epoch": 3.3136, | |
| "grad_norm": 0.6384070283786387, | |
| "learning_rate": 1.2449227308081509e-05, | |
| "loss": 0.5008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1421659290790558, | |
| "step": 520, | |
| "valid_targets_mean": 845.1, | |
| "valid_targets_min": 573 | |
| }, | |
| { | |
| "epoch": 3.3456, | |
| "grad_norm": 0.6332388154728119, | |
| "learning_rate": 1.2039076974762587e-05, | |
| "loss": 0.4928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11383464932441711, | |
| "step": 525, | |
| "valid_targets_mean": 718.1, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 3.3776, | |
| "grad_norm": 0.6093799616673348, | |
| "learning_rate": 1.163286736401044e-05, | |
| "loss": 0.4926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12254788726568222, | |
| "step": 530, | |
| "valid_targets_mean": 798.4, | |
| "valid_targets_min": 413 | |
| }, | |
| { | |
| "epoch": 3.4096, | |
| "grad_norm": 0.6455046460858423, | |
| "learning_rate": 1.123079955293322e-05, | |
| "loss": 0.5112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13581742346286774, | |
| "step": 535, | |
| "valid_targets_mean": 846.3, | |
| "valid_targets_min": 520 | |
| }, | |
| { | |
| "epoch": 3.4416, | |
| "grad_norm": 0.631427790063437, | |
| "learning_rate": 1.0833072568414037e-05, | |
| "loss": 0.5119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1291504204273224, | |
| "step": 540, | |
| "valid_targets_mean": 806.6, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 3.4736000000000002, | |
| "grad_norm": 0.6322674040782007, | |
| "learning_rate": 1.0439883288591057e-05, | |
| "loss": 0.4964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11973114311695099, | |
| "step": 545, | |
| "valid_targets_mean": 736.1, | |
| "valid_targets_min": 400 | |
| }, | |
| { | |
| "epoch": 3.5056000000000003, | |
| "grad_norm": 0.6441453581221164, | |
| "learning_rate": 1.0051426345401202e-05, | |
| "loss": 0.5132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13486211001873016, | |
| "step": 550, | |
| "valid_targets_mean": 804.2, | |
| "valid_targets_min": 478 | |
| }, | |
| { | |
| "epoch": 3.5376, | |
| "grad_norm": 0.6425950808621582, | |
| "learning_rate": 9.667894028235704e-06, | |
| "loss": 0.5077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12831318378448486, | |
| "step": 555, | |
| "valid_targets_mean": 724.8, | |
| "valid_targets_min": 512 | |
| }, | |
| { | |
| "epoch": 3.5696, | |
| "grad_norm": 0.6124823375637398, | |
| "learning_rate": 9.289476188755315e-06, | |
| "loss": 0.5038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12071146070957184, | |
| "step": 560, | |
| "valid_targets_mean": 741.8, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 3.6016, | |
| "grad_norm": 0.6205786442665068, | |
| "learning_rate": 8.916360146912122e-06, | |
| "loss": 0.5037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10591184347867966, | |
| "step": 565, | |
| "valid_targets_mean": 709.2, | |
| "valid_targets_min": 480 | |
| }, | |
| { | |
| "epoch": 3.6336, | |
| "grad_norm": 0.6636478895992294, | |
| "learning_rate": 8.548730598224646e-06, | |
| "loss": 0.4993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12058466672897339, | |
| "step": 570, | |
| "valid_targets_mean": 727.6, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 3.6656, | |
| "grad_norm": 0.5971242481772463, | |
| "learning_rate": 8.186769522352053e-06, | |
| "loss": 0.4952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13874907791614532, | |
| "step": 575, | |
| "valid_targets_mean": 860.1, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 3.6976, | |
| "grad_norm": 0.615075544398069, | |
| "learning_rate": 7.830656093012714e-06, | |
| "loss": 0.5037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11050853878259659, | |
| "step": 580, | |
| "valid_targets_mean": 743.3, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 3.7296, | |
| "grad_norm": 0.6470427339088323, | |
| "learning_rate": 7.480566589291696e-06, | |
| "loss": 0.5064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13953588902950287, | |
| "step": 585, | |
| "valid_targets_mean": 822.9, | |
| "valid_targets_min": 493 | |
| }, | |
| { | |
| "epoch": 3.7616, | |
| "grad_norm": 0.597610350393088, | |
| "learning_rate": 7.1366743083812285e-06, | |
| "loss": 0.4967, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11674021184444427, | |
| "step": 590, | |
| "valid_targets_mean": 738.9, | |
| "valid_targets_min": 415 | |
| }, | |
| { | |
| "epoch": 3.7936, | |
| "grad_norm": 0.6338991358688321, | |
| "learning_rate": 6.799149479797101e-06, | |
| "loss": 0.5006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12019048631191254, | |
| "step": 595, | |
| "valid_targets_mean": 707.8, | |
| "valid_targets_min": 442 | |
| }, | |
| { | |
| "epoch": 3.8256, | |
| "grad_norm": 0.6202165939689909, | |
| "learning_rate": 6.4681591811137e-06, | |
| "loss": 0.4968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12805257737636566, | |
| "step": 600, | |
| "valid_targets_mean": 873.7, | |
| "valid_targets_min": 529 | |
| }, | |
| { | |
| "epoch": 3.8576, | |
| "grad_norm": 0.6714289302846277, | |
| "learning_rate": 6.143867255259197e-06, | |
| "loss": 0.5012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12346205115318298, | |
| "step": 605, | |
| "valid_targets_mean": 609.8, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 3.8895999999999997, | |
| "grad_norm": 0.6242136384257845, | |
| "learning_rate": 5.8264342294119504e-06, | |
| "loss": 0.501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11968934535980225, | |
| "step": 610, | |
| "valid_targets_mean": 689.3, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 3.9215999999999998, | |
| "grad_norm": 0.6463661477202667, | |
| "learning_rate": 5.516017235538258e-06, | |
| "loss": 0.5053, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1359320878982544, | |
| "step": 615, | |
| "valid_targets_mean": 786.9, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 3.9536, | |
| "grad_norm": 0.6491908587833649, | |
| "learning_rate": 5.212769932610695e-06, | |
| "loss": 0.4973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14212754368782043, | |
| "step": 620, | |
| "valid_targets_mean": 861.1, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 3.9856, | |
| "grad_norm": 0.6281434470761624, | |
| "learning_rate": 4.916842430545681e-06, | |
| "loss": 0.4975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11817573010921478, | |
| "step": 625, | |
| "valid_targets_mean": 755.6, | |
| "valid_targets_min": 445 | |
| }, | |
| { | |
| "epoch": 4.0128, | |
| "grad_norm": 0.6490532684460447, | |
| "learning_rate": 4.628381215897837e-06, | |
| "loss": 0.4819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11554375290870667, | |
| "step": 630, | |
| "valid_targets_mean": 724.1, | |
| "valid_targets_min": 367 | |
| }, | |
| { | |
| "epoch": 4.0448, | |
| "grad_norm": 0.6327445790214326, | |
| "learning_rate": 4.347529079347914e-06, | |
| "loss": 0.4894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12214243412017822, | |
| "step": 635, | |
| "valid_targets_mean": 676.2, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 4.0768, | |
| "grad_norm": 0.6158265147921352, | |
| "learning_rate": 4.074425045020247e-06, | |
| "loss": 0.4868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13750189542770386, | |
| "step": 640, | |
| "valid_targets_mean": 834.1, | |
| "valid_targets_min": 555 | |
| }, | |
| { | |
| "epoch": 4.1088, | |
| "grad_norm": 0.627070881496122, | |
| "learning_rate": 3.8092043016646487e-06, | |
| "loss": 0.4929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11655047535896301, | |
| "step": 645, | |
| "valid_targets_mean": 674.8, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 4.1408, | |
| "grad_norm": 0.6221973742999799, | |
| "learning_rate": 3.551998135736867e-06, | |
| "loss": 0.489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12250182032585144, | |
| "step": 650, | |
| "valid_targets_mean": 665.8, | |
| "valid_targets_min": 394 | |
| }, | |
| { | |
| "epoch": 4.1728, | |
| "grad_norm": 0.6626884060775692, | |
| "learning_rate": 3.3029338664107267e-06, | |
| "loss": 0.4741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12655746936798096, | |
| "step": 655, | |
| "valid_targets_mean": 705.9, | |
| "valid_targets_min": 548 | |
| }, | |
| { | |
| "epoch": 4.2048, | |
| "grad_norm": 0.6224489306449362, | |
| "learning_rate": 3.0621347825540625e-06, | |
| "loss": 0.4841, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12335214763879776, | |
| "step": 660, | |
| "valid_targets_mean": 784.9, | |
| "valid_targets_min": 525 | |
| }, | |
| { | |
| "epoch": 4.2368, | |
| "grad_norm": 0.6520842934256829, | |
| "learning_rate": 2.8297200816997183e-06, | |
| "loss": 0.4911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12841469049453735, | |
| "step": 665, | |
| "valid_targets_mean": 765.9, | |
| "valid_targets_min": 481 | |
| }, | |
| { | |
| "epoch": 4.2688, | |
| "grad_norm": 0.6423816689951226, | |
| "learning_rate": 2.605804811041803e-06, | |
| "loss": 0.4819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10488501191139221, | |
| "step": 670, | |
| "valid_targets_mean": 732.0, | |
| "valid_targets_min": 450 | |
| }, | |
| { | |
| "epoch": 4.3008, | |
| "grad_norm": 0.6181615169830669, | |
| "learning_rate": 2.390499810486351e-06, | |
| "loss": 0.481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12980340421199799, | |
| "step": 675, | |
| "valid_targets_mean": 780.8, | |
| "valid_targets_min": 466 | |
| }, | |
| { | |
| "epoch": 4.3328, | |
| "grad_norm": 0.6322677157647695, | |
| "learning_rate": 2.183911657784685e-06, | |
| "loss": 0.4964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12320327758789062, | |
| "step": 680, | |
| "valid_targets_mean": 868.0, | |
| "valid_targets_min": 429 | |
| }, | |
| { | |
| "epoch": 4.3648, | |
| "grad_norm": 0.6043179861396448, | |
| "learning_rate": 1.986142615776532e-06, | |
| "loss": 0.4919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1195220947265625, | |
| "step": 685, | |
| "valid_targets_mean": 728.2, | |
| "valid_targets_min": 471 | |
| }, | |
| { | |
| "epoch": 4.3968, | |
| "grad_norm": 0.640478901066271, | |
| "learning_rate": 1.7972905817690644e-06, | |
| "loss": 0.4888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12453019618988037, | |
| "step": 690, | |
| "valid_targets_mean": 716.9, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 4.4288, | |
| "grad_norm": 0.6112657891179784, | |
| "learning_rate": 1.617449039076955e-06, | |
| "loss": 0.4972, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12277944386005402, | |
| "step": 695, | |
| "valid_targets_mean": 826.2, | |
| "valid_targets_min": 418 | |
| }, | |
| { | |
| "epoch": 4.4608, | |
| "grad_norm": 0.680661377919841, | |
| "learning_rate": 1.4467070107473413e-06, | |
| "loss": 0.4926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11709259450435638, | |
| "step": 700, | |
| "valid_targets_mean": 670.3, | |
| "valid_targets_min": 446 | |
| }, | |
| { | |
| "epoch": 4.4928, | |
| "grad_norm": 0.6067946877348196, | |
| "learning_rate": 1.2851490154926816e-06, | |
| "loss": 0.4844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11835283041000366, | |
| "step": 705, | |
| "valid_targets_mean": 833.8, | |
| "valid_targets_min": 535 | |
| }, | |
| { | |
| "epoch": 4.5248, | |
| "grad_norm": 0.6214656228794159, | |
| "learning_rate": 1.1328550258533211e-06, | |
| "loss": 0.4955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1317848563194275, | |
| "step": 710, | |
| "valid_targets_mean": 780.4, | |
| "valid_targets_min": 540 | |
| }, | |
| { | |
| "epoch": 4.5568, | |
| "grad_norm": 0.6101425589821468, | |
| "learning_rate": 9.899004286103953e-07, | |
| "loss": 0.4892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1342540979385376, | |
| "step": 715, | |
| "valid_targets_mean": 813.8, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 4.5888, | |
| "grad_norm": 0.6431480087423993, | |
| "learning_rate": 8.5635598746876e-07, | |
| "loss": 0.4892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12438573688268661, | |
| "step": 720, | |
| "valid_targets_mean": 771.0, | |
| "valid_targets_min": 505 | |
| }, | |
| { | |
| "epoch": 4.6208, | |
| "grad_norm": 0.6403336841979095, | |
| "learning_rate": 7.32287808028389e-07, | |
| "loss": 0.502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14776507019996643, | |
| "step": 725, | |
| "valid_targets_mean": 895.8, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 4.6528, | |
| "grad_norm": 0.6038822017761685, | |
| "learning_rate": 6.177573050615327e-07, | |
| "loss": 0.4909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11538829654455185, | |
| "step": 730, | |
| "valid_targets_mean": 747.9, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 4.6848, | |
| "grad_norm": 0.6324119113515146, | |
| "learning_rate": 5.128211721119213e-07, | |
| "loss": 0.4979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13889926671981812, | |
| "step": 735, | |
| "valid_targets_mean": 792.2, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 4.7168, | |
| "grad_norm": 0.6324936257203374, | |
| "learning_rate": 4.175313534309755e-07, | |
| "loss": 0.487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1103043407201767, | |
| "step": 740, | |
| "valid_targets_mean": 703.1, | |
| "valid_targets_min": 435 | |
| }, | |
| { | |
| "epoch": 4.7488, | |
| "grad_norm": 0.629729044732728, | |
| "learning_rate": 3.319350182649861e-07, | |
| "loss": 0.4892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12301481515169144, | |
| "step": 745, | |
| "valid_targets_mean": 739.1, | |
| "valid_targets_min": 491 | |
| }, | |
| { | |
| "epoch": 4.7808, | |
| "grad_norm": 0.6274760958790774, | |
| "learning_rate": 2.560745375059392e-07, | |
| "loss": 0.4877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11202463507652283, | |
| "step": 750, | |
| "valid_targets_mean": 644.5, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 4.8128, | |
| "grad_norm": 0.6073341940792345, | |
| "learning_rate": 1.8998746271758016e-07, | |
| "loss": 0.4851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12768879532814026, | |
| "step": 755, | |
| "valid_targets_mean": 871.2, | |
| "valid_targets_min": 439 | |
| }, | |
| { | |
| "epoch": 4.8448, | |
| "grad_norm": 0.6300616469590965, | |
| "learning_rate": 1.337065075470778e-07, | |
| "loss": 0.4962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10540274530649185, | |
| "step": 760, | |
| "valid_targets_mean": 732.8, | |
| "valid_targets_min": 458 | |
| }, | |
| { | |
| "epoch": 4.8768, | |
| "grad_norm": 0.607465565387994, | |
| "learning_rate": 8.725953153150279e-08, | |
| "loss": 0.4911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.132490336894989, | |
| "step": 765, | |
| "valid_targets_mean": 854.8, | |
| "valid_targets_min": 435 | |
| }, | |
| { | |
| "epoch": 4.9088, | |
| "grad_norm": 0.5917313437808399, | |
| "learning_rate": 5.066952630711886e-08, | |
| "loss": 0.4864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10872993618249893, | |
| "step": 770, | |
| "valid_targets_mean": 783.2, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 4.9408, | |
| "grad_norm": 0.7403546016233938, | |
| "learning_rate": 2.3954604228342283e-08, | |
| "loss": 0.4979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12833866477012634, | |
| "step": 775, | |
| "valid_targets_mean": 677.8, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 4.9728, | |
| "grad_norm": 0.6407918253430359, | |
| "learning_rate": 7.12798940197601e-09, | |
| "loss": 0.498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12702913582324982, | |
| "step": 780, | |
| "valid_targets_mean": 762.4, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.2926809123761072, | |
| "learning_rate": 1.9801114115480802e-10, | |
| "loss": 0.4833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4620312452316284, | |
| "step": 785, | |
| "valid_targets_mean": 702.6, | |
| "valid_targets_min": 459 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4620312452316284, | |
| "step": 785, | |
| "total_flos": 2.3852276139845222e+17, | |
| "train_loss": 0.5620896111628052, | |
| "train_runtime": 6728.981, | |
| "train_samples_per_second": 7.428, | |
| "train_steps_per_second": 0.117, | |
| "valid_targets_mean": 702.6, | |
| "valid_targets_min": 459 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 785, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3852276139845222e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |