| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 785, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.032, |
| "grad_norm": 13.266562800371773, |
| "learning_rate": 2.0253164556962026e-06, |
| "loss": 0.9687, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.29333972930908203, |
| "step": 5, |
| "valid_targets_mean": 997.2, |
| "valid_targets_min": 343 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 8.489508619950364, |
| "learning_rate": 4.556962025316456e-06, |
| "loss": 0.9291, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.19505365192890167, |
| "step": 10, |
| "valid_targets_mean": 701.6, |
| "valid_targets_min": 428 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 3.7845870692684294, |
| "learning_rate": 7.08860759493671e-06, |
| "loss": 0.8215, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.2177591770887375, |
| "step": 15, |
| "valid_targets_mean": 873.1, |
| "valid_targets_min": 416 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 1.9414984171383554, |
| "learning_rate": 9.620253164556963e-06, |
| "loss": 0.7485, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.15389665961265564, |
| "step": 20, |
| "valid_targets_mean": 859.7, |
| "valid_targets_min": 408 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.4709191649573787, |
| "learning_rate": 1.2151898734177216e-05, |
| "loss": 0.7104, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.17423586547374725, |
| "step": 25, |
| "valid_targets_mean": 877.4, |
| "valid_targets_min": 370 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.1650092738181974, |
| "learning_rate": 1.468354430379747e-05, |
| "loss": 0.6583, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13358977437019348, |
| "step": 30, |
| "valid_targets_mean": 705.3, |
| "valid_targets_min": 362 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.9252713386370359, |
| "learning_rate": 1.7215189873417723e-05, |
| "loss": 0.5997, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12491467595100403, |
| "step": 35, |
| "valid_targets_mean": 662.6, |
| "valid_targets_min": 366 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.7978370195902237, |
| "learning_rate": 1.974683544303798e-05, |
| "loss": 0.5912, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12133462727069855, |
| "step": 40, |
| "valid_targets_mean": 708.8, |
| "valid_targets_min": 304 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.6443855909829759, |
| "learning_rate": 2.2278481012658228e-05, |
| "loss": 0.5653, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1608869433403015, |
| "step": 45, |
| "valid_targets_mean": 1044.8, |
| "valid_targets_min": 363 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.6275594218254698, |
| "learning_rate": 2.481012658227848e-05, |
| "loss": 0.5612, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12222383916378021, |
| "step": 50, |
| "valid_targets_mean": 880.1, |
| "valid_targets_min": 394 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.5428436498161326, |
| "learning_rate": 2.7341772151898737e-05, |
| "loss": 0.5246, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12391236424446106, |
| "step": 55, |
| "valid_targets_mean": 865.2, |
| "valid_targets_min": 407 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.6335065649840791, |
| "learning_rate": 2.987341772151899e-05, |
| "loss": 0.5621, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13935324549674988, |
| "step": 60, |
| "valid_targets_mean": 877.2, |
| "valid_targets_min": 331 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.7837301485260351, |
| "learning_rate": 3.240506329113924e-05, |
| "loss": 0.5275, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12021635472774506, |
| "step": 65, |
| "valid_targets_mean": 592.8, |
| "valid_targets_min": 373 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.5784603563747543, |
| "learning_rate": 3.49367088607595e-05, |
| "loss": 0.5115, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09628315269947052, |
| "step": 70, |
| "valid_targets_mean": 794.9, |
| "valid_targets_min": 348 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5661489350396759, |
| "learning_rate": 3.746835443037975e-05, |
| "loss": 0.5105, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.15161114931106567, |
| "step": 75, |
| "valid_targets_mean": 1041.4, |
| "valid_targets_min": 396 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.6020958811116237, |
| "learning_rate": 4e-05, |
| "loss": 0.5101, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1513483226299286, |
| "step": 80, |
| "valid_targets_mean": 1158.1, |
| "valid_targets_min": 419 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.7041879443596871, |
| "learning_rate": 3.999504991751045e-05, |
| "loss": 0.4906, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11582835763692856, |
| "step": 85, |
| "valid_targets_mean": 546.4, |
| "valid_targets_min": 348 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.6350096833420561, |
| "learning_rate": 3.9980202120373464e-05, |
| "loss": 0.4942, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11067851632833481, |
| "step": 90, |
| "valid_targets_mean": 770.5, |
| "valid_targets_min": 365 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.5823516591675933, |
| "learning_rate": 3.995546395837111e-05, |
| "loss": 0.4833, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11685052514076233, |
| "step": 95, |
| "valid_targets_mean": 844.8, |
| "valid_targets_min": 358 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.6959370180161534, |
| "learning_rate": 3.992084767709763e-05, |
| "loss": 0.492, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11015971750020981, |
| "step": 100, |
| "valid_targets_mean": 649.9, |
| "valid_targets_min": 346 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.7368399273439582, |
| "learning_rate": 3.987637041189781e-05, |
| "loss": 0.4814, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12616831064224243, |
| "step": 105, |
| "valid_targets_mean": 813.2, |
| "valid_targets_min": 369 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.6272126363527385, |
| "learning_rate": 3.982205417938482e-05, |
| "loss": 0.4864, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09629425406455994, |
| "step": 110, |
| "valid_targets_mean": 738.4, |
| "valid_targets_min": 346 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.6408625248348573, |
| "learning_rate": 3.975792586654179e-05, |
| "loss": 0.487, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10672964155673981, |
| "step": 115, |
| "valid_targets_mean": 582.9, |
| "valid_targets_min": 394 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.6804203420373807, |
| "learning_rate": 3.968401721741259e-05, |
| "loss": 0.487, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11347218602895737, |
| "step": 120, |
| "valid_targets_mean": 730.2, |
| "valid_targets_min": 451 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.5723844040912542, |
| "learning_rate": 3.960036481738819e-05, |
| "loss": 0.4942, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.15321877598762512, |
| "step": 125, |
| "valid_targets_mean": 1487.5, |
| "valid_targets_min": 316 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.6956004153841853, |
| "learning_rate": 3.950701007509667e-05, |
| "loss": 0.479, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11619652807712555, |
| "step": 130, |
| "valid_targets_mean": 745.4, |
| "valid_targets_min": 386 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.6296253097471561, |
| "learning_rate": 3.940399920190552e-05, |
| "loss": 0.4654, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09865935146808624, |
| "step": 135, |
| "valid_targets_mean": 767.4, |
| "valid_targets_min": 397 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.6474475974143318, |
| "learning_rate": 3.92913831890467e-05, |
| "loss": 0.494, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1201113611459732, |
| "step": 140, |
| "valid_targets_mean": 823.1, |
| "valid_targets_min": 334 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.5614028338623152, |
| "learning_rate": 3.916921778237556e-05, |
| "loss": 0.4736, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11008624732494354, |
| "step": 145, |
| "valid_targets_mean": 724.4, |
| "valid_targets_min": 310 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.6417707676376314, |
| "learning_rate": 3.903756345477612e-05, |
| "loss": 0.4894, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13074582815170288, |
| "step": 150, |
| "valid_targets_mean": 799.7, |
| "valid_targets_min": 366 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.5556077488737904, |
| "learning_rate": 3.889648537622657e-05, |
| "loss": 0.4647, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09520229697227478, |
| "step": 155, |
| "valid_targets_mean": 759.1, |
| "valid_targets_min": 379 |
| }, |
| { |
| "epoch": 1.0192, |
| "grad_norm": 0.518917132469663, |
| "learning_rate": 3.874605338153952e-05, |
| "loss": 0.4877, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1457328051328659, |
| "step": 160, |
| "valid_targets_mean": 1207.9, |
| "valid_targets_min": 401 |
| }, |
| { |
| "epoch": 1.0512, |
| "grad_norm": 0.6519204578424899, |
| "learning_rate": 3.8586341935793265e-05, |
| "loss": 0.4549, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09436912834644318, |
| "step": 165, |
| "valid_targets_mean": 591.2, |
| "valid_targets_min": 358 |
| }, |
| { |
| "epoch": 1.0832, |
| "grad_norm": 0.7191821046729394, |
| "learning_rate": 3.841743009747089e-05, |
| "loss": 0.4724, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09062425047159195, |
| "step": 170, |
| "valid_targets_mean": 553.2, |
| "valid_targets_min": 413 |
| }, |
| { |
| "epoch": 1.1152, |
| "grad_norm": 0.5941223668586252, |
| "learning_rate": 3.8239401479325714e-05, |
| "loss": 0.4416, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11501722037792206, |
| "step": 175, |
| "valid_targets_mean": 767.8, |
| "valid_targets_min": 385 |
| }, |
| { |
| "epoch": 1.1472, |
| "grad_norm": 0.5716063684906239, |
| "learning_rate": 3.8052344206992276e-05, |
| "loss": 0.4658, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.15316523611545563, |
| "step": 180, |
| "valid_targets_mean": 1364.5, |
| "valid_targets_min": 520 |
| }, |
| { |
| "epoch": 1.1792, |
| "grad_norm": 0.5527348861402952, |
| "learning_rate": 3.7856350875363396e-05, |
| "loss": 0.4557, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1328214555978775, |
| "step": 185, |
| "valid_targets_mean": 979.0, |
| "valid_targets_min": 416 |
| }, |
| { |
| "epoch": 1.2112, |
| "grad_norm": 0.5274840156315275, |
| "learning_rate": 3.765151850275497e-05, |
| "loss": 0.4457, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09974588453769684, |
| "step": 190, |
| "valid_targets_mean": 1234.8, |
| "valid_targets_min": 294 |
| }, |
| { |
| "epoch": 1.2432, |
| "grad_norm": 0.5987542988521145, |
| "learning_rate": 3.7437948482881104e-05, |
| "loss": 0.4495, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1128249317407608, |
| "step": 195, |
| "valid_targets_mean": 824.9, |
| "valid_targets_min": 397 |
| }, |
| { |
| "epoch": 1.2752, |
| "grad_norm": 0.5732559336354344, |
| "learning_rate": 3.721574653466336e-05, |
| "loss": 0.4424, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13284024596214294, |
| "step": 200, |
| "valid_targets_mean": 1010.7, |
| "valid_targets_min": 367 |
| }, |
| { |
| "epoch": 1.3072, |
| "grad_norm": 0.6697760935036775, |
| "learning_rate": 3.698502264989903e-05, |
| "loss": 0.4334, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13111239671707153, |
| "step": 205, |
| "valid_targets_mean": 944.9, |
| "valid_targets_min": 423 |
| }, |
| { |
| "epoch": 1.3392, |
| "grad_norm": 0.6167253454576184, |
| "learning_rate": 3.674589103881432e-05, |
| "loss": 0.4611, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13232994079589844, |
| "step": 210, |
| "valid_targets_mean": 811.1, |
| "valid_targets_min": 372 |
| }, |
| { |
| "epoch": 1.3712, |
| "grad_norm": 0.5210643419125344, |
| "learning_rate": 3.64984700735293e-05, |
| "loss": 0.4459, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06730905920267105, |
| "step": 215, |
| "valid_targets_mean": 581.2, |
| "valid_targets_min": 303 |
| }, |
| { |
| "epoch": 1.4032, |
| "grad_norm": 0.6506348468397748, |
| "learning_rate": 3.624288222946273e-05, |
| "loss": 0.4643, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11295449733734131, |
| "step": 220, |
| "valid_targets_mean": 779.0, |
| "valid_targets_min": 291 |
| }, |
| { |
| "epoch": 1.4352, |
| "grad_norm": 0.6198424313674729, |
| "learning_rate": 3.597925402470578e-05, |
| "loss": 0.4502, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09262027591466904, |
| "step": 225, |
| "valid_targets_mean": 662.2, |
| "valid_targets_min": 356 |
| }, |
| { |
| "epoch": 1.4672, |
| "grad_norm": 0.5636413590661971, |
| "learning_rate": 3.570771595739445e-05, |
| "loss": 0.4613, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09992943704128265, |
| "step": 230, |
| "valid_targets_mean": 901.2, |
| "valid_targets_min": 363 |
| }, |
| { |
| "epoch": 1.4992, |
| "grad_norm": 0.5770044697343024, |
| "learning_rate": 3.5428402441111964e-05, |
| "loss": 0.4609, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09955944120883942, |
| "step": 235, |
| "valid_targets_mean": 701.8, |
| "valid_targets_min": 471 |
| }, |
| { |
| "epoch": 1.5312000000000001, |
| "grad_norm": 0.5816016693103543, |
| "learning_rate": 3.5141451738352936e-05, |
| "loss": 0.4354, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09309546649456024, |
| "step": 240, |
| "valid_targets_mean": 610.4, |
| "valid_targets_min": 358 |
| }, |
| { |
| "epoch": 1.5632000000000001, |
| "grad_norm": 0.5432246001353459, |
| "learning_rate": 3.4847005892082266e-05, |
| "loss": 0.4421, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08772096782922745, |
| "step": 245, |
| "valid_targets_mean": 664.1, |
| "valid_targets_min": 359 |
| }, |
| { |
| "epoch": 1.5952, |
| "grad_norm": 0.5245659004342419, |
| "learning_rate": 3.454521065542273e-05, |
| "loss": 0.432, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09933168441057205, |
| "step": 250, |
| "valid_targets_mean": 855.1, |
| "valid_targets_min": 383 |
| }, |
| { |
| "epoch": 1.6272, |
| "grad_norm": 0.5071814981485833, |
| "learning_rate": 3.423621541950597e-05, |
| "loss": 0.4395, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08219840377569199, |
| "step": 255, |
| "valid_targets_mean": 697.7, |
| "valid_targets_min": 378 |
| }, |
| { |
| "epoch": 1.6592, |
| "grad_norm": 0.6421027216281103, |
| "learning_rate": 3.3920173139522664e-05, |
| "loss": 0.4522, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09957920759916306, |
| "step": 260, |
| "valid_targets_mean": 635.2, |
| "valid_targets_min": 329 |
| }, |
| { |
| "epoch": 1.6912, |
| "grad_norm": 0.8918683611862951, |
| "learning_rate": 3.35972402590084e-05, |
| "loss": 0.4452, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09719720482826233, |
| "step": 265, |
| "valid_targets_mean": 654.4, |
| "valid_targets_min": 368 |
| }, |
| { |
| "epoch": 1.7231999999999998, |
| "grad_norm": 0.5937318332037429, |
| "learning_rate": 3.326757663240291e-05, |
| "loss": 0.4355, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.129220113158226, |
| "step": 270, |
| "valid_targets_mean": 767.7, |
| "valid_targets_min": 334 |
| }, |
| { |
| "epoch": 1.7551999999999999, |
| "grad_norm": 0.615788760605738, |
| "learning_rate": 3.293134544592073e-05, |
| "loss": 0.4556, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.17965131998062134, |
| "step": 275, |
| "valid_targets_mean": 1080.9, |
| "valid_targets_min": 451 |
| }, |
| { |
| "epoch": 1.7872, |
| "grad_norm": 0.575288638742944, |
| "learning_rate": 3.258871313677274e-05, |
| "loss": 0.4428, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09257206320762634, |
| "step": 280, |
| "valid_targets_mean": 597.2, |
| "valid_targets_min": 320 |
| }, |
| { |
| "epoch": 1.8192, |
| "grad_norm": 0.6226156825477193, |
| "learning_rate": 3.2239849310778316e-05, |
| "loss": 0.4417, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11135537922382355, |
| "step": 285, |
| "valid_targets_mean": 747.9, |
| "valid_targets_min": 376 |
| }, |
| { |
| "epoch": 1.8512, |
| "grad_norm": 0.5474766803313764, |
| "learning_rate": 3.188492665840909e-05, |
| "loss": 0.4316, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1179531142115593, |
| "step": 290, |
| "valid_targets_mean": 1048.9, |
| "valid_targets_min": 321 |
| }, |
| { |
| "epoch": 1.8832, |
| "grad_norm": 0.555344017183991, |
| "learning_rate": 3.1524120869305726e-05, |
| "loss": 0.448, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09857015311717987, |
| "step": 295, |
| "valid_targets_mean": 785.6, |
| "valid_targets_min": 302 |
| }, |
| { |
| "epoch": 1.9152, |
| "grad_norm": 0.5088681384764417, |
| "learning_rate": 3.11576105453101e-05, |
| "loss": 0.4296, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09216314554214478, |
| "step": 300, |
| "valid_targets_mean": 784.4, |
| "valid_targets_min": 380 |
| }, |
| { |
| "epoch": 1.9472, |
| "grad_norm": 0.6660264590729095, |
| "learning_rate": 3.0785577112055916e-05, |
| "loss": 0.4423, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07816915959119797, |
| "step": 305, |
| "valid_targets_mean": 565.1, |
| "valid_targets_min": 320 |
| }, |
| { |
| "epoch": 1.9792, |
| "grad_norm": 0.5429833393761442, |
| "learning_rate": 3.040820472916153e-05, |
| "loss": 0.4335, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11158039420843124, |
| "step": 310, |
| "valid_targets_mean": 977.5, |
| "valid_targets_min": 344 |
| }, |
| { |
| "epoch": 2.0064, |
| "grad_norm": 0.5775581645620036, |
| "learning_rate": 3.002568019906939e-05, |
| "loss": 0.4487, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07440705597400665, |
| "step": 315, |
| "valid_targets_mean": 643.6, |
| "valid_targets_min": 404 |
| }, |
| { |
| "epoch": 2.0384, |
| "grad_norm": 0.5129583670120458, |
| "learning_rate": 2.963819287457733e-05, |
| "loss": 0.4121, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07239961624145508, |
| "step": 320, |
| "valid_targets_mean": 864.1, |
| "valid_targets_min": 403 |
| }, |
| { |
| "epoch": 2.0704, |
| "grad_norm": 0.6077115293347921, |
| "learning_rate": 2.924593456510733e-05, |
| "loss": 0.4031, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12308503687381744, |
| "step": 325, |
| "valid_targets_mean": 956.9, |
| "valid_targets_min": 391 |
| }, |
| { |
| "epoch": 2.1024, |
| "grad_norm": 0.6031214474986006, |
| "learning_rate": 2.8849099441758306e-05, |
| "loss": 0.4127, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0778675377368927, |
| "step": 330, |
| "valid_targets_mean": 551.2, |
| "valid_targets_min": 404 |
| }, |
| { |
| "epoch": 2.1344, |
| "grad_norm": 0.49034633652229753, |
| "learning_rate": 2.844788394118979e-05, |
| "loss": 0.4212, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1424838900566101, |
| "step": 335, |
| "valid_targets_mean": 1388.6, |
| "valid_targets_min": 412 |
| }, |
| { |
| "epoch": 2.1664, |
| "grad_norm": 0.6002144395513832, |
| "learning_rate": 2.8042486668384164e-05, |
| "loss": 0.4195, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09713360667228699, |
| "step": 340, |
| "valid_targets_mean": 841.2, |
| "valid_targets_min": 338 |
| }, |
| { |
| "epoch": 2.1984, |
| "grad_norm": 0.5953750346604982, |
| "learning_rate": 2.7633108298335582e-05, |
| "loss": 0.4139, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12253813445568085, |
| "step": 345, |
| "valid_targets_mean": 1029.0, |
| "valid_targets_min": 398 |
| }, |
| { |
| "epoch": 2.2304, |
| "grad_norm": 0.5664035810551488, |
| "learning_rate": 2.721995147671416e-05, |
| "loss": 0.4173, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.14460813999176025, |
| "step": 350, |
| "valid_targets_mean": 1296.0, |
| "valid_targets_min": 433 |
| }, |
| { |
| "epoch": 2.2624, |
| "grad_norm": 0.5575494465624244, |
| "learning_rate": 2.68032207195547e-05, |
| "loss": 0.4159, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11319831013679504, |
| "step": 355, |
| "valid_targets_mean": 998.4, |
| "valid_targets_min": 405 |
| }, |
| { |
| "epoch": 2.2944, |
| "grad_norm": 0.5722654942074888, |
| "learning_rate": 2.6383122312019604e-05, |
| "loss": 0.4136, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11292511224746704, |
| "step": 360, |
| "valid_targets_mean": 791.4, |
| "valid_targets_min": 357 |
| }, |
| { |
| "epoch": 2.3264, |
| "grad_norm": 0.6156699181787635, |
| "learning_rate": 2.595986420628597e-05, |
| "loss": 0.4323, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0936831682920456, |
| "step": 365, |
| "valid_targets_mean": 645.4, |
| "valid_targets_min": 413 |
| }, |
| { |
| "epoch": 2.3584, |
| "grad_norm": 0.5799332606546033, |
| "learning_rate": 2.5533655918607573e-05, |
| "loss": 0.42, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0842270478606224, |
| "step": 370, |
| "valid_targets_mean": 631.9, |
| "valid_targets_min": 319 |
| }, |
| { |
| "epoch": 2.3904, |
| "grad_norm": 0.622585761390544, |
| "learning_rate": 2.510470842560259e-05, |
| "loss": 0.4189, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12394921481609344, |
| "step": 375, |
| "valid_targets_mean": 862.4, |
| "valid_targets_min": 464 |
| }, |
| { |
| "epoch": 2.4224, |
| "grad_norm": 0.5722309355051565, |
| "learning_rate": 2.467323405981841e-05, |
| "loss": 0.4051, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0961218997836113, |
| "step": 380, |
| "valid_targets_mean": 768.4, |
| "valid_targets_min": 348 |
| }, |
| { |
| "epoch": 2.4544, |
| "grad_norm": 0.5321841680118645, |
| "learning_rate": 2.423944640462533e-05, |
| "loss": 0.4146, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0827847570180893, |
| "step": 385, |
| "valid_targets_mean": 868.4, |
| "valid_targets_min": 390 |
| }, |
| { |
| "epoch": 2.4864, |
| "grad_norm": 0.6700888637238239, |
| "learning_rate": 2.3803560188490968e-05, |
| "loss": 0.431, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09786651283502579, |
| "step": 390, |
| "valid_targets_mean": 746.6, |
| "valid_targets_min": 315 |
| }, |
| { |
| "epoch": 2.5183999999999997, |
| "grad_norm": 0.6323804353095867, |
| "learning_rate": 2.336579117868789e-05, |
| "loss": 0.4229, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10056309401988983, |
| "step": 395, |
| "valid_targets_mean": 806.9, |
| "valid_targets_min": 394 |
| }, |
| { |
| "epoch": 2.5504, |
| "grad_norm": 0.6550381034455577, |
| "learning_rate": 2.292635607448711e-05, |
| "loss": 0.4229, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13149245083332062, |
| "step": 400, |
| "valid_targets_mean": 998.6, |
| "valid_targets_min": 403 |
| }, |
| { |
| "epoch": 2.5824, |
| "grad_norm": 0.6645834209618975, |
| "learning_rate": 2.248547239989008e-05, |
| "loss": 0.4154, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10545995831489563, |
| "step": 405, |
| "valid_targets_mean": 689.8, |
| "valid_targets_min": 440 |
| }, |
| { |
| "epoch": 2.6144, |
| "grad_norm": 0.5941577749283523, |
| "learning_rate": 2.204335839595255e-05, |
| "loss": 0.4216, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1064852923154831, |
| "step": 410, |
| "valid_targets_mean": 724.1, |
| "valid_targets_min": 376 |
| }, |
| { |
| "epoch": 2.6464, |
| "grad_norm": 0.5056780394022312, |
| "learning_rate": 2.1600232912753452e-05, |
| "loss": 0.4019, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11357980221509933, |
| "step": 415, |
| "valid_targets_mean": 1238.4, |
| "valid_targets_min": 323 |
| }, |
| { |
| "epoch": 2.6784, |
| "grad_norm": 0.610168786701435, |
| "learning_rate": 2.1156315301062293e-05, |
| "loss": 0.4064, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09324711561203003, |
| "step": 420, |
| "valid_targets_mean": 613.2, |
| "valid_targets_min": 359 |
| }, |
| { |
| "epoch": 2.7104, |
| "grad_norm": 0.5504726610083012, |
| "learning_rate": 2.0711825303758712e-05, |
| "loss": 0.4232, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1077696830034256, |
| "step": 425, |
| "valid_targets_mean": 818.8, |
| "valid_targets_min": 390 |
| }, |
| { |
| "epoch": 2.7424, |
| "grad_norm": 0.6641939339447608, |
| "learning_rate": 2.0266982947057962e-05, |
| "loss": 0.422, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0970541387796402, |
| "step": 430, |
| "valid_targets_mean": 685.2, |
| "valid_targets_min": 321 |
| }, |
| { |
| "epoch": 2.7744, |
| "grad_norm": 0.653531813006077, |
| "learning_rate": 1.9822008431596083e-05, |
| "loss": 0.4177, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10846106708049774, |
| "step": 435, |
| "valid_targets_mean": 723.3, |
| "valid_targets_min": 404 |
| }, |
| { |
| "epoch": 2.8064, |
| "grad_norm": 0.6681671556144005, |
| "learning_rate": 1.937712202342881e-05, |
| "loss": 0.4196, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09692089259624481, |
| "step": 440, |
| "valid_targets_mean": 633.7, |
| "valid_targets_min": 427 |
| }, |
| { |
| "epoch": 2.8384, |
| "grad_norm": 0.6380439037814938, |
| "learning_rate": 1.8932543944998037e-05, |
| "loss": 0.4183, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11875765770673752, |
| "step": 445, |
| "valid_targets_mean": 746.5, |
| "valid_targets_min": 423 |
| }, |
| { |
| "epoch": 2.8704, |
| "grad_norm": 0.5219284155044907, |
| "learning_rate": 1.8488494266119877e-05, |
| "loss": 0.4058, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09141748398542404, |
| "step": 450, |
| "valid_targets_mean": 716.4, |
| "valid_targets_min": 366 |
| }, |
| { |
| "epoch": 2.9024, |
| "grad_norm": 0.5566392528555462, |
| "learning_rate": 1.804519279504834e-05, |
| "loss": 0.4113, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09680511057376862, |
| "step": 455, |
| "valid_targets_mean": 934.8, |
| "valid_targets_min": 432 |
| }, |
| { |
| "epoch": 2.9344, |
| "grad_norm": 0.6626446716495282, |
| "learning_rate": 1.7602858969668365e-05, |
| "loss": 0.409, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08213680982589722, |
| "step": 460, |
| "valid_targets_mean": 650.0, |
| "valid_targets_min": 334 |
| }, |
| { |
| "epoch": 2.9664, |
| "grad_norm": 0.593266768884052, |
| "learning_rate": 1.716171174887231e-05, |
| "loss": 0.4113, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10700133442878723, |
| "step": 465, |
| "valid_targets_mean": 696.6, |
| "valid_targets_min": 320 |
| }, |
| { |
| "epoch": 2.9984, |
| "grad_norm": 0.5752624662339391, |
| "learning_rate": 1.6721969504173484e-05, |
| "loss": 0.42, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08804523199796677, |
| "step": 470, |
| "valid_targets_mean": 729.1, |
| "valid_targets_min": 418 |
| }, |
| { |
| "epoch": 3.0256, |
| "grad_norm": 0.5610382190573227, |
| "learning_rate": 1.628384991161041e-05, |
| "loss": 0.4063, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08425762504339218, |
| "step": 475, |
| "valid_targets_mean": 1020.8, |
| "valid_targets_min": 399 |
| }, |
| { |
| "epoch": 3.0576, |
| "grad_norm": 0.6370374136408906, |
| "learning_rate": 1.5847569843995452e-05, |
| "loss": 0.4027, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10353687405586243, |
| "step": 480, |
| "valid_targets_mean": 837.4, |
| "valid_targets_min": 361 |
| }, |
| { |
| "epoch": 3.0896, |
| "grad_norm": 0.5873572112472938, |
| "learning_rate": 1.5413345263560922e-05, |
| "loss": 0.4117, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09108766168355942, |
| "step": 485, |
| "valid_targets_mean": 887.4, |
| "valid_targets_min": 416 |
| }, |
| { |
| "epoch": 3.1216, |
| "grad_norm": 0.59032555685135, |
| "learning_rate": 1.4981391115056032e-05, |
| "loss": 0.3966, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12735995650291443, |
| "step": 490, |
| "valid_targets_mean": 928.2, |
| "valid_targets_min": 397 |
| }, |
| { |
| "epoch": 3.1536, |
| "grad_norm": 0.5517995378600631, |
| "learning_rate": 1.455192121934748e-05, |
| "loss": 0.3876, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09549345076084137, |
| "step": 495, |
| "valid_targets_mean": 842.0, |
| "valid_targets_min": 326 |
| }, |
| { |
| "epoch": 3.1856, |
| "grad_norm": 0.5767640192840071, |
| "learning_rate": 1.4125148167576303e-05, |
| "loss": 0.3875, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09618216753005981, |
| "step": 500, |
| "valid_targets_mean": 1051.3, |
| "valid_targets_min": 357 |
| }, |
| { |
| "epoch": 3.2176, |
| "grad_norm": 0.614531690871058, |
| "learning_rate": 1.3701283215923563e-05, |
| "loss": 0.3918, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11241084337234497, |
| "step": 505, |
| "valid_targets_mean": 977.4, |
| "valid_targets_min": 430 |
| }, |
| { |
| "epoch": 3.2496, |
| "grad_norm": 0.585115408442605, |
| "learning_rate": 1.328053618103677e-05, |
| "loss": 0.389, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07872745394706726, |
| "step": 510, |
| "valid_targets_mean": 600.2, |
| "valid_targets_min": 401 |
| }, |
| { |
| "epoch": 3.2816, |
| "grad_norm": 0.5155757805806573, |
| "learning_rate": 1.2863115336168916e-05, |
| "loss": 0.3959, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07585766911506653, |
| "step": 515, |
| "valid_targets_mean": 734.4, |
| "valid_targets_min": 416 |
| }, |
| { |
| "epoch": 3.3136, |
| "grad_norm": 0.5335000876085558, |
| "learning_rate": 1.2449227308081509e-05, |
| "loss": 0.3871, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0835937038064003, |
| "step": 520, |
| "valid_targets_mean": 825.9, |
| "valid_targets_min": 368 |
| }, |
| { |
| "epoch": 3.3456, |
| "grad_norm": 0.5841210865260744, |
| "learning_rate": 1.2039076974762587e-05, |
| "loss": 0.3844, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07614777237176895, |
| "step": 525, |
| "valid_targets_mean": 763.9, |
| "valid_targets_min": 373 |
| }, |
| { |
| "epoch": 3.3776, |
| "grad_norm": 0.6483216131482832, |
| "learning_rate": 1.163286736401044e-05, |
| "loss": 0.4018, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10778698325157166, |
| "step": 530, |
| "valid_targets_mean": 766.4, |
| "valid_targets_min": 309 |
| }, |
| { |
| "epoch": 3.4096, |
| "grad_norm": 0.5326152721218573, |
| "learning_rate": 1.123079955293322e-05, |
| "loss": 0.3901, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06915357708930969, |
| "step": 535, |
| "valid_targets_mean": 703.1, |
| "valid_targets_min": 329 |
| }, |
| { |
| "epoch": 3.4416, |
| "grad_norm": 0.600429095738304, |
| "learning_rate": 1.0833072568414037e-05, |
| "loss": 0.4061, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07404685020446777, |
| "step": 540, |
| "valid_targets_mean": 642.3, |
| "valid_targets_min": 342 |
| }, |
| { |
| "epoch": 3.4736000000000002, |
| "grad_norm": 0.6338259376396709, |
| "learning_rate": 1.0439883288591057e-05, |
| "loss": 0.4, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1003207415342331, |
| "step": 545, |
| "valid_targets_mean": 815.3, |
| "valid_targets_min": 408 |
| }, |
| { |
| "epoch": 3.5056000000000003, |
| "grad_norm": 0.5714090152959421, |
| "learning_rate": 1.0051426345401202e-05, |
| "loss": 0.3834, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10465012490749359, |
| "step": 550, |
| "valid_targets_mean": 1010.7, |
| "valid_targets_min": 389 |
| }, |
| { |
| "epoch": 3.5376, |
| "grad_norm": 0.5888843808215544, |
| "learning_rate": 9.667894028235704e-06, |
| "loss": 0.4093, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13038086891174316, |
| "step": 555, |
| "valid_targets_mean": 1027.1, |
| "valid_targets_min": 358 |
| }, |
| { |
| "epoch": 3.5696, |
| "grad_norm": 0.6041611052917156, |
| "learning_rate": 9.289476188755315e-06, |
| "loss": 0.4042, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08387233316898346, |
| "step": 560, |
| "valid_targets_mean": 657.9, |
| "valid_targets_min": 401 |
| }, |
| { |
| "epoch": 3.6016, |
| "grad_norm": 0.6424612269453751, |
| "learning_rate": 8.916360146912122e-06, |
| "loss": 0.3946, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07798133790493011, |
| "step": 565, |
| "valid_targets_mean": 693.3, |
| "valid_targets_min": 364 |
| }, |
| { |
| "epoch": 3.6336, |
| "grad_norm": 0.6656167537622161, |
| "learning_rate": 8.548730598224646e-06, |
| "loss": 0.3991, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07147403806447983, |
| "step": 570, |
| "valid_targets_mean": 540.2, |
| "valid_targets_min": 334 |
| }, |
| { |
| "epoch": 3.6656, |
| "grad_norm": 0.6024773862090805, |
| "learning_rate": 8.186769522352053e-06, |
| "loss": 0.4047, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08182119578123093, |
| "step": 575, |
| "valid_targets_mean": 724.2, |
| "valid_targets_min": 268 |
| }, |
| { |
| "epoch": 3.6976, |
| "grad_norm": 0.5099213228173635, |
| "learning_rate": 7.830656093012714e-06, |
| "loss": 0.3909, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06938575953245163, |
| "step": 580, |
| "valid_targets_mean": 856.3, |
| "valid_targets_min": 385 |
| }, |
| { |
| "epoch": 3.7296, |
| "grad_norm": 0.5878150081667711, |
| "learning_rate": 7.480566589291696e-06, |
| "loss": 0.3963, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09519918262958527, |
| "step": 585, |
| "valid_targets_mean": 777.5, |
| "valid_targets_min": 378 |
| }, |
| { |
| "epoch": 3.7616, |
| "grad_norm": 0.542873280912327, |
| "learning_rate": 7.1366743083812285e-06, |
| "loss": 0.39, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06709694862365723, |
| "step": 590, |
| "valid_targets_mean": 632.2, |
| "valid_targets_min": 385 |
| }, |
| { |
| "epoch": 3.7936, |
| "grad_norm": 2.31349167232845, |
| "learning_rate": 6.799149479797101e-06, |
| "loss": 0.3844, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0990547239780426, |
| "step": 595, |
| "valid_targets_mean": 654.9, |
| "valid_targets_min": 302 |
| }, |
| { |
| "epoch": 3.8256, |
| "grad_norm": 0.590653745798261, |
| "learning_rate": 6.4681591811137e-06, |
| "loss": 0.3759, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07994721084833145, |
| "step": 600, |
| "valid_targets_mean": 684.1, |
| "valid_targets_min": 435 |
| }, |
| { |
| "epoch": 3.8576, |
| "grad_norm": 0.6127118274714611, |
| "learning_rate": 6.143867255259197e-06, |
| "loss": 0.4117, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09342561662197113, |
| "step": 605, |
| "valid_targets_mean": 816.8, |
| "valid_targets_min": 284 |
| }, |
| { |
| "epoch": 3.8895999999999997, |
| "grad_norm": 0.6391548156109519, |
| "learning_rate": 5.8264342294119504e-06, |
| "loss": 0.3895, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08812019228935242, |
| "step": 610, |
| "valid_targets_mean": 632.6, |
| "valid_targets_min": 393 |
| }, |
| { |
| "epoch": 3.9215999999999998, |
| "grad_norm": 0.6060676769815595, |
| "learning_rate": 5.516017235538258e-06, |
| "loss": 0.3969, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09914878010749817, |
| "step": 615, |
| "valid_targets_mean": 743.7, |
| "valid_targets_min": 441 |
| }, |
| { |
| "epoch": 3.9536, |
| "grad_norm": 0.5982168918074733, |
| "learning_rate": 5.212769932610695e-06, |
| "loss": 0.3819, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.058783818036317825, |
| "step": 620, |
| "valid_targets_mean": 507.6, |
| "valid_targets_min": 400 |
| }, |
| { |
| "epoch": 3.9856, |
| "grad_norm": 0.5972327172351439, |
| "learning_rate": 4.916842430545681e-06, |
| "loss": 0.4038, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08714640885591507, |
| "step": 625, |
| "valid_targets_mean": 827.5, |
| "valid_targets_min": 397 |
| }, |
| { |
| "epoch": 4.0128, |
| "grad_norm": 0.4847513858068835, |
| "learning_rate": 4.628381215897837e-06, |
| "loss": 0.3936, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0801130086183548, |
| "step": 630, |
| "valid_targets_mean": 1127.3, |
| "valid_targets_min": 368 |
| }, |
| { |
| "epoch": 4.0448, |
| "grad_norm": 0.5611576007645827, |
| "learning_rate": 4.347529079347914e-06, |
| "loss": 0.3824, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08794663846492767, |
| "step": 635, |
| "valid_targets_mean": 680.2, |
| "valid_targets_min": 379 |
| }, |
| { |
| "epoch": 4.0768, |
| "grad_norm": 0.5528385055241838, |
| "learning_rate": 4.074425045020247e-06, |
| "loss": 0.3745, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08833988755941391, |
| "step": 640, |
| "valid_targets_mean": 1072.6, |
| "valid_targets_min": 362 |
| }, |
| { |
| "epoch": 4.1088, |
| "grad_norm": 0.6360019127829928, |
| "learning_rate": 3.8092043016646487e-06, |
| "loss": 0.3931, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07920222729444504, |
| "step": 645, |
| "valid_targets_mean": 576.0, |
| "valid_targets_min": 346 |
| }, |
| { |
| "epoch": 4.1408, |
| "grad_norm": 0.5695054148744619, |
| "learning_rate": 3.551998135736867e-06, |
| "loss": 0.3741, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10163244605064392, |
| "step": 650, |
| "valid_targets_mean": 795.4, |
| "valid_targets_min": 389 |
| }, |
| { |
| "epoch": 4.1728, |
| "grad_norm": 0.5823717131910814, |
| "learning_rate": 3.3029338664107267e-06, |
| "loss": 0.3852, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08717401325702667, |
| "step": 655, |
| "valid_targets_mean": 757.6, |
| "valid_targets_min": 450 |
| }, |
| { |
| "epoch": 4.2048, |
| "grad_norm": 0.5431835475455258, |
| "learning_rate": 3.0621347825540625e-06, |
| "loss": 0.3931, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11405445635318756, |
| "step": 660, |
| "valid_targets_mean": 1233.8, |
| "valid_targets_min": 477 |
| }, |
| { |
| "epoch": 4.2368, |
| "grad_norm": 0.6284908129512062, |
| "learning_rate": 2.8297200816997183e-06, |
| "loss": 0.3871, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08217090368270874, |
| "step": 665, |
| "valid_targets_mean": 670.6, |
| "valid_targets_min": 381 |
| }, |
| { |
| "epoch": 4.2688, |
| "grad_norm": 0.4950334898816112, |
| "learning_rate": 2.605804811041803e-06, |
| "loss": 0.3754, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06383413821458817, |
| "step": 670, |
| "valid_targets_mean": 776.5, |
| "valid_targets_min": 410 |
| }, |
| { |
| "epoch": 4.3008, |
| "grad_norm": 0.591098970902121, |
| "learning_rate": 2.390499810486351e-06, |
| "loss": 0.3935, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10034292936325073, |
| "step": 675, |
| "valid_targets_mean": 826.2, |
| "valid_targets_min": 383 |
| }, |
| { |
| "epoch": 4.3328, |
| "grad_norm": 0.5651496158158206, |
| "learning_rate": 2.183911657784685e-06, |
| "loss": 0.3813, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07695989310741425, |
| "step": 680, |
| "valid_targets_mean": 672.1, |
| "valid_targets_min": 385 |
| }, |
| { |
| "epoch": 4.3648, |
| "grad_norm": 0.5917327908907087, |
| "learning_rate": 1.986142615776532e-06, |
| "loss": 0.38, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07897144556045532, |
| "step": 685, |
| "valid_targets_mean": 684.1, |
| "valid_targets_min": 387 |
| }, |
| { |
| "epoch": 4.3968, |
| "grad_norm": 0.5323303388546669, |
| "learning_rate": 1.7972905817690644e-06, |
| "loss": 0.3905, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.092294842004776, |
| "step": 690, |
| "valid_targets_mean": 973.1, |
| "valid_targets_min": 401 |
| }, |
| { |
| "epoch": 4.4288, |
| "grad_norm": 0.5460713542113188, |
| "learning_rate": 1.617449039076955e-06, |
| "loss": 0.3892, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09456771612167358, |
| "step": 695, |
| "valid_targets_mean": 962.5, |
| "valid_targets_min": 438 |
| }, |
| { |
| "epoch": 4.4608, |
| "grad_norm": 0.6082925587977943, |
| "learning_rate": 1.4467070107473413e-06, |
| "loss": 0.3853, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1136481761932373, |
| "step": 700, |
| "valid_targets_mean": 849.4, |
| "valid_targets_min": 412 |
| }, |
| { |
| "epoch": 4.4928, |
| "grad_norm": 0.6200633164112519, |
| "learning_rate": 1.2851490154926816e-06, |
| "loss": 0.3893, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10344002395868301, |
| "step": 705, |
| "valid_targets_mean": 861.0, |
| "valid_targets_min": 459 |
| }, |
| { |
| "epoch": 4.5248, |
| "grad_norm": 0.5980979220481121, |
| "learning_rate": 1.1328550258533211e-06, |
| "loss": 0.387, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12262655794620514, |
| "step": 710, |
| "valid_targets_mean": 1103.6, |
| "valid_targets_min": 302 |
| }, |
| { |
| "epoch": 4.5568, |
| "grad_norm": 1.0985283290768537, |
| "learning_rate": 9.899004286103953e-07, |
| "loss": 0.3909, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10326635837554932, |
| "step": 715, |
| "valid_targets_mean": 771.8, |
| "valid_targets_min": 303 |
| }, |
| { |
| "epoch": 4.5888, |
| "grad_norm": 0.5349623349471714, |
| "learning_rate": 8.5635598746876e-07, |
| "loss": 0.3827, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10169753432273865, |
| "step": 720, |
| "valid_targets_mean": 993.0, |
| "valid_targets_min": 347 |
| }, |
| { |
| "epoch": 4.6208, |
| "grad_norm": 0.5976359862832646, |
| "learning_rate": 7.32287808028389e-07, |
| "loss": 0.3848, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10116741061210632, |
| "step": 725, |
| "valid_targets_mean": 912.4, |
| "valid_targets_min": 383 |
| }, |
| { |
| "epoch": 4.6528, |
| "grad_norm": 0.6339445206464914, |
| "learning_rate": 6.177573050615327e-07, |
| "loss": 0.3946, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08657615631818771, |
| "step": 730, |
| "valid_targets_mean": 686.4, |
| "valid_targets_min": 384 |
| }, |
| { |
| "epoch": 4.6848, |
| "grad_norm": 0.5154419341958988, |
| "learning_rate": 5.128211721119213e-07, |
| "loss": 0.3741, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08375303447246552, |
| "step": 735, |
| "valid_targets_mean": 967.8, |
| "valid_targets_min": 383 |
| }, |
| { |
| "epoch": 4.7168, |
| "grad_norm": 0.6321852313540388, |
| "learning_rate": 4.175313534309755e-07, |
| "loss": 0.3956, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13004527986049652, |
| "step": 740, |
| "valid_targets_mean": 931.1, |
| "valid_targets_min": 399 |
| }, |
| { |
| "epoch": 4.7488, |
| "grad_norm": 0.6067056281796016, |
| "learning_rate": 3.319350182649861e-07, |
| "loss": 0.3782, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1023097038269043, |
| "step": 745, |
| "valid_targets_mean": 758.2, |
| "valid_targets_min": 303 |
| }, |
| { |
| "epoch": 4.7808, |
| "grad_norm": 0.6919237537475112, |
| "learning_rate": 2.560745375059392e-07, |
| "loss": 0.3991, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08634231984615326, |
| "step": 750, |
| "valid_targets_mean": 746.2, |
| "valid_targets_min": 390 |
| }, |
| { |
| "epoch": 4.8128, |
| "grad_norm": 0.5637550282681978, |
| "learning_rate": 1.8998746271758016e-07, |
| "loss": 0.383, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10297013819217682, |
| "step": 755, |
| "valid_targets_mean": 907.4, |
| "valid_targets_min": 393 |
| }, |
| { |
| "epoch": 4.8448, |
| "grad_norm": 0.5702544895441023, |
| "learning_rate": 1.337065075470778e-07, |
| "loss": 0.3915, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.15079092979431152, |
| "step": 760, |
| "valid_targets_mean": 1253.9, |
| "valid_targets_min": 356 |
| }, |
| { |
| "epoch": 4.8768, |
| "grad_norm": 0.5882537057091088, |
| "learning_rate": 8.725953153150279e-08, |
| "loss": 0.3964, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07823218405246735, |
| "step": 765, |
| "valid_targets_mean": 649.6, |
| "valid_targets_min": 448 |
| }, |
| { |
| "epoch": 4.9088, |
| "grad_norm": 0.6073688670028073, |
| "learning_rate": 5.066952630711886e-08, |
| "loss": 0.3864, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07631102204322815, |
| "step": 770, |
| "valid_targets_mean": 616.8, |
| "valid_targets_min": 379 |
| }, |
| { |
| "epoch": 4.9408, |
| "grad_norm": 0.5206072014992669, |
| "learning_rate": 2.3954604228342283e-08, |
| "loss": 0.3705, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09787902235984802, |
| "step": 775, |
| "valid_targets_mean": 1027.2, |
| "valid_targets_min": 365 |
| }, |
| { |
| "epoch": 4.9728, |
| "grad_norm": 0.5742498548264887, |
| "learning_rate": 7.12798940197601e-09, |
| "loss": 0.3679, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10835174471139908, |
| "step": 780, |
| "valid_targets_mean": 1055.2, |
| "valid_targets_min": 405 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.1933161078427301, |
| "learning_rate": 1.9801114115480802e-10, |
| "loss": 0.3872, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.391443133354187, |
| "step": 785, |
| "valid_targets_mean": 719.2, |
| "valid_targets_min": 319 |
| }, |
| { |
| "epoch": 5.0, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.391443133354187, |
| "step": 785, |
| "total_flos": 2.9988290575938355e+17, |
| "train_loss": 0.4420624729934012, |
| "train_runtime": 7853.4449, |
| "train_samples_per_second": 6.363, |
| "train_steps_per_second": 0.1, |
| "valid_targets_mean": 719.2, |
| "valid_targets_min": 319 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 785, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.9988290575938355e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|