| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 785, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 5.123559399121411, | |
| "learning_rate": 2.0253164556962026e-06, | |
| "loss": 0.7118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19307199120521545, | |
| "step": 5, | |
| "valid_targets_mean": 3871.6, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 3.763931962921917, | |
| "learning_rate": 4.556962025316456e-06, | |
| "loss": 0.6943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22809526324272156, | |
| "step": 10, | |
| "valid_targets_mean": 5613.6, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 2.0183154742430185, | |
| "learning_rate": 7.08860759493671e-06, | |
| "loss": 0.6395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09760536253452301, | |
| "step": 15, | |
| "valid_targets_mean": 2523.2, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.9352948580258807, | |
| "learning_rate": 9.620253164556963e-06, | |
| "loss": 0.6191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12820899486541748, | |
| "step": 20, | |
| "valid_targets_mean": 3427.8, | |
| "valid_targets_min": 475 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.7628090891296735, | |
| "learning_rate": 1.2151898734177216e-05, | |
| "loss": 0.566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16758130490779877, | |
| "step": 25, | |
| "valid_targets_mean": 4205.4, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.5686185647713958, | |
| "learning_rate": 1.468354430379747e-05, | |
| "loss": 0.5366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11331957578659058, | |
| "step": 30, | |
| "valid_targets_mean": 3240.8, | |
| "valid_targets_min": 853 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.45416630652182394, | |
| "learning_rate": 1.7215189873417723e-05, | |
| "loss": 0.5315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10806378722190857, | |
| "step": 35, | |
| "valid_targets_mean": 3366.1, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.39087482012009667, | |
| "learning_rate": 1.974683544303798e-05, | |
| "loss": 0.5105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09493663161993027, | |
| "step": 40, | |
| "valid_targets_mean": 3527.8, | |
| "valid_targets_min": 984 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.34531644205256307, | |
| "learning_rate": 2.2278481012658228e-05, | |
| "loss": 0.4909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1321936547756195, | |
| "step": 45, | |
| "valid_targets_mean": 4917.8, | |
| "valid_targets_min": 958 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.3097489395021709, | |
| "learning_rate": 2.481012658227848e-05, | |
| "loss": 0.4556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1298643797636032, | |
| "step": 50, | |
| "valid_targets_mean": 5106.7, | |
| "valid_targets_min": 499 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.3283831471489093, | |
| "learning_rate": 2.7341772151898737e-05, | |
| "loss": 0.4844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1309063732624054, | |
| "step": 55, | |
| "valid_targets_mean": 3499.2, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.2516293027308092, | |
| "learning_rate": 2.987341772151899e-05, | |
| "loss": 0.4396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1051831990480423, | |
| "step": 60, | |
| "valid_targets_mean": 5455.0, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.2854946560412455, | |
| "learning_rate": 3.240506329113924e-05, | |
| "loss": 0.4558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12581893801689148, | |
| "step": 65, | |
| "valid_targets_mean": 4816.4, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.2543739735682122, | |
| "learning_rate": 3.49367088607595e-05, | |
| "loss": 0.4558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12147209048271179, | |
| "step": 70, | |
| "valid_targets_mean": 5330.7, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.26226792045732755, | |
| "learning_rate": 3.746835443037975e-05, | |
| "loss": 0.4063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11655369400978088, | |
| "step": 75, | |
| "valid_targets_mean": 5211.1, | |
| "valid_targets_min": 563 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.28395063906098045, | |
| "learning_rate": 4e-05, | |
| "loss": 0.4313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05843083932995796, | |
| "step": 80, | |
| "valid_targets_mean": 1917.4, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.28239078877411444, | |
| "learning_rate": 3.999504991751045e-05, | |
| "loss": 0.415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0934157520532608, | |
| "step": 85, | |
| "valid_targets_mean": 3886.2, | |
| "valid_targets_min": 947 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.30999355830338726, | |
| "learning_rate": 3.9980202120373464e-05, | |
| "loss": 0.4395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08777200430631638, | |
| "step": 90, | |
| "valid_targets_mean": 3050.1, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.2621011383012756, | |
| "learning_rate": 3.995546395837111e-05, | |
| "loss": 0.426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09876669198274612, | |
| "step": 95, | |
| "valid_targets_mean": 4814.9, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.3154358811654234, | |
| "learning_rate": 3.992084767709763e-05, | |
| "loss": 0.433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13018369674682617, | |
| "step": 100, | |
| "valid_targets_mean": 3646.2, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.25742017828747193, | |
| "learning_rate": 3.987637041189781e-05, | |
| "loss": 0.4266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08610528707504272, | |
| "step": 105, | |
| "valid_targets_mean": 4598.8, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.2800910342040014, | |
| "learning_rate": 3.982205417938482e-05, | |
| "loss": 0.4195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0918370857834816, | |
| "step": 110, | |
| "valid_targets_mean": 3425.6, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.28543479156246787, | |
| "learning_rate": 3.975792586654179e-05, | |
| "loss": 0.4024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08587724715471268, | |
| "step": 115, | |
| "valid_targets_mean": 3006.7, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.2828159199589699, | |
| "learning_rate": 3.968401721741259e-05, | |
| "loss": 0.3933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10949550569057465, | |
| "step": 120, | |
| "valid_targets_mean": 4528.8, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.37485843692930904, | |
| "learning_rate": 3.960036481738819e-05, | |
| "loss": 0.4241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12021397054195404, | |
| "step": 125, | |
| "valid_targets_mean": 4176.4, | |
| "valid_targets_min": 803 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.2883979451864893, | |
| "learning_rate": 3.950701007509667e-05, | |
| "loss": 0.4257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13099262118339539, | |
| "step": 130, | |
| "valid_targets_mean": 4978.4, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.27738107252967953, | |
| "learning_rate": 3.940399920190552e-05, | |
| "loss": 0.3934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09928615391254425, | |
| "step": 135, | |
| "valid_targets_mean": 4291.4, | |
| "valid_targets_min": 837 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.2819931507018627, | |
| "learning_rate": 3.92913831890467e-05, | |
| "loss": 0.407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08053112030029297, | |
| "step": 140, | |
| "valid_targets_mean": 2928.4, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.3009961981778164, | |
| "learning_rate": 3.916921778237556e-05, | |
| "loss": 0.4049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10841743648052216, | |
| "step": 145, | |
| "valid_targets_mean": 4792.4, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.305413981825015, | |
| "learning_rate": 3.903756345477612e-05, | |
| "loss": 0.4048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10222049057483673, | |
| "step": 150, | |
| "valid_targets_mean": 4030.8, | |
| "valid_targets_min": 1160 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.2739272317952161, | |
| "learning_rate": 3.889648537622657e-05, | |
| "loss": 0.4085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.065219447016716, | |
| "step": 155, | |
| "valid_targets_mean": 3643.8, | |
| "valid_targets_min": 545 | |
| }, | |
| { | |
| "epoch": 1.0192, | |
| "grad_norm": 0.2983476898860103, | |
| "learning_rate": 3.874605338153952e-05, | |
| "loss": 0.3996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08826182782649994, | |
| "step": 160, | |
| "valid_targets_mean": 2892.6, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 1.0512, | |
| "grad_norm": 0.2610768559227238, | |
| "learning_rate": 3.8586341935793265e-05, | |
| "loss": 0.4012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09568485617637634, | |
| "step": 165, | |
| "valid_targets_mean": 4814.1, | |
| "valid_targets_min": 528 | |
| }, | |
| { | |
| "epoch": 1.0832, | |
| "grad_norm": 0.304482614131595, | |
| "learning_rate": 3.841743009747089e-05, | |
| "loss": 0.3739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09390249848365784, | |
| "step": 170, | |
| "valid_targets_mean": 3810.7, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 1.1152, | |
| "grad_norm": 0.3091772796967434, | |
| "learning_rate": 3.8239401479325714e-05, | |
| "loss": 0.389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07383556663990021, | |
| "step": 175, | |
| "valid_targets_mean": 2320.4, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 1.1472, | |
| "grad_norm": 0.2721877197502448, | |
| "learning_rate": 3.8052344206992276e-05, | |
| "loss": 0.4076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10070022195577621, | |
| "step": 180, | |
| "valid_targets_mean": 4155.7, | |
| "valid_targets_min": 1030 | |
| }, | |
| { | |
| "epoch": 1.1792, | |
| "grad_norm": 0.2821130253246887, | |
| "learning_rate": 3.7856350875363396e-05, | |
| "loss": 0.3951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0644916445016861, | |
| "step": 185, | |
| "valid_targets_mean": 2258.2, | |
| "valid_targets_min": 715 | |
| }, | |
| { | |
| "epoch": 1.2112, | |
| "grad_norm": 0.25408511996506994, | |
| "learning_rate": 3.765151850275497e-05, | |
| "loss": 0.3812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09500850737094879, | |
| "step": 190, | |
| "valid_targets_mean": 4719.8, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 1.2432, | |
| "grad_norm": 0.2535382532842216, | |
| "learning_rate": 3.7437948482881104e-05, | |
| "loss": 0.3843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08362163603305817, | |
| "step": 195, | |
| "valid_targets_mean": 4427.1, | |
| "valid_targets_min": 1226 | |
| }, | |
| { | |
| "epoch": 1.2752, | |
| "grad_norm": 0.30222803134860965, | |
| "learning_rate": 3.721574653466336e-05, | |
| "loss": 0.3899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09327980130910873, | |
| "step": 200, | |
| "valid_targets_mean": 3980.8, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 1.3072, | |
| "grad_norm": 0.28826618143514643, | |
| "learning_rate": 3.698502264989903e-05, | |
| "loss": 0.399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09443897753953934, | |
| "step": 205, | |
| "valid_targets_mean": 4265.8, | |
| "valid_targets_min": 831 | |
| }, | |
| { | |
| "epoch": 1.3392, | |
| "grad_norm": 0.26669404523258555, | |
| "learning_rate": 3.674589103881432e-05, | |
| "loss": 0.3754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08040675520896912, | |
| "step": 210, | |
| "valid_targets_mean": 3924.1, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 1.3712, | |
| "grad_norm": 0.25881484017371537, | |
| "learning_rate": 3.64984700735293e-05, | |
| "loss": 0.3914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07955779135227203, | |
| "step": 215, | |
| "valid_targets_mean": 3554.8, | |
| "valid_targets_min": 596 | |
| }, | |
| { | |
| "epoch": 1.4032, | |
| "grad_norm": 0.244343214005911, | |
| "learning_rate": 3.624288222946273e-05, | |
| "loss": 0.3745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10236145555973053, | |
| "step": 220, | |
| "valid_targets_mean": 5353.9, | |
| "valid_targets_min": 1234 | |
| }, | |
| { | |
| "epoch": 1.4352, | |
| "grad_norm": 0.2951484456032743, | |
| "learning_rate": 3.597925402470578e-05, | |
| "loss": 0.3779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11778046935796738, | |
| "step": 225, | |
| "valid_targets_mean": 4378.1, | |
| "valid_targets_min": 504 | |
| }, | |
| { | |
| "epoch": 1.4672, | |
| "grad_norm": 0.2872888179186468, | |
| "learning_rate": 3.570771595739445e-05, | |
| "loss": 0.3849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1087140440940857, | |
| "step": 230, | |
| "valid_targets_mean": 3771.1, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 1.4992, | |
| "grad_norm": 0.2664791631990777, | |
| "learning_rate": 3.5428402441111964e-05, | |
| "loss": 0.389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08366061747074127, | |
| "step": 235, | |
| "valid_targets_mean": 3675.7, | |
| "valid_targets_min": 558 | |
| }, | |
| { | |
| "epoch": 1.5312000000000001, | |
| "grad_norm": 0.32231879775320366, | |
| "learning_rate": 3.5141451738352936e-05, | |
| "loss": 0.372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07299835979938507, | |
| "step": 240, | |
| "valid_targets_mean": 2671.1, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 1.5632000000000001, | |
| "grad_norm": 0.3126277244374383, | |
| "learning_rate": 3.4847005892082266e-05, | |
| "loss": 0.3885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08901453018188477, | |
| "step": 245, | |
| "valid_targets_mean": 3037.4, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 1.5952, | |
| "grad_norm": 0.3125192265093272, | |
| "learning_rate": 3.454521065542273e-05, | |
| "loss": 0.3952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08856867253780365, | |
| "step": 250, | |
| "valid_targets_mean": 2858.2, | |
| "valid_targets_min": 551 | |
| }, | |
| { | |
| "epoch": 1.6272, | |
| "grad_norm": 0.28195550340542574, | |
| "learning_rate": 3.423621541950597e-05, | |
| "loss": 0.3977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09304862469434738, | |
| "step": 255, | |
| "valid_targets_mean": 3211.8, | |
| "valid_targets_min": 927 | |
| }, | |
| { | |
| "epoch": 1.6592, | |
| "grad_norm": 0.26938548707427185, | |
| "learning_rate": 3.3920173139522664e-05, | |
| "loss": 0.3949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09955386817455292, | |
| "step": 260, | |
| "valid_targets_mean": 3969.5, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 1.6912, | |
| "grad_norm": 0.2483726756742141, | |
| "learning_rate": 3.35972402590084e-05, | |
| "loss": 0.396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10629540681838989, | |
| "step": 265, | |
| "valid_targets_mean": 4389.2, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 1.7231999999999998, | |
| "grad_norm": 0.29123410626612384, | |
| "learning_rate": 3.326757663240291e-05, | |
| "loss": 0.3988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10789086669683456, | |
| "step": 270, | |
| "valid_targets_mean": 3529.2, | |
| "valid_targets_min": 630 | |
| }, | |
| { | |
| "epoch": 1.7551999999999999, | |
| "grad_norm": 0.30280861630651545, | |
| "learning_rate": 3.293134544592073e-05, | |
| "loss": 0.3823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11701425909996033, | |
| "step": 275, | |
| "valid_targets_mean": 4547.2, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 1.7872, | |
| "grad_norm": 0.2985462451149467, | |
| "learning_rate": 3.258871313677274e-05, | |
| "loss": 0.3847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11407148838043213, | |
| "step": 280, | |
| "valid_targets_mean": 3978.5, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 1.8192, | |
| "grad_norm": 0.25837594581231305, | |
| "learning_rate": 3.2239849310778316e-05, | |
| "loss": 0.377, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09650738537311554, | |
| "step": 285, | |
| "valid_targets_mean": 4518.4, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 1.8512, | |
| "grad_norm": 0.2939888765907172, | |
| "learning_rate": 3.188492665840909e-05, | |
| "loss": 0.3796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10243727266788483, | |
| "step": 290, | |
| "valid_targets_mean": 4090.3, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 1.8832, | |
| "grad_norm": 0.3101251423188906, | |
| "learning_rate": 3.1524120869305726e-05, | |
| "loss": 0.3832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11230114102363586, | |
| "step": 295, | |
| "valid_targets_mean": 4298.2, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 1.9152, | |
| "grad_norm": 0.28589603481487275, | |
| "learning_rate": 3.11576105453101e-05, | |
| "loss": 0.3749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09055155515670776, | |
| "step": 300, | |
| "valid_targets_mean": 3955.1, | |
| "valid_targets_min": 718 | |
| }, | |
| { | |
| "epoch": 1.9472, | |
| "grad_norm": 0.2984962488323581, | |
| "learning_rate": 3.0785577112055916e-05, | |
| "loss": 0.3687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1139441579580307, | |
| "step": 305, | |
| "valid_targets_mean": 3979.9, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 1.9792, | |
| "grad_norm": 0.28388503224497175, | |
| "learning_rate": 3.040820472916153e-05, | |
| "loss": 0.3912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09116608649492264, | |
| "step": 310, | |
| "valid_targets_mean": 3222.1, | |
| "valid_targets_min": 920 | |
| }, | |
| { | |
| "epoch": 2.0064, | |
| "grad_norm": 0.30953531700586717, | |
| "learning_rate": 3.002568019906939e-05, | |
| "loss": 0.4097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11616051197052002, | |
| "step": 315, | |
| "valid_targets_mean": 3531.1, | |
| "valid_targets_min": 716 | |
| }, | |
| { | |
| "epoch": 2.0384, | |
| "grad_norm": 0.30916414883354776, | |
| "learning_rate": 2.963819287457733e-05, | |
| "loss": 0.3745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10336826741695404, | |
| "step": 320, | |
| "valid_targets_mean": 5022.9, | |
| "valid_targets_min": 606 | |
| }, | |
| { | |
| "epoch": 2.0704, | |
| "grad_norm": 0.29887399892454847, | |
| "learning_rate": 2.924593456510733e-05, | |
| "loss": 0.3592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07415148615837097, | |
| "step": 325, | |
| "valid_targets_mean": 2968.1, | |
| "valid_targets_min": 705 | |
| }, | |
| { | |
| "epoch": 2.1024, | |
| "grad_norm": 0.24171216751728572, | |
| "learning_rate": 2.8849099441758306e-05, | |
| "loss": 0.3647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08233106136322021, | |
| "step": 330, | |
| "valid_targets_mean": 3958.2, | |
| "valid_targets_min": 1003 | |
| }, | |
| { | |
| "epoch": 2.1344, | |
| "grad_norm": 0.2632145892192255, | |
| "learning_rate": 2.844788394118979e-05, | |
| "loss": 0.3732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11405801773071289, | |
| "step": 335, | |
| "valid_targets_mean": 5185.2, | |
| "valid_targets_min": 1419 | |
| }, | |
| { | |
| "epoch": 2.1664, | |
| "grad_norm": 0.23978443823562096, | |
| "learning_rate": 2.8042486668384164e-05, | |
| "loss": 0.3708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09842608869075775, | |
| "step": 340, | |
| "valid_targets_mean": 6315.8, | |
| "valid_targets_min": 560 | |
| }, | |
| { | |
| "epoch": 2.1984, | |
| "grad_norm": 0.3275638835480335, | |
| "learning_rate": 2.7633108298335582e-05, | |
| "loss": 0.3811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13050734996795654, | |
| "step": 345, | |
| "valid_targets_mean": 3756.8, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 2.2304, | |
| "grad_norm": 0.2558159790158855, | |
| "learning_rate": 2.721995147671416e-05, | |
| "loss": 0.3548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08216506987810135, | |
| "step": 350, | |
| "valid_targets_mean": 3335.6, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 2.2624, | |
| "grad_norm": 0.26499037503286305, | |
| "learning_rate": 2.68032207195547e-05, | |
| "loss": 0.3603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08695670962333679, | |
| "step": 355, | |
| "valid_targets_mean": 4812.5, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 2.2944, | |
| "grad_norm": 0.2703002516233749, | |
| "learning_rate": 2.6383122312019604e-05, | |
| "loss": 0.3642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09461858868598938, | |
| "step": 360, | |
| "valid_targets_mean": 3527.5, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 2.3264, | |
| "grad_norm": 0.2946386586063812, | |
| "learning_rate": 2.595986420628597e-05, | |
| "loss": 0.3542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09603875875473022, | |
| "step": 365, | |
| "valid_targets_mean": 2959.8, | |
| "valid_targets_min": 643 | |
| }, | |
| { | |
| "epoch": 2.3584, | |
| "grad_norm": 0.27633532112311515, | |
| "learning_rate": 2.5533655918607573e-05, | |
| "loss": 0.3731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07474471628665924, | |
| "step": 370, | |
| "valid_targets_mean": 2618.7, | |
| "valid_targets_min": 504 | |
| }, | |
| { | |
| "epoch": 2.3904, | |
| "grad_norm": 0.28908010965611014, | |
| "learning_rate": 2.510470842560259e-05, | |
| "loss": 0.3574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07259393483400345, | |
| "step": 375, | |
| "valid_targets_mean": 2407.9, | |
| "valid_targets_min": 724 | |
| }, | |
| { | |
| "epoch": 2.4224, | |
| "grad_norm": 0.27717188895417405, | |
| "learning_rate": 2.467323405981841e-05, | |
| "loss": 0.3766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0742412656545639, | |
| "step": 380, | |
| "valid_targets_mean": 3299.6, | |
| "valid_targets_min": 872 | |
| }, | |
| { | |
| "epoch": 2.4544, | |
| "grad_norm": 0.270994998144437, | |
| "learning_rate": 2.423944640462533e-05, | |
| "loss": 0.38, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11589524894952774, | |
| "step": 385, | |
| "valid_targets_mean": 5340.8, | |
| "valid_targets_min": 547 | |
| }, | |
| { | |
| "epoch": 2.4864, | |
| "grad_norm": 0.28124609118517746, | |
| "learning_rate": 2.3803560188490968e-05, | |
| "loss": 0.3725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10828963667154312, | |
| "step": 390, | |
| "valid_targets_mean": 4697.1, | |
| "valid_targets_min": 1664 | |
| }, | |
| { | |
| "epoch": 2.5183999999999997, | |
| "grad_norm": 0.26180127716696405, | |
| "learning_rate": 2.336579117868789e-05, | |
| "loss": 0.3742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10938633978366852, | |
| "step": 395, | |
| "valid_targets_mean": 5047.6, | |
| "valid_targets_min": 558 | |
| }, | |
| { | |
| "epoch": 2.5504, | |
| "grad_norm": 0.24760814106417292, | |
| "learning_rate": 2.292635607448711e-05, | |
| "loss": 0.3703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09030160307884216, | |
| "step": 400, | |
| "valid_targets_mean": 4877.5, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 2.5824, | |
| "grad_norm": 0.26402964956326563, | |
| "learning_rate": 2.248547239989008e-05, | |
| "loss": 0.3699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08305498957633972, | |
| "step": 405, | |
| "valid_targets_mean": 3571.9, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 2.6144, | |
| "grad_norm": 0.3345187595598977, | |
| "learning_rate": 2.204335839595255e-05, | |
| "loss": 0.3696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08089946210384369, | |
| "step": 410, | |
| "valid_targets_mean": 3421.5, | |
| "valid_targets_min": 585 | |
| }, | |
| { | |
| "epoch": 2.6464, | |
| "grad_norm": 0.28701488129335584, | |
| "learning_rate": 2.1600232912753452e-05, | |
| "loss": 0.3564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0921049565076828, | |
| "step": 415, | |
| "valid_targets_mean": 4721.9, | |
| "valid_targets_min": 1078 | |
| }, | |
| { | |
| "epoch": 2.6784, | |
| "grad_norm": 0.26999818817996407, | |
| "learning_rate": 2.1156315301062293e-05, | |
| "loss": 0.3826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08706622570753098, | |
| "step": 420, | |
| "valid_targets_mean": 3336.2, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 2.7104, | |
| "grad_norm": 0.2755453839282535, | |
| "learning_rate": 2.0711825303758712e-05, | |
| "loss": 0.3555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07806932181119919, | |
| "step": 425, | |
| "valid_targets_mean": 3672.5, | |
| "valid_targets_min": 964 | |
| }, | |
| { | |
| "epoch": 2.7424, | |
| "grad_norm": 0.2522995637255699, | |
| "learning_rate": 2.0266982947057962e-05, | |
| "loss": 0.371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06017307564616203, | |
| "step": 430, | |
| "valid_targets_mean": 2447.6, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 2.7744, | |
| "grad_norm": 0.2818674090065295, | |
| "learning_rate": 1.9822008431596083e-05, | |
| "loss": 0.3709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08891990780830383, | |
| "step": 435, | |
| "valid_targets_mean": 2737.7, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 2.8064, | |
| "grad_norm": 0.26578812021591164, | |
| "learning_rate": 1.937712202342881e-05, | |
| "loss": 0.3814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08054114878177643, | |
| "step": 440, | |
| "valid_targets_mean": 4190.1, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 2.8384, | |
| "grad_norm": 0.2531360427737702, | |
| "learning_rate": 1.8932543944998037e-05, | |
| "loss": 0.3686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10457911342382431, | |
| "step": 445, | |
| "valid_targets_mean": 4728.6, | |
| "valid_targets_min": 1129 | |
| }, | |
| { | |
| "epoch": 2.8704, | |
| "grad_norm": 0.27440382401044455, | |
| "learning_rate": 1.8488494266119877e-05, | |
| "loss": 0.3532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10408136248588562, | |
| "step": 450, | |
| "valid_targets_mean": 4394.5, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 2.9024, | |
| "grad_norm": 0.25081918128174724, | |
| "learning_rate": 1.804519279504834e-05, | |
| "loss": 0.3554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11893998831510544, | |
| "step": 455, | |
| "valid_targets_mean": 5693.7, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 2.9344, | |
| "grad_norm": 0.2834077338782865, | |
| "learning_rate": 1.7602858969668365e-05, | |
| "loss": 0.3743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09525357186794281, | |
| "step": 460, | |
| "valid_targets_mean": 3857.6, | |
| "valid_targets_min": 511 | |
| }, | |
| { | |
| "epoch": 2.9664, | |
| "grad_norm": 0.25065749565011525, | |
| "learning_rate": 1.716171174887231e-05, | |
| "loss": 0.3589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07195942103862762, | |
| "step": 465, | |
| "valid_targets_mean": 3614.1, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 2.9984, | |
| "grad_norm": 0.25261737291599506, | |
| "learning_rate": 1.6721969504173484e-05, | |
| "loss": 0.3804, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10894008725881577, | |
| "step": 470, | |
| "valid_targets_mean": 4318.6, | |
| "valid_targets_min": 1447 | |
| }, | |
| { | |
| "epoch": 3.0256, | |
| "grad_norm": 0.29585483113925043, | |
| "learning_rate": 1.628384991161041e-05, | |
| "loss": 0.3534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07916560769081116, | |
| "step": 475, | |
| "valid_targets_mean": 3204.2, | |
| "valid_targets_min": 489 | |
| }, | |
| { | |
| "epoch": 3.0576, | |
| "grad_norm": 0.27284535141909216, | |
| "learning_rate": 1.5847569843995452e-05, | |
| "loss": 0.3442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1198822408914566, | |
| "step": 480, | |
| "valid_targets_mean": 5402.2, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 3.0896, | |
| "grad_norm": 0.23685736105382676, | |
| "learning_rate": 1.5413345263560922e-05, | |
| "loss": 0.3549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0932244211435318, | |
| "step": 485, | |
| "valid_targets_mean": 5391.9, | |
| "valid_targets_min": 847 | |
| }, | |
| { | |
| "epoch": 3.1216, | |
| "grad_norm": 0.24219250569033773, | |
| "learning_rate": 1.4981391115056032e-05, | |
| "loss": 0.3404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07279252260923386, | |
| "step": 490, | |
| "valid_targets_mean": 4604.3, | |
| "valid_targets_min": 936 | |
| }, | |
| { | |
| "epoch": 3.1536, | |
| "grad_norm": 0.2952260986090949, | |
| "learning_rate": 1.455192121934748e-05, | |
| "loss": 0.3653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08438520133495331, | |
| "step": 495, | |
| "valid_targets_mean": 3512.8, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 3.1856, | |
| "grad_norm": 0.2592994691035553, | |
| "learning_rate": 1.4125148167576303e-05, | |
| "loss": 0.3671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11322016268968582, | |
| "step": 500, | |
| "valid_targets_mean": 6657.6, | |
| "valid_targets_min": 569 | |
| }, | |
| { | |
| "epoch": 3.2176, | |
| "grad_norm": 0.24434615644800672, | |
| "learning_rate": 1.3701283215923563e-05, | |
| "loss": 0.3533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08228069543838501, | |
| "step": 505, | |
| "valid_targets_mean": 4968.6, | |
| "valid_targets_min": 861 | |
| }, | |
| { | |
| "epoch": 3.2496, | |
| "grad_norm": 0.2658882802024102, | |
| "learning_rate": 1.328053618103677e-05, | |
| "loss": 0.3457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0911003053188324, | |
| "step": 510, | |
| "valid_targets_mean": 4631.8, | |
| "valid_targets_min": 535 | |
| }, | |
| { | |
| "epoch": 3.2816, | |
| "grad_norm": 0.24570090059120336, | |
| "learning_rate": 1.2863115336168916e-05, | |
| "loss": 0.353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11129696667194366, | |
| "step": 515, | |
| "valid_targets_mean": 5717.9, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 3.3136, | |
| "grad_norm": 0.23399943576216928, | |
| "learning_rate": 1.2449227308081509e-05, | |
| "loss": 0.3734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10479730367660522, | |
| "step": 520, | |
| "valid_targets_mean": 5044.2, | |
| "valid_targets_min": 558 | |
| }, | |
| { | |
| "epoch": 3.3456, | |
| "grad_norm": 0.25081899345833053, | |
| "learning_rate": 1.2039076974762587e-05, | |
| "loss": 0.3646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07487428188323975, | |
| "step": 525, | |
| "valid_targets_mean": 3598.0, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 3.3776, | |
| "grad_norm": 0.26689722263092003, | |
| "learning_rate": 1.163286736401044e-05, | |
| "loss": 0.3657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0844791829586029, | |
| "step": 530, | |
| "valid_targets_mean": 3690.6, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 3.4096, | |
| "grad_norm": 0.25794663356657005, | |
| "learning_rate": 1.123079955293322e-05, | |
| "loss": 0.3648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06158680468797684, | |
| "step": 535, | |
| "valid_targets_mean": 2746.2, | |
| "valid_targets_min": 734 | |
| }, | |
| { | |
| "epoch": 3.4416, | |
| "grad_norm": 0.28503864447699606, | |
| "learning_rate": 1.0833072568414037e-05, | |
| "loss": 0.3717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11139211058616638, | |
| "step": 540, | |
| "valid_targets_mean": 4353.8, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 3.4736000000000002, | |
| "grad_norm": 0.279755912862288, | |
| "learning_rate": 1.0439883288591057e-05, | |
| "loss": 0.3635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11007075011730194, | |
| "step": 545, | |
| "valid_targets_mean": 3903.1, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 3.5056000000000003, | |
| "grad_norm": 0.30400557108715487, | |
| "learning_rate": 1.0051426345401202e-05, | |
| "loss": 0.3549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08389125019311905, | |
| "step": 550, | |
| "valid_targets_mean": 3952.9, | |
| "valid_targets_min": 543 | |
| }, | |
| { | |
| "epoch": 3.5376, | |
| "grad_norm": 0.28687310975076874, | |
| "learning_rate": 9.667894028235704e-06, | |
| "loss": 0.3508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05745293200016022, | |
| "step": 555, | |
| "valid_targets_mean": 2451.6, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 3.5696, | |
| "grad_norm": 0.2730673931036646, | |
| "learning_rate": 9.289476188755315e-06, | |
| "loss": 0.3652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09018082916736603, | |
| "step": 560, | |
| "valid_targets_mean": 3772.7, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 3.6016, | |
| "grad_norm": 0.26040469462819876, | |
| "learning_rate": 8.916360146912122e-06, | |
| "loss": 0.3618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09662330150604248, | |
| "step": 565, | |
| "valid_targets_mean": 4347.9, | |
| "valid_targets_min": 649 | |
| }, | |
| { | |
| "epoch": 3.6336, | |
| "grad_norm": 0.3422549700068142, | |
| "learning_rate": 8.548730598224646e-06, | |
| "loss": 0.3576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09894652664661407, | |
| "step": 570, | |
| "valid_targets_mean": 4176.6, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 3.6656, | |
| "grad_norm": 0.2721675552437772, | |
| "learning_rate": 8.186769522352053e-06, | |
| "loss": 0.3636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09673941880464554, | |
| "step": 575, | |
| "valid_targets_mean": 4116.6, | |
| "valid_targets_min": 707 | |
| }, | |
| { | |
| "epoch": 3.6976, | |
| "grad_norm": 0.27410889142640954, | |
| "learning_rate": 7.830656093012714e-06, | |
| "loss": 0.3569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1134338453412056, | |
| "step": 580, | |
| "valid_targets_mean": 5329.2, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 3.7296, | |
| "grad_norm": 0.27093217629363314, | |
| "learning_rate": 7.480566589291696e-06, | |
| "loss": 0.3473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10096120834350586, | |
| "step": 585, | |
| "valid_targets_mean": 4416.1, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 3.7616, | |
| "grad_norm": 0.4071819039904009, | |
| "learning_rate": 7.1366743083812285e-06, | |
| "loss": 0.3505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08029647171497345, | |
| "step": 590, | |
| "valid_targets_mean": 2834.7, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 3.7936, | |
| "grad_norm": 0.2543091297081118, | |
| "learning_rate": 6.799149479797101e-06, | |
| "loss": 0.3513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09890288859605789, | |
| "step": 595, | |
| "valid_targets_mean": 4795.7, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 3.8256, | |
| "grad_norm": 0.27488756170401624, | |
| "learning_rate": 6.4681591811137e-06, | |
| "loss": 0.3499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08060499280691147, | |
| "step": 600, | |
| "valid_targets_mean": 3579.9, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 3.8576, | |
| "grad_norm": 0.3073688917404464, | |
| "learning_rate": 6.143867255259197e-06, | |
| "loss": 0.364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06048120558261871, | |
| "step": 605, | |
| "valid_targets_mean": 2302.8, | |
| "valid_targets_min": 673 | |
| }, | |
| { | |
| "epoch": 3.8895999999999997, | |
| "grad_norm": 0.27290806728995437, | |
| "learning_rate": 5.8264342294119504e-06, | |
| "loss": 0.3594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11828367412090302, | |
| "step": 610, | |
| "valid_targets_mean": 6998.1, | |
| "valid_targets_min": 1277 | |
| }, | |
| { | |
| "epoch": 3.9215999999999998, | |
| "grad_norm": 0.2919267461992761, | |
| "learning_rate": 5.516017235538258e-06, | |
| "loss": 0.3517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1104055643081665, | |
| "step": 615, | |
| "valid_targets_mean": 4389.9, | |
| "valid_targets_min": 275 | |
| }, | |
| { | |
| "epoch": 3.9536, | |
| "grad_norm": 0.2473642063842536, | |
| "learning_rate": 5.212769932610695e-06, | |
| "loss": 0.3502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09933553636074066, | |
| "step": 620, | |
| "valid_targets_mean": 4802.8, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 3.9856, | |
| "grad_norm": 0.2412069973453617, | |
| "learning_rate": 4.916842430545681e-06, | |
| "loss": 0.3547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11441973596811295, | |
| "step": 625, | |
| "valid_targets_mean": 5423.8, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 4.0128, | |
| "grad_norm": 0.2831300435180799, | |
| "learning_rate": 4.628381215897837e-06, | |
| "loss": 0.3525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11363305151462555, | |
| "step": 630, | |
| "valid_targets_mean": 6128.9, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 4.0448, | |
| "grad_norm": 0.2512146545727681, | |
| "learning_rate": 4.347529079347914e-06, | |
| "loss": 0.3463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10569345206022263, | |
| "step": 635, | |
| "valid_targets_mean": 5054.3, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 4.0768, | |
| "grad_norm": 0.24033810416108486, | |
| "learning_rate": 4.074425045020247e-06, | |
| "loss": 0.3584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04762096703052521, | |
| "step": 640, | |
| "valid_targets_mean": 2127.2, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 4.1088, | |
| "grad_norm": 0.26224618720797604, | |
| "learning_rate": 3.8092043016646487e-06, | |
| "loss": 0.3567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10576556622982025, | |
| "step": 645, | |
| "valid_targets_mean": 4538.6, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 4.1408, | |
| "grad_norm": 0.2817999108462764, | |
| "learning_rate": 3.551998135736867e-06, | |
| "loss": 0.3611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07951150834560394, | |
| "step": 650, | |
| "valid_targets_mean": 3178.2, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 4.1728, | |
| "grad_norm": 0.31718463039344674, | |
| "learning_rate": 3.3029338664107267e-06, | |
| "loss": 0.3519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1132584810256958, | |
| "step": 655, | |
| "valid_targets_mean": 3367.9, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 4.2048, | |
| "grad_norm": 0.2537483567842618, | |
| "learning_rate": 3.0621347825540625e-06, | |
| "loss": 0.3408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06239619106054306, | |
| "step": 660, | |
| "valid_targets_mean": 3163.3, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 4.2368, | |
| "grad_norm": 0.25617713747366333, | |
| "learning_rate": 2.8297200816997183e-06, | |
| "loss": 0.3714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08043758571147919, | |
| "step": 665, | |
| "valid_targets_mean": 4096.4, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 4.2688, | |
| "grad_norm": 0.27890687706472184, | |
| "learning_rate": 2.605804811041803e-06, | |
| "loss": 0.3726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09695044159889221, | |
| "step": 670, | |
| "valid_targets_mean": 4406.8, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 4.3008, | |
| "grad_norm": 0.3105208198917944, | |
| "learning_rate": 2.390499810486351e-06, | |
| "loss": 0.3337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06947635859251022, | |
| "step": 675, | |
| "valid_targets_mean": 2855.2, | |
| "valid_targets_min": 763 | |
| }, | |
| { | |
| "epoch": 4.3328, | |
| "grad_norm": 0.2685314010918853, | |
| "learning_rate": 2.183911657784685e-06, | |
| "loss": 0.3515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11535772681236267, | |
| "step": 680, | |
| "valid_targets_mean": 4551.8, | |
| "valid_targets_min": 596 | |
| }, | |
| { | |
| "epoch": 4.3648, | |
| "grad_norm": 0.29534774277282866, | |
| "learning_rate": 1.986142615776532e-06, | |
| "loss": 0.3453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08106249570846558, | |
| "step": 685, | |
| "valid_targets_mean": 2982.1, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 4.3968, | |
| "grad_norm": 0.2765677244413694, | |
| "learning_rate": 1.7972905817690644e-06, | |
| "loss": 0.3614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11062294244766235, | |
| "step": 690, | |
| "valid_targets_mean": 3802.2, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 4.4288, | |
| "grad_norm": 0.3202545975694623, | |
| "learning_rate": 1.617449039076955e-06, | |
| "loss": 0.3527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08397354185581207, | |
| "step": 695, | |
| "valid_targets_mean": 3328.4, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 4.4608, | |
| "grad_norm": 0.2834553173729245, | |
| "learning_rate": 1.4467070107473413e-06, | |
| "loss": 0.3586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07980514317750931, | |
| "step": 700, | |
| "valid_targets_mean": 3018.1, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 4.4928, | |
| "grad_norm": 0.2326209093506353, | |
| "learning_rate": 1.2851490154926816e-06, | |
| "loss": 0.3341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08851353079080582, | |
| "step": 705, | |
| "valid_targets_mean": 5364.4, | |
| "valid_targets_min": 808 | |
| }, | |
| { | |
| "epoch": 4.5248, | |
| "grad_norm": 0.27250326687612625, | |
| "learning_rate": 1.1328550258533211e-06, | |
| "loss": 0.3575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07718469202518463, | |
| "step": 710, | |
| "valid_targets_mean": 4557.2, | |
| "valid_targets_min": 933 | |
| }, | |
| { | |
| "epoch": 4.5568, | |
| "grad_norm": 0.26348768556638374, | |
| "learning_rate": 9.899004286103953e-07, | |
| "loss": 0.3615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11061061918735504, | |
| "step": 715, | |
| "valid_targets_mean": 4996.2, | |
| "valid_targets_min": 1069 | |
| }, | |
| { | |
| "epoch": 4.5888, | |
| "grad_norm": 0.2573371370876499, | |
| "learning_rate": 8.5635598746876e-07, | |
| "loss": 0.3462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07320088148117065, | |
| "step": 720, | |
| "valid_targets_mean": 3575.3, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 4.6208, | |
| "grad_norm": 0.25525024596196577, | |
| "learning_rate": 7.32287808028389e-07, | |
| "loss": 0.3368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07971251755952835, | |
| "step": 725, | |
| "valid_targets_mean": 3960.3, | |
| "valid_targets_min": 660 | |
| }, | |
| { | |
| "epoch": 4.6528, | |
| "grad_norm": 0.301705155211909, | |
| "learning_rate": 6.177573050615327e-07, | |
| "loss": 0.3612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09736718237400055, | |
| "step": 730, | |
| "valid_targets_mean": 3549.9, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 4.6848, | |
| "grad_norm": 0.30781397575318803, | |
| "learning_rate": 5.128211721119213e-07, | |
| "loss": 0.3521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11787542700767517, | |
| "step": 735, | |
| "valid_targets_mean": 3794.4, | |
| "valid_targets_min": 405 | |
| }, | |
| { | |
| "epoch": 4.7168, | |
| "grad_norm": 0.2985296116067968, | |
| "learning_rate": 4.175313534309755e-07, | |
| "loss": 0.3591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08046736568212509, | |
| "step": 740, | |
| "valid_targets_mean": 2845.8, | |
| "valid_targets_min": 708 | |
| }, | |
| { | |
| "epoch": 4.7488, | |
| "grad_norm": 0.291570948505226, | |
| "learning_rate": 3.319350182649861e-07, | |
| "loss": 0.3374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07322987169027328, | |
| "step": 745, | |
| "valid_targets_mean": 4419.3, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 4.7808, | |
| "grad_norm": 0.2369028363101534, | |
| "learning_rate": 2.560745375059392e-07, | |
| "loss": 0.3651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07131391763687134, | |
| "step": 750, | |
| "valid_targets_mean": 4349.6, | |
| "valid_targets_min": 778 | |
| }, | |
| { | |
| "epoch": 4.8128, | |
| "grad_norm": 0.28433777966757406, | |
| "learning_rate": 1.8998746271758016e-07, | |
| "loss": 0.355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09417184442281723, | |
| "step": 755, | |
| "valid_targets_mean": 3460.1, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 4.8448, | |
| "grad_norm": 0.2305339793527443, | |
| "learning_rate": 1.337065075470778e-07, | |
| "loss": 0.3477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05339543893933296, | |
| "step": 760, | |
| "valid_targets_mean": 3036.2, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 4.8768, | |
| "grad_norm": 0.29097125706132515, | |
| "learning_rate": 8.725953153150279e-08, | |
| "loss": 0.3401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08501491695642471, | |
| "step": 765, | |
| "valid_targets_mean": 3384.9, | |
| "valid_targets_min": 573 | |
| }, | |
| { | |
| "epoch": 4.9088, | |
| "grad_norm": 0.2818422936792934, | |
| "learning_rate": 5.066952630711886e-08, | |
| "loss": 0.3509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07093001902103424, | |
| "step": 770, | |
| "valid_targets_mean": 3446.8, | |
| "valid_targets_min": 499 | |
| }, | |
| { | |
| "epoch": 4.9408, | |
| "grad_norm": 0.27100953727905064, | |
| "learning_rate": 2.3954604228342283e-08, | |
| "loss": 0.3513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08591412007808685, | |
| "step": 775, | |
| "valid_targets_mean": 3519.9, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 4.9728, | |
| "grad_norm": 0.2886406832816968, | |
| "learning_rate": 7.12798940197601e-09, | |
| "loss": 0.3561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12035445868968964, | |
| "step": 780, | |
| "valid_targets_mean": 4119.5, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5774330853712365, | |
| "learning_rate": 1.9801114115480802e-10, | |
| "loss": 0.356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36682021617889404, | |
| "step": 785, | |
| "valid_targets_mean": 3412.7, | |
| "valid_targets_min": 1028 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36682021617889404, | |
| "step": 785, | |
| "total_flos": 1.091054629938004e+18, | |
| "train_loss": 0.38741187927829235, | |
| "train_runtime": 17417.9225, | |
| "train_samples_per_second": 2.871, | |
| "train_steps_per_second": 0.045, | |
| "valid_targets_mean": 3412.7, | |
| "valid_targets_min": 1028 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 785, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.091054629938004e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |