| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 775, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03231017770597738, |
| "grad_norm": 6.731328532589346, |
| "learning_rate": 2.0512820512820513e-06, |
| "loss": 0.7811, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.18013827502727509, |
| "step": 5, |
| "valid_targets_mean": 3120.6, |
| "valid_targets_min": 739 |
| }, |
| { |
| "epoch": 0.06462035541195477, |
| "grad_norm": 4.158689615969652, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 0.7582, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.17159777879714966, |
| "step": 10, |
| "valid_targets_mean": 3504.9, |
| "valid_targets_min": 487 |
| }, |
| { |
| "epoch": 0.09693053311793215, |
| "grad_norm": 2.4589720035548734, |
| "learning_rate": 7.17948717948718e-06, |
| "loss": 0.701, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.14925020933151245, |
| "step": 15, |
| "valid_targets_mean": 3731.6, |
| "valid_targets_min": 1279 |
| }, |
| { |
| "epoch": 0.12924071082390953, |
| "grad_norm": 0.9663503382808891, |
| "learning_rate": 9.743589743589744e-06, |
| "loss": 0.6601, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.17283087968826294, |
| "step": 20, |
| "valid_targets_mean": 4027.5, |
| "valid_targets_min": 501 |
| }, |
| { |
| "epoch": 0.16155088852988692, |
| "grad_norm": 0.8054644194990555, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 0.6369, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.17246821522712708, |
| "step": 25, |
| "valid_targets_mean": 3996.4, |
| "valid_targets_min": 1977 |
| }, |
| { |
| "epoch": 0.1938610662358643, |
| "grad_norm": 0.5216073107230456, |
| "learning_rate": 1.4871794871794874e-05, |
| "loss": 0.6002, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.14975571632385254, |
| "step": 30, |
| "valid_targets_mean": 4030.6, |
| "valid_targets_min": 399 |
| }, |
| { |
| "epoch": 0.22617124394184168, |
| "grad_norm": 0.5196065116767887, |
| "learning_rate": 1.7435897435897438e-05, |
| "loss": 0.5885, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.14672940969467163, |
| "step": 35, |
| "valid_targets_mean": 4130.4, |
| "valid_targets_min": 984 |
| }, |
| { |
| "epoch": 0.25848142164781907, |
| "grad_norm": 0.38054025010847065, |
| "learning_rate": 2e-05, |
| "loss": 0.5665, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1372590959072113, |
| "step": 40, |
| "valid_targets_mean": 4145.8, |
| "valid_targets_min": 1033 |
| }, |
| { |
| "epoch": 0.29079159935379645, |
| "grad_norm": 0.3929948606013641, |
| "learning_rate": 2.2564102564102566e-05, |
| "loss": 0.563, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13531070947647095, |
| "step": 45, |
| "valid_targets_mean": 3306.5, |
| "valid_targets_min": 1157 |
| }, |
| { |
| "epoch": 0.32310177705977383, |
| "grad_norm": 0.32153200327415404, |
| "learning_rate": 2.512820512820513e-05, |
| "loss": 0.5405, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.14371386170387268, |
| "step": 50, |
| "valid_targets_mean": 4432.1, |
| "valid_targets_min": 571 |
| }, |
| { |
| "epoch": 0.3554119547657512, |
| "grad_norm": 0.30016200515564767, |
| "learning_rate": 2.7692307692307694e-05, |
| "loss": 0.5318, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12094619125127792, |
| "step": 55, |
| "valid_targets_mean": 3439.8, |
| "valid_targets_min": 1091 |
| }, |
| { |
| "epoch": 0.3877221324717286, |
| "grad_norm": 0.32812658031573905, |
| "learning_rate": 3.0256410256410257e-05, |
| "loss": 0.5183, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1525593101978302, |
| "step": 60, |
| "valid_targets_mean": 4788.1, |
| "valid_targets_min": 508 |
| }, |
| { |
| "epoch": 0.420032310177706, |
| "grad_norm": 0.31480454051703594, |
| "learning_rate": 3.282051282051282e-05, |
| "loss": 0.5237, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10941419005393982, |
| "step": 65, |
| "valid_targets_mean": 2808.4, |
| "valid_targets_min": 573 |
| }, |
| { |
| "epoch": 0.45234248788368336, |
| "grad_norm": 0.2696632862359051, |
| "learning_rate": 3.538461538461539e-05, |
| "loss": 0.5084, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13120058178901672, |
| "step": 70, |
| "valid_targets_mean": 4135.1, |
| "valid_targets_min": 937 |
| }, |
| { |
| "epoch": 0.48465266558966075, |
| "grad_norm": 0.6024620941620338, |
| "learning_rate": 3.794871794871795e-05, |
| "loss": 0.5068, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1219104528427124, |
| "step": 75, |
| "valid_targets_mean": 3757.5, |
| "valid_targets_min": 1028 |
| }, |
| { |
| "epoch": 0.5169628432956381, |
| "grad_norm": 0.28664290005490634, |
| "learning_rate": 3.999979684222212e-05, |
| "loss": 0.5038, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13252684473991394, |
| "step": 80, |
| "valid_targets_mean": 4096.0, |
| "valid_targets_min": 729 |
| }, |
| { |
| "epoch": 0.5492730210016155, |
| "grad_norm": 0.29814119507104336, |
| "learning_rate": 3.999268675335385e-05, |
| "loss": 0.4891, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11344544589519501, |
| "step": 85, |
| "valid_targets_mean": 3552.1, |
| "valid_targets_min": 814 |
| }, |
| { |
| "epoch": 0.5815831987075929, |
| "grad_norm": 0.3159183051465075, |
| "learning_rate": 3.997542290252236e-05, |
| "loss": 0.4843, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12366665154695511, |
| "step": 90, |
| "valid_targets_mean": 3477.1, |
| "valid_targets_min": 979 |
| }, |
| { |
| "epoch": 0.6138933764135702, |
| "grad_norm": 0.266069868151099, |
| "learning_rate": 3.9948014057585294e-05, |
| "loss": 0.4803, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11242301762104034, |
| "step": 95, |
| "valid_targets_mean": 3989.3, |
| "valid_targets_min": 590 |
| }, |
| { |
| "epoch": 0.6462035541195477, |
| "grad_norm": 0.28400332082322377, |
| "learning_rate": 3.991047413877713e-05, |
| "loss": 0.4804, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13703756034374237, |
| "step": 100, |
| "valid_targets_mean": 3988.6, |
| "valid_targets_min": 1313 |
| }, |
| { |
| "epoch": 0.678513731825525, |
| "grad_norm": 0.2959731557583916, |
| "learning_rate": 3.98628222116394e-05, |
| "loss": 0.4897, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10094247758388519, |
| "step": 105, |
| "valid_targets_mean": 3035.1, |
| "valid_targets_min": 593 |
| }, |
| { |
| "epoch": 0.7108239095315024, |
| "grad_norm": 0.3021715068251041, |
| "learning_rate": 3.9805082477337815e-05, |
| "loss": 0.4628, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12388584017753601, |
| "step": 110, |
| "valid_targets_mean": 3724.1, |
| "valid_targets_min": 1076 |
| }, |
| { |
| "epoch": 0.7431340872374798, |
| "grad_norm": 0.2856991041044821, |
| "learning_rate": 3.9737284260371144e-05, |
| "loss": 0.471, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11789651215076447, |
| "step": 115, |
| "valid_targets_mean": 3853.4, |
| "valid_targets_min": 857 |
| }, |
| { |
| "epoch": 0.7754442649434572, |
| "grad_norm": 0.28392806599052883, |
| "learning_rate": 3.965946199367804e-05, |
| "loss": 0.4804, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11591988056898117, |
| "step": 120, |
| "valid_targets_mean": 3771.6, |
| "valid_targets_min": 730 |
| }, |
| { |
| "epoch": 0.8077544426494345, |
| "grad_norm": 0.2919653111659463, |
| "learning_rate": 3.957165520114948e-05, |
| "loss": 0.4682, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13558943569660187, |
| "step": 125, |
| "valid_targets_mean": 3917.8, |
| "valid_targets_min": 1219 |
| }, |
| { |
| "epoch": 0.840064620355412, |
| "grad_norm": 0.3238305897868469, |
| "learning_rate": 3.947390847755559e-05, |
| "loss": 0.4732, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11949899047613144, |
| "step": 130, |
| "valid_targets_mean": 3899.7, |
| "valid_targets_min": 770 |
| }, |
| { |
| "epoch": 0.8723747980613893, |
| "grad_norm": 0.30925326761097366, |
| "learning_rate": 3.936627146589715e-05, |
| "loss": 0.4776, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12182562053203583, |
| "step": 135, |
| "valid_targets_mean": 3646.8, |
| "valid_targets_min": 516 |
| }, |
| { |
| "epoch": 0.9046849757673667, |
| "grad_norm": 0.27131763808428006, |
| "learning_rate": 3.92487988321932e-05, |
| "loss": 0.4582, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1010097786784172, |
| "step": 140, |
| "valid_targets_mean": 3460.2, |
| "valid_targets_min": 905 |
| }, |
| { |
| "epoch": 0.9369951534733441, |
| "grad_norm": 0.2787680956479995, |
| "learning_rate": 3.912155023771762e-05, |
| "loss": 0.4679, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12193842232227325, |
| "step": 145, |
| "valid_targets_mean": 4362.3, |
| "valid_targets_min": 1958 |
| }, |
| { |
| "epoch": 0.9693053311793215, |
| "grad_norm": 0.2993760143538429, |
| "learning_rate": 3.89845903086987e-05, |
| "loss": 0.4523, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10849137604236603, |
| "step": 150, |
| "valid_targets_mean": 3652.4, |
| "valid_targets_min": 1075 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3292167259914328, |
| "learning_rate": 3.883798860349722e-05, |
| "loss": 0.4587, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1710975468158722, |
| "step": 155, |
| "valid_targets_mean": 4421.6, |
| "valid_targets_min": 1262 |
| }, |
| { |
| "epoch": 1.0323101777059773, |
| "grad_norm": 0.2695765904096439, |
| "learning_rate": 3.8681819577279515e-05, |
| "loss": 0.4468, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11836530268192291, |
| "step": 160, |
| "valid_targets_mean": 4131.2, |
| "valid_targets_min": 770 |
| }, |
| { |
| "epoch": 1.0646203554119547, |
| "grad_norm": 0.283031599096208, |
| "learning_rate": 3.85161625442037e-05, |
| "loss": 0.4548, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0947444885969162, |
| "step": 165, |
| "valid_targets_mean": 3082.1, |
| "valid_targets_min": 980 |
| }, |
| { |
| "epoch": 1.0969305331179322, |
| "grad_norm": 0.2757114518295554, |
| "learning_rate": 3.834110163713806e-05, |
| "loss": 0.4502, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11827362328767776, |
| "step": 170, |
| "valid_targets_mean": 4066.2, |
| "valid_targets_min": 1746 |
| }, |
| { |
| "epoch": 1.1292407108239095, |
| "grad_norm": 0.2861879786831059, |
| "learning_rate": 3.81567257649322e-05, |
| "loss": 0.4629, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1151590347290039, |
| "step": 175, |
| "valid_targets_mean": 3779.0, |
| "valid_targets_min": 760 |
| }, |
| { |
| "epoch": 1.1615508885298869, |
| "grad_norm": 0.28247290465940766, |
| "learning_rate": 3.796312856726252e-05, |
| "loss": 0.4695, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1255260705947876, |
| "step": 180, |
| "valid_targets_mean": 3885.4, |
| "valid_targets_min": 1325 |
| }, |
| { |
| "epoch": 1.1938610662358644, |
| "grad_norm": 0.2767026202339847, |
| "learning_rate": 3.77604083670751e-05, |
| "loss": 0.4472, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11101329326629639, |
| "step": 185, |
| "valid_targets_mean": 3925.6, |
| "valid_targets_min": 1324 |
| }, |
| { |
| "epoch": 1.2261712439418417, |
| "grad_norm": 0.29143791671926056, |
| "learning_rate": 3.754866812065008e-05, |
| "loss": 0.4471, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10287977755069733, |
| "step": 190, |
| "valid_targets_mean": 3699.9, |
| "valid_targets_min": 573 |
| }, |
| { |
| "epoch": 1.258481421647819, |
| "grad_norm": 0.26901543603219996, |
| "learning_rate": 3.7328015365312815e-05, |
| "loss": 0.4383, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10629215836524963, |
| "step": 195, |
| "valid_targets_mean": 3979.9, |
| "valid_targets_min": 1310 |
| }, |
| { |
| "epoch": 1.2907915993537964, |
| "grad_norm": 0.26386364331283385, |
| "learning_rate": 3.709856216481852e-05, |
| "loss": 0.4461, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1253024935722351, |
| "step": 200, |
| "valid_targets_mean": 4954.8, |
| "valid_targets_min": 1384 |
| }, |
| { |
| "epoch": 1.3231017770597737, |
| "grad_norm": 0.28205279834558566, |
| "learning_rate": 3.6860425052437986e-05, |
| "loss": 0.4581, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1366412192583084, |
| "step": 205, |
| "valid_targets_mean": 4692.7, |
| "valid_targets_min": 1437 |
| }, |
| { |
| "epoch": 1.3554119547657513, |
| "grad_norm": 0.27397295572622304, |
| "learning_rate": 3.6613724971773426e-05, |
| "loss": 0.4475, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10592503845691681, |
| "step": 210, |
| "valid_targets_mean": 3428.0, |
| "valid_targets_min": 1092 |
| }, |
| { |
| "epoch": 1.3877221324717286, |
| "grad_norm": 0.26346316493352695, |
| "learning_rate": 3.6358587215334355e-05, |
| "loss": 0.4466, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11440782994031906, |
| "step": 215, |
| "valid_targets_mean": 4438.2, |
| "valid_targets_min": 1592 |
| }, |
| { |
| "epoch": 1.420032310177706, |
| "grad_norm": 0.28710754924699733, |
| "learning_rate": 3.609514136090483e-05, |
| "loss": 0.4448, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10379820317029953, |
| "step": 220, |
| "valid_targets_mean": 3354.9, |
| "valid_targets_min": 714 |
| }, |
| { |
| "epoch": 1.4523424878836835, |
| "grad_norm": 0.2673010194641308, |
| "learning_rate": 3.582352120573427e-05, |
| "loss": 0.4367, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12144172936677933, |
| "step": 225, |
| "valid_targets_mean": 4791.8, |
| "valid_targets_min": 1801 |
| }, |
| { |
| "epoch": 1.4846526655896608, |
| "grad_norm": 0.29713460613466613, |
| "learning_rate": 3.554386469858534e-05, |
| "loss": 0.4462, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12215234339237213, |
| "step": 230, |
| "valid_targets_mean": 4136.6, |
| "valid_targets_min": 2541 |
| }, |
| { |
| "epoch": 1.5169628432956381, |
| "grad_norm": 0.27978320939134727, |
| "learning_rate": 3.5256313869673385e-05, |
| "loss": 0.4383, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10995539277791977, |
| "step": 235, |
| "valid_targets_mean": 3900.1, |
| "valid_targets_min": 1037 |
| }, |
| { |
| "epoch": 1.5492730210016155, |
| "grad_norm": 0.2722574626172616, |
| "learning_rate": 3.4961014758533025e-05, |
| "loss": 0.441, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12816287577152252, |
| "step": 240, |
| "valid_targets_mean": 5076.9, |
| "valid_targets_min": 2138 |
| }, |
| { |
| "epoch": 1.5815831987075928, |
| "grad_norm": 0.2673145334904031, |
| "learning_rate": 3.4658117339848476e-05, |
| "loss": 0.439, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11988267302513123, |
| "step": 245, |
| "valid_targets_mean": 4658.4, |
| "valid_targets_min": 892 |
| }, |
| { |
| "epoch": 1.6138933764135701, |
| "grad_norm": 0.2918663090693661, |
| "learning_rate": 3.434777544728535e-05, |
| "loss": 0.4526, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12342672049999237, |
| "step": 250, |
| "valid_targets_mean": 4099.1, |
| "valid_targets_min": 1213 |
| }, |
| { |
| "epoch": 1.6462035541195477, |
| "grad_norm": 0.2736131631419664, |
| "learning_rate": 3.403014669536254e-05, |
| "loss": 0.4381, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1107107549905777, |
| "step": 255, |
| "valid_targets_mean": 4110.0, |
| "valid_targets_min": 900 |
| }, |
| { |
| "epoch": 1.678513731825525, |
| "grad_norm": 0.2856001690229933, |
| "learning_rate": 3.370539239940398e-05, |
| "loss": 0.4339, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10994768142700195, |
| "step": 260, |
| "valid_targets_mean": 4428.8, |
| "valid_targets_min": 885 |
| }, |
| { |
| "epoch": 1.7108239095315025, |
| "grad_norm": 0.27636624735054516, |
| "learning_rate": 3.337367749361079e-05, |
| "loss": 0.4359, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09362054616212845, |
| "step": 265, |
| "valid_targets_mean": 3526.1, |
| "valid_targets_min": 405 |
| }, |
| { |
| "epoch": 1.7431340872374799, |
| "grad_norm": 0.2892324920954573, |
| "learning_rate": 3.3035170447295477e-05, |
| "loss": 0.4489, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11963844299316406, |
| "step": 270, |
| "valid_targets_mean": 3514.6, |
| "valid_targets_min": 480 |
| }, |
| { |
| "epoch": 1.7754442649434572, |
| "grad_norm": 0.29559967298100737, |
| "learning_rate": 3.269004317932088e-05, |
| "loss": 0.4376, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10168647766113281, |
| "step": 275, |
| "valid_targets_mean": 3955.1, |
| "valid_targets_min": 843 |
| }, |
| { |
| "epoch": 1.8077544426494345, |
| "grad_norm": 0.29301743788955603, |
| "learning_rate": 3.2338470970786975e-05, |
| "loss": 0.4316, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09594365209341049, |
| "step": 280, |
| "valid_targets_mean": 4001.1, |
| "valid_targets_min": 739 |
| }, |
| { |
| "epoch": 1.8400646203554119, |
| "grad_norm": 0.2714210889052756, |
| "learning_rate": 3.198063237601028e-05, |
| "loss": 0.4358, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09736962616443634, |
| "step": 285, |
| "valid_targets_mean": 3362.9, |
| "valid_targets_min": 601 |
| }, |
| { |
| "epoch": 1.8723747980613892, |
| "grad_norm": 0.2938292684222412, |
| "learning_rate": 3.161670913184075e-05, |
| "loss": 0.4383, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10187675803899765, |
| "step": 290, |
| "valid_targets_mean": 3258.2, |
| "valid_targets_min": 961 |
| }, |
| { |
| "epoch": 1.9046849757673667, |
| "grad_norm": 0.2893151712006615, |
| "learning_rate": 3.1246886065362384e-05, |
| "loss": 0.4356, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12088151276111603, |
| "step": 295, |
| "valid_targets_mean": 3892.3, |
| "valid_targets_min": 1317 |
| }, |
| { |
| "epoch": 1.936995153473344, |
| "grad_norm": 0.29656932195947505, |
| "learning_rate": 3.0871351000024425e-05, |
| "loss": 0.4405, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11113157868385315, |
| "step": 300, |
| "valid_targets_mean": 4205.4, |
| "valid_targets_min": 633 |
| }, |
| { |
| "epoch": 1.9693053311793216, |
| "grad_norm": 0.291185824152099, |
| "learning_rate": 3.049029466025073e-05, |
| "loss": 0.4505, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12442485243082047, |
| "step": 305, |
| "valid_targets_mean": 4337.8, |
| "valid_targets_min": 1336 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.3230631171230839, |
| "learning_rate": 3.010391057457582e-05, |
| "loss": 0.4491, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.14164745807647705, |
| "step": 310, |
| "valid_targets_mean": 3661.9, |
| "valid_targets_min": 635 |
| }, |
| { |
| "epoch": 2.0323101777059773, |
| "grad_norm": 0.28933083551433375, |
| "learning_rate": 2.9712394977356824e-05, |
| "loss": 0.4268, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1224806010723114, |
| "step": 315, |
| "valid_targets_mean": 4159.8, |
| "valid_targets_min": 1026 |
| }, |
| { |
| "epoch": 2.0646203554119547, |
| "grad_norm": 0.2667030484032612, |
| "learning_rate": 2.931594670911119e-05, |
| "loss": 0.4272, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11036545783281326, |
| "step": 320, |
| "valid_targets_mean": 4104.9, |
| "valid_targets_min": 770 |
| }, |
| { |
| "epoch": 2.096930533117932, |
| "grad_norm": 0.2832908426862824, |
| "learning_rate": 2.891476711553077e-05, |
| "loss": 0.4245, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10099711269140244, |
| "step": 325, |
| "valid_targets_mean": 3807.2, |
| "valid_targets_min": 1082 |
| }, |
| { |
| "epoch": 2.1292407108239093, |
| "grad_norm": 0.29187045263894107, |
| "learning_rate": 2.850905994522364e-05, |
| "loss": 0.4346, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0898057371377945, |
| "step": 330, |
| "valid_targets_mean": 3095.2, |
| "valid_targets_min": 448 |
| }, |
| { |
| "epoch": 2.161550888529887, |
| "grad_norm": 0.2770526132588516, |
| "learning_rate": 2.8099031246235518e-05, |
| "loss": 0.4316, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09981365501880646, |
| "step": 335, |
| "valid_targets_mean": 3444.5, |
| "valid_targets_min": 927 |
| }, |
| { |
| "epoch": 2.1938610662358644, |
| "grad_norm": 0.2703022811543922, |
| "learning_rate": 2.768488926140336e-05, |
| "loss": 0.4302, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10014323890209198, |
| "step": 340, |
| "valid_targets_mean": 4162.8, |
| "valid_targets_min": 794 |
| }, |
| { |
| "epoch": 2.2261712439418417, |
| "grad_norm": 0.2802946227690351, |
| "learning_rate": 2.7266844322594228e-05, |
| "loss": 0.419, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10989852249622345, |
| "step": 345, |
| "valid_targets_mean": 4701.1, |
| "valid_targets_min": 682 |
| }, |
| { |
| "epoch": 2.258481421647819, |
| "grad_norm": 0.3051346896874913, |
| "learning_rate": 2.684510874388333e-05, |
| "loss": 0.4268, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1363740861415863, |
| "step": 350, |
| "valid_targets_mean": 4684.3, |
| "valid_targets_min": 640 |
| }, |
| { |
| "epoch": 2.2907915993537964, |
| "grad_norm": 0.27648399125184, |
| "learning_rate": 2.6419896713725194e-05, |
| "loss": 0.4197, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10181058943271637, |
| "step": 355, |
| "valid_targets_mean": 4190.1, |
| "valid_targets_min": 1038 |
| }, |
| { |
| "epoch": 2.3231017770597737, |
| "grad_norm": 0.30499154547298046, |
| "learning_rate": 2.599142418617299e-05, |
| "loss": 0.4294, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10151579231023788, |
| "step": 360, |
| "valid_targets_mean": 3688.4, |
| "valid_targets_min": 726 |
| }, |
| { |
| "epoch": 2.355411954765751, |
| "grad_norm": 0.2957071744459196, |
| "learning_rate": 2.555990877120111e-05, |
| "loss": 0.4211, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09378227591514587, |
| "step": 365, |
| "valid_targets_mean": 2939.3, |
| "valid_targets_min": 819 |
| }, |
| { |
| "epoch": 2.387722132471729, |
| "grad_norm": 0.2761336471089066, |
| "learning_rate": 2.5125569624186782e-05, |
| "loss": 0.4153, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10543694347143173, |
| "step": 370, |
| "valid_targets_mean": 4215.9, |
| "valid_targets_min": 733 |
| }, |
| { |
| "epoch": 2.420032310177706, |
| "grad_norm": 0.2899428111740831, |
| "learning_rate": 2.4688627334606773e-05, |
| "loss": 0.4186, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08878014236688614, |
| "step": 375, |
| "valid_targets_mean": 3717.1, |
| "valid_targets_min": 521 |
| }, |
| { |
| "epoch": 2.4523424878836835, |
| "grad_norm": 0.2979664309978484, |
| "learning_rate": 2.4249303814005787e-05, |
| "loss": 0.4315, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11504903435707092, |
| "step": 380, |
| "valid_targets_mean": 3742.7, |
| "valid_targets_min": 722 |
| }, |
| { |
| "epoch": 2.484652665589661, |
| "grad_norm": 0.2994522528219869, |
| "learning_rate": 2.380782218329337e-05, |
| "loss": 0.4343, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10162772238254547, |
| "step": 385, |
| "valid_targets_mean": 3453.4, |
| "valid_targets_min": 576 |
| }, |
| { |
| "epoch": 2.516962843295638, |
| "grad_norm": 0.27098939969509317, |
| "learning_rate": 2.33644066594267e-05, |
| "loss": 0.4257, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10443132370710373, |
| "step": 390, |
| "valid_targets_mean": 3782.1, |
| "valid_targets_min": 953 |
| }, |
| { |
| "epoch": 2.5492730210016155, |
| "grad_norm": 0.27358095003105976, |
| "learning_rate": 2.2919282441536622e-05, |
| "loss": 0.4252, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12023317813873291, |
| "step": 395, |
| "valid_targets_mean": 3923.7, |
| "valid_targets_min": 666 |
| }, |
| { |
| "epoch": 2.581583198707593, |
| "grad_norm": 0.27561709551863967, |
| "learning_rate": 2.247267559655492e-05, |
| "loss": 0.425, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10897140204906464, |
| "step": 400, |
| "valid_targets_mean": 4004.1, |
| "valid_targets_min": 831 |
| }, |
| { |
| "epoch": 2.61389337641357, |
| "grad_norm": 0.2911055404776475, |
| "learning_rate": 2.202481294440086e-05, |
| "loss": 0.4272, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1356189250946045, |
| "step": 405, |
| "valid_targets_mean": 4587.9, |
| "valid_targets_min": 1835 |
| }, |
| { |
| "epoch": 2.6462035541195474, |
| "grad_norm": 0.27638152367327684, |
| "learning_rate": 2.1575921942785247e-05, |
| "loss": 0.4135, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11528658121824265, |
| "step": 410, |
| "valid_targets_mean": 4123.7, |
| "valid_targets_min": 1322 |
| }, |
| { |
| "epoch": 2.678513731825525, |
| "grad_norm": 0.2724723503201682, |
| "learning_rate": 2.1126230571690688e-05, |
| "loss": 0.4247, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10935419052839279, |
| "step": 415, |
| "valid_targets_mean": 3782.7, |
| "valid_targets_min": 696 |
| }, |
| { |
| "epoch": 2.7108239095315025, |
| "grad_norm": 0.28022431584327206, |
| "learning_rate": 2.0675967217586453e-05, |
| "loss": 0.425, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11043274402618408, |
| "step": 420, |
| "valid_targets_mean": 4288.0, |
| "valid_targets_min": 1929 |
| }, |
| { |
| "epoch": 2.74313408723748, |
| "grad_norm": 0.26570243268701954, |
| "learning_rate": 2.022536055743702e-05, |
| "loss": 0.4336, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09703566879034042, |
| "step": 425, |
| "valid_targets_mean": 3890.9, |
| "valid_targets_min": 1285 |
| }, |
| { |
| "epoch": 2.775444264943457, |
| "grad_norm": 0.2803669278086659, |
| "learning_rate": 1.9774639442562994e-05, |
| "loss": 0.4206, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11521075665950775, |
| "step": 430, |
| "valid_targets_mean": 4614.6, |
| "valid_targets_min": 611 |
| }, |
| { |
| "epoch": 2.8077544426494345, |
| "grad_norm": 0.27372724895007716, |
| "learning_rate": 1.932403278241355e-05, |
| "loss": 0.4171, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10905209183692932, |
| "step": 435, |
| "valid_targets_mean": 4459.7, |
| "valid_targets_min": 1502 |
| }, |
| { |
| "epoch": 2.840064620355412, |
| "grad_norm": 0.2560272749894161, |
| "learning_rate": 1.8873769428309315e-05, |
| "loss": 0.4225, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10236693173646927, |
| "step": 440, |
| "valid_targets_mean": 4706.2, |
| "valid_targets_min": 607 |
| }, |
| { |
| "epoch": 2.872374798061389, |
| "grad_norm": 0.2697096585907479, |
| "learning_rate": 1.8424078057214753e-05, |
| "loss": 0.4207, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10952171683311462, |
| "step": 445, |
| "valid_targets_mean": 4067.4, |
| "valid_targets_min": 550 |
| }, |
| { |
| "epoch": 2.904684975767367, |
| "grad_norm": 0.29101709596296904, |
| "learning_rate": 1.7975187055599153e-05, |
| "loss": 0.4259, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12544356286525726, |
| "step": 450, |
| "valid_targets_mean": 3887.2, |
| "valid_targets_min": 730 |
| }, |
| { |
| "epoch": 2.936995153473344, |
| "grad_norm": 0.28158413880193023, |
| "learning_rate": 1.7527324403445086e-05, |
| "loss": 0.4164, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10754776746034622, |
| "step": 455, |
| "valid_targets_mean": 3837.2, |
| "valid_targets_min": 1232 |
| }, |
| { |
| "epoch": 2.9693053311793216, |
| "grad_norm": 0.2846122123476895, |
| "learning_rate": 1.708071755846338e-05, |
| "loss": 0.4265, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10734738409519196, |
| "step": 460, |
| "valid_targets_mean": 3481.8, |
| "valid_targets_min": 1306 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.30539912011689785, |
| "learning_rate": 1.66355933405733e-05, |
| "loss": 0.4203, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1297743320465088, |
| "step": 465, |
| "valid_targets_mean": 3604.1, |
| "valid_targets_min": 608 |
| }, |
| { |
| "epoch": 3.0323101777059773, |
| "grad_norm": 0.2906283787124391, |
| "learning_rate": 1.619217781670663e-05, |
| "loss": 0.4222, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10506054759025574, |
| "step": 470, |
| "valid_targets_mean": 3606.9, |
| "valid_targets_min": 1303 |
| }, |
| { |
| "epoch": 3.0646203554119547, |
| "grad_norm": 0.28401688637835226, |
| "learning_rate": 1.5750696185994226e-05, |
| "loss": 0.4238, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.092110276222229, |
| "step": 475, |
| "valid_targets_mean": 3426.1, |
| "valid_targets_min": 603 |
| }, |
| { |
| "epoch": 3.096930533117932, |
| "grad_norm": 0.261548774285929, |
| "learning_rate": 1.531137266539323e-05, |
| "loss": 0.4019, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10355602204799652, |
| "step": 480, |
| "valid_targets_mean": 4255.1, |
| "valid_targets_min": 873 |
| }, |
| { |
| "epoch": 3.1292407108239093, |
| "grad_norm": 0.3198608674027354, |
| "learning_rate": 1.4874430375813223e-05, |
| "loss": 0.4153, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10992392897605896, |
| "step": 485, |
| "valid_targets_mean": 3584.8, |
| "valid_targets_min": 534 |
| }, |
| { |
| "epoch": 3.161550888529887, |
| "grad_norm": 0.28922630514389835, |
| "learning_rate": 1.4440091228798896e-05, |
| "loss": 0.4203, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12728840112686157, |
| "step": 490, |
| "valid_targets_mean": 4236.7, |
| "valid_targets_min": 788 |
| }, |
| { |
| "epoch": 3.1938610662358644, |
| "grad_norm": 0.2779446885189012, |
| "learning_rate": 1.4008575813827023e-05, |
| "loss": 0.4154, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09563896059989929, |
| "step": 495, |
| "valid_targets_mean": 3544.6, |
| "valid_targets_min": 601 |
| }, |
| { |
| "epoch": 3.2261712439418417, |
| "grad_norm": 0.2700572606821521, |
| "learning_rate": 1.3580103286274816e-05, |
| "loss": 0.4162, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11817362904548645, |
| "step": 500, |
| "valid_targets_mean": 4341.7, |
| "valid_targets_min": 1233 |
| }, |
| { |
| "epoch": 3.258481421647819, |
| "grad_norm": 0.28169645716296227, |
| "learning_rate": 1.3154891256116677e-05, |
| "loss": 0.4118, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1011514663696289, |
| "step": 505, |
| "valid_targets_mean": 3950.1, |
| "valid_targets_min": 1079 |
| }, |
| { |
| "epoch": 3.2907915993537964, |
| "grad_norm": 0.2521242878848628, |
| "learning_rate": 1.2733155677405776e-05, |
| "loss": 0.4205, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10162997990846634, |
| "step": 510, |
| "valid_targets_mean": 4916.3, |
| "valid_targets_min": 1809 |
| }, |
| { |
| "epoch": 3.3231017770597737, |
| "grad_norm": 0.26803173620107257, |
| "learning_rate": 1.2315110738596654e-05, |
| "loss": 0.4121, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09578994661569595, |
| "step": 515, |
| "valid_targets_mean": 3822.8, |
| "valid_targets_min": 1606 |
| }, |
| { |
| "epoch": 3.355411954765751, |
| "grad_norm": 0.27660293170452704, |
| "learning_rate": 1.1900968753764483e-05, |
| "loss": 0.4066, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1010737344622612, |
| "step": 520, |
| "valid_targets_mean": 3893.4, |
| "valid_targets_min": 909 |
| }, |
| { |
| "epoch": 3.387722132471729, |
| "grad_norm": 0.26313591447221535, |
| "learning_rate": 1.1490940054776365e-05, |
| "loss": 0.4181, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10495524108409882, |
| "step": 525, |
| "valid_targets_mean": 4405.3, |
| "valid_targets_min": 808 |
| }, |
| { |
| "epoch": 3.420032310177706, |
| "grad_norm": 0.27385516656009323, |
| "learning_rate": 1.1085232884469236e-05, |
| "loss": 0.4103, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09986208379268646, |
| "step": 530, |
| "valid_targets_mean": 3620.4, |
| "valid_targets_min": 748 |
| }, |
| { |
| "epoch": 3.4523424878836835, |
| "grad_norm": 0.27628551895346487, |
| "learning_rate": 1.0684053290888824e-05, |
| "loss": 0.4018, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11552257835865021, |
| "step": 535, |
| "valid_targets_mean": 4266.4, |
| "valid_targets_min": 640 |
| }, |
| { |
| "epoch": 3.484652665589661, |
| "grad_norm": 0.2718878735955647, |
| "learning_rate": 1.0287605022643183e-05, |
| "loss": 0.4201, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09710375964641571, |
| "step": 540, |
| "valid_targets_mean": 3415.8, |
| "valid_targets_min": 371 |
| }, |
| { |
| "epoch": 3.516962843295638, |
| "grad_norm": 0.2865387366012006, |
| "learning_rate": 9.896089425424188e-06, |
| "loss": 0.4285, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07953400164842606, |
| "step": 545, |
| "valid_targets_mean": 2944.9, |
| "valid_targets_min": 712 |
| }, |
| { |
| "epoch": 3.5492730210016155, |
| "grad_norm": 0.25966498720410713, |
| "learning_rate": 9.509705339749277e-06, |
| "loss": 0.4076, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09222520887851715, |
| "step": 550, |
| "valid_targets_mean": 3501.2, |
| "valid_targets_min": 667 |
| }, |
| { |
| "epoch": 3.581583198707593, |
| "grad_norm": 0.2713083547984801, |
| "learning_rate": 9.12864899997558e-06, |
| "loss": 0.4175, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1106656938791275, |
| "step": 555, |
| "valid_targets_mean": 3612.1, |
| "valid_targets_min": 1550 |
| }, |
| { |
| "epoch": 3.61389337641357, |
| "grad_norm": 0.2737405302240275, |
| "learning_rate": 8.753113934637621e-06, |
| "loss": 0.4122, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09648142755031586, |
| "step": 560, |
| "valid_targets_mean": 3494.7, |
| "valid_targets_min": 1065 |
| }, |
| { |
| "epoch": 3.6462035541195474, |
| "grad_norm": 0.28551738045137637, |
| "learning_rate": 8.383290868159256e-06, |
| "loss": 0.4106, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09706847369670868, |
| "step": 565, |
| "valid_targets_mean": 3369.0, |
| "valid_targets_min": 570 |
| }, |
| { |
| "epoch": 3.678513731825525, |
| "grad_norm": 0.2704546745998757, |
| "learning_rate": 8.01936762398972e-06, |
| "loss": 0.4089, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09082476794719696, |
| "step": 570, |
| "valid_targets_mean": 3314.4, |
| "valid_targets_min": 1093 |
| }, |
| { |
| "epoch": 3.7108239095315025, |
| "grad_norm": 0.2883589575944319, |
| "learning_rate": 7.661529029213023e-06, |
| "loss": 0.404, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11036745458841324, |
| "step": 575, |
| "valid_targets_mean": 4221.6, |
| "valid_targets_min": 892 |
| }, |
| { |
| "epoch": 3.74313408723748, |
| "grad_norm": 0.2719955608173771, |
| "learning_rate": 7.3099568206791315e-06, |
| "loss": 0.41, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10909552872180939, |
| "step": 580, |
| "valid_targets_mean": 3840.0, |
| "valid_targets_min": 463 |
| }, |
| { |
| "epoch": 3.775444264943457, |
| "grad_norm": 0.27643080510215096, |
| "learning_rate": 6.964829552704526e-06, |
| "loss": 0.4139, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10257598757743835, |
| "step": 585, |
| "valid_targets_mean": 3169.1, |
| "valid_targets_min": 682 |
| }, |
| { |
| "epoch": 3.8077544426494345, |
| "grad_norm": 0.2689937521699833, |
| "learning_rate": 6.62632250638922e-06, |
| "loss": 0.4156, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09786760061979294, |
| "step": 590, |
| "valid_targets_mean": 3638.8, |
| "valid_targets_min": 1010 |
| }, |
| { |
| "epoch": 3.840064620355412, |
| "grad_norm": 0.34681376444735507, |
| "learning_rate": 6.2946076005960184e-06, |
| "loss": 0.4102, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10728378593921661, |
| "step": 595, |
| "valid_targets_mean": 4384.9, |
| "valid_targets_min": 521 |
| }, |
| { |
| "epoch": 3.872374798061389, |
| "grad_norm": 0.2749441764442808, |
| "learning_rate": 5.969853304637467e-06, |
| "loss": 0.4144, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09306375682353973, |
| "step": 600, |
| "valid_targets_mean": 3161.8, |
| "valid_targets_min": 914 |
| }, |
| { |
| "epoch": 3.904684975767367, |
| "grad_norm": 0.27306877129599383, |
| "learning_rate": 5.65222455271466e-06, |
| "loss": 0.4205, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08627478778362274, |
| "step": 605, |
| "valid_targets_mean": 3452.9, |
| "valid_targets_min": 326 |
| }, |
| { |
| "epoch": 3.936995153473344, |
| "grad_norm": 0.2786598032003557, |
| "learning_rate": 5.341882660151527e-06, |
| "loss": 0.4107, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1056986153125763, |
| "step": 610, |
| "valid_targets_mean": 3757.9, |
| "valid_targets_min": 600 |
| }, |
| { |
| "epoch": 3.9693053311793216, |
| "grad_norm": 0.24556167869153683, |
| "learning_rate": 5.038985241466978e-06, |
| "loss": 0.4056, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09949535876512527, |
| "step": 615, |
| "valid_targets_mean": 3879.6, |
| "valid_targets_min": 442 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.4202349721331711, |
| "learning_rate": 4.7436861303266255e-06, |
| "loss": 0.4157, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12905532121658325, |
| "step": 620, |
| "valid_targets_mean": 3440.1, |
| "valid_targets_min": 697 |
| }, |
| { |
| "epoch": 4.032310177705978, |
| "grad_norm": 0.26293159983995606, |
| "learning_rate": 4.456135301414672e-06, |
| "loss": 0.4067, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11424065381288528, |
| "step": 625, |
| "valid_targets_mean": 3840.8, |
| "valid_targets_min": 681 |
| }, |
| { |
| "epoch": 4.064620355411955, |
| "grad_norm": 0.43342150888282877, |
| "learning_rate": 4.176478794265737e-06, |
| "loss": 0.4057, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10436274111270905, |
| "step": 630, |
| "valid_targets_mean": 3659.8, |
| "valid_targets_min": 1872 |
| }, |
| { |
| "epoch": 4.096930533117932, |
| "grad_norm": 0.3801825882709417, |
| "learning_rate": 3.904858639095174e-06, |
| "loss": 0.4204, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11651758849620819, |
| "step": 635, |
| "valid_targets_mean": 4127.8, |
| "valid_targets_min": 1477 |
| }, |
| { |
| "epoch": 4.129240710823909, |
| "grad_norm": 0.25828199655425915, |
| "learning_rate": 3.641412784665648e-06, |
| "loss": 0.4128, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10827606171369553, |
| "step": 640, |
| "valid_targets_mean": 4205.9, |
| "valid_targets_min": 903 |
| }, |
| { |
| "epoch": 4.161550888529887, |
| "grad_norm": 0.26223751778594456, |
| "learning_rate": 3.3862750282265798e-06, |
| "loss": 0.4133, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1035182774066925, |
| "step": 645, |
| "valid_targets_mean": 3863.5, |
| "valid_targets_min": 1102 |
| }, |
| { |
| "epoch": 4.193861066235864, |
| "grad_norm": 0.26844737353091586, |
| "learning_rate": 3.1395749475620185e-06, |
| "loss": 0.4163, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11465262621641159, |
| "step": 650, |
| "valid_targets_mean": 4107.2, |
| "valid_targets_min": 2044 |
| }, |
| { |
| "epoch": 4.226171243941842, |
| "grad_norm": 0.2778999716523805, |
| "learning_rate": 2.9014378351814866e-06, |
| "loss": 0.4076, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09468638151884079, |
| "step": 655, |
| "valid_targets_mean": 3168.5, |
| "valid_targets_min": 1039 |
| }, |
| { |
| "epoch": 4.258481421647819, |
| "grad_norm": 0.26830655466160835, |
| "learning_rate": 2.671984634687186e-06, |
| "loss": 0.3974, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0945512056350708, |
| "step": 660, |
| "valid_targets_mean": 3421.4, |
| "valid_targets_min": 840 |
| }, |
| { |
| "epoch": 4.290791599353796, |
| "grad_norm": 0.2873158323777035, |
| "learning_rate": 2.4513318793499274e-06, |
| "loss": 0.4098, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09555218368768692, |
| "step": 665, |
| "valid_targets_mean": 3014.9, |
| "valid_targets_min": 472 |
| }, |
| { |
| "epoch": 4.323101777059774, |
| "grad_norm": 0.24585209774411093, |
| "learning_rate": 2.239591632924907e-06, |
| "loss": 0.4041, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09621794521808624, |
| "step": 670, |
| "valid_targets_mean": 3395.1, |
| "valid_targets_min": 468 |
| }, |
| { |
| "epoch": 4.355411954765751, |
| "grad_norm": 0.26594780949791114, |
| "learning_rate": 2.0368714327374905e-06, |
| "loss": 0.4119, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09523327648639679, |
| "step": 675, |
| "valid_targets_mean": 3977.6, |
| "valid_targets_min": 976 |
| }, |
| { |
| "epoch": 4.387722132471729, |
| "grad_norm": 0.2616672301186846, |
| "learning_rate": 1.8432742350678023e-06, |
| "loss": 0.4168, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10347115993499756, |
| "step": 680, |
| "valid_targets_mean": 3634.4, |
| "valid_targets_min": 437 |
| }, |
| { |
| "epoch": 4.420032310177706, |
| "grad_norm": 0.25286821109840774, |
| "learning_rate": 1.6588983628619404e-06, |
| "loss": 0.4082, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09281186759471893, |
| "step": 685, |
| "valid_targets_mean": 4083.2, |
| "valid_targets_min": 1570 |
| }, |
| { |
| "epoch": 4.4523424878836835, |
| "grad_norm": 0.27245429614361333, |
| "learning_rate": 1.4838374557963064e-06, |
| "loss": 0.4077, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10341990739107132, |
| "step": 690, |
| "valid_targets_mean": 3520.6, |
| "valid_targets_min": 1209 |
| }, |
| { |
| "epoch": 4.48465266558966, |
| "grad_norm": 0.2571028998172337, |
| "learning_rate": 1.3181804227204897e-06, |
| "loss": 0.4151, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10382469743490219, |
| "step": 695, |
| "valid_targets_mean": 4107.4, |
| "valid_targets_min": 990 |
| }, |
| { |
| "epoch": 4.516962843295638, |
| "grad_norm": 0.25853771008397497, |
| "learning_rate": 1.1620113965027823e-06, |
| "loss": 0.4088, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11473739147186279, |
| "step": 700, |
| "valid_targets_mean": 4600.4, |
| "valid_targets_min": 1167 |
| }, |
| { |
| "epoch": 4.549273021001616, |
| "grad_norm": 0.25555348361237107, |
| "learning_rate": 1.0154096913012989e-06, |
| "loss": 0.4119, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09417924284934998, |
| "step": 705, |
| "valid_targets_mean": 3880.7, |
| "valid_targets_min": 714 |
| }, |
| { |
| "epoch": 4.581583198707593, |
| "grad_norm": 0.25034785262389336, |
| "learning_rate": 8.784497622823874e-07, |
| "loss": 0.4082, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09192933887243271, |
| "step": 710, |
| "valid_targets_mean": 3490.3, |
| "valid_targets_min": 1247 |
| }, |
| { |
| "epoch": 4.613893376413571, |
| "grad_norm": 0.2639146483604065, |
| "learning_rate": 7.512011678068077e-07, |
| "loss": 0.4027, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11304928362369537, |
| "step": 715, |
| "valid_targets_mean": 4564.2, |
| "valid_targets_min": 2029 |
| }, |
| { |
| "epoch": 4.646203554119547, |
| "grad_norm": 0.25644367484928005, |
| "learning_rate": 6.33728534102862e-07, |
| "loss": 0.4029, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09347259998321533, |
| "step": 720, |
| "valid_targets_mean": 3595.4, |
| "valid_targets_min": 937 |
| }, |
| { |
| "epoch": 4.678513731825525, |
| "grad_norm": 0.26466164823269805, |
| "learning_rate": 5.260915224444207e-07, |
| "loss": 0.406, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0985584706068039, |
| "step": 725, |
| "valid_targets_mean": 3558.4, |
| "valid_targets_min": 1108 |
| }, |
| { |
| "epoch": 4.710823909531502, |
| "grad_norm": 0.26762760315162576, |
| "learning_rate": 4.2834479885052846e-07, |
| "loss": 0.4151, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09770895540714264, |
| "step": 730, |
| "valid_targets_mean": 3214.7, |
| "valid_targets_min": 691 |
| }, |
| { |
| "epoch": 4.74313408723748, |
| "grad_norm": 0.27795743352938646, |
| "learning_rate": 3.4053800632196434e-07, |
| "loss": 0.4173, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12234581261873245, |
| "step": 735, |
| "valid_targets_mean": 4275.8, |
| "valid_targets_min": 856 |
| }, |
| { |
| "epoch": 4.775444264943458, |
| "grad_norm": 0.25529297793215555, |
| "learning_rate": 2.627157396288604e-07, |
| "loss": 0.3984, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07973651587963104, |
| "step": 740, |
| "valid_targets_mean": 3078.0, |
| "valid_targets_min": 766 |
| }, |
| { |
| "epoch": 4.8077544426494345, |
| "grad_norm": 0.2742977151583616, |
| "learning_rate": 1.94917522662188e-07, |
| "loss": 0.399, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10854361206293106, |
| "step": 745, |
| "valid_targets_mean": 4476.7, |
| "valid_targets_min": 982 |
| }, |
| { |
| "epoch": 4.840064620355412, |
| "grad_norm": 0.25263871685885614, |
| "learning_rate": 1.3717778836060735e-07, |
| "loss": 0.4039, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09801102429628372, |
| "step": 750, |
| "valid_targets_mean": 4007.4, |
| "valid_targets_min": 1477 |
| }, |
| { |
| "epoch": 4.872374798061389, |
| "grad_norm": 0.26280785954762587, |
| "learning_rate": 8.952586122287443e-08, |
| "loss": 0.4127, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11048153042793274, |
| "step": 755, |
| "valid_targets_mean": 3341.8, |
| "valid_targets_min": 811 |
| }, |
| { |
| "epoch": 4.904684975767367, |
| "grad_norm": 0.27488205591473774, |
| "learning_rate": 5.1985942414709556e-08, |
| "loss": 0.4107, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09189851582050323, |
| "step": 760, |
| "valid_targets_mean": 3471.2, |
| "valid_targets_min": 889 |
| }, |
| { |
| "epoch": 4.936995153473344, |
| "grad_norm": 0.25480665657723, |
| "learning_rate": 2.4577097477647137e-08, |
| "loss": 0.4012, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09942759573459625, |
| "step": 765, |
| "valid_targets_mean": 3963.9, |
| "valid_targets_min": 1524 |
| }, |
| { |
| "epoch": 4.969305331179322, |
| "grad_norm": 0.39531924920960926, |
| "learning_rate": 7.313246646150074e-09, |
| "loss": 0.409, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0939750149846077, |
| "step": 770, |
| "valid_targets_mean": 4071.9, |
| "valid_targets_min": 725 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.31807527946415975, |
| "learning_rate": 2.0315777789159387e-10, |
| "loss": 0.4055, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1633167266845703, |
| "step": 775, |
| "valid_targets_mean": 3996.9, |
| "valid_targets_min": 611 |
| }, |
| { |
| "epoch": 5.0, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1633167266845703, |
| "step": 775, |
| "total_flos": 1.2782147307999068e+18, |
| "train_loss": 0.4453052529981059, |
| "train_runtime": 15886.2519, |
| "train_samples_per_second": 3.115, |
| "train_steps_per_second": 0.049, |
| "valid_targets_mean": 3996.9, |
| "valid_targets_min": 611 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 775, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2782147307999068e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|