| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1010, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0049504950495049506, | |
| "grad_norm": 2.7106738805250417, | |
| "learning_rate": 7.920792079207921e-07, | |
| "loss": 0.6976, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009900990099009901, | |
| "grad_norm": 2.696401601038833, | |
| "learning_rate": 1.5841584158415842e-06, | |
| "loss": 0.6904, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01485148514851485, | |
| "grad_norm": 2.692059520203227, | |
| "learning_rate": 2.3762376237623762e-06, | |
| "loss": 0.6909, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.019801980198019802, | |
| "grad_norm": 2.49976515730273, | |
| "learning_rate": 3.1683168316831685e-06, | |
| "loss": 0.6768, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.024752475247524754, | |
| "grad_norm": 1.967697623740443, | |
| "learning_rate": 3.960396039603961e-06, | |
| "loss": 0.6623, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0297029702970297, | |
| "grad_norm": 1.3506412968967432, | |
| "learning_rate": 4.7524752475247525e-06, | |
| "loss": 0.6329, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.034653465346534656, | |
| "grad_norm": 1.2792125314994267, | |
| "learning_rate": 5.544554455445545e-06, | |
| "loss": 0.6231, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.039603960396039604, | |
| "grad_norm": 1.0567537067272226, | |
| "learning_rate": 6.336633663366337e-06, | |
| "loss": 0.5908, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04455445544554455, | |
| "grad_norm": 1.0503985206378559, | |
| "learning_rate": 7.128712871287129e-06, | |
| "loss": 0.5854, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04950495049504951, | |
| "grad_norm": 0.9386269831580432, | |
| "learning_rate": 7.920792079207921e-06, | |
| "loss": 0.5732, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.054455445544554455, | |
| "grad_norm": 1.1064367369100885, | |
| "learning_rate": 8.712871287128714e-06, | |
| "loss": 0.537, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0594059405940594, | |
| "grad_norm": 1.0457856811185964, | |
| "learning_rate": 9.504950495049505e-06, | |
| "loss": 0.5313, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06435643564356436, | |
| "grad_norm": 0.681887596760736, | |
| "learning_rate": 1.0297029702970298e-05, | |
| "loss": 0.515, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06930693069306931, | |
| "grad_norm": 0.5774254406822782, | |
| "learning_rate": 1.108910891089109e-05, | |
| "loss": 0.5054, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07425742574257425, | |
| "grad_norm": 1.0925049544322916, | |
| "learning_rate": 1.1881188118811881e-05, | |
| "loss": 0.4899, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07920792079207921, | |
| "grad_norm": 0.861721437180681, | |
| "learning_rate": 1.2673267326732674e-05, | |
| "loss": 0.4833, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08415841584158416, | |
| "grad_norm": 0.5864729079716412, | |
| "learning_rate": 1.3465346534653467e-05, | |
| "loss": 0.4782, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0891089108910891, | |
| "grad_norm": 0.5865390324576937, | |
| "learning_rate": 1.4257425742574257e-05, | |
| "loss": 0.4695, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09405940594059406, | |
| "grad_norm": 0.6130092306626196, | |
| "learning_rate": 1.504950495049505e-05, | |
| "loss": 0.4674, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.09900990099009901, | |
| "grad_norm": 0.5021736430582571, | |
| "learning_rate": 1.5841584158415843e-05, | |
| "loss": 0.4613, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10396039603960396, | |
| "grad_norm": 0.38770857771222433, | |
| "learning_rate": 1.6633663366336635e-05, | |
| "loss": 0.4518, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.10891089108910891, | |
| "grad_norm": 0.3801872343316169, | |
| "learning_rate": 1.7425742574257428e-05, | |
| "loss": 0.4461, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.11386138613861387, | |
| "grad_norm": 0.36919542588411713, | |
| "learning_rate": 1.821782178217822e-05, | |
| "loss": 0.4408, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1188118811881188, | |
| "grad_norm": 0.37639723207731884, | |
| "learning_rate": 1.900990099009901e-05, | |
| "loss": 0.4449, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.12376237623762376, | |
| "grad_norm": 0.344848768853846, | |
| "learning_rate": 1.9801980198019803e-05, | |
| "loss": 0.4419, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12871287128712872, | |
| "grad_norm": 0.28355527610389414, | |
| "learning_rate": 2.0594059405940595e-05, | |
| "loss": 0.4343, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.13366336633663367, | |
| "grad_norm": 0.2754529294047224, | |
| "learning_rate": 2.1386138613861388e-05, | |
| "loss": 0.4237, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.13861386138613863, | |
| "grad_norm": 0.2865711175520124, | |
| "learning_rate": 2.217821782178218e-05, | |
| "loss": 0.4263, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.14356435643564355, | |
| "grad_norm": 0.2508438147810079, | |
| "learning_rate": 2.297029702970297e-05, | |
| "loss": 0.4248, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.1485148514851485, | |
| "grad_norm": 0.24448250670982463, | |
| "learning_rate": 2.3762376237623762e-05, | |
| "loss": 0.4227, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.15346534653465346, | |
| "grad_norm": 0.2294048162138678, | |
| "learning_rate": 2.4554455445544555e-05, | |
| "loss": 0.4256, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.15841584158415842, | |
| "grad_norm": 0.209271866937176, | |
| "learning_rate": 2.5346534653465348e-05, | |
| "loss": 0.4151, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.16336633663366337, | |
| "grad_norm": 0.24176526780414578, | |
| "learning_rate": 2.613861386138614e-05, | |
| "loss": 0.4161, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.16831683168316833, | |
| "grad_norm": 0.21049518828920885, | |
| "learning_rate": 2.6930693069306933e-05, | |
| "loss": 0.406, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.17326732673267325, | |
| "grad_norm": 0.19342535956001514, | |
| "learning_rate": 2.7722772277227722e-05, | |
| "loss": 0.4126, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1782178217821782, | |
| "grad_norm": 0.19033425678980173, | |
| "learning_rate": 2.8514851485148515e-05, | |
| "loss": 0.4126, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.18316831683168316, | |
| "grad_norm": 0.18234331276088547, | |
| "learning_rate": 2.9306930693069308e-05, | |
| "loss": 0.4105, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.18811881188118812, | |
| "grad_norm": 0.18097741327633646, | |
| "learning_rate": 3.00990099009901e-05, | |
| "loss": 0.4072, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.19306930693069307, | |
| "grad_norm": 0.17660769986639668, | |
| "learning_rate": 3.0891089108910896e-05, | |
| "loss": 0.4082, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.19801980198019803, | |
| "grad_norm": 0.16490613450315042, | |
| "learning_rate": 3.1683168316831686e-05, | |
| "loss": 0.4004, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.20297029702970298, | |
| "grad_norm": 0.17948074742599796, | |
| "learning_rate": 3.247524752475248e-05, | |
| "loss": 0.3991, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.2079207920792079, | |
| "grad_norm": 0.1694173422938168, | |
| "learning_rate": 3.326732673267327e-05, | |
| "loss": 0.39, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.21287128712871287, | |
| "grad_norm": 0.16195673711111672, | |
| "learning_rate": 3.405940594059406e-05, | |
| "loss": 0.3906, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.21782178217821782, | |
| "grad_norm": 0.18011327698717658, | |
| "learning_rate": 3.4851485148514856e-05, | |
| "loss": 0.3989, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.22277227722772278, | |
| "grad_norm": 0.16228918096461187, | |
| "learning_rate": 3.5643564356435645e-05, | |
| "loss": 0.3975, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.22772277227722773, | |
| "grad_norm": 0.1770949848045279, | |
| "learning_rate": 3.643564356435644e-05, | |
| "loss": 0.3926, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.23267326732673269, | |
| "grad_norm": 0.165950052864865, | |
| "learning_rate": 3.722772277227723e-05, | |
| "loss": 0.3836, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2376237623762376, | |
| "grad_norm": 0.1687315416703815, | |
| "learning_rate": 3.801980198019802e-05, | |
| "loss": 0.3844, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.24257425742574257, | |
| "grad_norm": 0.16567211187560885, | |
| "learning_rate": 3.8811881188118816e-05, | |
| "loss": 0.3799, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.24752475247524752, | |
| "grad_norm": 0.16916743953716526, | |
| "learning_rate": 3.9603960396039605e-05, | |
| "loss": 0.387, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2524752475247525, | |
| "grad_norm": 0.17623879763389697, | |
| "learning_rate": 4.03960396039604e-05, | |
| "loss": 0.3858, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.25742574257425743, | |
| "grad_norm": 0.17467961939214205, | |
| "learning_rate": 4.118811881188119e-05, | |
| "loss": 0.3807, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2623762376237624, | |
| "grad_norm": 0.2944378690201917, | |
| "learning_rate": 4.1980198019801987e-05, | |
| "loss": 0.385, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.26732673267326734, | |
| "grad_norm": 0.5178473274848127, | |
| "learning_rate": 4.2772277227722776e-05, | |
| "loss": 0.381, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2722772277227723, | |
| "grad_norm": 0.9941728281188755, | |
| "learning_rate": 4.356435643564357e-05, | |
| "loss": 0.3953, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.27722772277227725, | |
| "grad_norm": 0.9303019204930113, | |
| "learning_rate": 4.435643564356436e-05, | |
| "loss": 0.3966, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.28217821782178215, | |
| "grad_norm": 0.6881180716715236, | |
| "learning_rate": 4.514851485148515e-05, | |
| "loss": 0.3932, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.2871287128712871, | |
| "grad_norm": 0.764197790459852, | |
| "learning_rate": 4.594059405940594e-05, | |
| "loss": 0.3941, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.29207920792079206, | |
| "grad_norm": 1.647386456322686, | |
| "learning_rate": 4.6732673267326736e-05, | |
| "loss": 0.4021, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.297029702970297, | |
| "grad_norm": 0.7491760288830157, | |
| "learning_rate": 4.7524752475247525e-05, | |
| "loss": 0.3949, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.30198019801980197, | |
| "grad_norm": 0.8931216713440562, | |
| "learning_rate": 4.831683168316832e-05, | |
| "loss": 0.3942, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3069306930693069, | |
| "grad_norm": 0.6557824032290299, | |
| "learning_rate": 4.910891089108911e-05, | |
| "loss": 0.3903, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.3118811881188119, | |
| "grad_norm": 0.613421803633619, | |
| "learning_rate": 4.9900990099009906e-05, | |
| "loss": 0.3816, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.31683168316831684, | |
| "grad_norm": 0.5547034347595892, | |
| "learning_rate": 5.0693069306930696e-05, | |
| "loss": 0.3867, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3217821782178218, | |
| "grad_norm": 0.6397811968860269, | |
| "learning_rate": 5.148514851485149e-05, | |
| "loss": 0.3835, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.32673267326732675, | |
| "grad_norm": 0.48090924579294886, | |
| "learning_rate": 5.227722772277228e-05, | |
| "loss": 0.3814, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3316831683168317, | |
| "grad_norm": 0.5627837815105704, | |
| "learning_rate": 5.306930693069308e-05, | |
| "loss": 0.382, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.33663366336633666, | |
| "grad_norm": 0.4296757500964271, | |
| "learning_rate": 5.3861386138613866e-05, | |
| "loss": 0.3771, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3415841584158416, | |
| "grad_norm": 0.5124763237012445, | |
| "learning_rate": 5.465346534653466e-05, | |
| "loss": 0.3686, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3465346534653465, | |
| "grad_norm": 0.44620454004036086, | |
| "learning_rate": 5.5445544554455445e-05, | |
| "loss": 0.3768, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.35148514851485146, | |
| "grad_norm": 0.40097174912710437, | |
| "learning_rate": 5.623762376237624e-05, | |
| "loss": 0.3732, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3564356435643564, | |
| "grad_norm": 0.3605645786344511, | |
| "learning_rate": 5.702970297029703e-05, | |
| "loss": 0.3705, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3613861386138614, | |
| "grad_norm": 0.41932072631744316, | |
| "learning_rate": 5.7821782178217826e-05, | |
| "loss": 0.3693, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.36633663366336633, | |
| "grad_norm": 0.4006777830339425, | |
| "learning_rate": 5.8613861386138615e-05, | |
| "loss": 0.379, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3712871287128713, | |
| "grad_norm": 0.4465969738529599, | |
| "learning_rate": 5.940594059405941e-05, | |
| "loss": 0.3709, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.37623762376237624, | |
| "grad_norm": 0.4223804979204032, | |
| "learning_rate": 6.01980198019802e-05, | |
| "loss": 0.3675, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3811881188118812, | |
| "grad_norm": 0.37123640032095456, | |
| "learning_rate": 6.0990099009900997e-05, | |
| "loss": 0.366, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.38613861386138615, | |
| "grad_norm": 0.3392928493351884, | |
| "learning_rate": 6.178217821782179e-05, | |
| "loss": 0.3716, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3910891089108911, | |
| "grad_norm": 0.2889058251453323, | |
| "learning_rate": 6.257425742574258e-05, | |
| "loss": 0.3642, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.39603960396039606, | |
| "grad_norm": 0.3459814472315841, | |
| "learning_rate": 6.336633663366337e-05, | |
| "loss": 0.3691, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.400990099009901, | |
| "grad_norm": 0.38845940118983235, | |
| "learning_rate": 6.415841584158417e-05, | |
| "loss": 0.3711, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.40594059405940597, | |
| "grad_norm": 0.42532185159046343, | |
| "learning_rate": 6.495049504950496e-05, | |
| "loss": 0.3675, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.41089108910891087, | |
| "grad_norm": 0.5455105953636796, | |
| "learning_rate": 6.574257425742575e-05, | |
| "loss": 0.3633, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.4158415841584158, | |
| "grad_norm": 0.5990744796491794, | |
| "learning_rate": 6.653465346534654e-05, | |
| "loss": 0.3583, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.4207920792079208, | |
| "grad_norm": 0.49648400280044397, | |
| "learning_rate": 6.732673267326732e-05, | |
| "loss": 0.3664, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.42574257425742573, | |
| "grad_norm": 0.4307985110055904, | |
| "learning_rate": 6.811881188118812e-05, | |
| "loss": 0.3673, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4306930693069307, | |
| "grad_norm": 0.5272138230588959, | |
| "learning_rate": 6.891089108910892e-05, | |
| "loss": 0.3653, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.43564356435643564, | |
| "grad_norm": 0.6931632267314781, | |
| "learning_rate": 6.970297029702971e-05, | |
| "loss": 0.3725, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.4405940594059406, | |
| "grad_norm": 0.9339192352616005, | |
| "learning_rate": 7.04950495049505e-05, | |
| "loss": 0.3718, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.44554455445544555, | |
| "grad_norm": 1.0174313097655168, | |
| "learning_rate": 7.128712871287129e-05, | |
| "loss": 0.3835, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4504950495049505, | |
| "grad_norm": 0.6671231145210254, | |
| "learning_rate": 7.207920792079209e-05, | |
| "loss": 0.367, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.45544554455445546, | |
| "grad_norm": 0.6175710394910587, | |
| "learning_rate": 7.287128712871288e-05, | |
| "loss": 0.3705, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4603960396039604, | |
| "grad_norm": 0.6273903843989881, | |
| "learning_rate": 7.366336633663368e-05, | |
| "loss": 0.3701, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.46534653465346537, | |
| "grad_norm": 0.5030734661096935, | |
| "learning_rate": 7.445544554455446e-05, | |
| "loss": 0.372, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.47029702970297027, | |
| "grad_norm": 0.610175464714336, | |
| "learning_rate": 7.524752475247524e-05, | |
| "loss": 0.3702, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4752475247524752, | |
| "grad_norm": 0.45894009874038927, | |
| "learning_rate": 7.603960396039604e-05, | |
| "loss": 0.3695, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4801980198019802, | |
| "grad_norm": 0.5986232687060531, | |
| "learning_rate": 7.683168316831684e-05, | |
| "loss": 0.3657, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.48514851485148514, | |
| "grad_norm": 0.46624796933237705, | |
| "learning_rate": 7.762376237623763e-05, | |
| "loss": 0.3614, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.4900990099009901, | |
| "grad_norm": 0.43351289175270075, | |
| "learning_rate": 7.841584158415841e-05, | |
| "loss": 0.3683, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.49504950495049505, | |
| "grad_norm": 0.4940464058502036, | |
| "learning_rate": 7.920792079207921e-05, | |
| "loss": 0.3621, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.4386737116693806, | |
| "learning_rate": 8e-05, | |
| "loss": 0.3611, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.504950495049505, | |
| "grad_norm": 0.33108309935071073, | |
| "learning_rate": 7.999976110803523e-05, | |
| "loss": 0.3571, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5099009900990099, | |
| "grad_norm": 0.43229575461499764, | |
| "learning_rate": 7.99990444349944e-05, | |
| "loss": 0.3588, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5148514851485149, | |
| "grad_norm": 0.39892418329866514, | |
| "learning_rate": 7.999784998943787e-05, | |
| "loss": 0.3621, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5198019801980198, | |
| "grad_norm": 0.39765033553103313, | |
| "learning_rate": 7.999617778563281e-05, | |
| "loss": 0.36, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5247524752475248, | |
| "grad_norm": 0.47174546802256195, | |
| "learning_rate": 7.999402784355303e-05, | |
| "loss": 0.3679, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5297029702970297, | |
| "grad_norm": 0.5434295873242668, | |
| "learning_rate": 7.999140018887873e-05, | |
| "loss": 0.365, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5346534653465347, | |
| "grad_norm": 0.46618471813920354, | |
| "learning_rate": 7.998829485299617e-05, | |
| "loss": 0.362, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5396039603960396, | |
| "grad_norm": 0.32274696751184606, | |
| "learning_rate": 7.998471187299734e-05, | |
| "loss": 0.3573, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5445544554455446, | |
| "grad_norm": 0.2980275103082691, | |
| "learning_rate": 7.998065129167953e-05, | |
| "loss": 0.3604, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5495049504950495, | |
| "grad_norm": 0.3313425678437383, | |
| "learning_rate": 7.997611315754472e-05, | |
| "loss": 0.3559, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5544554455445545, | |
| "grad_norm": 0.34876342801843374, | |
| "learning_rate": 7.997109752479912e-05, | |
| "loss": 0.3605, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5594059405940595, | |
| "grad_norm": 0.27070943874947645, | |
| "learning_rate": 7.996560445335241e-05, | |
| "loss": 0.3578, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5643564356435643, | |
| "grad_norm": 0.30784470099177474, | |
| "learning_rate": 7.995963400881718e-05, | |
| "loss": 0.3525, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5693069306930693, | |
| "grad_norm": 0.3534221286392907, | |
| "learning_rate": 7.995318626250795e-05, | |
| "loss": 0.359, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5742574257425742, | |
| "grad_norm": 0.3474350284931066, | |
| "learning_rate": 7.994626129144047e-05, | |
| "loss": 0.354, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5792079207920792, | |
| "grad_norm": 0.3100297920829696, | |
| "learning_rate": 7.993885917833073e-05, | |
| "loss": 0.3505, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5841584158415841, | |
| "grad_norm": 0.34227574104701863, | |
| "learning_rate": 7.9930980011594e-05, | |
| "loss": 0.357, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5891089108910891, | |
| "grad_norm": 0.33712051818395816, | |
| "learning_rate": 7.992262388534378e-05, | |
| "loss": 0.3527, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.594059405940594, | |
| "grad_norm": 0.3626887525897252, | |
| "learning_rate": 7.991379089939062e-05, | |
| "loss": 0.3553, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.599009900990099, | |
| "grad_norm": 0.3293657653843729, | |
| "learning_rate": 7.990448115924099e-05, | |
| "loss": 0.3579, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6039603960396039, | |
| "grad_norm": 0.4513819250936864, | |
| "learning_rate": 7.989469477609601e-05, | |
| "loss": 0.3536, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6089108910891089, | |
| "grad_norm": 0.5867550752785323, | |
| "learning_rate": 7.988443186685007e-05, | |
| "loss": 0.3598, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6138613861386139, | |
| "grad_norm": 0.6848907478025485, | |
| "learning_rate": 7.987369255408953e-05, | |
| "loss": 0.3557, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6188118811881188, | |
| "grad_norm": 0.5804172340911018, | |
| "learning_rate": 7.986247696609112e-05, | |
| "loss": 0.3579, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6237623762376238, | |
| "grad_norm": 0.35057273505497694, | |
| "learning_rate": 7.985078523682058e-05, | |
| "loss": 0.3476, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6287128712871287, | |
| "grad_norm": 0.3497443854677124, | |
| "learning_rate": 7.983861750593091e-05, | |
| "loss": 0.3524, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6336633663366337, | |
| "grad_norm": 0.4250139056185807, | |
| "learning_rate": 7.982597391876076e-05, | |
| "loss": 0.357, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6386138613861386, | |
| "grad_norm": 0.31755749351038337, | |
| "learning_rate": 7.981285462633268e-05, | |
| "loss": 0.3513, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6435643564356436, | |
| "grad_norm": 0.26676351402850523, | |
| "learning_rate": 7.979925978535137e-05, | |
| "loss": 0.3566, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6485148514851485, | |
| "grad_norm": 0.32517374670467525, | |
| "learning_rate": 7.978518955820173e-05, | |
| "loss": 0.3548, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6534653465346535, | |
| "grad_norm": 0.2890288825364607, | |
| "learning_rate": 7.977064411294698e-05, | |
| "loss": 0.3472, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6584158415841584, | |
| "grad_norm": 0.22366770202582242, | |
| "learning_rate": 7.975562362332663e-05, | |
| "loss": 0.3516, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6633663366336634, | |
| "grad_norm": 0.31844198115660743, | |
| "learning_rate": 7.974012826875436e-05, | |
| "loss": 0.3515, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6683168316831684, | |
| "grad_norm": 0.3529525464109541, | |
| "learning_rate": 7.972415823431599e-05, | |
| "loss": 0.3525, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6732673267326733, | |
| "grad_norm": 0.255066234755533, | |
| "learning_rate": 7.970771371076715e-05, | |
| "loss": 0.3498, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6782178217821783, | |
| "grad_norm": 0.2593778740218104, | |
| "learning_rate": 7.969079489453107e-05, | |
| "loss": 0.3506, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6831683168316832, | |
| "grad_norm": 0.35100330007534536, | |
| "learning_rate": 7.96734019876962e-05, | |
| "loss": 0.3507, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6881188118811881, | |
| "grad_norm": 0.4006287005322337, | |
| "learning_rate": 7.965553519801385e-05, | |
| "loss": 0.3525, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.693069306930693, | |
| "grad_norm": 0.4271373026120573, | |
| "learning_rate": 7.963719473889562e-05, | |
| "loss": 0.3514, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.698019801980198, | |
| "grad_norm": 0.5081112805036884, | |
| "learning_rate": 7.961838082941094e-05, | |
| "loss": 0.3604, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7029702970297029, | |
| "grad_norm": 0.5842521863062967, | |
| "learning_rate": 7.959909369428441e-05, | |
| "loss": 0.3515, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7079207920792079, | |
| "grad_norm": 0.5503571352775998, | |
| "learning_rate": 7.957933356389306e-05, | |
| "loss": 0.3524, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7128712871287128, | |
| "grad_norm": 0.42737622709783635, | |
| "learning_rate": 7.955910067426377e-05, | |
| "loss": 0.3497, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7178217821782178, | |
| "grad_norm": 0.4811283193865698, | |
| "learning_rate": 7.953839526707025e-05, | |
| "loss": 0.3519, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7227722772277227, | |
| "grad_norm": 0.5237842031287816, | |
| "learning_rate": 7.951721758963028e-05, | |
| "loss": 0.3543, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7277227722772277, | |
| "grad_norm": 0.33835426109252503, | |
| "learning_rate": 7.949556789490269e-05, | |
| "loss": 0.3495, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7326732673267327, | |
| "grad_norm": 0.4072933799188343, | |
| "learning_rate": 7.94734464414844e-05, | |
| "loss": 0.3525, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7376237623762376, | |
| "grad_norm": 0.4420494788957065, | |
| "learning_rate": 7.945085349360728e-05, | |
| "loss": 0.3515, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7425742574257426, | |
| "grad_norm": 0.3047954686653965, | |
| "learning_rate": 7.942778932113501e-05, | |
| "loss": 0.3526, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7475247524752475, | |
| "grad_norm": 0.36720786547284384, | |
| "learning_rate": 7.940425419955988e-05, | |
| "loss": 0.3511, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7524752475247525, | |
| "grad_norm": 0.2709662965586093, | |
| "learning_rate": 7.938024840999944e-05, | |
| "loss": 0.3464, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7574257425742574, | |
| "grad_norm": 0.3067260723147892, | |
| "learning_rate": 7.935577223919322e-05, | |
| "loss": 0.3496, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7623762376237624, | |
| "grad_norm": 0.33922244765994963, | |
| "learning_rate": 7.933082597949925e-05, | |
| "loss": 0.3444, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7673267326732673, | |
| "grad_norm": 0.24825212387361578, | |
| "learning_rate": 7.930540992889056e-05, | |
| "loss": 0.3462, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7722772277227723, | |
| "grad_norm": 0.28742334966555666, | |
| "learning_rate": 7.927952439095167e-05, | |
| "loss": 0.3415, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7772277227722773, | |
| "grad_norm": 0.28105530967442244, | |
| "learning_rate": 7.925316967487493e-05, | |
| "loss": 0.3489, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7821782178217822, | |
| "grad_norm": 0.2269815480722697, | |
| "learning_rate": 7.922634609545685e-05, | |
| "loss": 0.35, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7871287128712872, | |
| "grad_norm": 0.3460207605487597, | |
| "learning_rate": 7.919905397309429e-05, | |
| "loss": 0.3454, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7920792079207921, | |
| "grad_norm": 0.34695798696000907, | |
| "learning_rate": 7.917129363378069e-05, | |
| "loss": 0.3512, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7970297029702971, | |
| "grad_norm": 0.3417847973040481, | |
| "learning_rate": 7.914306540910216e-05, | |
| "loss": 0.3491, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.801980198019802, | |
| "grad_norm": 0.42447506809816166, | |
| "learning_rate": 7.91143696362335e-05, | |
| "loss": 0.3458, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.806930693069307, | |
| "grad_norm": 0.4652667317777917, | |
| "learning_rate": 7.908520665793419e-05, | |
| "loss": 0.3471, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8118811881188119, | |
| "grad_norm": 0.45670453442321735, | |
| "learning_rate": 7.905557682254429e-05, | |
| "loss": 0.35, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8168316831683168, | |
| "grad_norm": 0.39419625102522626, | |
| "learning_rate": 7.902548048398028e-05, | |
| "loss": 0.3483, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8217821782178217, | |
| "grad_norm": 0.24325629441917615, | |
| "learning_rate": 7.89949180017308e-05, | |
| "loss": 0.3405, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8267326732673267, | |
| "grad_norm": 0.3145115138550842, | |
| "learning_rate": 7.896388974085246e-05, | |
| "loss": 0.3467, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8316831683168316, | |
| "grad_norm": 0.41909642085754, | |
| "learning_rate": 7.893239607196537e-05, | |
| "loss": 0.3497, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8366336633663366, | |
| "grad_norm": 0.33326007547308273, | |
| "learning_rate": 7.890043737124872e-05, | |
| "loss": 0.3468, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8415841584158416, | |
| "grad_norm": 0.21373097133178287, | |
| "learning_rate": 7.886801402043639e-05, | |
| "loss": 0.347, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8465346534653465, | |
| "grad_norm": 0.28669930148909606, | |
| "learning_rate": 7.883512640681226e-05, | |
| "loss": 0.3497, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8514851485148515, | |
| "grad_norm": 0.34281409481153846, | |
| "learning_rate": 7.880177492320565e-05, | |
| "loss": 0.3476, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8564356435643564, | |
| "grad_norm": 0.32878705062110175, | |
| "learning_rate": 7.876795996798665e-05, | |
| "loss": 0.3443, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8613861386138614, | |
| "grad_norm": 0.2560890954076474, | |
| "learning_rate": 7.873368194506131e-05, | |
| "loss": 0.3449, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8663366336633663, | |
| "grad_norm": 0.2659800616971379, | |
| "learning_rate": 7.869894126386684e-05, | |
| "loss": 0.3494, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8712871287128713, | |
| "grad_norm": 0.2957706400184562, | |
| "learning_rate": 7.866373833936673e-05, | |
| "loss": 0.3427, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8762376237623762, | |
| "grad_norm": 0.2877326856661544, | |
| "learning_rate": 7.862807359204574e-05, | |
| "loss": 0.3404, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8811881188118812, | |
| "grad_norm": 0.2492809343857738, | |
| "learning_rate": 7.859194744790498e-05, | |
| "loss": 0.3423, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.8861386138613861, | |
| "grad_norm": 0.20823037683334517, | |
| "learning_rate": 7.855536033845673e-05, | |
| "loss": 0.3417, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8910891089108911, | |
| "grad_norm": 0.2615486876434859, | |
| "learning_rate": 7.851831270071929e-05, | |
| "loss": 0.3447, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8960396039603961, | |
| "grad_norm": 0.28681482554906357, | |
| "learning_rate": 7.848080497721181e-05, | |
| "loss": 0.3423, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.900990099009901, | |
| "grad_norm": 0.2979002962137349, | |
| "learning_rate": 7.844283761594899e-05, | |
| "loss": 0.3369, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.905940594059406, | |
| "grad_norm": 0.33231625569300743, | |
| "learning_rate": 7.84044110704357e-05, | |
| "loss": 0.3467, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9108910891089109, | |
| "grad_norm": 0.3331695358581735, | |
| "learning_rate": 7.83655257996616e-05, | |
| "loss": 0.3417, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9158415841584159, | |
| "grad_norm": 0.3411061202071129, | |
| "learning_rate": 7.83261822680956e-05, | |
| "loss": 0.3487, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9207920792079208, | |
| "grad_norm": 0.30749202524041036, | |
| "learning_rate": 7.828638094568041e-05, | |
| "loss": 0.3406, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9257425742574258, | |
| "grad_norm": 0.30511452956851215, | |
| "learning_rate": 7.824612230782681e-05, | |
| "loss": 0.3403, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9306930693069307, | |
| "grad_norm": 0.33218486067673636, | |
| "learning_rate": 7.820540683540808e-05, | |
| "loss": 0.3388, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9356435643564357, | |
| "grad_norm": 0.3213728709603462, | |
| "learning_rate": 7.816423501475415e-05, | |
| "loss": 0.3457, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9405940594059405, | |
| "grad_norm": 0.3095238063829038, | |
| "learning_rate": 7.812260733764591e-05, | |
| "loss": 0.348, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9455445544554455, | |
| "grad_norm": 0.35118241238757736, | |
| "learning_rate": 7.80805243013092e-05, | |
| "loss": 0.3467, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9504950495049505, | |
| "grad_norm": 0.370467699158138, | |
| "learning_rate": 7.803798640840901e-05, | |
| "loss": 0.3441, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.9554455445544554, | |
| "grad_norm": 0.34511631244982893, | |
| "learning_rate": 7.799499416704338e-05, | |
| "loss": 0.3457, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9603960396039604, | |
| "grad_norm": 0.3235511774722652, | |
| "learning_rate": 7.795154809073735e-05, | |
| "loss": 0.3408, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9653465346534653, | |
| "grad_norm": 0.3539469729464918, | |
| "learning_rate": 7.790764869843684e-05, | |
| "loss": 0.3426, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9702970297029703, | |
| "grad_norm": 0.35801210970598224, | |
| "learning_rate": 7.786329651450248e-05, | |
| "loss": 0.3462, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9752475247524752, | |
| "grad_norm": 0.30433710890475135, | |
| "learning_rate": 7.781849206870325e-05, | |
| "loss": 0.3475, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9801980198019802, | |
| "grad_norm": 0.34885762683733873, | |
| "learning_rate": 7.77732358962103e-05, | |
| "loss": 0.3408, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9851485148514851, | |
| "grad_norm": 0.43270154125711485, | |
| "learning_rate": 7.772752853759039e-05, | |
| "loss": 0.3412, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.9900990099009901, | |
| "grad_norm": 0.5193353797335327, | |
| "learning_rate": 7.768137053879957e-05, | |
| "loss": 0.345, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.995049504950495, | |
| "grad_norm": 0.5848338133959268, | |
| "learning_rate": 7.763476245117659e-05, | |
| "loss": 0.3402, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.49189050142360197, | |
| "learning_rate": 7.758770483143634e-05, | |
| "loss": 0.3418, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.004950495049505, | |
| "grad_norm": 0.3965620050129241, | |
| "learning_rate": 7.754019824166318e-05, | |
| "loss": 0.3272, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.00990099009901, | |
| "grad_norm": 0.38729303869225656, | |
| "learning_rate": 7.749224324930421e-05, | |
| "loss": 0.3265, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.0148514851485149, | |
| "grad_norm": 0.46382028945219433, | |
| "learning_rate": 7.744384042716258e-05, | |
| "loss": 0.3259, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.0198019801980198, | |
| "grad_norm": 0.4961244468745795, | |
| "learning_rate": 7.739499035339055e-05, | |
| "loss": 0.3265, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.0247524752475248, | |
| "grad_norm": 0.39391833603810456, | |
| "learning_rate": 7.734569361148262e-05, | |
| "loss": 0.3243, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.0297029702970297, | |
| "grad_norm": 0.31996146294099276, | |
| "learning_rate": 7.729595079026856e-05, | |
| "loss": 0.3251, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.0346534653465347, | |
| "grad_norm": 0.26647643410303384, | |
| "learning_rate": 7.724576248390639e-05, | |
| "loss": 0.3223, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0396039603960396, | |
| "grad_norm": 0.25657678139008755, | |
| "learning_rate": 7.719512929187527e-05, | |
| "loss": 0.3189, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.0445544554455446, | |
| "grad_norm": 0.3153458918560271, | |
| "learning_rate": 7.714405181896831e-05, | |
| "loss": 0.325, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.0495049504950495, | |
| "grad_norm": 0.3310266745371117, | |
| "learning_rate": 7.709253067528545e-05, | |
| "loss": 0.3258, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.0544554455445545, | |
| "grad_norm": 0.2595609505361843, | |
| "learning_rate": 7.704056647622603e-05, | |
| "loss": 0.3176, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.0594059405940595, | |
| "grad_norm": 0.22851822976210578, | |
| "learning_rate": 7.698815984248152e-05, | |
| "loss": 0.3171, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.0643564356435644, | |
| "grad_norm": 0.2628390466311596, | |
| "learning_rate": 7.693531140002811e-05, | |
| "loss": 0.3208, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.0693069306930694, | |
| "grad_norm": 0.22084804612132355, | |
| "learning_rate": 7.688202178011921e-05, | |
| "loss": 0.3246, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0742574257425743, | |
| "grad_norm": 0.20572617251073919, | |
| "learning_rate": 7.682829161927794e-05, | |
| "loss": 0.3265, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.0792079207920793, | |
| "grad_norm": 0.26152370679155457, | |
| "learning_rate": 7.677412155928946e-05, | |
| "loss": 0.3244, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.0841584158415842, | |
| "grad_norm": 0.23198561773413004, | |
| "learning_rate": 7.671951224719339e-05, | |
| "loss": 0.3221, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.0891089108910892, | |
| "grad_norm": 0.23885371632512684, | |
| "learning_rate": 7.666446433527601e-05, | |
| "loss": 0.3228, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0940594059405941, | |
| "grad_norm": 0.281941194601826, | |
| "learning_rate": 7.660897848106251e-05, | |
| "loss": 0.3183, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.099009900990099, | |
| "grad_norm": 0.35643488194797623, | |
| "learning_rate": 7.655305534730916e-05, | |
| "loss": 0.3223, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.103960396039604, | |
| "grad_norm": 0.4175750224727906, | |
| "learning_rate": 7.649669560199528e-05, | |
| "loss": 0.3226, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.108910891089109, | |
| "grad_norm": 0.4804436533424334, | |
| "learning_rate": 7.643989991831541e-05, | |
| "loss": 0.3261, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.113861386138614, | |
| "grad_norm": 0.5026289432175224, | |
| "learning_rate": 7.638266897467117e-05, | |
| "loss": 0.3239, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.118811881188119, | |
| "grad_norm": 0.45160715992891726, | |
| "learning_rate": 7.632500345466318e-05, | |
| "loss": 0.3255, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.1237623762376239, | |
| "grad_norm": 0.323894832081213, | |
| "learning_rate": 7.62669040470829e-05, | |
| "loss": 0.3235, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.1287128712871288, | |
| "grad_norm": 0.23111833292364858, | |
| "learning_rate": 7.620837144590444e-05, | |
| "loss": 0.3261, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.1336633663366338, | |
| "grad_norm": 0.35672796574835897, | |
| "learning_rate": 7.61494063502762e-05, | |
| "loss": 0.3244, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.1386138613861387, | |
| "grad_norm": 0.38953557445459835, | |
| "learning_rate": 7.609000946451255e-05, | |
| "loss": 0.3275, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.1435643564356435, | |
| "grad_norm": 0.2456196439272548, | |
| "learning_rate": 7.603018149808542e-05, | |
| "loss": 0.3242, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.1485148514851484, | |
| "grad_norm": 0.2627252229709477, | |
| "learning_rate": 7.596992316561583e-05, | |
| "loss": 0.3263, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.1534653465346534, | |
| "grad_norm": 0.3880693283953946, | |
| "learning_rate": 7.590923518686537e-05, | |
| "loss": 0.3227, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.1584158415841583, | |
| "grad_norm": 0.36122965194857654, | |
| "learning_rate": 7.584811828672755e-05, | |
| "loss": 0.324, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.1633663366336633, | |
| "grad_norm": 0.24771560907735035, | |
| "learning_rate": 7.578657319521918e-05, | |
| "loss": 0.3272, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.1683168316831682, | |
| "grad_norm": 0.3549181872881683, | |
| "learning_rate": 7.572460064747167e-05, | |
| "loss": 0.3252, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.1732673267326732, | |
| "grad_norm": 0.3701278541218383, | |
| "learning_rate": 7.56622013837222e-05, | |
| "loss": 0.322, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.1782178217821782, | |
| "grad_norm": 0.2291300819304423, | |
| "learning_rate": 7.55993761493049e-05, | |
| "loss": 0.3268, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.183168316831683, | |
| "grad_norm": 0.30325809144675336, | |
| "learning_rate": 7.553612569464197e-05, | |
| "loss": 0.3239, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.188118811881188, | |
| "grad_norm": 0.34766919596131945, | |
| "learning_rate": 7.547245077523466e-05, | |
| "loss": 0.3269, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.193069306930693, | |
| "grad_norm": 0.2802166057218898, | |
| "learning_rate": 7.540835215165431e-05, | |
| "loss": 0.3237, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.198019801980198, | |
| "grad_norm": 0.20761909947515408, | |
| "learning_rate": 7.534383058953321e-05, | |
| "loss": 0.3233, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.202970297029703, | |
| "grad_norm": 0.21713786543933997, | |
| "learning_rate": 7.527888685955551e-05, | |
| "loss": 0.3266, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.2079207920792079, | |
| "grad_norm": 0.28332778351110616, | |
| "learning_rate": 7.5213521737448e-05, | |
| "loss": 0.3234, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.2128712871287128, | |
| "grad_norm": 0.25218609771205475, | |
| "learning_rate": 7.514773600397076e-05, | |
| "loss": 0.3225, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.2178217821782178, | |
| "grad_norm": 0.23493196928481838, | |
| "learning_rate": 7.508153044490796e-05, | |
| "loss": 0.3244, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.2227722772277227, | |
| "grad_norm": 0.24313872207402484, | |
| "learning_rate": 7.50149058510584e-05, | |
| "loss": 0.322, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.2277227722772277, | |
| "grad_norm": 0.26071446304328083, | |
| "learning_rate": 7.494786301822611e-05, | |
| "loss": 0.325, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.2326732673267327, | |
| "grad_norm": 0.2713861363334041, | |
| "learning_rate": 7.488040274721077e-05, | |
| "loss": 0.3229, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.2376237623762376, | |
| "grad_norm": 0.2815273546029921, | |
| "learning_rate": 7.481252584379822e-05, | |
| "loss": 0.3229, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.2425742574257426, | |
| "grad_norm": 0.31187646080931386, | |
| "learning_rate": 7.47442331187508e-05, | |
| "loss": 0.3207, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.2475247524752475, | |
| "grad_norm": 0.2650133379890827, | |
| "learning_rate": 7.467552538779768e-05, | |
| "loss": 0.32, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.2524752475247525, | |
| "grad_norm": 0.17403472770593334, | |
| "learning_rate": 7.460640347162508e-05, | |
| "loss": 0.3238, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.2574257425742574, | |
| "grad_norm": 0.20063032190918698, | |
| "learning_rate": 7.453686819586655e-05, | |
| "loss": 0.329, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.2623762376237624, | |
| "grad_norm": 0.27733239216940847, | |
| "learning_rate": 7.4466920391093e-05, | |
| "loss": 0.3224, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.2673267326732673, | |
| "grad_norm": 0.30391912965378015, | |
| "learning_rate": 7.439656089280286e-05, | |
| "loss": 0.3187, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.2722772277227723, | |
| "grad_norm": 0.258142425865035, | |
| "learning_rate": 7.432579054141208e-05, | |
| "loss": 0.3213, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.2772277227722773, | |
| "grad_norm": 0.22151301347497324, | |
| "learning_rate": 7.425461018224406e-05, | |
| "loss": 0.3201, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.2821782178217822, | |
| "grad_norm": 0.23419027691681132, | |
| "learning_rate": 7.418302066551959e-05, | |
| "loss": 0.3267, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.2871287128712872, | |
| "grad_norm": 0.26013678205953394, | |
| "learning_rate": 7.411102284634672e-05, | |
| "loss": 0.3259, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.2920792079207921, | |
| "grad_norm": 0.2842311667544899, | |
| "learning_rate": 7.403861758471043e-05, | |
| "loss": 0.3187, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.297029702970297, | |
| "grad_norm": 0.31228351173236724, | |
| "learning_rate": 7.396580574546251e-05, | |
| "loss": 0.3222, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.301980198019802, | |
| "grad_norm": 0.3486572694201658, | |
| "learning_rate": 7.38925881983111e-05, | |
| "loss": 0.3275, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.306930693069307, | |
| "grad_norm": 0.3674794148030631, | |
| "learning_rate": 7.381896581781042e-05, | |
| "loss": 0.3215, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.311881188118812, | |
| "grad_norm": 0.3160950184823215, | |
| "learning_rate": 7.37449394833502e-05, | |
| "loss": 0.3235, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.316831683168317, | |
| "grad_norm": 0.22994487888930124, | |
| "learning_rate": 7.367051007914527e-05, | |
| "loss": 0.3222, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.3217821782178218, | |
| "grad_norm": 0.1938850721739046, | |
| "learning_rate": 7.359567849422496e-05, | |
| "loss": 0.324, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.3267326732673268, | |
| "grad_norm": 0.19676881488742676, | |
| "learning_rate": 7.352044562242248e-05, | |
| "loss": 0.3259, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.3316831683168318, | |
| "grad_norm": 0.2783673393968265, | |
| "learning_rate": 7.344481236236428e-05, | |
| "loss": 0.3201, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.3366336633663367, | |
| "grad_norm": 0.33404846697755264, | |
| "learning_rate": 7.336877961745926e-05, | |
| "loss": 0.3172, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.3415841584158417, | |
| "grad_norm": 0.32850987411997973, | |
| "learning_rate": 7.329234829588798e-05, | |
| "loss": 0.3201, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.3465346534653464, | |
| "grad_norm": 0.2580864877181822, | |
| "learning_rate": 7.321551931059191e-05, | |
| "loss": 0.3257, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.3514851485148514, | |
| "grad_norm": 0.15332399404847796, | |
| "learning_rate": 7.313829357926238e-05, | |
| "loss": 0.3261, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.3564356435643563, | |
| "grad_norm": 0.20248337274096065, | |
| "learning_rate": 7.306067202432976e-05, | |
| "loss": 0.3224, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.3613861386138613, | |
| "grad_norm": 0.30304088082346603, | |
| "learning_rate": 7.29826555729523e-05, | |
| "loss": 0.3255, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.3663366336633662, | |
| "grad_norm": 0.2931366381999886, | |
| "learning_rate": 7.290424515700519e-05, | |
| "loss": 0.323, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.3712871287128712, | |
| "grad_norm": 0.22992458525253304, | |
| "learning_rate": 7.282544171306933e-05, | |
| "loss": 0.3267, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.3762376237623761, | |
| "grad_norm": 0.18041230312676543, | |
| "learning_rate": 7.274624618242022e-05, | |
| "loss": 0.3227, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.381188118811881, | |
| "grad_norm": 0.204075595457074, | |
| "learning_rate": 7.266665951101664e-05, | |
| "loss": 0.3241, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.386138613861386, | |
| "grad_norm": 0.2695584195519876, | |
| "learning_rate": 7.258668264948941e-05, | |
| "loss": 0.3197, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.391089108910891, | |
| "grad_norm": 0.2587498125470864, | |
| "learning_rate": 7.250631655313001e-05, | |
| "loss": 0.3229, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.396039603960396, | |
| "grad_norm": 0.2498126566179802, | |
| "learning_rate": 7.242556218187919e-05, | |
| "loss": 0.3235, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.400990099009901, | |
| "grad_norm": 0.2769036801401161, | |
| "learning_rate": 7.234442050031543e-05, | |
| "loss": 0.3222, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.4059405940594059, | |
| "grad_norm": 0.2951697321170327, | |
| "learning_rate": 7.226289247764354e-05, | |
| "loss": 0.3193, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.4108910891089108, | |
| "grad_norm": 0.3140073279063781, | |
| "learning_rate": 7.2180979087683e-05, | |
| "loss": 0.3231, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.4158415841584158, | |
| "grad_norm": 0.3294640016367039, | |
| "learning_rate": 7.209868130885634e-05, | |
| "loss": 0.3214, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.4207920792079207, | |
| "grad_norm": 0.36565594630893544, | |
| "learning_rate": 7.201600012417745e-05, | |
| "loss": 0.3271, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.4257425742574257, | |
| "grad_norm": 0.35379832804519007, | |
| "learning_rate": 7.193293652123989e-05, | |
| "loss": 0.3205, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.4306930693069306, | |
| "grad_norm": 0.3223921579634103, | |
| "learning_rate": 7.1849491492205e-05, | |
| "loss": 0.3211, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.4356435643564356, | |
| "grad_norm": 0.35467670180093575, | |
| "learning_rate": 7.176566603379015e-05, | |
| "loss": 0.3221, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.4405940594059405, | |
| "grad_norm": 0.37798578537913297, | |
| "learning_rate": 7.168146114725673e-05, | |
| "loss": 0.3198, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.4455445544554455, | |
| "grad_norm": 0.34966699936779133, | |
| "learning_rate": 7.159687783839832e-05, | |
| "loss": 0.3227, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.4504950495049505, | |
| "grad_norm": 0.24726341182024242, | |
| "learning_rate": 7.151191711752854e-05, | |
| "loss": 0.3189, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.4554455445544554, | |
| "grad_norm": 0.25949588464207435, | |
| "learning_rate": 7.142657999946906e-05, | |
| "loss": 0.3222, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.4603960396039604, | |
| "grad_norm": 0.2855037596259817, | |
| "learning_rate": 7.134086750353747e-05, | |
| "loss": 0.3217, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.4653465346534653, | |
| "grad_norm": 0.22186831176071517, | |
| "learning_rate": 7.125478065353512e-05, | |
| "loss": 0.3193, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.4702970297029703, | |
| "grad_norm": 0.24569476568558268, | |
| "learning_rate": 7.116832047773484e-05, | |
| "loss": 0.3233, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.4752475247524752, | |
| "grad_norm": 0.24947664055115984, | |
| "learning_rate": 7.108148800886869e-05, | |
| "loss": 0.321, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.4801980198019802, | |
| "grad_norm": 0.21593927166838858, | |
| "learning_rate": 7.09942842841156e-05, | |
| "loss": 0.3177, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.4851485148514851, | |
| "grad_norm": 0.19917076539743275, | |
| "learning_rate": 7.090671034508905e-05, | |
| "loss": 0.3201, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.49009900990099, | |
| "grad_norm": 0.20214350623028918, | |
| "learning_rate": 7.081876723782457e-05, | |
| "loss": 0.3222, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.495049504950495, | |
| "grad_norm": 0.2385491320035371, | |
| "learning_rate": 7.073045601276723e-05, | |
| "loss": 0.3192, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.23154350378634414, | |
| "learning_rate": 7.064177772475912e-05, | |
| "loss": 0.3196, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.504950495049505, | |
| "grad_norm": 0.22628705886217929, | |
| "learning_rate": 7.05527334330268e-05, | |
| "loss": 0.3225, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.50990099009901, | |
| "grad_norm": 0.19427424546791436, | |
| "learning_rate": 7.046332420116852e-05, | |
| "loss": 0.3181, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.5148514851485149, | |
| "grad_norm": 0.18634215600114334, | |
| "learning_rate": 7.037355109714165e-05, | |
| "loss": 0.3184, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.5198019801980198, | |
| "grad_norm": 0.19920642103648958, | |
| "learning_rate": 7.028341519324985e-05, | |
| "loss": 0.317, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.5247524752475248, | |
| "grad_norm": 0.2684657108142712, | |
| "learning_rate": 7.019291756613029e-05, | |
| "loss": 0.3296, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.5297029702970297, | |
| "grad_norm": 0.2849696623219465, | |
| "learning_rate": 7.010205929674075e-05, | |
| "loss": 0.3202, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.5346534653465347, | |
| "grad_norm": 0.21705204588552374, | |
| "learning_rate": 7.001084147034676e-05, | |
| "loss": 0.319, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.5396039603960396, | |
| "grad_norm": 0.22560008026003084, | |
| "learning_rate": 6.99192651765086e-05, | |
| "loss": 0.3249, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.5445544554455446, | |
| "grad_norm": 0.26614078312578027, | |
| "learning_rate": 6.982733150906833e-05, | |
| "loss": 0.3212, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.5495049504950495, | |
| "grad_norm": 0.28899787142239786, | |
| "learning_rate": 6.973504156613666e-05, | |
| "loss": 0.3176, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.5544554455445545, | |
| "grad_norm": 0.2800623429447802, | |
| "learning_rate": 6.964239645007989e-05, | |
| "loss": 0.3197, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.5594059405940595, | |
| "grad_norm": 0.249656934863319, | |
| "learning_rate": 6.954939726750667e-05, | |
| "loss": 0.3214, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.5643564356435644, | |
| "grad_norm": 0.23045070686947017, | |
| "learning_rate": 6.945604512925493e-05, | |
| "loss": 0.3217, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.5693069306930694, | |
| "grad_norm": 0.22566199504874904, | |
| "learning_rate": 6.936234115037842e-05, | |
| "loss": 0.3239, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.5742574257425743, | |
| "grad_norm": 0.2262227158844052, | |
| "learning_rate": 6.926828645013353e-05, | |
| "loss": 0.3198, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.5792079207920793, | |
| "grad_norm": 0.1960066759664643, | |
| "learning_rate": 6.917388215196585e-05, | |
| "loss": 0.3222, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.5841584158415842, | |
| "grad_norm": 0.22269151083586428, | |
| "learning_rate": 6.907912938349682e-05, | |
| "loss": 0.3157, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.5891089108910892, | |
| "grad_norm": 0.21113188276021733, | |
| "learning_rate": 6.898402927651019e-05, | |
| "loss": 0.3175, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.5940594059405941, | |
| "grad_norm": 0.2349933871989314, | |
| "learning_rate": 6.88885829669385e-05, | |
| "loss": 0.3175, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.599009900990099, | |
| "grad_norm": 0.24334828788369725, | |
| "learning_rate": 6.879279159484961e-05, | |
| "loss": 0.3207, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.603960396039604, | |
| "grad_norm": 0.2375605067454906, | |
| "learning_rate": 6.869665630443295e-05, | |
| "loss": 0.3231, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.608910891089109, | |
| "grad_norm": 0.23722656434346312, | |
| "learning_rate": 6.860017824398595e-05, | |
| "loss": 0.3192, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.613861386138614, | |
| "grad_norm": 0.23914897802445803, | |
| "learning_rate": 6.85033585659003e-05, | |
| "loss": 0.3165, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.618811881188119, | |
| "grad_norm": 0.25680399156979883, | |
| "learning_rate": 6.84061984266481e-05, | |
| "loss": 0.3233, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.6237623762376239, | |
| "grad_norm": 0.29013994357664635, | |
| "learning_rate": 6.830869898676822e-05, | |
| "loss": 0.3184, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.6287128712871288, | |
| "grad_norm": 0.2596053886368674, | |
| "learning_rate": 6.82108614108523e-05, | |
| "loss": 0.315, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.6336633663366338, | |
| "grad_norm": 0.24593548087470338, | |
| "learning_rate": 6.811268686753086e-05, | |
| "loss": 0.3188, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.6386138613861387, | |
| "grad_norm": 0.2333605325483033, | |
| "learning_rate": 6.801417652945939e-05, | |
| "loss": 0.3233, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.6435643564356437, | |
| "grad_norm": 0.22975229979338618, | |
| "learning_rate": 6.79153315733043e-05, | |
| "loss": 0.3193, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.6485148514851486, | |
| "grad_norm": 0.22562916248834147, | |
| "learning_rate": 6.781615317972886e-05, | |
| "loss": 0.3195, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.6534653465346536, | |
| "grad_norm": 0.1881999956764054, | |
| "learning_rate": 6.771664253337916e-05, | |
| "loss": 0.3161, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.6584158415841586, | |
| "grad_norm": 0.17957554796661543, | |
| "learning_rate": 6.761680082286988e-05, | |
| "loss": 0.3146, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.6633663366336635, | |
| "grad_norm": 0.24244282127286865, | |
| "learning_rate": 6.751662924077015e-05, | |
| "loss": 0.3185, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.6683168316831685, | |
| "grad_norm": 0.27176749547357265, | |
| "learning_rate": 6.741612898358924e-05, | |
| "loss": 0.325, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.6732673267326734, | |
| "grad_norm": 0.23705062747162786, | |
| "learning_rate": 6.731530125176237e-05, | |
| "loss": 0.3172, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.6782178217821784, | |
| "grad_norm": 0.18068260445114523, | |
| "learning_rate": 6.721414724963631e-05, | |
| "loss": 0.317, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.6831683168316833, | |
| "grad_norm": 0.18686064135931604, | |
| "learning_rate": 6.711266818545494e-05, | |
| "loss": 0.323, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.688118811881188, | |
| "grad_norm": 0.2433556257913613, | |
| "learning_rate": 6.701086527134491e-05, | |
| "loss": 0.3197, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.693069306930693, | |
| "grad_norm": 0.237609603882351, | |
| "learning_rate": 6.690873972330116e-05, | |
| "loss": 0.3207, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.698019801980198, | |
| "grad_norm": 0.19748333286805253, | |
| "learning_rate": 6.68062927611723e-05, | |
| "loss": 0.316, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.702970297029703, | |
| "grad_norm": 0.19866434487926096, | |
| "learning_rate": 6.670352560864615e-05, | |
| "loss": 0.3186, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.7079207920792079, | |
| "grad_norm": 0.22486559077228344, | |
| "learning_rate": 6.660043949323505e-05, | |
| "loss": 0.3204, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.7128712871287128, | |
| "grad_norm": 0.2416905102052713, | |
| "learning_rate": 6.649703564626125e-05, | |
| "loss": 0.3164, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.7178217821782178, | |
| "grad_norm": 0.20746633246421747, | |
| "learning_rate": 6.639331530284214e-05, | |
| "loss": 0.324, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.7227722772277227, | |
| "grad_norm": 0.18552595723695436, | |
| "learning_rate": 6.628927970187557e-05, | |
| "loss": 0.3227, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.7277227722772277, | |
| "grad_norm": 0.19862972904635046, | |
| "learning_rate": 6.618493008602496e-05, | |
| "loss": 0.3176, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.7326732673267327, | |
| "grad_norm": 0.22644562653013095, | |
| "learning_rate": 6.608026770170459e-05, | |
| "loss": 0.3127, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.7376237623762376, | |
| "grad_norm": 0.23956197789130662, | |
| "learning_rate": 6.597529379906455e-05, | |
| "loss": 0.3195, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.7425742574257426, | |
| "grad_norm": 0.20822303615309365, | |
| "learning_rate": 6.587000963197598e-05, | |
| "loss": 0.3161, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.7475247524752475, | |
| "grad_norm": 0.1968015427618515, | |
| "learning_rate": 6.576441645801592e-05, | |
| "loss": 0.3198, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.7524752475247525, | |
| "grad_norm": 0.21268509183684337, | |
| "learning_rate": 6.565851553845242e-05, | |
| "loss": 0.3187, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.7574257425742574, | |
| "grad_norm": 0.23160018089210926, | |
| "learning_rate": 6.555230813822942e-05, | |
| "loss": 0.3174, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.7623762376237624, | |
| "grad_norm": 0.22635391545597686, | |
| "learning_rate": 6.544579552595165e-05, | |
| "loss": 0.3182, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.7673267326732673, | |
| "grad_norm": 0.19581408322193755, | |
| "learning_rate": 6.533897897386946e-05, | |
| "loss": 0.319, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.7722772277227723, | |
| "grad_norm": 0.1863068382565854, | |
| "learning_rate": 6.523185975786366e-05, | |
| "loss": 0.3206, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.7772277227722773, | |
| "grad_norm": 0.20289821307826553, | |
| "learning_rate": 6.512443915743024e-05, | |
| "loss": 0.322, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.7821782178217822, | |
| "grad_norm": 0.22406897792167674, | |
| "learning_rate": 6.501671845566512e-05, | |
| "loss": 0.3251, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.7871287128712872, | |
| "grad_norm": 0.22046628764087864, | |
| "learning_rate": 6.49086989392488e-05, | |
| "loss": 0.3204, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.7920792079207921, | |
| "grad_norm": 0.21486546344708518, | |
| "learning_rate": 6.480038189843101e-05, | |
| "loss": 0.3227, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.797029702970297, | |
| "grad_norm": 0.20057169420220247, | |
| "learning_rate": 6.469176862701529e-05, | |
| "loss": 0.3181, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.801980198019802, | |
| "grad_norm": 0.1983948377595345, | |
| "learning_rate": 6.458286042234352e-05, | |
| "loss": 0.3177, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.806930693069307, | |
| "grad_norm": 0.1825724140696818, | |
| "learning_rate": 6.447365858528046e-05, | |
| "loss": 0.3144, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.811881188118812, | |
| "grad_norm": 0.2045402810504852, | |
| "learning_rate": 6.436416442019817e-05, | |
| "loss": 0.3183, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.8168316831683167, | |
| "grad_norm": 0.23015666963702616, | |
| "learning_rate": 6.425437923496045e-05, | |
| "loss": 0.3195, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.8217821782178216, | |
| "grad_norm": 0.216666025851071, | |
| "learning_rate": 6.414430434090725e-05, | |
| "loss": 0.3115, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.8267326732673266, | |
| "grad_norm": 0.15099483842056746, | |
| "learning_rate": 6.403394105283897e-05, | |
| "loss": 0.3123, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.8316831683168315, | |
| "grad_norm": 0.16186595047832233, | |
| "learning_rate": 6.392329068900072e-05, | |
| "loss": 0.3182, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.8366336633663365, | |
| "grad_norm": 0.17482024609831948, | |
| "learning_rate": 6.381235457106664e-05, | |
| "loss": 0.3185, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.8415841584158414, | |
| "grad_norm": 0.14165035612092844, | |
| "learning_rate": 6.370113402412412e-05, | |
| "loss": 0.3145, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.8465346534653464, | |
| "grad_norm": 0.1539766406897144, | |
| "learning_rate": 6.358963037665787e-05, | |
| "loss": 0.3175, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.8514851485148514, | |
| "grad_norm": 0.1430702648589554, | |
| "learning_rate": 6.347784496053416e-05, | |
| "loss": 0.3159, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.8564356435643563, | |
| "grad_norm": 0.1728534526806878, | |
| "learning_rate": 6.336577911098493e-05, | |
| "loss": 0.3138, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.8613861386138613, | |
| "grad_norm": 0.1631825571245601, | |
| "learning_rate": 6.325343416659166e-05, | |
| "loss": 0.3185, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.8663366336633662, | |
| "grad_norm": 0.16660564436193692, | |
| "learning_rate": 6.314081146926964e-05, | |
| "loss": 0.3164, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.8712871287128712, | |
| "grad_norm": 0.15419071429371625, | |
| "learning_rate": 6.302791236425169e-05, | |
| "loss": 0.3139, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.8762376237623761, | |
| "grad_norm": 0.14922406198716454, | |
| "learning_rate": 6.291473820007227e-05, | |
| "loss": 0.3185, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.881188118811881, | |
| "grad_norm": 0.13257966307723432, | |
| "learning_rate": 6.280129032855132e-05, | |
| "loss": 0.3236, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.886138613861386, | |
| "grad_norm": 0.16559043781736188, | |
| "learning_rate": 6.268757010477806e-05, | |
| "loss": 0.3174, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.891089108910891, | |
| "grad_norm": 0.1901919649105337, | |
| "learning_rate": 6.257357888709492e-05, | |
| "loss": 0.3175, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.896039603960396, | |
| "grad_norm": 0.20040247164651906, | |
| "learning_rate": 6.245931803708116e-05, | |
| "loss": 0.3177, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.900990099009901, | |
| "grad_norm": 0.20936214961639668, | |
| "learning_rate": 6.234478891953674e-05, | |
| "loss": 0.324, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.9059405940594059, | |
| "grad_norm": 0.2524923054319829, | |
| "learning_rate": 6.222999290246595e-05, | |
| "loss": 0.3164, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.9108910891089108, | |
| "grad_norm": 0.29228623984013463, | |
| "learning_rate": 6.211493135706109e-05, | |
| "loss": 0.3158, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.9158415841584158, | |
| "grad_norm": 0.34235524644727683, | |
| "learning_rate": 6.199960565768611e-05, | |
| "loss": 0.3126, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.9207920792079207, | |
| "grad_norm": 0.3411973043712662, | |
| "learning_rate": 6.188401718186013e-05, | |
| "loss": 0.3207, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.9257425742574257, | |
| "grad_norm": 0.25638163404336106, | |
| "learning_rate": 6.17681673102411e-05, | |
| "loss": 0.3207, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.9306930693069306, | |
| "grad_norm": 0.21633928164084248, | |
| "learning_rate": 6.165205742660915e-05, | |
| "loss": 0.3151, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.9356435643564356, | |
| "grad_norm": 0.17868716190088593, | |
| "learning_rate": 6.15356889178502e-05, | |
| "loss": 0.3162, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.9405940594059405, | |
| "grad_norm": 0.23795517223714394, | |
| "learning_rate": 6.141906317393934e-05, | |
| "loss": 0.318, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.9455445544554455, | |
| "grad_norm": 0.2584866147734799, | |
| "learning_rate": 6.130218158792421e-05, | |
| "loss": 0.3176, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.9504950495049505, | |
| "grad_norm": 0.24993881460825326, | |
| "learning_rate": 6.118504555590843e-05, | |
| "loss": 0.3183, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.9554455445544554, | |
| "grad_norm": 0.233885680487996, | |
| "learning_rate": 6.10676564770348e-05, | |
| "loss": 0.3168, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.9603960396039604, | |
| "grad_norm": 0.22126298891282137, | |
| "learning_rate": 6.0950015753468745e-05, | |
| "loss": 0.316, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.9653465346534653, | |
| "grad_norm": 0.15727132928115792, | |
| "learning_rate": 6.083212479038143e-05, | |
| "loss": 0.3162, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.9702970297029703, | |
| "grad_norm": 0.1936288808268254, | |
| "learning_rate": 6.0713984995933016e-05, | |
| "loss": 0.3171, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.9752475247524752, | |
| "grad_norm": 0.21781666303367697, | |
| "learning_rate": 6.059559778125593e-05, | |
| "loss": 0.3147, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.9801980198019802, | |
| "grad_norm": 0.1893055744915759, | |
| "learning_rate": 6.0476964560437864e-05, | |
| "loss": 0.3154, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.9851485148514851, | |
| "grad_norm": 0.16631443387482395, | |
| "learning_rate": 6.035808675050497e-05, | |
| "loss": 0.3182, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.99009900990099, | |
| "grad_norm": 0.1861546633666588, | |
| "learning_rate": 6.023896577140496e-05, | |
| "loss": 0.3171, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.995049504950495, | |
| "grad_norm": 0.173542149801703, | |
| "learning_rate": 6.011960304599003e-05, | |
| "loss": 0.3128, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.20561215287249168, | |
| "learning_rate": 6.000000000000001e-05, | |
| "loss": 0.3137, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.004950495049505, | |
| "grad_norm": 0.21980457246155255, | |
| "learning_rate": 5.988015806204521e-05, | |
| "loss": 0.2936, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.00990099009901, | |
| "grad_norm": 0.28062396978410536, | |
| "learning_rate": 5.9760078663589454e-05, | |
| "loss": 0.2911, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.014851485148515, | |
| "grad_norm": 0.34344935249211755, | |
| "learning_rate": 5.9639763238932893e-05, | |
| "loss": 0.298, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.01980198019802, | |
| "grad_norm": 0.3775207626291412, | |
| "learning_rate": 5.9519213225194944e-05, | |
| "loss": 0.2892, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.0247524752475248, | |
| "grad_norm": 0.4129173027605364, | |
| "learning_rate": 5.9398430062297104e-05, | |
| "loss": 0.2978, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.0297029702970297, | |
| "grad_norm": 0.48484659194676527, | |
| "learning_rate": 5.9277415192945707e-05, | |
| "loss": 0.2936, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.0346534653465347, | |
| "grad_norm": 0.5647215424942426, | |
| "learning_rate": 5.915617006261475e-05, | |
| "loss": 0.2984, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.0396039603960396, | |
| "grad_norm": 0.46988958631525757, | |
| "learning_rate": 5.903469611952861e-05, | |
| "loss": 0.2926, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.0445544554455446, | |
| "grad_norm": 0.2844786650225628, | |
| "learning_rate": 5.891299481464473e-05, | |
| "loss": 0.2949, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.0495049504950495, | |
| "grad_norm": 0.39589356406529824, | |
| "learning_rate": 5.8791067601636305e-05, | |
| "loss": 0.2935, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.0544554455445545, | |
| "grad_norm": 0.41475513226060795, | |
| "learning_rate": 5.866891593687492e-05, | |
| "loss": 0.2935, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.0594059405940595, | |
| "grad_norm": 0.2878905866874, | |
| "learning_rate": 5.8546541279413094e-05, | |
| "loss": 0.2875, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.0643564356435644, | |
| "grad_norm": 0.32027862809257346, | |
| "learning_rate": 5.842394509096699e-05, | |
| "loss": 0.2914, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.0693069306930694, | |
| "grad_norm": 0.3536068047740315, | |
| "learning_rate": 5.8301128835898814e-05, | |
| "loss": 0.2968, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.0742574257425743, | |
| "grad_norm": 0.2864872599359047, | |
| "learning_rate": 5.817809398119937e-05, | |
| "loss": 0.2928, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.0792079207920793, | |
| "grad_norm": 0.3534226329728549, | |
| "learning_rate": 5.805484199647059e-05, | |
| "loss": 0.2954, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.0841584158415842, | |
| "grad_norm": 0.27157082417435113, | |
| "learning_rate": 5.7931374353907904e-05, | |
| "loss": 0.2915, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.089108910891089, | |
| "grad_norm": 0.2785354421375662, | |
| "learning_rate": 5.780769252828268e-05, | |
| "loss": 0.2938, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.094059405940594, | |
| "grad_norm": 0.35357055000386345, | |
| "learning_rate": 5.768379799692469e-05, | |
| "loss": 0.2949, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.099009900990099, | |
| "grad_norm": 0.2581527386389988, | |
| "learning_rate": 5.7559692239704255e-05, | |
| "loss": 0.291, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.103960396039604, | |
| "grad_norm": 0.25995964778388375, | |
| "learning_rate": 5.743537673901485e-05, | |
| "loss": 0.2856, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.108910891089109, | |
| "grad_norm": 0.2488711176684702, | |
| "learning_rate": 5.731085297975516e-05, | |
| "loss": 0.2912, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.113861386138614, | |
| "grad_norm": 0.2554760977266841, | |
| "learning_rate": 5.718612244931146e-05, | |
| "loss": 0.2907, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.118811881188119, | |
| "grad_norm": 0.22671175719855702, | |
| "learning_rate": 5.706118663753982e-05, | |
| "loss": 0.2941, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.123762376237624, | |
| "grad_norm": 0.23769071563907318, | |
| "learning_rate": 5.6936047036748335e-05, | |
| "loss": 0.2894, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.128712871287129, | |
| "grad_norm": 0.2836621764459792, | |
| "learning_rate": 5.6810705141679246e-05, | |
| "loss": 0.2907, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.133663366336634, | |
| "grad_norm": 0.1858854875190047, | |
| "learning_rate": 5.6685162449491125e-05, | |
| "loss": 0.2919, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.1386138613861387, | |
| "grad_norm": 0.199748393569554, | |
| "learning_rate": 5.655942045974101e-05, | |
| "loss": 0.2892, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.1435643564356437, | |
| "grad_norm": 0.24570429878298897, | |
| "learning_rate": 5.643348067436644e-05, | |
| "loss": 0.2928, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.1485148514851486, | |
| "grad_norm": 0.1710956167931347, | |
| "learning_rate": 5.6307344597667555e-05, | |
| "loss": 0.2888, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.1534653465346536, | |
| "grad_norm": 0.22400624967389368, | |
| "learning_rate": 5.6181013736289114e-05, | |
| "loss": 0.2933, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.1584158415841586, | |
| "grad_norm": 0.18635235837084865, | |
| "learning_rate": 5.605448959920251e-05, | |
| "loss": 0.2891, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.1633663366336635, | |
| "grad_norm": 0.17591809964429744, | |
| "learning_rate": 5.5927773697687726e-05, | |
| "loss": 0.2891, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.1683168316831685, | |
| "grad_norm": 0.21247736779834164, | |
| "learning_rate": 5.580086754531527e-05, | |
| "loss": 0.2928, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.1732673267326734, | |
| "grad_norm": 0.14355206393142206, | |
| "learning_rate": 5.567377265792819e-05, | |
| "loss": 0.2906, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.1782178217821784, | |
| "grad_norm": 0.21295542237568282, | |
| "learning_rate": 5.554649055362381e-05, | |
| "loss": 0.2911, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.1831683168316833, | |
| "grad_norm": 0.17126899898270218, | |
| "learning_rate": 5.5419022752735764e-05, | |
| "loss": 0.289, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.1881188118811883, | |
| "grad_norm": 0.15620418915639625, | |
| "learning_rate": 5.5291370777815693e-05, | |
| "loss": 0.2912, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.1930693069306932, | |
| "grad_norm": 0.18366948761566249, | |
| "learning_rate": 5.5163536153615185e-05, | |
| "loss": 0.289, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.198019801980198, | |
| "grad_norm": 0.148505159881694, | |
| "learning_rate": 5.503552040706744e-05, | |
| "loss": 0.2885, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.202970297029703, | |
| "grad_norm": 0.18651095779714405, | |
| "learning_rate": 5.490732506726911e-05, | |
| "loss": 0.2904, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.207920792079208, | |
| "grad_norm": 0.16447675158548666, | |
| "learning_rate": 5.477895166546207e-05, | |
| "loss": 0.291, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.212871287128713, | |
| "grad_norm": 0.1453281761545619, | |
| "learning_rate": 5.4650401735014985e-05, | |
| "loss": 0.2943, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.217821782178218, | |
| "grad_norm": 0.17954456405595917, | |
| "learning_rate": 5.452167681140515e-05, | |
| "loss": 0.292, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.222772277227723, | |
| "grad_norm": 0.1334253529596552, | |
| "learning_rate": 5.4392778432200044e-05, | |
| "loss": 0.2925, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.227722772277228, | |
| "grad_norm": 0.1480862531642023, | |
| "learning_rate": 5.426370813703903e-05, | |
| "loss": 0.2893, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.232673267326733, | |
| "grad_norm": 0.14228035944887227, | |
| "learning_rate": 5.4134467467614945e-05, | |
| "loss": 0.296, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.237623762376238, | |
| "grad_norm": 0.1398882481618865, | |
| "learning_rate": 5.4005057967655634e-05, | |
| "loss": 0.2899, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.2425742574257423, | |
| "grad_norm": 0.150249228933869, | |
| "learning_rate": 5.3875481182905595e-05, | |
| "loss": 0.2875, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.2475247524752477, | |
| "grad_norm": 0.12606016232940834, | |
| "learning_rate": 5.374573866110746e-05, | |
| "loss": 0.2984, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.2524752475247523, | |
| "grad_norm": 0.15069385649777214, | |
| "learning_rate": 5.3615831951983535e-05, | |
| "loss": 0.2916, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.2574257425742577, | |
| "grad_norm": 0.14028873576812315, | |
| "learning_rate": 5.348576260721725e-05, | |
| "loss": 0.2855, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.262376237623762, | |
| "grad_norm": 0.1434736708641502, | |
| "learning_rate": 5.3355532180434696e-05, | |
| "loss": 0.2866, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.2673267326732676, | |
| "grad_norm": 0.15021662510139536, | |
| "learning_rate": 5.3225142227185974e-05, | |
| "loss": 0.2861, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.272277227722772, | |
| "grad_norm": 0.14159743878292066, | |
| "learning_rate": 5.309459430492672e-05, | |
| "loss": 0.2893, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.2772277227722775, | |
| "grad_norm": 0.15035314890314877, | |
| "learning_rate": 5.2963889972999384e-05, | |
| "loss": 0.294, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.282178217821782, | |
| "grad_norm": 0.1429351778691825, | |
| "learning_rate": 5.283303079261471e-05, | |
| "loss": 0.2877, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.287128712871287, | |
| "grad_norm": 0.15695781588769755, | |
| "learning_rate": 5.2702018326833044e-05, | |
| "loss": 0.2909, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.292079207920792, | |
| "grad_norm": 0.16086443131888203, | |
| "learning_rate": 5.257085414054565e-05, | |
| "loss": 0.2881, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.297029702970297, | |
| "grad_norm": 0.11603808269970421, | |
| "learning_rate": 5.243953980045603e-05, | |
| "loss": 0.2939, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.301980198019802, | |
| "grad_norm": 0.11831438387847333, | |
| "learning_rate": 5.230807687506122e-05, | |
| "loss": 0.2946, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.3069306930693068, | |
| "grad_norm": 0.151830685765451, | |
| "learning_rate": 5.2176466934633045e-05, | |
| "loss": 0.2916, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.3118811881188117, | |
| "grad_norm": 0.12678954851295432, | |
| "learning_rate": 5.204471155119938e-05, | |
| "loss": 0.2965, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.3168316831683167, | |
| "grad_norm": 0.13602397297885316, | |
| "learning_rate": 5.191281229852534e-05, | |
| "loss": 0.2958, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.3217821782178216, | |
| "grad_norm": 0.12563578179567897, | |
| "learning_rate": 5.17807707520945e-05, | |
| "loss": 0.2905, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.3267326732673266, | |
| "grad_norm": 0.10565510601351141, | |
| "learning_rate": 5.164858848909009e-05, | |
| "loss": 0.2937, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.3316831683168315, | |
| "grad_norm": 0.13170476219629715, | |
| "learning_rate": 5.151626708837612e-05, | |
| "loss": 0.2971, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.3366336633663365, | |
| "grad_norm": 0.11937369149884527, | |
| "learning_rate": 5.1383808130478605e-05, | |
| "loss": 0.2885, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.3415841584158414, | |
| "grad_norm": 0.11453800690630266, | |
| "learning_rate": 5.1251213197566515e-05, | |
| "loss": 0.2854, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.3465346534653464, | |
| "grad_norm": 0.13062929038283053, | |
| "learning_rate": 5.11184838734331e-05, | |
| "loss": 0.2924, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.3514851485148514, | |
| "grad_norm": 0.12107898486408612, | |
| "learning_rate": 5.098562174347679e-05, | |
| "loss": 0.293, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.3564356435643563, | |
| "grad_norm": 0.09901605670345262, | |
| "learning_rate": 5.085262839468236e-05, | |
| "loss": 0.2913, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.3613861386138613, | |
| "grad_norm": 0.13783522952102104, | |
| "learning_rate": 5.071950541560193e-05, | |
| "loss": 0.2895, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.366336633663366, | |
| "grad_norm": 0.12148311907430435, | |
| "learning_rate": 5.058625439633599e-05, | |
| "loss": 0.2877, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.371287128712871, | |
| "grad_norm": 0.1028377899483092, | |
| "learning_rate": 5.0452876928514434e-05, | |
| "loss": 0.2881, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.376237623762376, | |
| "grad_norm": 0.12962503408861803, | |
| "learning_rate": 5.031937460527753e-05, | |
| "loss": 0.2974, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.381188118811881, | |
| "grad_norm": 0.1273079806450915, | |
| "learning_rate": 5.018574902125689e-05, | |
| "loss": 0.2882, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.386138613861386, | |
| "grad_norm": 0.10935427368540332, | |
| "learning_rate": 5.005200177255645e-05, | |
| "loss": 0.2905, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.391089108910891, | |
| "grad_norm": 0.13181865971025042, | |
| "learning_rate": 4.991813445673334e-05, | |
| "loss": 0.2941, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.396039603960396, | |
| "grad_norm": 0.14828254882763164, | |
| "learning_rate": 4.9784148672778864e-05, | |
| "loss": 0.2936, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.400990099009901, | |
| "grad_norm": 0.10590676383587967, | |
| "learning_rate": 4.965004602109938e-05, | |
| "loss": 0.2869, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.405940594059406, | |
| "grad_norm": 0.10897986558224348, | |
| "learning_rate": 4.95158281034972e-05, | |
| "loss": 0.2965, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.410891089108911, | |
| "grad_norm": 0.1526575863023953, | |
| "learning_rate": 4.938149652315142e-05, | |
| "loss": 0.2904, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.4158415841584158, | |
| "grad_norm": 0.13976101013770628, | |
| "learning_rate": 4.92470528845988e-05, | |
| "loss": 0.2907, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.4207920792079207, | |
| "grad_norm": 0.11275068997162369, | |
| "learning_rate": 4.911249879371457e-05, | |
| "loss": 0.2939, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.4257425742574257, | |
| "grad_norm": 0.13409375961453313, | |
| "learning_rate": 4.897783585769331e-05, | |
| "loss": 0.2896, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.4306930693069306, | |
| "grad_norm": 0.13184720935289135, | |
| "learning_rate": 4.884306568502968e-05, | |
| "loss": 0.2905, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.4356435643564356, | |
| "grad_norm": 0.09381659993624109, | |
| "learning_rate": 4.870818988549923e-05, | |
| "loss": 0.2881, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.4405940594059405, | |
| "grad_norm": 0.10819786847426302, | |
| "learning_rate": 4.857321007013924e-05, | |
| "loss": 0.2874, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.4455445544554455, | |
| "grad_norm": 0.11840707921735176, | |
| "learning_rate": 4.843812785122933e-05, | |
| "loss": 0.2914, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.4504950495049505, | |
| "grad_norm": 0.11186031988136662, | |
| "learning_rate": 4.830294484227236e-05, | |
| "loss": 0.2902, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.4554455445544554, | |
| "grad_norm": 0.10078610321782347, | |
| "learning_rate": 4.816766265797505e-05, | |
| "loss": 0.2875, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.4603960396039604, | |
| "grad_norm": 0.12008233777261267, | |
| "learning_rate": 4.8032282914228743e-05, | |
| "loss": 0.293, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.4653465346534653, | |
| "grad_norm": 0.12200388770178253, | |
| "learning_rate": 4.78968072280901e-05, | |
| "loss": 0.2868, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.4702970297029703, | |
| "grad_norm": 0.136406459834568, | |
| "learning_rate": 4.7761237217761736e-05, | |
| "loss": 0.2903, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.4752475247524752, | |
| "grad_norm": 0.11393281739573007, | |
| "learning_rate": 4.7625574502572975e-05, | |
| "loss": 0.2892, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.48019801980198, | |
| "grad_norm": 0.14322465120458702, | |
| "learning_rate": 4.7489820702960444e-05, | |
| "loss": 0.2913, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.485148514851485, | |
| "grad_norm": 0.1330468571388596, | |
| "learning_rate": 4.735397744044874e-05, | |
| "loss": 0.29, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.49009900990099, | |
| "grad_norm": 0.1914714574897793, | |
| "learning_rate": 4.721804633763105e-05, | |
| "loss": 0.2904, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.495049504950495, | |
| "grad_norm": 0.17353960077506989, | |
| "learning_rate": 4.7082029018149816e-05, | |
| "loss": 0.2914, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.10568552222443248, | |
| "learning_rate": 4.694592710667723e-05, | |
| "loss": 0.2879, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.504950495049505, | |
| "grad_norm": 0.14992802837908273, | |
| "learning_rate": 4.680974222889595e-05, | |
| "loss": 0.2884, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.50990099009901, | |
| "grad_norm": 0.16137694662439006, | |
| "learning_rate": 4.667347601147965e-05, | |
| "loss": 0.2897, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.514851485148515, | |
| "grad_norm": 0.12112544582018925, | |
| "learning_rate": 4.653713008207353e-05, | |
| "loss": 0.291, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.51980198019802, | |
| "grad_norm": 0.11917815529367859, | |
| "learning_rate": 4.640070606927497e-05, | |
| "loss": 0.2919, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.5247524752475248, | |
| "grad_norm": 0.15623317002467732, | |
| "learning_rate": 4.6264205602613944e-05, | |
| "loss": 0.2899, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.5297029702970297, | |
| "grad_norm": 0.1381688374311921, | |
| "learning_rate": 4.612763031253372e-05, | |
| "loss": 0.2933, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.5346534653465347, | |
| "grad_norm": 0.10724423849127208, | |
| "learning_rate": 4.599098183037127e-05, | |
| "loss": 0.2919, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.5396039603960396, | |
| "grad_norm": 0.1247464275436635, | |
| "learning_rate": 4.5854261788337785e-05, | |
| "loss": 0.2913, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.5445544554455446, | |
| "grad_norm": 0.13249787487710485, | |
| "learning_rate": 4.571747181949928e-05, | |
| "loss": 0.2895, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.5495049504950495, | |
| "grad_norm": 0.10321336217751037, | |
| "learning_rate": 4.558061355775693e-05, | |
| "loss": 0.2938, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.5544554455445545, | |
| "grad_norm": 0.10307416532977476, | |
| "learning_rate": 4.5443688637827716e-05, | |
| "loss": 0.2923, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.5594059405940595, | |
| "grad_norm": 0.12137127847342442, | |
| "learning_rate": 4.530669869522478e-05, | |
| "loss": 0.2938, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.5643564356435644, | |
| "grad_norm": 0.10696843702534209, | |
| "learning_rate": 4.516964536623796e-05, | |
| "loss": 0.2917, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.5693069306930694, | |
| "grad_norm": 0.10464426473950372, | |
| "learning_rate": 4.503253028791422e-05, | |
| "loss": 0.2871, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.5742574257425743, | |
| "grad_norm": 0.11642336829523302, | |
| "learning_rate": 4.489535509803806e-05, | |
| "loss": 0.2926, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.5792079207920793, | |
| "grad_norm": 0.10644012339280991, | |
| "learning_rate": 4.475812143511202e-05, | |
| "loss": 0.2903, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.5841584158415842, | |
| "grad_norm": 0.10999866291513487, | |
| "learning_rate": 4.4620830938337055e-05, | |
| "loss": 0.2883, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.589108910891089, | |
| "grad_norm": 0.13755068955133282, | |
| "learning_rate": 4.448348524759302e-05, | |
| "loss": 0.2907, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.594059405940594, | |
| "grad_norm": 0.11304002693406412, | |
| "learning_rate": 4.4346086003418985e-05, | |
| "loss": 0.2924, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.599009900990099, | |
| "grad_norm": 0.10875367629369516, | |
| "learning_rate": 4.420863484699374e-05, | |
| "loss": 0.2895, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.603960396039604, | |
| "grad_norm": 0.12194581656327487, | |
| "learning_rate": 4.4071133420116106e-05, | |
| "loss": 0.2922, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.608910891089109, | |
| "grad_norm": 0.10928580726928758, | |
| "learning_rate": 4.3933583365185396e-05, | |
| "loss": 0.2956, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.613861386138614, | |
| "grad_norm": 0.10472106905680585, | |
| "learning_rate": 4.379598632518175e-05, | |
| "loss": 0.2901, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.618811881188119, | |
| "grad_norm": 0.13630901537032983, | |
| "learning_rate": 4.365834394364653e-05, | |
| "loss": 0.2945, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.623762376237624, | |
| "grad_norm": 0.12559855808593584, | |
| "learning_rate": 4.35206578646627e-05, | |
| "loss": 0.2897, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.628712871287129, | |
| "grad_norm": 0.10534753317516414, | |
| "learning_rate": 4.338292973283512e-05, | |
| "loss": 0.2896, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.633663366336634, | |
| "grad_norm": 0.11993140772526223, | |
| "learning_rate": 4.324516119327102e-05, | |
| "loss": 0.2894, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.6386138613861387, | |
| "grad_norm": 0.11261616944808854, | |
| "learning_rate": 4.310735389156026e-05, | |
| "loss": 0.292, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.6435643564356437, | |
| "grad_norm": 0.09250685300963525, | |
| "learning_rate": 4.296950947375566e-05, | |
| "loss": 0.2912, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.6485148514851486, | |
| "grad_norm": 0.09584975853113382, | |
| "learning_rate": 4.2831629586353446e-05, | |
| "loss": 0.2882, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.6534653465346536, | |
| "grad_norm": 0.10430183276684997, | |
| "learning_rate": 4.269371587627346e-05, | |
| "loss": 0.2918, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.6584158415841586, | |
| "grad_norm": 0.09633743259405408, | |
| "learning_rate": 4.255576999083956e-05, | |
| "loss": 0.2912, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.6633663366336635, | |
| "grad_norm": 0.10315192122664113, | |
| "learning_rate": 4.241779357775993e-05, | |
| "loss": 0.2901, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.6683168316831685, | |
| "grad_norm": 0.11126301619791243, | |
| "learning_rate": 4.227978828510739e-05, | |
| "loss": 0.2907, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.6732673267326734, | |
| "grad_norm": 0.12626969810049277, | |
| "learning_rate": 4.214175576129972e-05, | |
| "loss": 0.2843, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.6782178217821784, | |
| "grad_norm": 0.10637402816416124, | |
| "learning_rate": 4.200369765507995e-05, | |
| "loss": 0.291, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.6831683168316833, | |
| "grad_norm": 0.11609951156690725, | |
| "learning_rate": 4.18656156154967e-05, | |
| "loss": 0.289, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.6881188118811883, | |
| "grad_norm": 0.10635855005794152, | |
| "learning_rate": 4.172751129188447e-05, | |
| "loss": 0.2878, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.693069306930693, | |
| "grad_norm": 0.09969116484857603, | |
| "learning_rate": 4.158938633384389e-05, | |
| "loss": 0.2911, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.698019801980198, | |
| "grad_norm": 0.10338679032150914, | |
| "learning_rate": 4.1451242391222105e-05, | |
| "loss": 0.29, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.7029702970297027, | |
| "grad_norm": 0.09747866123350818, | |
| "learning_rate": 4.1313081114093025e-05, | |
| "loss": 0.2878, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.707920792079208, | |
| "grad_norm": 0.09745084051835436, | |
| "learning_rate": 4.117490415273757e-05, | |
| "loss": 0.2893, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.7128712871287126, | |
| "grad_norm": 0.09225392778972681, | |
| "learning_rate": 4.1036713157624045e-05, | |
| "loss": 0.2903, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.717821782178218, | |
| "grad_norm": 0.09437236963665839, | |
| "learning_rate": 4.089850977938836e-05, | |
| "loss": 0.2881, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.7227722772277225, | |
| "grad_norm": 0.09436910160998535, | |
| "learning_rate": 4.076029566881436e-05, | |
| "loss": 0.289, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.727722772277228, | |
| "grad_norm": 0.0980607482524191, | |
| "learning_rate": 4.0622072476814045e-05, | |
| "loss": 0.2872, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.7326732673267324, | |
| "grad_norm": 0.10439483236771886, | |
| "learning_rate": 4.0483841854407906e-05, | |
| "loss": 0.2934, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.737623762376238, | |
| "grad_norm": 0.10513256333702312, | |
| "learning_rate": 4.0345605452705225e-05, | |
| "loss": 0.2933, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.7425742574257423, | |
| "grad_norm": 0.09879708977573251, | |
| "learning_rate": 4.020736492288426e-05, | |
| "loss": 0.2892, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.7475247524752477, | |
| "grad_norm": 0.10870427744339516, | |
| "learning_rate": 4.006912191617259e-05, | |
| "loss": 0.2885, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.7524752475247523, | |
| "grad_norm": 0.10462833839112169, | |
| "learning_rate": 3.993087808382742e-05, | |
| "loss": 0.2908, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.7574257425742577, | |
| "grad_norm": 0.11008718087986996, | |
| "learning_rate": 3.9792635077115755e-05, | |
| "loss": 0.2915, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.762376237623762, | |
| "grad_norm": 0.11165269895235802, | |
| "learning_rate": 3.9654394547294775e-05, | |
| "loss": 0.2949, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.7673267326732676, | |
| "grad_norm": 0.11157871130143804, | |
| "learning_rate": 3.9516158145592093e-05, | |
| "loss": 0.2902, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.772277227722772, | |
| "grad_norm": 0.10798653329594901, | |
| "learning_rate": 3.937792752318597e-05, | |
| "loss": 0.29, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.7772277227722775, | |
| "grad_norm": 0.08830297670358335, | |
| "learning_rate": 3.923970433118566e-05, | |
| "loss": 0.2911, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.782178217821782, | |
| "grad_norm": 0.10692200078028577, | |
| "learning_rate": 3.9101490220611646e-05, | |
| "loss": 0.2888, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.7871287128712874, | |
| "grad_norm": 0.09073477619941334, | |
| "learning_rate": 3.8963286842375955e-05, | |
| "loss": 0.2884, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.792079207920792, | |
| "grad_norm": 0.10758407723631432, | |
| "learning_rate": 3.882509584726244e-05, | |
| "loss": 0.2884, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.7970297029702973, | |
| "grad_norm": 0.08703114989053835, | |
| "learning_rate": 3.868691888590699e-05, | |
| "loss": 0.2905, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.801980198019802, | |
| "grad_norm": 0.09815325795728913, | |
| "learning_rate": 3.854875760877791e-05, | |
| "loss": 0.2891, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.806930693069307, | |
| "grad_norm": 0.0963193079710705, | |
| "learning_rate": 3.8410613666156126e-05, | |
| "loss": 0.2932, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.8118811881188117, | |
| "grad_norm": 0.0934892455160611, | |
| "learning_rate": 3.8272488708115536e-05, | |
| "loss": 0.2877, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.8168316831683167, | |
| "grad_norm": 0.11193820279984304, | |
| "learning_rate": 3.81343843845033e-05, | |
| "loss": 0.289, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.8217821782178216, | |
| "grad_norm": 0.11073267502904961, | |
| "learning_rate": 3.7996302344920056e-05, | |
| "loss": 0.2881, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.8267326732673266, | |
| "grad_norm": 0.10447219952745992, | |
| "learning_rate": 3.785824423870029e-05, | |
| "loss": 0.2932, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.8316831683168315, | |
| "grad_norm": 0.09161213745057079, | |
| "learning_rate": 3.772021171489261e-05, | |
| "loss": 0.2888, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.8366336633663365, | |
| "grad_norm": 0.10089529395624805, | |
| "learning_rate": 3.7582206422240073e-05, | |
| "loss": 0.2923, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.8415841584158414, | |
| "grad_norm": 0.08617518269899792, | |
| "learning_rate": 3.744423000916045e-05, | |
| "loss": 0.2872, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.8465346534653464, | |
| "grad_norm": 0.10144855439914764, | |
| "learning_rate": 3.7306284123726545e-05, | |
| "loss": 0.2901, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.8514851485148514, | |
| "grad_norm": 0.10158399468636482, | |
| "learning_rate": 3.716837041364657e-05, | |
| "loss": 0.2924, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.8564356435643563, | |
| "grad_norm": 0.10116294192144563, | |
| "learning_rate": 3.703049052624434e-05, | |
| "loss": 0.2844, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.8613861386138613, | |
| "grad_norm": 0.11957236661139066, | |
| "learning_rate": 3.689264610843975e-05, | |
| "loss": 0.2897, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.866336633663366, | |
| "grad_norm": 0.11284051298160382, | |
| "learning_rate": 3.6754838806728985e-05, | |
| "loss": 0.2867, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.871287128712871, | |
| "grad_norm": 0.0924325415706199, | |
| "learning_rate": 3.6617070267164895e-05, | |
| "loss": 0.289, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.876237623762376, | |
| "grad_norm": 0.1105426281095416, | |
| "learning_rate": 3.647934213533733e-05, | |
| "loss": 0.2875, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.881188118811881, | |
| "grad_norm": 0.09174690942839933, | |
| "learning_rate": 3.634165605635347e-05, | |
| "loss": 0.292, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.886138613861386, | |
| "grad_norm": 0.10220347134653099, | |
| "learning_rate": 3.6204013674818264e-05, | |
| "loss": 0.2857, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.891089108910891, | |
| "grad_norm": 0.0981680983492792, | |
| "learning_rate": 3.606641663481462e-05, | |
| "loss": 0.2919, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.896039603960396, | |
| "grad_norm": 0.09807625793972466, | |
| "learning_rate": 3.5928866579883914e-05, | |
| "loss": 0.2902, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.900990099009901, | |
| "grad_norm": 0.09723694201470973, | |
| "learning_rate": 3.579136515300627e-05, | |
| "loss": 0.2904, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.905940594059406, | |
| "grad_norm": 0.09282029713355545, | |
| "learning_rate": 3.565391399658102e-05, | |
| "loss": 0.2858, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.910891089108911, | |
| "grad_norm": 0.09078676323851734, | |
| "learning_rate": 3.5516514752406996e-05, | |
| "loss": 0.2877, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.9158415841584158, | |
| "grad_norm": 0.1035730885124026, | |
| "learning_rate": 3.537916906166295e-05, | |
| "loss": 0.2887, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.9207920792079207, | |
| "grad_norm": 0.10072443988749984, | |
| "learning_rate": 3.5241878564888006e-05, | |
| "loss": 0.2857, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.9257425742574257, | |
| "grad_norm": 0.10201801759402962, | |
| "learning_rate": 3.510464490196195e-05, | |
| "loss": 0.2878, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.9306930693069306, | |
| "grad_norm": 0.09498823456658204, | |
| "learning_rate": 3.496746971208579e-05, | |
| "loss": 0.2903, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.9356435643564356, | |
| "grad_norm": 0.09439742302971477, | |
| "learning_rate": 3.4830354633762044e-05, | |
| "loss": 0.2885, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.9405940594059405, | |
| "grad_norm": 0.10085915056545246, | |
| "learning_rate": 3.4693301304775226e-05, | |
| "loss": 0.2912, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.9455445544554455, | |
| "grad_norm": 0.08842640947738424, | |
| "learning_rate": 3.455631136217231e-05, | |
| "loss": 0.2867, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.9504950495049505, | |
| "grad_norm": 0.1004373056709111, | |
| "learning_rate": 3.4419386442243084e-05, | |
| "loss": 0.2921, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.9554455445544554, | |
| "grad_norm": 0.09684450083741175, | |
| "learning_rate": 3.428252818050074e-05, | |
| "loss": 0.2916, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.9603960396039604, | |
| "grad_norm": 0.09286612877802164, | |
| "learning_rate": 3.414573821166222e-05, | |
| "loss": 0.2905, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.9653465346534653, | |
| "grad_norm": 0.09408550294226069, | |
| "learning_rate": 3.4009018169628744e-05, | |
| "loss": 0.2871, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.9702970297029703, | |
| "grad_norm": 0.09366561753918336, | |
| "learning_rate": 3.38723696874663e-05, | |
| "loss": 0.2906, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.9752475247524752, | |
| "grad_norm": 0.10124650316242359, | |
| "learning_rate": 3.373579439738606e-05, | |
| "loss": 0.2885, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.98019801980198, | |
| "grad_norm": 0.08127495837017719, | |
| "learning_rate": 3.359929393072505e-05, | |
| "loss": 0.2922, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.985148514851485, | |
| "grad_norm": 0.10185253182926989, | |
| "learning_rate": 3.346286991792648e-05, | |
| "loss": 0.2894, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.99009900990099, | |
| "grad_norm": 0.09763480465367606, | |
| "learning_rate": 3.3326523988520365e-05, | |
| "loss": 0.288, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.995049504950495, | |
| "grad_norm": 0.08124295963133833, | |
| "learning_rate": 3.3190257771104055e-05, | |
| "loss": 0.2865, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.10088677360749207, | |
| "learning_rate": 3.305407289332279e-05, | |
| "loss": 0.2818, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.004950495049505, | |
| "grad_norm": 0.13873473329596994, | |
| "learning_rate": 3.2917970981850205e-05, | |
| "loss": 0.2602, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.00990099009901, | |
| "grad_norm": 0.12976038560967385, | |
| "learning_rate": 3.2781953662368954e-05, | |
| "loss": 0.2606, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.014851485148515, | |
| "grad_norm": 0.15951817148087163, | |
| "learning_rate": 3.264602255955127e-05, | |
| "loss": 0.2577, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.01980198019802, | |
| "grad_norm": 0.16279714502214718, | |
| "learning_rate": 3.251017929703956e-05, | |
| "loss": 0.2649, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.0247524752475248, | |
| "grad_norm": 0.1492586830551721, | |
| "learning_rate": 3.237442549742704e-05, | |
| "loss": 0.2612, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.0297029702970297, | |
| "grad_norm": 0.1491998033398401, | |
| "learning_rate": 3.223876278223828e-05, | |
| "loss": 0.2601, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.0346534653465347, | |
| "grad_norm": 0.13488498997370776, | |
| "learning_rate": 3.2103192771909927e-05, | |
| "loss": 0.2625, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.0396039603960396, | |
| "grad_norm": 0.1342441983854818, | |
| "learning_rate": 3.196771708577127e-05, | |
| "loss": 0.2597, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.0445544554455446, | |
| "grad_norm": 0.12716853452234733, | |
| "learning_rate": 3.1832337342024956e-05, | |
| "loss": 0.2618, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.0495049504950495, | |
| "grad_norm": 0.11793012695462536, | |
| "learning_rate": 3.1697055157727654e-05, | |
| "loss": 0.2612, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.0544554455445545, | |
| "grad_norm": 0.11621690642718228, | |
| "learning_rate": 3.156187214877068e-05, | |
| "loss": 0.2627, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.0594059405940595, | |
| "grad_norm": 0.10824520198545912, | |
| "learning_rate": 3.142678992986078e-05, | |
| "loss": 0.2588, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.0643564356435644, | |
| "grad_norm": 0.11763046761959932, | |
| "learning_rate": 3.129181011450077e-05, | |
| "loss": 0.2624, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.0693069306930694, | |
| "grad_norm": 0.13250782552006196, | |
| "learning_rate": 3.115693431497033e-05, | |
| "loss": 0.259, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.0742574257425743, | |
| "grad_norm": 0.10745625886139168, | |
| "learning_rate": 3.102216414230671e-05, | |
| "loss": 0.2634, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.0792079207920793, | |
| "grad_norm": 0.13028975884190627, | |
| "learning_rate": 3.0887501206285436e-05, | |
| "loss": 0.2645, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.0841584158415842, | |
| "grad_norm": 0.11204426938100358, | |
| "learning_rate": 3.075294711540123e-05, | |
| "loss": 0.2568, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.089108910891089, | |
| "grad_norm": 0.0938552199733989, | |
| "learning_rate": 3.061850347684859e-05, | |
| "loss": 0.2602, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.094059405940594, | |
| "grad_norm": 0.1127679213593348, | |
| "learning_rate": 3.0484171896502805e-05, | |
| "loss": 0.2607, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.099009900990099, | |
| "grad_norm": 0.10200434233865428, | |
| "learning_rate": 3.034995397890063e-05, | |
| "loss": 0.2616, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.103960396039604, | |
| "grad_norm": 0.09991375780190867, | |
| "learning_rate": 3.0215851327221163e-05, | |
| "loss": 0.2623, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.108910891089109, | |
| "grad_norm": 0.09232562826749552, | |
| "learning_rate": 3.0081865543266687e-05, | |
| "loss": 0.2614, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.113861386138614, | |
| "grad_norm": 0.1065573407714681, | |
| "learning_rate": 2.994799822744356e-05, | |
| "loss": 0.2586, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.118811881188119, | |
| "grad_norm": 0.09147796522034173, | |
| "learning_rate": 2.9814250978743115e-05, | |
| "loss": 0.2592, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.123762376237624, | |
| "grad_norm": 0.09934546358889536, | |
| "learning_rate": 2.9680625394722483e-05, | |
| "loss": 0.265, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.128712871287129, | |
| "grad_norm": 0.09335841251464885, | |
| "learning_rate": 2.9547123071485586e-05, | |
| "loss": 0.2591, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.133663366336634, | |
| "grad_norm": 0.08506717200868281, | |
| "learning_rate": 2.9413745603664023e-05, | |
| "loss": 0.2611, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.1386138613861387, | |
| "grad_norm": 0.09638638396370519, | |
| "learning_rate": 2.928049458439808e-05, | |
| "loss": 0.2627, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.1435643564356437, | |
| "grad_norm": 0.08636142462750247, | |
| "learning_rate": 2.914737160531765e-05, | |
| "loss": 0.2648, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.1485148514851486, | |
| "grad_norm": 0.09268694847893381, | |
| "learning_rate": 2.9014378256523218e-05, | |
| "loss": 0.2605, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.1534653465346536, | |
| "grad_norm": 0.09876107078789798, | |
| "learning_rate": 2.888151612656692e-05, | |
| "loss": 0.2583, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.1584158415841586, | |
| "grad_norm": 0.08411855624881796, | |
| "learning_rate": 2.874878680243349e-05, | |
| "loss": 0.263, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.1633663366336635, | |
| "grad_norm": 0.10415546979119643, | |
| "learning_rate": 2.8616191869521412e-05, | |
| "loss": 0.2604, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.1683168316831685, | |
| "grad_norm": 0.09152241248584239, | |
| "learning_rate": 2.8483732911623882e-05, | |
| "loss": 0.2617, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.1732673267326734, | |
| "grad_norm": 0.08574306643093083, | |
| "learning_rate": 2.8351411510909926e-05, | |
| "loss": 0.2551, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.1782178217821784, | |
| "grad_norm": 0.11525551347781693, | |
| "learning_rate": 2.821922924790552e-05, | |
| "loss": 0.2627, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.1831683168316833, | |
| "grad_norm": 0.07971057087238882, | |
| "learning_rate": 2.8087187701474667e-05, | |
| "loss": 0.2593, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.1881188118811883, | |
| "grad_norm": 0.09070420344687578, | |
| "learning_rate": 2.7955288448800628e-05, | |
| "loss": 0.2647, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.1930693069306932, | |
| "grad_norm": 0.08840670408396233, | |
| "learning_rate": 2.7823533065366965e-05, | |
| "loss": 0.2606, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.198019801980198, | |
| "grad_norm": 0.08190382673177843, | |
| "learning_rate": 2.7691923124938794e-05, | |
| "loss": 0.2592, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.202970297029703, | |
| "grad_norm": 0.08956962744513197, | |
| "learning_rate": 2.756046019954398e-05, | |
| "loss": 0.2617, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 3.207920792079208, | |
| "grad_norm": 0.08875947136387043, | |
| "learning_rate": 2.742914585945436e-05, | |
| "loss": 0.2601, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.212871287128713, | |
| "grad_norm": 0.07905536192742312, | |
| "learning_rate": 2.7297981673166963e-05, | |
| "loss": 0.2624, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 3.217821782178218, | |
| "grad_norm": 0.08409581506993473, | |
| "learning_rate": 2.71669692073853e-05, | |
| "loss": 0.2607, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.222772277227723, | |
| "grad_norm": 0.07852436746970415, | |
| "learning_rate": 2.7036110027000636e-05, | |
| "loss": 0.2614, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 3.227722772277228, | |
| "grad_norm": 0.10198706585003715, | |
| "learning_rate": 2.690540569507329e-05, | |
| "loss": 0.2603, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 3.232673267326733, | |
| "grad_norm": 0.08152560978569826, | |
| "learning_rate": 2.677485777281403e-05, | |
| "loss": 0.263, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 3.237623762376238, | |
| "grad_norm": 0.1010991457038921, | |
| "learning_rate": 2.6644467819565317e-05, | |
| "loss": 0.2604, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 3.2425742574257423, | |
| "grad_norm": 0.07891142552443962, | |
| "learning_rate": 2.651423739278276e-05, | |
| "loss": 0.2651, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 3.2475247524752477, | |
| "grad_norm": 0.11260260262443215, | |
| "learning_rate": 2.638416804801648e-05, | |
| "loss": 0.2635, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 3.2524752475247523, | |
| "grad_norm": 0.07603687187619462, | |
| "learning_rate": 2.6254261338892536e-05, | |
| "loss": 0.2579, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 3.2574257425742577, | |
| "grad_norm": 0.09383699286259775, | |
| "learning_rate": 2.6124518817094418e-05, | |
| "loss": 0.2624, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 3.262376237623762, | |
| "grad_norm": 0.08743649333602849, | |
| "learning_rate": 2.5994942032344376e-05, | |
| "loss": 0.2586, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 3.2673267326732676, | |
| "grad_norm": 0.08026177278250374, | |
| "learning_rate": 2.5865532532385072e-05, | |
| "loss": 0.2614, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.272277227722772, | |
| "grad_norm": 0.07564032476267157, | |
| "learning_rate": 2.573629186296097e-05, | |
| "loss": 0.2586, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 3.2772277227722775, | |
| "grad_norm": 0.07578420912298534, | |
| "learning_rate": 2.560722156779996e-05, | |
| "loss": 0.2579, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 3.282178217821782, | |
| "grad_norm": 0.08086724414241181, | |
| "learning_rate": 2.547832318859487e-05, | |
| "loss": 0.2579, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 3.287128712871287, | |
| "grad_norm": 0.0749779149015514, | |
| "learning_rate": 2.5349598264985028e-05, | |
| "loss": 0.2632, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 3.292079207920792, | |
| "grad_norm": 0.06928132458586024, | |
| "learning_rate": 2.5221048334537952e-05, | |
| "loss": 0.2621, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 3.297029702970297, | |
| "grad_norm": 0.07518881651499724, | |
| "learning_rate": 2.5092674932730886e-05, | |
| "loss": 0.2593, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 3.301980198019802, | |
| "grad_norm": 0.09171673550931758, | |
| "learning_rate": 2.4964479592932574e-05, | |
| "loss": 0.2601, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 3.3069306930693068, | |
| "grad_norm": 0.08658887819981398, | |
| "learning_rate": 2.4836463846384832e-05, | |
| "loss": 0.2652, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 3.3118811881188117, | |
| "grad_norm": 0.07581169369002726, | |
| "learning_rate": 2.470862922218431e-05, | |
| "loss": 0.2601, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 3.3168316831683167, | |
| "grad_norm": 0.08405531192143934, | |
| "learning_rate": 2.4580977247264253e-05, | |
| "loss": 0.2617, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.3217821782178216, | |
| "grad_norm": 0.08264190542819465, | |
| "learning_rate": 2.4453509446376192e-05, | |
| "loss": 0.2645, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 3.3267326732673266, | |
| "grad_norm": 0.0875835392802001, | |
| "learning_rate": 2.432622734207182e-05, | |
| "loss": 0.2606, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 3.3316831683168315, | |
| "grad_norm": 0.0777091868248858, | |
| "learning_rate": 2.4199132454684736e-05, | |
| "loss": 0.2635, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 3.3366336633663365, | |
| "grad_norm": 0.09147076046625585, | |
| "learning_rate": 2.40722263023123e-05, | |
| "loss": 0.2547, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 3.3415841584158414, | |
| "grad_norm": 0.0814714175342403, | |
| "learning_rate": 2.3945510400797485e-05, | |
| "loss": 0.2604, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.3465346534653464, | |
| "grad_norm": 0.08004074246147477, | |
| "learning_rate": 2.3818986263710886e-05, | |
| "loss": 0.263, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 3.3514851485148514, | |
| "grad_norm": 0.07584161785288035, | |
| "learning_rate": 2.3692655402332455e-05, | |
| "loss": 0.2594, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 3.3564356435643563, | |
| "grad_norm": 0.08695533626808738, | |
| "learning_rate": 2.3566519325633567e-05, | |
| "loss": 0.2601, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 3.3613861386138613, | |
| "grad_norm": 0.0791972098065313, | |
| "learning_rate": 2.3440579540259006e-05, | |
| "loss": 0.2615, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 3.366336633663366, | |
| "grad_norm": 0.08818611699036372, | |
| "learning_rate": 2.3314837550508875e-05, | |
| "loss": 0.2602, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.371287128712871, | |
| "grad_norm": 0.07757375545655479, | |
| "learning_rate": 2.3189294858320768e-05, | |
| "loss": 0.2609, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 3.376237623762376, | |
| "grad_norm": 0.08162773290629517, | |
| "learning_rate": 2.3063952963251682e-05, | |
| "loss": 0.259, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 3.381188118811881, | |
| "grad_norm": 0.0756857422551136, | |
| "learning_rate": 2.2938813362460198e-05, | |
| "loss": 0.2558, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 3.386138613861386, | |
| "grad_norm": 0.08379742666430028, | |
| "learning_rate": 2.2813877550688553e-05, | |
| "loss": 0.2643, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 3.391089108910891, | |
| "grad_norm": 0.08340597193070581, | |
| "learning_rate": 2.2689147020244848e-05, | |
| "loss": 0.2608, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.396039603960396, | |
| "grad_norm": 0.07863270852195665, | |
| "learning_rate": 2.256462326098516e-05, | |
| "loss": 0.2624, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 3.400990099009901, | |
| "grad_norm": 0.08196381945609525, | |
| "learning_rate": 2.2440307760295755e-05, | |
| "loss": 0.2616, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 3.405940594059406, | |
| "grad_norm": 0.08378698824706224, | |
| "learning_rate": 2.2316202003075347e-05, | |
| "loss": 0.262, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 3.410891089108911, | |
| "grad_norm": 0.07195893904281865, | |
| "learning_rate": 2.2192307471717324e-05, | |
| "loss": 0.2593, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 3.4158415841584158, | |
| "grad_norm": 0.08537050560423447, | |
| "learning_rate": 2.2068625646092103e-05, | |
| "loss": 0.2652, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.4207920792079207, | |
| "grad_norm": 0.07000150332485644, | |
| "learning_rate": 2.194515800352942e-05, | |
| "loss": 0.2598, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 3.4257425742574257, | |
| "grad_norm": 0.08681466720555077, | |
| "learning_rate": 2.1821906018800643e-05, | |
| "loss": 0.26, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 3.4306930693069306, | |
| "grad_norm": 0.07771534110147085, | |
| "learning_rate": 2.169887116410121e-05, | |
| "loss": 0.2632, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 3.4356435643564356, | |
| "grad_norm": 0.07591468547904767, | |
| "learning_rate": 2.1576054909033014e-05, | |
| "loss": 0.264, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.4405940594059405, | |
| "grad_norm": 0.08082768131651101, | |
| "learning_rate": 2.1453458720586902e-05, | |
| "loss": 0.2648, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.4455445544554455, | |
| "grad_norm": 0.08123463224039203, | |
| "learning_rate": 2.13310840631251e-05, | |
| "loss": 0.2616, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.4504950495049505, | |
| "grad_norm": 0.08379480947484824, | |
| "learning_rate": 2.1208932398363712e-05, | |
| "loss": 0.2604, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 3.4554455445544554, | |
| "grad_norm": 0.07509673804422137, | |
| "learning_rate": 2.1087005185355292e-05, | |
| "loss": 0.2623, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.4603960396039604, | |
| "grad_norm": 0.0858048782262789, | |
| "learning_rate": 2.0965303880471405e-05, | |
| "loss": 0.267, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.4653465346534653, | |
| "grad_norm": 0.07547640130167284, | |
| "learning_rate": 2.0843829937385255e-05, | |
| "loss": 0.2626, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.4702970297029703, | |
| "grad_norm": 0.07350883425204456, | |
| "learning_rate": 2.072258480705431e-05, | |
| "loss": 0.261, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.4752475247524752, | |
| "grad_norm": 0.07191893655858747, | |
| "learning_rate": 2.0601569937702913e-05, | |
| "loss": 0.2622, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.48019801980198, | |
| "grad_norm": 0.07616081022762551, | |
| "learning_rate": 2.048078677480507e-05, | |
| "loss": 0.2606, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.485148514851485, | |
| "grad_norm": 0.06984625689042373, | |
| "learning_rate": 2.0360236761067117e-05, | |
| "loss": 0.2587, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.49009900990099, | |
| "grad_norm": 0.08028605456370366, | |
| "learning_rate": 2.023992133641055e-05, | |
| "loss": 0.2651, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.495049504950495, | |
| "grad_norm": 0.07388138739910662, | |
| "learning_rate": 2.0119841937954794e-05, | |
| "loss": 0.2657, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.07223380158067759, | |
| "learning_rate": 2.0000000000000012e-05, | |
| "loss": 0.2629, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.504950495049505, | |
| "grad_norm": 0.07459203599924807, | |
| "learning_rate": 1.9880396954009976e-05, | |
| "loss": 0.2663, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.50990099009901, | |
| "grad_norm": 0.07581551013145865, | |
| "learning_rate": 1.976103422859506e-05, | |
| "loss": 0.2629, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.514851485148515, | |
| "grad_norm": 0.06686751894402097, | |
| "learning_rate": 1.9641913249495026e-05, | |
| "loss": 0.2597, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.51980198019802, | |
| "grad_norm": 0.0736426132082484, | |
| "learning_rate": 1.9523035439562146e-05, | |
| "loss": 0.2588, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.5247524752475248, | |
| "grad_norm": 0.06871398394666604, | |
| "learning_rate": 1.9404402218744086e-05, | |
| "loss": 0.2618, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.5297029702970297, | |
| "grad_norm": 0.0763872712325709, | |
| "learning_rate": 1.9286015004066984e-05, | |
| "loss": 0.2635, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.5346534653465347, | |
| "grad_norm": 0.06539343146687637, | |
| "learning_rate": 1.9167875209618592e-05, | |
| "loss": 0.2603, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.5396039603960396, | |
| "grad_norm": 0.07878474772100631, | |
| "learning_rate": 1.9049984246531255e-05, | |
| "loss": 0.2637, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.5445544554455446, | |
| "grad_norm": 0.06871464146056458, | |
| "learning_rate": 1.8932343522965205e-05, | |
| "loss": 0.2611, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.5495049504950495, | |
| "grad_norm": 0.07766195448090221, | |
| "learning_rate": 1.8814954444091595e-05, | |
| "loss": 0.2629, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.5544554455445545, | |
| "grad_norm": 0.0684321915163134, | |
| "learning_rate": 1.8697818412075794e-05, | |
| "loss": 0.2602, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.5594059405940595, | |
| "grad_norm": 0.07986816367419101, | |
| "learning_rate": 1.8580936826060685e-05, | |
| "loss": 0.2622, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 3.5643564356435644, | |
| "grad_norm": 0.06371681601869503, | |
| "learning_rate": 1.846431108214981e-05, | |
| "loss": 0.2616, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.5693069306930694, | |
| "grad_norm": 0.0777995648043413, | |
| "learning_rate": 1.8347942573390865e-05, | |
| "loss": 0.2593, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.5742574257425743, | |
| "grad_norm": 0.06952441494079514, | |
| "learning_rate": 1.8231832689758903e-05, | |
| "loss": 0.2664, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.5792079207920793, | |
| "grad_norm": 0.0778247922778437, | |
| "learning_rate": 1.8115982818139862e-05, | |
| "loss": 0.263, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 3.5841584158415842, | |
| "grad_norm": 0.06754531987523885, | |
| "learning_rate": 1.80003943423139e-05, | |
| "loss": 0.2652, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.589108910891089, | |
| "grad_norm": 0.06763991069949353, | |
| "learning_rate": 1.7885068642938924e-05, | |
| "loss": 0.2647, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.594059405940594, | |
| "grad_norm": 0.06988041056890192, | |
| "learning_rate": 1.7770007097534062e-05, | |
| "loss": 0.2617, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.599009900990099, | |
| "grad_norm": 0.0675214500152565, | |
| "learning_rate": 1.7655211080463265e-05, | |
| "loss": 0.2601, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 3.603960396039604, | |
| "grad_norm": 0.0726640790012201, | |
| "learning_rate": 1.754068196291885e-05, | |
| "loss": 0.2624, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.608910891089109, | |
| "grad_norm": 0.06635841200881153, | |
| "learning_rate": 1.7426421112905095e-05, | |
| "loss": 0.2642, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.613861386138614, | |
| "grad_norm": 0.06321818867608649, | |
| "learning_rate": 1.731242989522195e-05, | |
| "loss": 0.2615, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.618811881188119, | |
| "grad_norm": 0.07039027608005301, | |
| "learning_rate": 1.7198709671448696e-05, | |
| "loss": 0.26, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.623762376237624, | |
| "grad_norm": 0.06583730825694595, | |
| "learning_rate": 1.7085261799927738e-05, | |
| "loss": 0.2626, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.628712871287129, | |
| "grad_norm": 0.06315117502780393, | |
| "learning_rate": 1.697208763574833e-05, | |
| "loss": 0.2604, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.633663366336634, | |
| "grad_norm": 0.0661718529079368, | |
| "learning_rate": 1.6859188530730387e-05, | |
| "loss": 0.2585, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.6386138613861387, | |
| "grad_norm": 0.06191307233046204, | |
| "learning_rate": 1.6746565833408352e-05, | |
| "loss": 0.2611, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.6435643564356437, | |
| "grad_norm": 0.06569352083114534, | |
| "learning_rate": 1.6634220889015087e-05, | |
| "loss": 0.2578, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.6485148514851486, | |
| "grad_norm": 0.06524584677846361, | |
| "learning_rate": 1.652215503946583e-05, | |
| "loss": 0.2591, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.6534653465346536, | |
| "grad_norm": 0.06882261641904054, | |
| "learning_rate": 1.6410369623342144e-05, | |
| "loss": 0.2621, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.6584158415841586, | |
| "grad_norm": 0.06599644920193304, | |
| "learning_rate": 1.6298865975875903e-05, | |
| "loss": 0.2621, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.6633663366336635, | |
| "grad_norm": 0.06494537997236631, | |
| "learning_rate": 1.6187645428933372e-05, | |
| "loss": 0.2576, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.6683168316831685, | |
| "grad_norm": 0.07191765354832381, | |
| "learning_rate": 1.607670931099929e-05, | |
| "loss": 0.2627, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.6732673267326734, | |
| "grad_norm": 0.06163702109997666, | |
| "learning_rate": 1.5966058947161035e-05, | |
| "loss": 0.2604, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.6782178217821784, | |
| "grad_norm": 0.06896827740527596, | |
| "learning_rate": 1.5855695659092746e-05, | |
| "loss": 0.2627, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.6831683168316833, | |
| "grad_norm": 0.07022993469113507, | |
| "learning_rate": 1.5745620765039564e-05, | |
| "loss": 0.2627, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.6881188118811883, | |
| "grad_norm": 0.06385564587911713, | |
| "learning_rate": 1.563583557980186e-05, | |
| "loss": 0.2571, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.693069306930693, | |
| "grad_norm": 0.06788789737493876, | |
| "learning_rate": 1.5526341414719565e-05, | |
| "loss": 0.2597, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.698019801980198, | |
| "grad_norm": 0.06355678554123764, | |
| "learning_rate": 1.541713957765649e-05, | |
| "loss": 0.2584, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.7029702970297027, | |
| "grad_norm": 0.06746069765280531, | |
| "learning_rate": 1.5308231372984723e-05, | |
| "loss": 0.2564, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.707920792079208, | |
| "grad_norm": 0.06602515183476947, | |
| "learning_rate": 1.5199618101569003e-05, | |
| "loss": 0.2618, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.7128712871287126, | |
| "grad_norm": 0.06261540013075592, | |
| "learning_rate": 1.5091301060751207e-05, | |
| "loss": 0.261, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.717821782178218, | |
| "grad_norm": 0.06738446233701077, | |
| "learning_rate": 1.4983281544334896e-05, | |
| "loss": 0.2615, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.7227722772277225, | |
| "grad_norm": 0.06387167102806292, | |
| "learning_rate": 1.4875560842569767e-05, | |
| "loss": 0.2628, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.727722772277228, | |
| "grad_norm": 0.06873548015112758, | |
| "learning_rate": 1.4768140242136353e-05, | |
| "loss": 0.2634, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.7326732673267324, | |
| "grad_norm": 0.07004440295456092, | |
| "learning_rate": 1.4661021026130553e-05, | |
| "loss": 0.2625, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.737623762376238, | |
| "grad_norm": 0.06913752523862327, | |
| "learning_rate": 1.4554204474048357e-05, | |
| "loss": 0.2603, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.7425742574257423, | |
| "grad_norm": 0.0648189635402994, | |
| "learning_rate": 1.4447691861770591e-05, | |
| "loss": 0.2598, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.7475247524752477, | |
| "grad_norm": 0.06865893360130693, | |
| "learning_rate": 1.4341484461547585e-05, | |
| "loss": 0.2621, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.7524752475247523, | |
| "grad_norm": 0.062626615368903, | |
| "learning_rate": 1.4235583541984092e-05, | |
| "loss": 0.2601, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.7574257425742577, | |
| "grad_norm": 0.06552809258055384, | |
| "learning_rate": 1.412999036802404e-05, | |
| "loss": 0.263, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.762376237623762, | |
| "grad_norm": 0.06568546114262737, | |
| "learning_rate": 1.4024706200935452e-05, | |
| "loss": 0.2612, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.7673267326732676, | |
| "grad_norm": 0.0669186021015185, | |
| "learning_rate": 1.3919732298295431e-05, | |
| "loss": 0.2596, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 3.772277227722772, | |
| "grad_norm": 0.0670319250412692, | |
| "learning_rate": 1.3815069913975045e-05, | |
| "loss": 0.2636, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.7772277227722775, | |
| "grad_norm": 0.06654340061509974, | |
| "learning_rate": 1.3710720298124454e-05, | |
| "loss": 0.256, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 3.782178217821782, | |
| "grad_norm": 0.06341792514467096, | |
| "learning_rate": 1.3606684697157876e-05, | |
| "loss": 0.2611, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.7871287128712874, | |
| "grad_norm": 0.06468155266586884, | |
| "learning_rate": 1.350296435373876e-05, | |
| "loss": 0.2614, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.792079207920792, | |
| "grad_norm": 0.06163612657643488, | |
| "learning_rate": 1.3399560506764959e-05, | |
| "loss": 0.2629, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.7970297029702973, | |
| "grad_norm": 0.06406891206439465, | |
| "learning_rate": 1.3296474391353854e-05, | |
| "loss": 0.2576, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 3.801980198019802, | |
| "grad_norm": 0.056663417800829424, | |
| "learning_rate": 1.3193707238827714e-05, | |
| "loss": 0.2562, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.806930693069307, | |
| "grad_norm": 0.06181580783140687, | |
| "learning_rate": 1.3091260276698847e-05, | |
| "loss": 0.2601, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 3.8118811881188117, | |
| "grad_norm": 0.05861519134108039, | |
| "learning_rate": 1.2989134728655097e-05, | |
| "loss": 0.261, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.8168316831683167, | |
| "grad_norm": 0.06567706503132943, | |
| "learning_rate": 1.288733181454508e-05, | |
| "loss": 0.2632, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 3.8217821782178216, | |
| "grad_norm": 0.06312183468335963, | |
| "learning_rate": 1.2785852750363716e-05, | |
| "loss": 0.2604, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.8267326732673266, | |
| "grad_norm": 0.06579516315731176, | |
| "learning_rate": 1.2684698748237633e-05, | |
| "loss": 0.2615, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 3.8316831683168315, | |
| "grad_norm": 0.05991919806623056, | |
| "learning_rate": 1.2583871016410764e-05, | |
| "loss": 0.2593, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.8366336633663365, | |
| "grad_norm": 0.06334787351057825, | |
| "learning_rate": 1.2483370759229874e-05, | |
| "loss": 0.2577, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.8415841584158414, | |
| "grad_norm": 0.07090959132648503, | |
| "learning_rate": 1.2383199177130135e-05, | |
| "loss": 0.2623, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.8465346534653464, | |
| "grad_norm": 0.0603300648619507, | |
| "learning_rate": 1.228335746662086e-05, | |
| "loss": 0.2642, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 3.8514851485148514, | |
| "grad_norm": 0.06486787854340496, | |
| "learning_rate": 1.2183846820271147e-05, | |
| "loss": 0.2649, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.8564356435643563, | |
| "grad_norm": 0.06511664568299079, | |
| "learning_rate": 1.2084668426695712e-05, | |
| "loss": 0.261, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 3.8613861386138613, | |
| "grad_norm": 0.06182611155071836, | |
| "learning_rate": 1.198582347054062e-05, | |
| "loss": 0.2649, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.866336633663366, | |
| "grad_norm": 0.06327929862771448, | |
| "learning_rate": 1.1887313132469154e-05, | |
| "loss": 0.265, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 3.871287128712871, | |
| "grad_norm": 0.06059354451174003, | |
| "learning_rate": 1.178913858914772e-05, | |
| "loss": 0.2585, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.876237623762376, | |
| "grad_norm": 0.062002888382052625, | |
| "learning_rate": 1.1691301013231788e-05, | |
| "loss": 0.2618, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.881188118811881, | |
| "grad_norm": 0.05662432666476744, | |
| "learning_rate": 1.1593801573351908e-05, | |
| "loss": 0.2624, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.886138613861386, | |
| "grad_norm": 0.05580277635060235, | |
| "learning_rate": 1.1496641434099725e-05, | |
| "loss": 0.2628, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.891089108910891, | |
| "grad_norm": 0.057711008207046964, | |
| "learning_rate": 1.1399821756014058e-05, | |
| "loss": 0.2605, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.896039603960396, | |
| "grad_norm": 0.05611620119762601, | |
| "learning_rate": 1.1303343695567066e-05, | |
| "loss": 0.2619, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 3.900990099009901, | |
| "grad_norm": 0.06680065656601918, | |
| "learning_rate": 1.1207208405150397e-05, | |
| "loss": 0.2639, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.905940594059406, | |
| "grad_norm": 0.060466239287352584, | |
| "learning_rate": 1.1111417033061498e-05, | |
| "loss": 0.2637, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 3.910891089108911, | |
| "grad_norm": 0.053378514763086894, | |
| "learning_rate": 1.1015970723489828e-05, | |
| "loss": 0.2565, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.9158415841584158, | |
| "grad_norm": 0.05813131762071988, | |
| "learning_rate": 1.0920870616503194e-05, | |
| "loss": 0.2595, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 3.9207920792079207, | |
| "grad_norm": 0.06355098231546671, | |
| "learning_rate": 1.082611784803417e-05, | |
| "loss": 0.2651, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.9257425742574257, | |
| "grad_norm": 0.05915544530618073, | |
| "learning_rate": 1.0731713549866494e-05, | |
| "loss": 0.2616, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 3.9306930693069306, | |
| "grad_norm": 0.05511489007583624, | |
| "learning_rate": 1.0637658849621593e-05, | |
| "loss": 0.2549, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.9356435643564356, | |
| "grad_norm": 0.056384857237651013, | |
| "learning_rate": 1.0543954870745088e-05, | |
| "loss": 0.2625, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.9405940594059405, | |
| "grad_norm": 0.05635325945291314, | |
| "learning_rate": 1.0450602732493337e-05, | |
| "loss": 0.2608, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.9455445544554455, | |
| "grad_norm": 0.057238822301882146, | |
| "learning_rate": 1.0357603549920129e-05, | |
| "loss": 0.2564, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 3.9504950495049505, | |
| "grad_norm": 0.06049015880310532, | |
| "learning_rate": 1.0264958433863353e-05, | |
| "loss": 0.2626, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.9554455445544554, | |
| "grad_norm": 0.05650257691466007, | |
| "learning_rate": 1.0172668490931673e-05, | |
| "loss": 0.2576, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 3.9603960396039604, | |
| "grad_norm": 0.057402085639922246, | |
| "learning_rate": 1.0080734823491402e-05, | |
| "loss": 0.2608, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.9653465346534653, | |
| "grad_norm": 0.057615806962218595, | |
| "learning_rate": 9.989158529653257e-06, | |
| "loss": 0.2621, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.9702970297029703, | |
| "grad_norm": 0.06058933676488647, | |
| "learning_rate": 9.897940703259264e-06, | |
| "loss": 0.2658, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.9752475247524752, | |
| "grad_norm": 0.05448946637229122, | |
| "learning_rate": 9.807082433869727e-06, | |
| "loss": 0.263, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 3.98019801980198, | |
| "grad_norm": 0.055961400533007126, | |
| "learning_rate": 9.716584806750151e-06, | |
| "loss": 0.26, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.985148514851485, | |
| "grad_norm": 0.06328918241686664, | |
| "learning_rate": 9.626448902858359e-06, | |
| "loss": 0.2596, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.99009900990099, | |
| "grad_norm": 0.05626164158686504, | |
| "learning_rate": 9.536675798831499e-06, | |
| "loss": 0.2605, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.995049504950495, | |
| "grad_norm": 0.06311974353501029, | |
| "learning_rate": 9.447266566973211e-06, | |
| "loss": 0.26, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.0657005829617962, | |
| "learning_rate": 9.358222275240884e-06, | |
| "loss": 0.2563, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 4.0049504950495045, | |
| "grad_norm": 0.14833670477704655, | |
| "learning_rate": 9.26954398723278e-06, | |
| "loss": 0.2415, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 4.00990099009901, | |
| "grad_norm": 0.09738865646581973, | |
| "learning_rate": 9.181232762175435e-06, | |
| "loss": 0.2363, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.014851485148514, | |
| "grad_norm": 0.09300244076891609, | |
| "learning_rate": 9.093289654910946e-06, | |
| "loss": 0.2367, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 4.01980198019802, | |
| "grad_norm": 0.13994293803359162, | |
| "learning_rate": 9.005715715884409e-06, | |
| "loss": 0.2366, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 4.024752475247524, | |
| "grad_norm": 0.11618540992533626, | |
| "learning_rate": 8.918511991131335e-06, | |
| "loss": 0.2371, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 4.02970297029703, | |
| "grad_norm": 0.11676112687203334, | |
| "learning_rate": 8.831679522265167e-06, | |
| "loss": 0.2373, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 4.034653465346534, | |
| "grad_norm": 0.11368974223103682, | |
| "learning_rate": 8.745219346464884e-06, | |
| "loss": 0.2398, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 4.03960396039604, | |
| "grad_norm": 0.10609372237214311, | |
| "learning_rate": 8.659132496462521e-06, | |
| "loss": 0.2389, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 4.044554455445544, | |
| "grad_norm": 0.10199378246411407, | |
| "learning_rate": 8.57342000053095e-06, | |
| "loss": 0.2369, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 4.0495049504950495, | |
| "grad_norm": 0.10641728987433756, | |
| "learning_rate": 8.488082882471476e-06, | |
| "loss": 0.2376, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 4.054455445544554, | |
| "grad_norm": 0.0948066652739402, | |
| "learning_rate": 8.403122161601699e-06, | |
| "loss": 0.2382, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 4.0594059405940595, | |
| "grad_norm": 0.09444767450020332, | |
| "learning_rate": 8.318538852743275e-06, | |
| "loss": 0.2413, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.064356435643564, | |
| "grad_norm": 0.09281522330484215, | |
| "learning_rate": 8.23433396620986e-06, | |
| "loss": 0.2357, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 4.069306930693069, | |
| "grad_norm": 0.08359912220801992, | |
| "learning_rate": 8.150508507795005e-06, | |
| "loss": 0.2397, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 4.074257425742574, | |
| "grad_norm": 0.0826517841789896, | |
| "learning_rate": 8.067063478760127e-06, | |
| "loss": 0.2394, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 4.079207920792079, | |
| "grad_norm": 0.07579285661758814, | |
| "learning_rate": 7.983999875822563e-06, | |
| "loss": 0.2351, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 4.084158415841584, | |
| "grad_norm": 0.07850432213332705, | |
| "learning_rate": 7.901318691143678e-06, | |
| "loss": 0.2403, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 4.089108910891089, | |
| "grad_norm": 0.07909754150897867, | |
| "learning_rate": 7.819020912317011e-06, | |
| "loss": 0.2387, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 4.094059405940594, | |
| "grad_norm": 0.06821060953413947, | |
| "learning_rate": 7.73710752235647e-06, | |
| "loss": 0.2372, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 4.099009900990099, | |
| "grad_norm": 0.07023999741301488, | |
| "learning_rate": 7.65557949968459e-06, | |
| "loss": 0.2402, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 4.103960396039604, | |
| "grad_norm": 0.07435090331777651, | |
| "learning_rate": 7.574437818120839e-06, | |
| "loss": 0.2338, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 4.108910891089109, | |
| "grad_norm": 0.07224904309089736, | |
| "learning_rate": 7.4936834468699945e-06, | |
| "loss": 0.2387, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.1138613861386135, | |
| "grad_norm": 0.06369678683068347, | |
| "learning_rate": 7.413317350510589e-06, | |
| "loss": 0.2367, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 4.118811881188119, | |
| "grad_norm": 0.06699315639457563, | |
| "learning_rate": 7.333340488983363e-06, | |
| "loss": 0.2375, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 4.123762376237623, | |
| "grad_norm": 0.06690201833514393, | |
| "learning_rate": 7.253753817579792e-06, | |
| "loss": 0.2369, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 4.128712871287129, | |
| "grad_norm": 0.06523178059708698, | |
| "learning_rate": 7.174558286930682e-06, | |
| "loss": 0.2353, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 4.133663366336633, | |
| "grad_norm": 0.06928338209332009, | |
| "learning_rate": 7.095754842994824e-06, | |
| "loss": 0.241, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 4.138613861386139, | |
| "grad_norm": 0.05826199727329262, | |
| "learning_rate": 7.0173444270477075e-06, | |
| "loss": 0.237, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 4.143564356435643, | |
| "grad_norm": 0.0650338593796366, | |
| "learning_rate": 6.939327975670256e-06, | |
| "loss": 0.2389, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 4.148514851485149, | |
| "grad_norm": 0.062373371937288424, | |
| "learning_rate": 6.861706420737628e-06, | |
| "loss": 0.235, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 4.153465346534653, | |
| "grad_norm": 0.059345606049629886, | |
| "learning_rate": 6.784480689408099e-06, | |
| "loss": 0.2374, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 4.158415841584159, | |
| "grad_norm": 0.05938411361311235, | |
| "learning_rate": 6.707651704112028e-06, | |
| "loss": 0.2394, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.163366336633663, | |
| "grad_norm": 0.05507804095810438, | |
| "learning_rate": 6.631220382540755e-06, | |
| "loss": 0.2379, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 4.1683168316831685, | |
| "grad_norm": 0.05676343569911684, | |
| "learning_rate": 6.555187637635727e-06, | |
| "loss": 0.2387, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 4.173267326732673, | |
| "grad_norm": 0.05738778495833525, | |
| "learning_rate": 6.479554377577528e-06, | |
| "loss": 0.2379, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 4.178217821782178, | |
| "grad_norm": 0.05800503879065267, | |
| "learning_rate": 6.404321505775053e-06, | |
| "loss": 0.2367, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 4.183168316831683, | |
| "grad_norm": 0.055483651786811077, | |
| "learning_rate": 6.329489920854745e-06, | |
| "loss": 0.2385, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 4.188118811881188, | |
| "grad_norm": 0.05477230373977264, | |
| "learning_rate": 6.255060516649809e-06, | |
| "loss": 0.239, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 4.193069306930693, | |
| "grad_norm": 0.051715088356370745, | |
| "learning_rate": 6.181034182189592e-06, | |
| "loss": 0.2429, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 4.198019801980198, | |
| "grad_norm": 0.05515414502968798, | |
| "learning_rate": 6.107411801688905e-06, | |
| "loss": 0.2379, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 4.202970297029703, | |
| "grad_norm": 0.05507096068990231, | |
| "learning_rate": 6.034194254537502e-06, | |
| "loss": 0.2355, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 4.207920792079208, | |
| "grad_norm": 0.053287860897235355, | |
| "learning_rate": 5.9613824152895765e-06, | |
| "loss": 0.2396, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.212871287128713, | |
| "grad_norm": 0.0560101802810022, | |
| "learning_rate": 5.8889771536532855e-06, | |
| "loss": 0.2368, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 4.217821782178218, | |
| "grad_norm": 0.051457826913242195, | |
| "learning_rate": 5.8169793344804085e-06, | |
| "loss": 0.2408, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 4.2227722772277225, | |
| "grad_norm": 0.05428425450968013, | |
| "learning_rate": 5.7453898177559505e-06, | |
| "loss": 0.2355, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 4.227722772277228, | |
| "grad_norm": 0.05221957619974725, | |
| "learning_rate": 5.674209458587929e-06, | |
| "loss": 0.2369, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 4.232673267326732, | |
| "grad_norm": 0.05124594198330728, | |
| "learning_rate": 5.603439107197149e-06, | |
| "loss": 0.2399, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 4.237623762376238, | |
| "grad_norm": 0.051046117213285905, | |
| "learning_rate": 5.5330796089070064e-06, | |
| "loss": 0.2391, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 4.242574257425742, | |
| "grad_norm": 0.052304073144293355, | |
| "learning_rate": 5.463131804133461e-06, | |
| "loss": 0.2374, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 4.247524752475248, | |
| "grad_norm": 0.05229718260879773, | |
| "learning_rate": 5.393596528374923e-06, | |
| "loss": 0.2377, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 4.252475247524752, | |
| "grad_norm": 0.051704981010214965, | |
| "learning_rate": 5.324474612202335e-06, | |
| "loss": 0.2386, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 4.257425742574258, | |
| "grad_norm": 0.04980569031859926, | |
| "learning_rate": 5.255766881249212e-06, | |
| "loss": 0.2382, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.262376237623762, | |
| "grad_norm": 0.056633565485021506, | |
| "learning_rate": 5.187474156201786e-06, | |
| "loss": 0.2358, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 4.267326732673268, | |
| "grad_norm": 0.05196973815132134, | |
| "learning_rate": 5.119597252789237e-06, | |
| "loss": 0.2353, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 4.272277227722772, | |
| "grad_norm": 0.054561533142327555, | |
| "learning_rate": 5.052136981773892e-06, | |
| "loss": 0.2379, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 4.2772277227722775, | |
| "grad_norm": 0.049673991948889856, | |
| "learning_rate": 4.9850941489415985e-06, | |
| "loss": 0.2404, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 4.282178217821782, | |
| "grad_norm": 0.052903140339615774, | |
| "learning_rate": 4.918469555092049e-06, | |
| "loss": 0.2383, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 4.287128712871287, | |
| "grad_norm": 0.05190724610448354, | |
| "learning_rate": 4.852263996029259e-06, | |
| "loss": 0.2357, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 4.292079207920792, | |
| "grad_norm": 0.04823404745690664, | |
| "learning_rate": 4.786478262552012e-06, | |
| "loss": 0.2347, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 4.297029702970297, | |
| "grad_norm": 0.05001330463963647, | |
| "learning_rate": 4.7211131404444825e-06, | |
| "loss": 0.2364, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 4.301980198019802, | |
| "grad_norm": 0.053915613343368925, | |
| "learning_rate": 4.656169410466795e-06, | |
| "loss": 0.2395, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 4.306930693069307, | |
| "grad_norm": 0.0521840158401366, | |
| "learning_rate": 4.591647848345711e-06, | |
| "loss": 0.2398, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.311881188118812, | |
| "grad_norm": 0.05092405950712264, | |
| "learning_rate": 4.527549224765362e-06, | |
| "loss": 0.2363, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 4.316831683168317, | |
| "grad_norm": 0.04856918141084432, | |
| "learning_rate": 4.463874305358045e-06, | |
| "loss": 0.2398, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 4.321782178217822, | |
| "grad_norm": 0.05172447677120079, | |
| "learning_rate": 4.400623850695103e-06, | |
| "loss": 0.2396, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 4.326732673267327, | |
| "grad_norm": 0.05355917732905732, | |
| "learning_rate": 4.337798616277806e-06, | |
| "loss": 0.2385, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 4.3316831683168315, | |
| "grad_norm": 0.051686520791098325, | |
| "learning_rate": 4.275399352528342e-06, | |
| "loss": 0.2394, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 4.336633663366337, | |
| "grad_norm": 0.05171279734424735, | |
| "learning_rate": 4.213426804780838e-06, | |
| "loss": 0.237, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 4.341584158415841, | |
| "grad_norm": 0.05150346337682817, | |
| "learning_rate": 4.151881713272472e-06, | |
| "loss": 0.239, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 4.346534653465347, | |
| "grad_norm": 0.050892722418566426, | |
| "learning_rate": 4.090764813134644e-06, | |
| "loss": 0.2416, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 4.351485148514851, | |
| "grad_norm": 0.049350733100512516, | |
| "learning_rate": 4.0300768343841805e-06, | |
| "loss": 0.2382, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 4.356435643564357, | |
| "grad_norm": 0.04931095750384826, | |
| "learning_rate": 3.969818501914597e-06, | |
| "loss": 0.2366, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.361386138613861, | |
| "grad_norm": 0.050924430998417654, | |
| "learning_rate": 3.909990535487472e-06, | |
| "loss": 0.237, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 4.366336633663367, | |
| "grad_norm": 0.049863362448873925, | |
| "learning_rate": 3.850593649723804e-06, | |
| "loss": 0.2398, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 4.371287128712871, | |
| "grad_norm": 0.04927463375224769, | |
| "learning_rate": 3.7916285540955566e-06, | |
| "loss": 0.2418, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 4.376237623762377, | |
| "grad_norm": 0.0482570661660394, | |
| "learning_rate": 3.733095952917101e-06, | |
| "loss": 0.2372, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 4.381188118811881, | |
| "grad_norm": 0.049192512272021933, | |
| "learning_rate": 3.6749965453368375e-06, | |
| "loss": 0.2364, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 4.3861386138613865, | |
| "grad_norm": 0.04845689086339911, | |
| "learning_rate": 3.617331025328845e-06, | |
| "loss": 0.2361, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 4.391089108910891, | |
| "grad_norm": 0.0502670225409252, | |
| "learning_rate": 3.5601000816846053e-06, | |
| "loss": 0.2372, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 4.396039603960396, | |
| "grad_norm": 0.04808200816842586, | |
| "learning_rate": 3.50330439800473e-06, | |
| "loss": 0.2384, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 4.400990099009901, | |
| "grad_norm": 0.05042628809150087, | |
| "learning_rate": 3.4469446526908555e-06, | |
| "loss": 0.2402, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 4.405940594059406, | |
| "grad_norm": 0.046037281684570205, | |
| "learning_rate": 3.3910215189374916e-06, | |
| "loss": 0.2404, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.410891089108911, | |
| "grad_norm": 0.0486971701984787, | |
| "learning_rate": 3.3355356647239987e-06, | |
| "loss": 0.2414, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 4.415841584158416, | |
| "grad_norm": 0.04880396827143136, | |
| "learning_rate": 3.2804877528066225e-06, | |
| "loss": 0.2383, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 4.420792079207921, | |
| "grad_norm": 0.04893364723038564, | |
| "learning_rate": 3.225878440710544e-06, | |
| "loss": 0.2408, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 4.425742574257426, | |
| "grad_norm": 0.047864235721432204, | |
| "learning_rate": 3.171708380722072e-06, | |
| "loss": 0.2375, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 4.430693069306931, | |
| "grad_norm": 0.050004565893982035, | |
| "learning_rate": 3.1179782198807973e-06, | |
| "loss": 0.2355, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 4.435643564356436, | |
| "grad_norm": 0.05153700056471179, | |
| "learning_rate": 3.064688599971901e-06, | |
| "loss": 0.2377, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 4.4405940594059405, | |
| "grad_norm": 0.0488630526140388, | |
| "learning_rate": 3.011840157518493e-06, | |
| "loss": 0.2376, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 4.445544554455446, | |
| "grad_norm": 0.04793270329958219, | |
| "learning_rate": 2.9594335237739778e-06, | |
| "loss": 0.24, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 4.4504950495049505, | |
| "grad_norm": 0.045205252044246934, | |
| "learning_rate": 2.9074693247145513e-06, | |
| "loss": 0.2369, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 4.455445544554456, | |
| "grad_norm": 0.046629357363959435, | |
| "learning_rate": 2.85594818103168e-06, | |
| "loss": 0.2345, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.46039603960396, | |
| "grad_norm": 0.04689830172030681, | |
| "learning_rate": 2.804870708124745e-06, | |
| "loss": 0.2366, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 4.465346534653466, | |
| "grad_norm": 0.05083578293715084, | |
| "learning_rate": 2.754237516093623e-06, | |
| "loss": 0.2375, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 4.47029702970297, | |
| "grad_norm": 0.04857771410366898, | |
| "learning_rate": 2.7040492097314498e-06, | |
| "loss": 0.2405, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 4.475247524752476, | |
| "grad_norm": 0.04634397869065822, | |
| "learning_rate": 2.6543063885173936e-06, | |
| "loss": 0.2374, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 4.48019801980198, | |
| "grad_norm": 0.047891106793236694, | |
| "learning_rate": 2.605009646609453e-06, | |
| "loss": 0.2387, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 4.485148514851485, | |
| "grad_norm": 0.048117090467841135, | |
| "learning_rate": 2.556159572837422e-06, | |
| "loss": 0.2415, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 4.49009900990099, | |
| "grad_norm": 0.04865265129692607, | |
| "learning_rate": 2.5077567506957977e-06, | |
| "loss": 0.2362, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 4.4950495049504955, | |
| "grad_norm": 0.04599413264021086, | |
| "learning_rate": 2.459801758336835e-06, | |
| "loss": 0.2372, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.048222084438524646, | |
| "learning_rate": 2.4122951685636674e-06, | |
| "loss": 0.2405, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 4.5049504950495045, | |
| "grad_norm": 0.04704432338353155, | |
| "learning_rate": 2.3652375488234114e-06, | |
| "loss": 0.2391, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.50990099009901, | |
| "grad_norm": 0.04745475969682847, | |
| "learning_rate": 2.3186294612004365e-06, | |
| "loss": 0.2395, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 4.514851485148515, | |
| "grad_norm": 0.04862261011896364, | |
| "learning_rate": 2.272471462409622e-06, | |
| "loss": 0.2409, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 4.51980198019802, | |
| "grad_norm": 0.046907173224887085, | |
| "learning_rate": 2.226764103789716e-06, | |
| "loss": 0.2389, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 4.524752475247524, | |
| "grad_norm": 0.04625726359398786, | |
| "learning_rate": 2.181507931296749e-06, | |
| "loss": 0.2409, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 4.52970297029703, | |
| "grad_norm": 0.048230191171381366, | |
| "learning_rate": 2.136703485497531e-06, | |
| "loss": 0.2376, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 4.534653465346535, | |
| "grad_norm": 0.0473264881125143, | |
| "learning_rate": 2.0923513015631646e-06, | |
| "loss": 0.2351, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 4.53960396039604, | |
| "grad_norm": 0.04590922745948315, | |
| "learning_rate": 2.0484519092626652e-06, | |
| "loss": 0.2395, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 4.544554455445544, | |
| "grad_norm": 0.048130979836544845, | |
| "learning_rate": 2.0050058329566367e-06, | |
| "loss": 0.2419, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 4.5495049504950495, | |
| "grad_norm": 0.04562632269161316, | |
| "learning_rate": 1.9620135915909968e-06, | |
| "loss": 0.2364, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 4.554455445544555, | |
| "grad_norm": 0.0476088198793673, | |
| "learning_rate": 1.9194756986908025e-06, | |
| "loss": 0.2391, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.5594059405940595, | |
| "grad_norm": 0.04716082029677215, | |
| "learning_rate": 1.8773926623541028e-06, | |
| "loss": 0.2374, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 4.564356435643564, | |
| "grad_norm": 0.04742904413065106, | |
| "learning_rate": 1.835764985245856e-06, | |
| "loss": 0.2394, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 4.569306930693069, | |
| "grad_norm": 0.04569300920088693, | |
| "learning_rate": 1.7945931645919358e-06, | |
| "loss": 0.2358, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 4.574257425742574, | |
| "grad_norm": 0.04518374268437982, | |
| "learning_rate": 1.7538776921731937e-06, | |
| "loss": 0.2413, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 4.579207920792079, | |
| "grad_norm": 0.04609139030601693, | |
| "learning_rate": 1.713619054319593e-06, | |
| "loss": 0.2392, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 4.584158415841584, | |
| "grad_norm": 0.04817915866130937, | |
| "learning_rate": 1.6738177319044036e-06, | |
| "loss": 0.2375, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 4.589108910891089, | |
| "grad_norm": 0.04459736481607962, | |
| "learning_rate": 1.6344742003384161e-06, | |
| "loss": 0.2362, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 4.594059405940594, | |
| "grad_norm": 0.0449026355617313, | |
| "learning_rate": 1.5955889295643111e-06, | |
| "loss": 0.2377, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.599009900990099, | |
| "grad_norm": 0.04744519306239131, | |
| "learning_rate": 1.5571623840510185e-06, | |
| "loss": 0.2391, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 4.603960396039604, | |
| "grad_norm": 0.0451351470929595, | |
| "learning_rate": 1.519195022788198e-06, | |
| "loss": 0.2408, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.608910891089109, | |
| "grad_norm": 0.04561138995843172, | |
| "learning_rate": 1.481687299280723e-06, | |
| "loss": 0.242, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 4.6138613861386135, | |
| "grad_norm": 0.04424775857738886, | |
| "learning_rate": 1.4446396615432855e-06, | |
| "loss": 0.2384, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 4.618811881188119, | |
| "grad_norm": 0.04631851429293384, | |
| "learning_rate": 1.4080525520950184e-06, | |
| "loss": 0.2442, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 4.623762376237623, | |
| "grad_norm": 0.04514568203810665, | |
| "learning_rate": 1.3719264079542628e-06, | |
| "loss": 0.2369, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 4.628712871287129, | |
| "grad_norm": 0.04575984145611956, | |
| "learning_rate": 1.33626166063328e-06, | |
| "loss": 0.2381, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 4.633663366336633, | |
| "grad_norm": 0.04515268797675634, | |
| "learning_rate": 1.3010587361331673e-06, | |
| "loss": 0.242, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 4.638613861386139, | |
| "grad_norm": 0.045073799413081025, | |
| "learning_rate": 1.2663180549387e-06, | |
| "loss": 0.2375, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 4.643564356435643, | |
| "grad_norm": 0.04670702500193507, | |
| "learning_rate": 1.2320400320133551e-06, | |
| "loss": 0.239, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 4.648514851485149, | |
| "grad_norm": 0.04525298838401546, | |
| "learning_rate": 1.1982250767943593e-06, | |
| "loss": 0.2374, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 4.653465346534653, | |
| "grad_norm": 0.04414989478160124, | |
| "learning_rate": 1.1648735931877543e-06, | |
| "loss": 0.2399, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.658415841584159, | |
| "grad_norm": 0.04539179349644283, | |
| "learning_rate": 1.131985979563619e-06, | |
| "loss": 0.238, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 4.663366336633663, | |
| "grad_norm": 0.045565671775401856, | |
| "learning_rate": 1.0995626287512828e-06, | |
| "loss": 0.2382, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 4.6683168316831685, | |
| "grad_norm": 0.04662849759325202, | |
| "learning_rate": 1.0676039280346439e-06, | |
| "loss": 0.243, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 4.673267326732673, | |
| "grad_norm": 0.04464773083623712, | |
| "learning_rate": 1.036110259147547e-06, | |
| "loss": 0.2407, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 4.678217821782178, | |
| "grad_norm": 0.044465821345200274, | |
| "learning_rate": 1.0050819982692083e-06, | |
| "loss": 0.2388, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 4.683168316831683, | |
| "grad_norm": 0.0449944805033351, | |
| "learning_rate": 9.745195160197452e-07, | |
| "loss": 0.2373, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 4.688118811881188, | |
| "grad_norm": 0.04801372663982143, | |
| "learning_rate": 9.444231774557199e-07, | |
| "loss": 0.2396, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 4.693069306930693, | |
| "grad_norm": 0.04438811479086852, | |
| "learning_rate": 9.147933420658117e-07, | |
| "loss": 0.2389, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 4.698019801980198, | |
| "grad_norm": 0.044288975040482834, | |
| "learning_rate": 8.856303637664987e-07, | |
| "loss": 0.2369, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 4.702970297029703, | |
| "grad_norm": 0.046158842268618265, | |
| "learning_rate": 8.569345908978355e-07, | |
| "loss": 0.2387, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.707920792079208, | |
| "grad_norm": 0.045244039053761356, | |
| "learning_rate": 8.287063662193095e-07, | |
| "loss": 0.2426, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 4.712871287128713, | |
| "grad_norm": 0.04481365441294429, | |
| "learning_rate": 8.009460269057156e-07, | |
| "loss": 0.2386, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 4.717821782178218, | |
| "grad_norm": 0.04577841830144016, | |
| "learning_rate": 7.736539045431634e-07, | |
| "loss": 0.2415, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 4.7227722772277225, | |
| "grad_norm": 0.04408699692259636, | |
| "learning_rate": 7.468303251250764e-07, | |
| "loss": 0.2409, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 4.727722772277228, | |
| "grad_norm": 0.044862655648373294, | |
| "learning_rate": 7.204756090483411e-07, | |
| "loss": 0.2396, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 4.732673267326732, | |
| "grad_norm": 0.044900920206465036, | |
| "learning_rate": 6.945900711094534e-07, | |
| "loss": 0.2366, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 4.737623762376238, | |
| "grad_norm": 0.04644906035668882, | |
| "learning_rate": 6.691740205007602e-07, | |
| "loss": 0.2402, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 4.742574257425742, | |
| "grad_norm": 0.04499665814919747, | |
| "learning_rate": 6.442277608067838e-07, | |
| "loss": 0.2375, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 4.747524752475248, | |
| "grad_norm": 0.04333842509925558, | |
| "learning_rate": 6.197515900005613e-07, | |
| "loss": 0.238, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 4.752475247524752, | |
| "grad_norm": 0.04382161690247031, | |
| "learning_rate": 5.957458004401328e-07, | |
| "loss": 0.2401, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 4.757425742574258, | |
| "grad_norm": 0.044485838351754986, | |
| "learning_rate": 5.722106788649928e-07, | |
| "loss": 0.2372, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 4.762376237623762, | |
| "grad_norm": 0.0459814339270677, | |
| "learning_rate": 5.491465063927282e-07, | |
| "loss": 0.2384, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 4.767326732673268, | |
| "grad_norm": 0.04407284345840262, | |
| "learning_rate": 5.265535585156079e-07, | |
| "loss": 0.2397, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 4.772277227722772, | |
| "grad_norm": 0.04268369087826167, | |
| "learning_rate": 5.044321050973189e-07, | |
| "loss": 0.2428, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 4.7772277227722775, | |
| "grad_norm": 0.042380738168656576, | |
| "learning_rate": 4.827824103697332e-07, | |
| "loss": 0.2372, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 4.782178217821782, | |
| "grad_norm": 0.043861037556938605, | |
| "learning_rate": 4.616047329297546e-07, | |
| "loss": 0.241, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 4.787128712871287, | |
| "grad_norm": 0.04309315460955193, | |
| "learning_rate": 4.408993257362282e-07, | |
| "loss": 0.2367, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 4.792079207920792, | |
| "grad_norm": 0.043850264542277494, | |
| "learning_rate": 4.206664361069379e-07, | |
| "loss": 0.2406, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 4.797029702970297, | |
| "grad_norm": 0.042518962522091946, | |
| "learning_rate": 4.0090630571560927e-07, | |
| "loss": 0.2381, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 4.801980198019802, | |
| "grad_norm": 0.04280591327583044, | |
| "learning_rate": 3.8161917058906706e-07, | |
| "loss": 0.2362, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 4.806930693069307, | |
| "grad_norm": 0.04428828190691451, | |
| "learning_rate": 3.628052611043842e-07, | |
| "loss": 0.2388, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 4.811881188118812, | |
| "grad_norm": 0.04297748218450373, | |
| "learning_rate": 3.444648019861552e-07, | |
| "loss": 0.2356, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 4.816831683168317, | |
| "grad_norm": 0.04385791587886883, | |
| "learning_rate": 3.265980123038004e-07, | |
| "loss": 0.2386, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 4.821782178217822, | |
| "grad_norm": 0.041261863255062134, | |
| "learning_rate": 3.0920510546894156e-07, | |
| "loss": 0.2371, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 4.826732673267327, | |
| "grad_norm": 0.045027627430168714, | |
| "learning_rate": 2.9228628923285705e-07, | |
| "loss": 0.2413, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 4.8316831683168315, | |
| "grad_norm": 0.04461871544630878, | |
| "learning_rate": 2.7584176568401734e-07, | |
| "loss": 0.2362, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 4.836633663366337, | |
| "grad_norm": 0.04452805342206743, | |
| "learning_rate": 2.5987173124564224e-07, | |
| "loss": 0.2412, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 4.841584158415841, | |
| "grad_norm": 0.043976192764155944, | |
| "learning_rate": 2.4437637667338754e-07, | |
| "loss": 0.2374, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 4.846534653465347, | |
| "grad_norm": 0.04618394921045984, | |
| "learning_rate": 2.2935588705302658e-07, | |
| "loss": 0.2384, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 4.851485148514851, | |
| "grad_norm": 0.042765827362432736, | |
| "learning_rate": 2.148104417982788e-07, | |
| "loss": 0.2369, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.856435643564357, | |
| "grad_norm": 0.04141942912952928, | |
| "learning_rate": 2.0074021464864702e-07, | |
| "loss": 0.2368, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 4.861386138613861, | |
| "grad_norm": 0.04381001009904221, | |
| "learning_rate": 1.871453736673301e-07, | |
| "loss": 0.239, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 4.866336633663367, | |
| "grad_norm": 0.04481962497369779, | |
| "learning_rate": 1.740260812392558e-07, | |
| "loss": 0.241, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 4.871287128712871, | |
| "grad_norm": 0.04339245184057915, | |
| "learning_rate": 1.6138249406909558e-07, | |
| "loss": 0.2387, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 4.876237623762377, | |
| "grad_norm": 0.04468071895656673, | |
| "learning_rate": 1.4921476317941719e-07, | |
| "loss": 0.2393, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 4.881188118811881, | |
| "grad_norm": 0.043243224263089276, | |
| "learning_rate": 1.3752303390887733e-07, | |
| "loss": 0.2405, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 4.8861386138613865, | |
| "grad_norm": 0.04411565568410913, | |
| "learning_rate": 1.2630744591048516e-07, | |
| "loss": 0.2388, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 4.891089108910891, | |
| "grad_norm": 0.0439686894180931, | |
| "learning_rate": 1.1556813314993698e-07, | |
| "loss": 0.2387, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 4.896039603960396, | |
| "grad_norm": 0.04258496686208244, | |
| "learning_rate": 1.0530522390400422e-07, | |
| "loss": 0.2382, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 4.900990099009901, | |
| "grad_norm": 0.043027628871607715, | |
| "learning_rate": 9.551884075901463e-08, | |
| "loss": 0.2366, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.905940594059406, | |
| "grad_norm": 0.04240544691281539, | |
| "learning_rate": 8.620910060938681e-08, | |
| "loss": 0.2377, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 4.910891089108911, | |
| "grad_norm": 0.0429291141669323, | |
| "learning_rate": 7.737611465622686e-08, | |
| "loss": 0.2391, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 4.915841584158416, | |
| "grad_norm": 0.04405082368021681, | |
| "learning_rate": 6.901998840600055e-08, | |
| "loss": 0.2388, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 4.920792079207921, | |
| "grad_norm": 0.04233502027894486, | |
| "learning_rate": 6.11408216692766e-08, | |
| "loss": 0.2373, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 4.925742574257426, | |
| "grad_norm": 0.04248456835844998, | |
| "learning_rate": 5.373870855954089e-08, | |
| "loss": 0.2395, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 4.930693069306931, | |
| "grad_norm": 0.04337704667173435, | |
| "learning_rate": 4.681373749205964e-08, | |
| "loss": 0.2392, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 4.935643564356436, | |
| "grad_norm": 0.04293673533953289, | |
| "learning_rate": 4.036599118282691e-08, | |
| "loss": 0.2398, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 4.9405940594059405, | |
| "grad_norm": 0.04264178722028713, | |
| "learning_rate": 3.439554664758316e-08, | |
| "loss": 0.2372, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 4.945544554455445, | |
| "grad_norm": 0.04733981864127338, | |
| "learning_rate": 2.890247520089151e-08, | |
| "loss": 0.2389, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 4.9504950495049505, | |
| "grad_norm": 0.043007524302967316, | |
| "learning_rate": 2.3886842455285166e-08, | |
| "loss": 0.235, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.955445544554456, | |
| "grad_norm": 0.042085073458264566, | |
| "learning_rate": 1.934870832047686e-08, | |
| "loss": 0.2364, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 4.96039603960396, | |
| "grad_norm": 0.04333569651636603, | |
| "learning_rate": 1.528812700266169e-08, | |
| "loss": 0.2362, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 4.965346534653465, | |
| "grad_norm": 0.04536711780330465, | |
| "learning_rate": 1.1705147003842065e-08, | |
| "loss": 0.2382, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 4.97029702970297, | |
| "grad_norm": 0.04235293811505524, | |
| "learning_rate": 8.59981112128594e-09, | |
| "loss": 0.2367, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 4.975247524752476, | |
| "grad_norm": 0.042102375850486706, | |
| "learning_rate": 5.972156446980571e-09, | |
| "loss": 0.2407, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 4.98019801980198, | |
| "grad_norm": 0.04302505768029455, | |
| "learning_rate": 3.822214367197319e-09, | |
| "loss": 0.2388, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 4.985148514851485, | |
| "grad_norm": 0.044323506621706, | |
| "learning_rate": 2.150010562140814e-09, | |
| "loss": 0.2391, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 4.99009900990099, | |
| "grad_norm": 0.04355933275117794, | |
| "learning_rate": 9.555650056070065e-10, | |
| "loss": 0.2353, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 4.9950495049504955, | |
| "grad_norm": 0.0446808654882407, | |
| "learning_rate": 2.3889196477000497e-10, | |
| "loss": 0.2401, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.04593523633444536, | |
| "learning_rate": 0.0, | |
| "loss": 0.2343, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 1010, | |
| "total_flos": 5.689611896487936e+16, | |
| "train_loss": 0.22220699680913794, | |
| "train_runtime": 41938.1616, | |
| "train_samples_per_second": 12.314, | |
| "train_steps_per_second": 0.024 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1010, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.689611896487936e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |