| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9982300884955753, |
| "eval_steps": 500, |
| "global_step": 282, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0035398230088495575, |
| "grad_norm": 0.014968921455505688, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 2.1572, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.007079646017699115, |
| "grad_norm": 0.011437004760788206, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 2.107, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.010619469026548672, |
| "grad_norm": 0.037465559679398804, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 2.2589, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01415929203539823, |
| "grad_norm": 0.05454537578994139, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 2.034, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.017699115044247787, |
| "grad_norm": 0.1571730702304739, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 2.2764, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.021238938053097345, |
| "grad_norm": 0.23702733026323008, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 2.2573, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.024778761061946902, |
| "grad_norm": 0.4231145053916906, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 2.218, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02831858407079646, |
| "grad_norm": 0.61941148194414, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 2.1493, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03185840707964602, |
| "grad_norm": 0.8161556182954559, |
| "learning_rate": 5e-05, |
| "loss": 2.0541, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.035398230088495575, |
| "grad_norm": 1.104704245155024, |
| "learning_rate": 4.9998344688731027e-05, |
| "loss": 2.0539, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03893805309734513, |
| "grad_norm": 0.9892480165180763, |
| "learning_rate": 4.999337897412852e-05, |
| "loss": 2.0266, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.04247787610619469, |
| "grad_norm": 0.48679604721665365, |
| "learning_rate": 4.9985103513776764e-05, |
| "loss": 1.7714, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04601769911504425, |
| "grad_norm": 0.6641453909307959, |
| "learning_rate": 4.997351940355277e-05, |
| "loss": 1.8569, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.049557522123893805, |
| "grad_norm": 0.4596566997606365, |
| "learning_rate": 4.9958628177481195e-05, |
| "loss": 1.7574, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.05309734513274336, |
| "grad_norm": 0.3912601442043214, |
| "learning_rate": 4.99404318075312e-05, |
| "loss": 1.6832, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05663716814159292, |
| "grad_norm": 0.4400958018245371, |
| "learning_rate": 4.9918932703355256e-05, |
| "loss": 1.6784, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06017699115044248, |
| "grad_norm": 0.35169505721314703, |
| "learning_rate": 4.989413371197013e-05, |
| "loss": 1.5881, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.06371681415929203, |
| "grad_norm": 0.3506392072576239, |
| "learning_rate": 4.9866038117379824e-05, |
| "loss": 1.6368, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06725663716814159, |
| "grad_norm": 0.29289238684825075, |
| "learning_rate": 4.9834649640140664e-05, |
| "loss": 1.5951, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.07079646017699115, |
| "grad_norm": 0.2811111005250868, |
| "learning_rate": 4.979997243686868e-05, |
| "loss": 1.5501, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0743362831858407, |
| "grad_norm": 0.22287349650796306, |
| "learning_rate": 4.976201109968908e-05, |
| "loss": 1.5089, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.07787610619469026, |
| "grad_norm": 0.2411382508079467, |
| "learning_rate": 4.972077065562821e-05, |
| "loss": 1.4887, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.08141592920353982, |
| "grad_norm": 0.23405299969321355, |
| "learning_rate": 4.967625656594782e-05, |
| "loss": 1.5078, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08495575221238938, |
| "grad_norm": 0.24898400278289592, |
| "learning_rate": 4.962847472542185e-05, |
| "loss": 1.5222, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08849557522123894, |
| "grad_norm": 0.24700246382161703, |
| "learning_rate": 4.957743146155581e-05, |
| "loss": 1.5576, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0920353982300885, |
| "grad_norm": 0.2587819540119162, |
| "learning_rate": 4.952313353374891e-05, |
| "loss": 1.524, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.09557522123893805, |
| "grad_norm": 0.24867732021555267, |
| "learning_rate": 4.946558813239888e-05, |
| "loss": 1.4653, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.09911504424778761, |
| "grad_norm": 0.3081887492909938, |
| "learning_rate": 4.9404802877949843e-05, |
| "loss": 1.5227, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.10265486725663717, |
| "grad_norm": 0.38210912317420137, |
| "learning_rate": 4.934078581988311e-05, |
| "loss": 1.5163, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.10619469026548672, |
| "grad_norm": 0.7013464407809188, |
| "learning_rate": 4.92735454356513e-05, |
| "loss": 1.5138, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10973451327433628, |
| "grad_norm": 0.47112796801369383, |
| "learning_rate": 4.920309062955568e-05, |
| "loss": 1.5108, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.11327433628318584, |
| "grad_norm": 0.28457319565968886, |
| "learning_rate": 4.912943073156701e-05, |
| "loss": 1.4629, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1168141592920354, |
| "grad_norm": 0.47049870670775934, |
| "learning_rate": 4.9052575496090016e-05, |
| "loss": 1.4611, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.12035398230088495, |
| "grad_norm": 0.33000778146504633, |
| "learning_rate": 4.897253510067169e-05, |
| "loss": 1.4329, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.12389380530973451, |
| "grad_norm": 0.4386960770505876, |
| "learning_rate": 4.888932014465352e-05, |
| "loss": 1.4591, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.12743362831858407, |
| "grad_norm": 0.3628284067349331, |
| "learning_rate": 4.8802941647767856e-05, |
| "loss": 1.4484, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.13097345132743363, |
| "grad_norm": 0.46785673204717787, |
| "learning_rate": 4.8713411048678635e-05, |
| "loss": 1.4166, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.13451327433628318, |
| "grad_norm": 0.3555068973478974, |
| "learning_rate": 4.862074020346664e-05, |
| "loss": 1.3474, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.13805309734513274, |
| "grad_norm": 0.32448258276292713, |
| "learning_rate": 4.8524941384059415e-05, |
| "loss": 1.4098, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1415929203539823, |
| "grad_norm": 0.22422182292818518, |
| "learning_rate": 4.842602727660618e-05, |
| "loss": 1.4523, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.14513274336283186, |
| "grad_norm": 0.26143288039726004, |
| "learning_rate": 4.8324010979797875e-05, |
| "loss": 1.4037, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1486725663716814, |
| "grad_norm": 0.2930995625884043, |
| "learning_rate": 4.8218906003132555e-05, |
| "loss": 1.4117, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.15221238938053097, |
| "grad_norm": 0.44012678716478415, |
| "learning_rate": 4.811072626512642e-05, |
| "loss": 1.4183, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.15575221238938053, |
| "grad_norm": 0.40282521531923454, |
| "learning_rate": 4.799948609147061e-05, |
| "loss": 1.4343, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1592920353982301, |
| "grad_norm": 0.3548537305066757, |
| "learning_rate": 4.7885200213134164e-05, |
| "loss": 1.4123, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.16283185840707964, |
| "grad_norm": 0.18896366313769444, |
| "learning_rate": 4.7767883764413266e-05, |
| "loss": 1.4341, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1663716814159292, |
| "grad_norm": 0.18341171851007929, |
| "learning_rate": 4.7647552280927086e-05, |
| "loss": 1.4068, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.16991150442477876, |
| "grad_norm": 0.21351492088740873, |
| "learning_rate": 4.752422169756048e-05, |
| "loss": 1.454, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.17345132743362832, |
| "grad_norm": 0.29096575232736366, |
| "learning_rate": 4.7397908346353796e-05, |
| "loss": 1.4382, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.17699115044247787, |
| "grad_norm": 0.3475137808261636, |
| "learning_rate": 4.7268628954340136e-05, |
| "loss": 1.4064, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18053097345132743, |
| "grad_norm": 0.35130460927670765, |
| "learning_rate": 4.713640064133025e-05, |
| "loss": 1.4386, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.184070796460177, |
| "grad_norm": 0.3056244311350184, |
| "learning_rate": 4.7001240917645465e-05, |
| "loss": 1.3835, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.18761061946902655, |
| "grad_norm": 0.21680753741736908, |
| "learning_rate": 4.686316768179889e-05, |
| "loss": 1.4115, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1911504424778761, |
| "grad_norm": 0.24305442966955867, |
| "learning_rate": 4.672219921812517e-05, |
| "loss": 1.3966, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.19469026548672566, |
| "grad_norm": 0.16091322568175684, |
| "learning_rate": 4.6578354194359227e-05, |
| "loss": 1.3585, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.19823008849557522, |
| "grad_norm": 0.17723290216839938, |
| "learning_rate": 4.6431651659164174e-05, |
| "loss": 1.4289, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.20176991150442478, |
| "grad_norm": 0.26160374576544143, |
| "learning_rate": 4.6282111039608784e-05, |
| "loss": 1.3914, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.20530973451327433, |
| "grad_norm": 0.22226248632808485, |
| "learning_rate": 4.6129752138594874e-05, |
| "loss": 1.3962, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2088495575221239, |
| "grad_norm": 0.20009113160493155, |
| "learning_rate": 4.59745951322349e-05, |
| "loss": 1.4087, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.21238938053097345, |
| "grad_norm": 0.19780714745729258, |
| "learning_rate": 4.581666056718016e-05, |
| "loss": 1.3653, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.215929203539823, |
| "grad_norm": 0.14022953988134976, |
| "learning_rate": 4.5655969357899874e-05, |
| "loss": 1.3999, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.21946902654867256, |
| "grad_norm": 0.164745362208849, |
| "learning_rate": 4.54925427839116e-05, |
| "loss": 1.328, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.22300884955752212, |
| "grad_norm": 0.11773879110072756, |
| "learning_rate": 4.532640248696331e-05, |
| "loss": 1.3576, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.22654867256637168, |
| "grad_norm": 0.14713521584311004, |
| "learning_rate": 4.5157570468167464e-05, |
| "loss": 1.3734, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.23008849557522124, |
| "grad_norm": 0.12150757743176828, |
| "learning_rate": 4.498606908508754e-05, |
| "loss": 1.3901, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2336283185840708, |
| "grad_norm": 0.17335782705846536, |
| "learning_rate": 4.481192104877726e-05, |
| "loss": 1.3379, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.23716814159292035, |
| "grad_norm": 0.15751237594808964, |
| "learning_rate": 4.463514942077323e-05, |
| "loss": 1.3856, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2407079646017699, |
| "grad_norm": 0.3214390865300867, |
| "learning_rate": 4.4455777610040846e-05, |
| "loss": 1.3402, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.24424778761061947, |
| "grad_norm": 0.1505432033259721, |
| "learning_rate": 4.427382936987449e-05, |
| "loss": 1.4107, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.24778761061946902, |
| "grad_norm": 0.1246996380600049, |
| "learning_rate": 4.4089328794751954e-05, |
| "loss": 1.3601, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2513274336283186, |
| "grad_norm": 0.11727561562909336, |
| "learning_rate": 4.3902300317143726e-05, |
| "loss": 1.4374, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.25486725663716814, |
| "grad_norm": 0.12074273017325926, |
| "learning_rate": 4.371276870427753e-05, |
| "loss": 1.3879, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2584070796460177, |
| "grad_norm": 0.19796910383444757, |
| "learning_rate": 4.352075905485854e-05, |
| "loss": 1.3909, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.26194690265486725, |
| "grad_norm": 0.17913857322680019, |
| "learning_rate": 4.332629679574566e-05, |
| "loss": 1.3591, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.26548672566371684, |
| "grad_norm": 0.21612189066738186, |
| "learning_rate": 4.312940767858441e-05, |
| "loss": 1.3981, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.26902654867256637, |
| "grad_norm": 0.15021608593853492, |
| "learning_rate": 4.293011777639675e-05, |
| "loss": 1.4217, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.27256637168141595, |
| "grad_norm": 0.13863221146764798, |
| "learning_rate": 4.272845348012833e-05, |
| "loss": 1.3392, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2761061946902655, |
| "grad_norm": 0.15019927813117484, |
| "learning_rate": 4.252444149515374e-05, |
| "loss": 1.3865, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.27964601769911507, |
| "grad_norm": 0.12191330805249051, |
| "learning_rate": 4.231810883773999e-05, |
| "loss": 1.3545, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2831858407079646, |
| "grad_norm": 0.2446045853389079, |
| "learning_rate": 4.210948283146892e-05, |
| "loss": 1.3849, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2867256637168142, |
| "grad_norm": 0.15363154813349278, |
| "learning_rate": 4.189859110361886e-05, |
| "loss": 1.3954, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2902654867256637, |
| "grad_norm": 0.1395549815462078, |
| "learning_rate": 4.1685461581506115e-05, |
| "loss": 1.3324, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2938053097345133, |
| "grad_norm": 0.17551930372267743, |
| "learning_rate": 4.1470122488786645e-05, |
| "loss": 1.4272, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2973451327433628, |
| "grad_norm": 0.13713537229983466, |
| "learning_rate": 4.125260234171861e-05, |
| "loss": 1.3876, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3008849557522124, |
| "grad_norm": 0.1295928704936419, |
| "learning_rate": 4.103292994538605e-05, |
| "loss": 1.4066, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.30442477876106194, |
| "grad_norm": 0.1202033500035325, |
| "learning_rate": 4.0811134389884433e-05, |
| "loss": 1.4204, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.30796460176991153, |
| "grad_norm": 0.1713978920098714, |
| "learning_rate": 4.058724504646834e-05, |
| "loss": 1.3673, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.31150442477876106, |
| "grad_norm": 0.1631957644367236, |
| "learning_rate": 4.036129156366203e-05, |
| "loss": 1.3936, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.31504424778761064, |
| "grad_norm": 0.1269834739898777, |
| "learning_rate": 4.013330386333321e-05, |
| "loss": 1.3609, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.3185840707964602, |
| "grad_norm": 0.29003377036786065, |
| "learning_rate": 3.9903312136730634e-05, |
| "loss": 1.4405, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.32212389380530976, |
| "grad_norm": 0.1550597273050285, |
| "learning_rate": 3.967134684048607e-05, |
| "loss": 1.4192, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.3256637168141593, |
| "grad_norm": 0.14307429007534203, |
| "learning_rate": 3.9437438692581e-05, |
| "loss": 1.3483, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3292035398230089, |
| "grad_norm": 0.11390632629452387, |
| "learning_rate": 3.920161866827889e-05, |
| "loss": 1.3705, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3327433628318584, |
| "grad_norm": 0.28577123310852803, |
| "learning_rate": 3.8963917996023245e-05, |
| "loss": 1.3899, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.336283185840708, |
| "grad_norm": 0.24148914707858088, |
| "learning_rate": 3.8724368153302166e-05, |
| "loss": 1.3827, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3398230088495575, |
| "grad_norm": 0.12888492035814383, |
| "learning_rate": 3.8483000862479986e-05, |
| "loss": 1.3774, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3433628318584071, |
| "grad_norm": 0.27468578419570394, |
| "learning_rate": 3.823984808659641e-05, |
| "loss": 1.3675, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.34690265486725663, |
| "grad_norm": 0.2522995801673515, |
| "learning_rate": 3.799494202513386e-05, |
| "loss": 1.3994, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3504424778761062, |
| "grad_norm": 0.19369450862361592, |
| "learning_rate": 3.77483151097534e-05, |
| "loss": 1.3676, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.35398230088495575, |
| "grad_norm": 0.1364981691071651, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 1.351, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.35752212389380533, |
| "grad_norm": 0.19824556305622026, |
| "learning_rate": 3.7250029578977625e-05, |
| "loss": 1.3983, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.36106194690265486, |
| "grad_norm": 0.258166693738195, |
| "learning_rate": 3.699843694899467e-05, |
| "loss": 1.3097, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.36460176991150445, |
| "grad_norm": 0.15288998693054637, |
| "learning_rate": 3.674525542718035e-05, |
| "loss": 1.3191, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.368141592920354, |
| "grad_norm": 0.1356079709541582, |
| "learning_rate": 3.64905185410728e-05, |
| "loss": 1.3446, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.37168141592920356, |
| "grad_norm": 0.14784692543277705, |
| "learning_rate": 3.6234260024179033e-05, |
| "loss": 1.3885, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3752212389380531, |
| "grad_norm": 0.659660827895679, |
| "learning_rate": 3.597651381150795e-05, |
| "loss": 1.3731, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3787610619469027, |
| "grad_norm": 0.1340122038319879, |
| "learning_rate": 3.5717314035076355e-05, |
| "loss": 1.37, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3823008849557522, |
| "grad_norm": 0.17555880073923438, |
| "learning_rate": 3.545669501938913e-05, |
| "loss": 1.3771, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3858407079646018, |
| "grad_norm": 0.2749432492658443, |
| "learning_rate": 3.5194691276893755e-05, |
| "loss": 1.4188, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3893805309734513, |
| "grad_norm": 0.16422845586243, |
| "learning_rate": 3.4931337503410034e-05, |
| "loss": 1.3907, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3929203539823009, |
| "grad_norm": 0.29765257271852646, |
| "learning_rate": 3.466666857353547e-05, |
| "loss": 1.3313, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.39646017699115044, |
| "grad_norm": 0.14967484015655694, |
| "learning_rate": 3.4400719536027056e-05, |
| "loss": 1.3716, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.19336396632327799, |
| "learning_rate": 3.413352560915988e-05, |
| "loss": 1.3418, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.40353982300884955, |
| "grad_norm": 0.14974407429618758, |
| "learning_rate": 3.386512217606339e-05, |
| "loss": 1.3987, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.40707964601769914, |
| "grad_norm": 0.13138145553916636, |
| "learning_rate": 3.359554478003579e-05, |
| "loss": 1.372, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.41061946902654867, |
| "grad_norm": 0.15087834260387423, |
| "learning_rate": 3.332482911983721e-05, |
| "loss": 1.3418, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.41415929203539825, |
| "grad_norm": 0.15396500859534987, |
| "learning_rate": 3.305301104496227e-05, |
| "loss": 1.339, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.4176991150442478, |
| "grad_norm": 0.21912037148617267, |
| "learning_rate": 3.278012655089277e-05, |
| "loss": 1.3877, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.42123893805309737, |
| "grad_norm": 0.16910612610739376, |
| "learning_rate": 3.250621177433097e-05, |
| "loss": 1.4116, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.4247787610619469, |
| "grad_norm": 0.41976499689222374, |
| "learning_rate": 3.2231302988414194e-05, |
| "loss": 1.4068, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4283185840707965, |
| "grad_norm": 0.16685047210835632, |
| "learning_rate": 3.195543659791132e-05, |
| "loss": 1.3695, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.431858407079646, |
| "grad_norm": 0.12533128029061685, |
| "learning_rate": 3.167864913440195e-05, |
| "loss": 1.3561, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.4353982300884956, |
| "grad_norm": 0.12335769952527838, |
| "learning_rate": 3.140097725143868e-05, |
| "loss": 1.3248, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.4389380530973451, |
| "grad_norm": 0.16480478639945306, |
| "learning_rate": 3.112245771969327e-05, |
| "loss": 1.3889, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4424778761061947, |
| "grad_norm": 0.12236451730713493, |
| "learning_rate": 3.084312742208728e-05, |
| "loss": 1.3578, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.44601769911504424, |
| "grad_norm": 0.17612171732594184, |
| "learning_rate": 3.056302334890786e-05, |
| "loss": 1.4233, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4495575221238938, |
| "grad_norm": 0.1857045286019139, |
| "learning_rate": 3.028218259290932e-05, |
| "loss": 1.3877, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.45309734513274336, |
| "grad_norm": 0.23845826617765498, |
| "learning_rate": 3.0000642344401113e-05, |
| "loss": 1.3951, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.45663716814159294, |
| "grad_norm": 0.20335694786532152, |
| "learning_rate": 2.971843988632292e-05, |
| "loss": 1.3895, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.46017699115044247, |
| "grad_norm": 0.16324247793893487, |
| "learning_rate": 2.9435612589307458e-05, |
| "loss": 1.3704, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.46371681415929206, |
| "grad_norm": 0.22068197102315823, |
| "learning_rate": 2.9152197906731687e-05, |
| "loss": 1.3763, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.4672566371681416, |
| "grad_norm": 0.13328424989654036, |
| "learning_rate": 2.886823336975703e-05, |
| "loss": 1.4059, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.47079646017699117, |
| "grad_norm": 0.1853269220432702, |
| "learning_rate": 2.8583756582359338e-05, |
| "loss": 1.3553, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4743362831858407, |
| "grad_norm": 0.17477870642537813, |
| "learning_rate": 2.8298805216349167e-05, |
| "loss": 1.3538, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.4778761061946903, |
| "grad_norm": 0.22830537461041098, |
| "learning_rate": 2.8013417006383076e-05, |
| "loss": 1.3057, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4814159292035398, |
| "grad_norm": 0.19882386738471552, |
| "learning_rate": 2.7727629744966695e-05, |
| "loss": 1.369, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.4849557522123894, |
| "grad_norm": 0.18011895559282673, |
| "learning_rate": 2.7441481277449954e-05, |
| "loss": 1.394, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.48849557522123893, |
| "grad_norm": 0.2320794287860154, |
| "learning_rate": 2.715500949701549e-05, |
| "loss": 1.3963, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4920353982300885, |
| "grad_norm": 0.2136574838846041, |
| "learning_rate": 2.686825233966061e-05, |
| "loss": 1.3882, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.49557522123893805, |
| "grad_norm": 0.14317691765167115, |
| "learning_rate": 2.6581247779173635e-05, |
| "loss": 1.3702, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.49911504424778763, |
| "grad_norm": 0.3110139016551138, |
| "learning_rate": 2.629403382210524e-05, |
| "loss": 1.362, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.5026548672566372, |
| "grad_norm": 0.2905968311582129, |
| "learning_rate": 2.600664850273538e-05, |
| "loss": 1.377, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.5061946902654867, |
| "grad_norm": 0.23781505531603292, |
| "learning_rate": 2.5719129878036686e-05, |
| "loss": 1.3922, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.5097345132743363, |
| "grad_norm": 0.17830734890592495, |
| "learning_rate": 2.5431516022634715e-05, |
| "loss": 1.3797, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.5132743362831859, |
| "grad_norm": 0.1830712952884457, |
| "learning_rate": 2.5143845023765943e-05, |
| "loss": 1.3441, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5168141592920354, |
| "grad_norm": 0.2816874187150034, |
| "learning_rate": 2.4856154976234063e-05, |
| "loss": 1.3733, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.5203539823008849, |
| "grad_norm": 0.17322438937284276, |
| "learning_rate": 2.456848397736529e-05, |
| "loss": 1.4381, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.5238938053097345, |
| "grad_norm": 0.19984978304386464, |
| "learning_rate": 2.4280870121963323e-05, |
| "loss": 1.385, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.5274336283185841, |
| "grad_norm": 0.24003529120628253, |
| "learning_rate": 2.399335149726463e-05, |
| "loss": 1.3378, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.5309734513274337, |
| "grad_norm": 0.1863666969548861, |
| "learning_rate": 2.370596617789476e-05, |
| "loss": 1.3711, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5345132743362832, |
| "grad_norm": 0.15186994434245493, |
| "learning_rate": 2.3418752220826364e-05, |
| "loss": 1.3917, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.5380530973451327, |
| "grad_norm": 0.18488533185567366, |
| "learning_rate": 2.3131747660339394e-05, |
| "loss": 1.3352, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5415929203539823, |
| "grad_norm": 0.14709637692023, |
| "learning_rate": 2.2844990502984513e-05, |
| "loss": 1.3606, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5451327433628319, |
| "grad_norm": 0.1929509569633954, |
| "learning_rate": 2.2558518722550048e-05, |
| "loss": 1.3675, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5486725663716814, |
| "grad_norm": 0.21437689825380304, |
| "learning_rate": 2.2272370255033314e-05, |
| "loss": 1.3784, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.552212389380531, |
| "grad_norm": 0.23426391248511405, |
| "learning_rate": 2.1986582993616926e-05, |
| "loss": 1.393, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.5557522123893806, |
| "grad_norm": 0.15338867979069432, |
| "learning_rate": 2.1701194783650846e-05, |
| "loss": 1.4128, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5592920353982301, |
| "grad_norm": 0.19084022015663935, |
| "learning_rate": 2.1416243417640668e-05, |
| "loss": 1.349, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5628318584070796, |
| "grad_norm": 0.18723374415049818, |
| "learning_rate": 2.1131766630242966e-05, |
| "loss": 1.3617, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5663716814159292, |
| "grad_norm": 0.1843740823402695, |
| "learning_rate": 2.084780209326831e-05, |
| "loss": 1.3863, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5699115044247788, |
| "grad_norm": 0.13776831606484746, |
| "learning_rate": 2.0564387410692544e-05, |
| "loss": 1.3505, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5734513274336284, |
| "grad_norm": 0.2185855169811869, |
| "learning_rate": 2.0281560113677086e-05, |
| "loss": 1.357, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5769911504424778, |
| "grad_norm": 0.1479496112029382, |
| "learning_rate": 1.9999357655598893e-05, |
| "loss": 1.3996, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5805309734513274, |
| "grad_norm": 0.14230152435027588, |
| "learning_rate": 1.971781740709068e-05, |
| "loss": 1.4579, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.584070796460177, |
| "grad_norm": 0.24415411312146668, |
| "learning_rate": 1.9436976651092144e-05, |
| "loss": 1.3951, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5876106194690266, |
| "grad_norm": 0.1531379017839431, |
| "learning_rate": 1.915687257791273e-05, |
| "loss": 1.4066, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5911504424778761, |
| "grad_norm": 0.12572602144186015, |
| "learning_rate": 1.8877542280306728e-05, |
| "loss": 1.4049, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5946902654867257, |
| "grad_norm": 0.28651665368702545, |
| "learning_rate": 1.8599022748561325e-05, |
| "loss": 1.3875, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5982300884955752, |
| "grad_norm": 0.2853818788671804, |
| "learning_rate": 1.8321350865598057e-05, |
| "loss": 1.3547, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.6017699115044248, |
| "grad_norm": 0.20397260857195473, |
| "learning_rate": 1.8044563402088684e-05, |
| "loss": 1.362, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6053097345132743, |
| "grad_norm": 0.22013691384673548, |
| "learning_rate": 1.776869701158581e-05, |
| "loss": 1.3619, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.6088495575221239, |
| "grad_norm": 0.208809460722862, |
| "learning_rate": 1.7493788225669027e-05, |
| "loss": 1.3896, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.6123893805309735, |
| "grad_norm": 0.4200222837373971, |
| "learning_rate": 1.7219873449107233e-05, |
| "loss": 1.3647, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.6159292035398231, |
| "grad_norm": 0.25501118855440547, |
| "learning_rate": 1.694698895503774e-05, |
| "loss": 1.4591, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.6194690265486725, |
| "grad_norm": 0.1600911540410192, |
| "learning_rate": 1.66751708801628e-05, |
| "loss": 1.3436, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6230088495575221, |
| "grad_norm": 0.16733007446645096, |
| "learning_rate": 1.6404455219964203e-05, |
| "loss": 1.3455, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.6265486725663717, |
| "grad_norm": 0.1899434164816737, |
| "learning_rate": 1.613487782393661e-05, |
| "loss": 1.3823, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.6300884955752213, |
| "grad_norm": 0.20941491888387903, |
| "learning_rate": 1.5866474390840125e-05, |
| "loss": 1.3896, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.6336283185840708, |
| "grad_norm": 0.14977102391347238, |
| "learning_rate": 1.5599280463972953e-05, |
| "loss": 1.3498, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.6371681415929203, |
| "grad_norm": 0.14005156844353428, |
| "learning_rate": 1.533333142646453e-05, |
| "loss": 1.3439, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6407079646017699, |
| "grad_norm": 0.24243476683329468, |
| "learning_rate": 1.5068662496589975e-05, |
| "loss": 1.4248, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.6442477876106195, |
| "grad_norm": 0.13257433719785397, |
| "learning_rate": 1.4805308723106248e-05, |
| "loss": 1.3372, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.647787610619469, |
| "grad_norm": 0.1360543958451605, |
| "learning_rate": 1.4543304980610878e-05, |
| "loss": 1.3502, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.6513274336283186, |
| "grad_norm": 0.171896129628506, |
| "learning_rate": 1.4282685964923642e-05, |
| "loss": 1.3754, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.6548672566371682, |
| "grad_norm": 0.14433841954600432, |
| "learning_rate": 1.4023486188492052e-05, |
| "loss": 1.3855, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6584070796460177, |
| "grad_norm": 0.27499627341386756, |
| "learning_rate": 1.3765739975820962e-05, |
| "loss": 1.3843, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.6619469026548672, |
| "grad_norm": 0.1832518338825052, |
| "learning_rate": 1.3509481458927209e-05, |
| "loss": 1.4067, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6654867256637168, |
| "grad_norm": 0.17627269284963698, |
| "learning_rate": 1.3254744572819658e-05, |
| "loss": 1.358, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6690265486725664, |
| "grad_norm": 0.12972103232452611, |
| "learning_rate": 1.3001563051005347e-05, |
| "loss": 1.3372, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.672566371681416, |
| "grad_norm": 0.17778610418067095, |
| "learning_rate": 1.2749970421022381e-05, |
| "loss": 1.4093, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6761061946902654, |
| "grad_norm": 0.17366530190012722, |
| "learning_rate": 1.2500000000000006e-05, |
| "loss": 1.4115, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.679646017699115, |
| "grad_norm": 0.2338226961364743, |
| "learning_rate": 1.225168489024661e-05, |
| "loss": 1.3919, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.6831858407079646, |
| "grad_norm": 0.12473946880268197, |
| "learning_rate": 1.2005057974866135e-05, |
| "loss": 1.3683, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6867256637168142, |
| "grad_norm": 0.29788214242861744, |
| "learning_rate": 1.1760151913403583e-05, |
| "loss": 1.3742, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.6902654867256637, |
| "grad_norm": 0.25161388399332285, |
| "learning_rate": 1.1516999137520023e-05, |
| "loss": 1.3436, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6938053097345133, |
| "grad_norm": 0.20275718493470796, |
| "learning_rate": 1.127563184669784e-05, |
| "loss": 1.3785, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6973451327433628, |
| "grad_norm": 0.2369309742864934, |
| "learning_rate": 1.1036082003976759e-05, |
| "loss": 1.413, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.7008849557522124, |
| "grad_norm": 0.1403641760235718, |
| "learning_rate": 1.0798381331721109e-05, |
| "loss": 1.406, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.7044247787610619, |
| "grad_norm": 0.27046989828036216, |
| "learning_rate": 1.0562561307419005e-05, |
| "loss": 1.3781, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.7079646017699115, |
| "grad_norm": 0.16492162334449362, |
| "learning_rate": 1.032865315951394e-05, |
| "loss": 1.3735, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7115044247787611, |
| "grad_norm": 0.2832750784253182, |
| "learning_rate": 1.0096687863269368e-05, |
| "loss": 1.4337, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.7150442477876107, |
| "grad_norm": 0.17170795646186768, |
| "learning_rate": 9.866696136666798e-06, |
| "loss": 1.4072, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.7185840707964601, |
| "grad_norm": 0.13847113237321784, |
| "learning_rate": 9.638708436337976e-06, |
| "loss": 1.4178, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.7221238938053097, |
| "grad_norm": 0.17526884575066398, |
| "learning_rate": 9.412754953531663e-06, |
| "loss": 1.3727, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.7256637168141593, |
| "grad_norm": 0.11417158087114691, |
| "learning_rate": 9.18886561011557e-06, |
| "loss": 1.3738, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7292035398230089, |
| "grad_norm": 0.16272670657433552, |
| "learning_rate": 8.967070054613949e-06, |
| "loss": 1.3713, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.7327433628318584, |
| "grad_norm": 0.3092931883528065, |
| "learning_rate": 8.747397658281395e-06, |
| "loss": 1.3996, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.736283185840708, |
| "grad_norm": 0.1935926337883175, |
| "learning_rate": 8.529877511213357e-06, |
| "loss": 1.3924, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.7398230088495575, |
| "grad_norm": 0.2888163198022463, |
| "learning_rate": 8.314538418493892e-06, |
| "loss": 1.3962, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.7433628318584071, |
| "grad_norm": 0.2037500223476182, |
| "learning_rate": 8.101408896381141e-06, |
| "loss": 1.4393, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7469026548672566, |
| "grad_norm": 0.20365280760954532, |
| "learning_rate": 7.890517168531086e-06, |
| "loss": 1.432, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.7504424778761062, |
| "grad_norm": 0.245973715435359, |
| "learning_rate": 7.681891162260015e-06, |
| "loss": 1.4125, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.7539823008849558, |
| "grad_norm": 0.18396574884320455, |
| "learning_rate": 7.475558504846264e-06, |
| "loss": 1.393, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7575221238938054, |
| "grad_norm": 0.14343988163912846, |
| "learning_rate": 7.271546519871672e-06, |
| "loss": 1.3735, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.7610619469026548, |
| "grad_norm": 0.19050518039826148, |
| "learning_rate": 7.0698822236032554e-06, |
| "loss": 1.3552, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7646017699115044, |
| "grad_norm": 0.16749967589709347, |
| "learning_rate": 6.8705923214155945e-06, |
| "loss": 1.3955, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.768141592920354, |
| "grad_norm": 0.20340220156851388, |
| "learning_rate": 6.673703204254347e-06, |
| "loss": 1.4241, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7716814159292036, |
| "grad_norm": 0.2774385020407223, |
| "learning_rate": 6.4792409451414735e-06, |
| "loss": 1.3569, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.7752212389380531, |
| "grad_norm": 0.16257195802961893, |
| "learning_rate": 6.28723129572247e-06, |
| "loss": 1.4044, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.7787610619469026, |
| "grad_norm": 0.19178231846523627, |
| "learning_rate": 6.097699682856275e-06, |
| "loss": 1.4115, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7823008849557522, |
| "grad_norm": 0.31772086840796093, |
| "learning_rate": 5.910671205248045e-06, |
| "loss": 1.3743, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.7858407079646018, |
| "grad_norm": 0.3344955593081661, |
| "learning_rate": 5.72617063012551e-06, |
| "loss": 1.4144, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7893805309734513, |
| "grad_norm": 0.41331884709239125, |
| "learning_rate": 5.544222389959164e-06, |
| "loss": 1.4096, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.7929203539823009, |
| "grad_norm": 0.31601068127978066, |
| "learning_rate": 5.3648505792267825e-06, |
| "loss": 1.3797, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.7964601769911505, |
| "grad_norm": 0.12615271990952526, |
| "learning_rate": 5.188078951222744e-06, |
| "loss": 1.4181, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.23681972723294573, |
| "learning_rate": 5.013930914912476e-06, |
| "loss": 1.3862, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.8035398230088495, |
| "grad_norm": 0.23506910056480054, |
| "learning_rate": 4.842429531832529e-06, |
| "loss": 1.406, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.8070796460176991, |
| "grad_norm": 0.1603225379263149, |
| "learning_rate": 4.673597513036684e-06, |
| "loss": 1.3943, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.8106194690265487, |
| "grad_norm": 0.1750016849687585, |
| "learning_rate": 4.507457216088396e-06, |
| "loss": 1.3752, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.8141592920353983, |
| "grad_norm": 0.33015856465351145, |
| "learning_rate": 4.344030642100133e-06, |
| "loss": 1.4204, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8176991150442477, |
| "grad_norm": 0.17198765892059484, |
| "learning_rate": 4.183339432819844e-06, |
| "loss": 1.3832, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.8212389380530973, |
| "grad_norm": 0.2494399367301955, |
| "learning_rate": 4.025404867765103e-06, |
| "loss": 1.3654, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.8247787610619469, |
| "grad_norm": 0.15522718151820233, |
| "learning_rate": 3.8702478614051355e-06, |
| "loss": 1.3921, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.8283185840707965, |
| "grad_norm": 0.13980066687088336, |
| "learning_rate": 3.717888960391222e-06, |
| "loss": 1.3558, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.831858407079646, |
| "grad_norm": 0.15487363597370576, |
| "learning_rate": 3.5683483408358307e-06, |
| "loss": 1.4594, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8353982300884956, |
| "grad_norm": 0.1602659742055085, |
| "learning_rate": 3.4216458056407775e-06, |
| "loss": 1.4266, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.8389380530973451, |
| "grad_norm": 0.19010269637116808, |
| "learning_rate": 3.2778007818748376e-06, |
| "loss": 1.4116, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.8424778761061947, |
| "grad_norm": 0.13278314739658872, |
| "learning_rate": 3.136832318201119e-06, |
| "loss": 1.4198, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.8460176991150442, |
| "grad_norm": 0.175606952118794, |
| "learning_rate": 2.998759082354538e-06, |
| "loss": 1.3803, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.8495575221238938, |
| "grad_norm": 0.2481514536808128, |
| "learning_rate": 2.8635993586697553e-06, |
| "loss": 1.4044, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8530973451327434, |
| "grad_norm": 0.282431156272049, |
| "learning_rate": 2.7313710456598667e-06, |
| "loss": 1.4294, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.856637168141593, |
| "grad_norm": 0.2698384131452036, |
| "learning_rate": 2.602091653646205e-06, |
| "loss": 1.4682, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.8601769911504424, |
| "grad_norm": 0.177268179533375, |
| "learning_rate": 2.475778302439524e-06, |
| "loss": 1.4015, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.863716814159292, |
| "grad_norm": 0.14802302054659544, |
| "learning_rate": 2.3524477190729144e-06, |
| "loss": 1.4131, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8672566371681416, |
| "grad_norm": 0.2718087485947451, |
| "learning_rate": 2.232116235586737e-06, |
| "loss": 1.3715, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8707964601769912, |
| "grad_norm": 0.28440166381934995, |
| "learning_rate": 2.1147997868658425e-06, |
| "loss": 1.4285, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.8743362831858407, |
| "grad_norm": 0.16098873943678169, |
| "learning_rate": 2.0005139085293945e-06, |
| "loss": 1.3959, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.8778761061946903, |
| "grad_norm": 0.14588882919671864, |
| "learning_rate": 1.8892737348735812e-06, |
| "loss": 1.4136, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.8814159292035398, |
| "grad_norm": 0.16837051864441668, |
| "learning_rate": 1.7810939968674418e-06, |
| "loss": 1.3834, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.8849557522123894, |
| "grad_norm": 0.13634066486185534, |
| "learning_rate": 1.6759890202021289e-06, |
| "loss": 1.4102, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8884955752212389, |
| "grad_norm": 0.19189176615372466, |
| "learning_rate": 1.5739727233938239e-06, |
| "loss": 1.3727, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.8920353982300885, |
| "grad_norm": 0.2130365343341551, |
| "learning_rate": 1.4750586159405915e-06, |
| "loss": 1.3547, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.8955752212389381, |
| "grad_norm": 0.14014293619438611, |
| "learning_rate": 1.3792597965333581e-06, |
| "loss": 1.4127, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.8991150442477877, |
| "grad_norm": 0.19706946980035872, |
| "learning_rate": 1.286588951321363e-06, |
| "loss": 1.4366, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.9026548672566371, |
| "grad_norm": 0.16141347931533398, |
| "learning_rate": 1.1970583522321472e-06, |
| "loss": 1.4402, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.9061946902654867, |
| "grad_norm": 0.16886776303202308, |
| "learning_rate": 1.1106798553464804e-06, |
| "loss": 1.4466, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.9097345132743363, |
| "grad_norm": 0.1289021524060537, |
| "learning_rate": 1.0274648993283093e-06, |
| "loss": 1.4022, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.9132743362831859, |
| "grad_norm": 0.2166369005456705, |
| "learning_rate": 9.474245039099882e-07, |
| "loss": 1.4273, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.9168141592920354, |
| "grad_norm": 0.2055233570255993, |
| "learning_rate": 8.705692684329969e-07, |
| "loss": 1.4447, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.9203539823008849, |
| "grad_norm": 0.24003258637066974, |
| "learning_rate": 7.969093704443209e-07, |
| "loss": 1.395, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9238938053097345, |
| "grad_norm": 0.24643248474581458, |
| "learning_rate": 7.264545643486997e-07, |
| "loss": 1.4292, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.9274336283185841, |
| "grad_norm": 0.14588590756720407, |
| "learning_rate": 6.592141801168933e-07, |
| "loss": 1.4206, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.9309734513274336, |
| "grad_norm": 0.2098810341079189, |
| "learning_rate": 5.951971220501645e-07, |
| "loss": 1.4003, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.9345132743362832, |
| "grad_norm": 0.2134185708217152, |
| "learning_rate": 5.344118676011172e-07, |
| "loss": 1.4529, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.9380530973451328, |
| "grad_norm": 0.1901396806615015, |
| "learning_rate": 4.768664662510941e-07, |
| "loss": 1.4181, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9415929203539823, |
| "grad_norm": 0.1740201436765075, |
| "learning_rate": 4.225685384441902e-07, |
| "loss": 1.4086, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.9451327433628318, |
| "grad_norm": 0.14602401119980518, |
| "learning_rate": 3.71525274578155e-07, |
| "loss": 1.4262, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.9486725663716814, |
| "grad_norm": 0.2295789342688302, |
| "learning_rate": 3.237434340521789e-07, |
| "loss": 1.4068, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.952212389380531, |
| "grad_norm": 0.14106911620373858, |
| "learning_rate": 2.7922934437178695e-07, |
| "loss": 1.447, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.9557522123893806, |
| "grad_norm": 0.1862366502924384, |
| "learning_rate": 2.3798890031092037e-07, |
| "loss": 1.4125, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.95929203539823, |
| "grad_norm": 0.17289316045885814, |
| "learning_rate": 2.0002756313132475e-07, |
| "loss": 1.4104, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.9628318584070796, |
| "grad_norm": 0.17621766011633694, |
| "learning_rate": 1.65350359859337e-07, |
| "loss": 1.3574, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.9663716814159292, |
| "grad_norm": 0.14324064947353912, |
| "learning_rate": 1.3396188262018438e-07, |
| "loss": 1.4327, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.9699115044247788, |
| "grad_norm": 0.21936984291113176, |
| "learning_rate": 1.0586628802987108e-07, |
| "loss": 1.4428, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.9734513274336283, |
| "grad_norm": 0.17815388829010623, |
| "learning_rate": 8.106729664475176e-08, |
| "loss": 1.43, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9769911504424779, |
| "grad_norm": 0.19414185063188497, |
| "learning_rate": 5.956819246881185e-08, |
| "loss": 1.4066, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.9805309734513274, |
| "grad_norm": 0.20422480602927082, |
| "learning_rate": 4.1371822518804224e-08, |
| "loss": 1.4236, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.984070796460177, |
| "grad_norm": 0.1757997100349613, |
| "learning_rate": 2.648059644723144e-08, |
| "loss": 1.448, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.9876106194690265, |
| "grad_norm": 0.22895459311601085, |
| "learning_rate": 1.4896486223239802e-08, |
| "loss": 1.4495, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.9911504424778761, |
| "grad_norm": 0.40857682028655, |
| "learning_rate": 6.621025871481057e-09, |
| "loss": 1.4162, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9946902654867257, |
| "grad_norm": 0.1575487199309586, |
| "learning_rate": 1.6553112689776662e-09, |
| "loss": 1.4067, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.9982300884955753, |
| "grad_norm": 0.1926481633107631, |
| "learning_rate": 0.0, |
| "loss": 1.4527, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9982300884955753, |
| "step": 282, |
| "total_flos": 1677476333879296.0, |
| "train_loss": 0.0, |
| "train_runtime": 0.0106, |
| "train_samples_per_second": 13689138.49, |
| "train_steps_per_second": 26706.264 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 282, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1677476333879296.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|