| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.10375237765865468, |
| "eval_steps": 500, |
| "global_step": 150, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006916825177243646, |
| "grad_norm": 0.2954893708229065, |
| "learning_rate": 0.0, |
| "loss": 2.3812, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0013833650354487291, |
| "grad_norm": 0.3372870683670044, |
| "learning_rate": 1.3698630136986302e-06, |
| "loss": 2.6562, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0020750475531730937, |
| "grad_norm": 0.41708794236183167, |
| "learning_rate": 2.7397260273972604e-06, |
| "loss": 2.779, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0027667300708974583, |
| "grad_norm": 0.48284393548965454, |
| "learning_rate": 4.10958904109589e-06, |
| "loss": 2.83, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0034584125886218224, |
| "grad_norm": 0.328521728515625, |
| "learning_rate": 5.479452054794521e-06, |
| "loss": 2.3607, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.004150095106346187, |
| "grad_norm": 0.3413754403591156, |
| "learning_rate": 6.849315068493151e-06, |
| "loss": 1.9744, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0048417776240705515, |
| "grad_norm": 0.43425995111465454, |
| "learning_rate": 8.21917808219178e-06, |
| "loss": 1.915, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0055334601417949165, |
| "grad_norm": 0.45184195041656494, |
| "learning_rate": 9.589041095890411e-06, |
| "loss": 2.0002, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.006225142659519281, |
| "grad_norm": 0.3451146185398102, |
| "learning_rate": 1.0958904109589042e-05, |
| "loss": 2.5739, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.006916825177243645, |
| "grad_norm": 0.39400503039360046, |
| "learning_rate": 1.2328767123287671e-05, |
| "loss": 2.6139, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00760850769496801, |
| "grad_norm": 0.4555888772010803, |
| "learning_rate": 1.3698630136986302e-05, |
| "loss": 2.4346, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.008300190212692375, |
| "grad_norm": 0.5475619435310364, |
| "learning_rate": 1.5068493150684931e-05, |
| "loss": 1.485, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.008991872730416739, |
| "grad_norm": 1.219076156616211, |
| "learning_rate": 1.643835616438356e-05, |
| "loss": 2.0555, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.009683555248141103, |
| "grad_norm": 0.3271433711051941, |
| "learning_rate": 1.780821917808219e-05, |
| "loss": 2.8418, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.010375237765865467, |
| "grad_norm": 0.3261394500732422, |
| "learning_rate": 1.9178082191780822e-05, |
| "loss": 2.287, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.011066920283589833, |
| "grad_norm": 0.39713796973228455, |
| "learning_rate": 2.0547945205479453e-05, |
| "loss": 2.7331, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.011758602801314197, |
| "grad_norm": 0.3226444125175476, |
| "learning_rate": 2.1917808219178083e-05, |
| "loss": 2.7307, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.012450285319038561, |
| "grad_norm": 0.4153122007846832, |
| "learning_rate": 2.328767123287671e-05, |
| "loss": 2.3229, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.013141967836762926, |
| "grad_norm": 0.29302462935447693, |
| "learning_rate": 2.4657534246575342e-05, |
| "loss": 2.2093, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01383365035448729, |
| "grad_norm": 0.42535701394081116, |
| "learning_rate": 2.6027397260273973e-05, |
| "loss": 2.3698, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.014525332872211656, |
| "grad_norm": 0.33567023277282715, |
| "learning_rate": 2.7397260273972603e-05, |
| "loss": 2.6456, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01521701538993602, |
| "grad_norm": 0.37375620007514954, |
| "learning_rate": 2.8767123287671234e-05, |
| "loss": 2.4148, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.015908697907660384, |
| "grad_norm": 0.2704203128814697, |
| "learning_rate": 3.0136986301369862e-05, |
| "loss": 1.6722, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01660038042538475, |
| "grad_norm": 0.3946782946586609, |
| "learning_rate": 3.1506849315068496e-05, |
| "loss": 2.6759, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.017292062943109112, |
| "grad_norm": 0.368335098028183, |
| "learning_rate": 3.287671232876712e-05, |
| "loss": 2.6882, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.017983745460833478, |
| "grad_norm": 0.38029783964157104, |
| "learning_rate": 3.424657534246575e-05, |
| "loss": 2.6915, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01867542797855784, |
| "grad_norm": 0.36253222823143005, |
| "learning_rate": 3.561643835616438e-05, |
| "loss": 2.1426, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.019367110496282206, |
| "grad_norm": 0.35769322514533997, |
| "learning_rate": 3.698630136986301e-05, |
| "loss": 2.6634, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.020058793014006572, |
| "grad_norm": 0.44577229022979736, |
| "learning_rate": 3.8356164383561644e-05, |
| "loss": 2.5996, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.020750475531730934, |
| "grad_norm": 0.5225628614425659, |
| "learning_rate": 3.9726027397260274e-05, |
| "loss": 2.5993, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0214421580494553, |
| "grad_norm": 0.45649707317352295, |
| "learning_rate": 4.1095890410958905e-05, |
| "loss": 2.3344, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.022133840567179666, |
| "grad_norm": 0.31408172845840454, |
| "learning_rate": 4.2465753424657536e-05, |
| "loss": 2.343, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02282552308490403, |
| "grad_norm": 0.3498693108558655, |
| "learning_rate": 4.383561643835617e-05, |
| "loss": 1.7196, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.023517205602628394, |
| "grad_norm": 0.5255292654037476, |
| "learning_rate": 4.520547945205479e-05, |
| "loss": 1.5354, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.024208888120352757, |
| "grad_norm": 0.3460487425327301, |
| "learning_rate": 4.657534246575342e-05, |
| "loss": 2.1435, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.024900570638077123, |
| "grad_norm": 0.47466233372688293, |
| "learning_rate": 4.794520547945205e-05, |
| "loss": 2.7025, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02559225315580149, |
| "grad_norm": 0.39667466282844543, |
| "learning_rate": 4.9315068493150684e-05, |
| "loss": 2.2736, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.02628393567352585, |
| "grad_norm": 0.40798133611679077, |
| "learning_rate": 5.068493150684932e-05, |
| "loss": 1.4074, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.026975618191250217, |
| "grad_norm": 0.39603471755981445, |
| "learning_rate": 5.2054794520547945e-05, |
| "loss": 2.4354, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02766730070897458, |
| "grad_norm": 0.5650593042373657, |
| "learning_rate": 5.342465753424658e-05, |
| "loss": 1.7307, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.028358983226698945, |
| "grad_norm": 0.7711919546127319, |
| "learning_rate": 5.479452054794521e-05, |
| "loss": 1.2067, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.02905066574442331, |
| "grad_norm": 0.46603119373321533, |
| "learning_rate": 5.616438356164384e-05, |
| "loss": 1.0539, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.029742348262147673, |
| "grad_norm": 0.2997550666332245, |
| "learning_rate": 5.753424657534247e-05, |
| "loss": 1.9997, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03043403077987204, |
| "grad_norm": 0.4058617651462555, |
| "learning_rate": 5.89041095890411e-05, |
| "loss": 2.3323, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0311257132975964, |
| "grad_norm": 0.3763635754585266, |
| "learning_rate": 6.0273972602739724e-05, |
| "loss": 1.8749, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.03181739581532077, |
| "grad_norm": 0.3507993221282959, |
| "learning_rate": 6.164383561643835e-05, |
| "loss": 2.3828, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03250907833304513, |
| "grad_norm": 0.34240859746932983, |
| "learning_rate": 6.301369863013699e-05, |
| "loss": 2.3809, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0332007608507695, |
| "grad_norm": 0.4183844029903412, |
| "learning_rate": 6.438356164383562e-05, |
| "loss": 2.2065, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03389244336849386, |
| "grad_norm": 0.5209120512008667, |
| "learning_rate": 6.575342465753424e-05, |
| "loss": 2.061, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.034584125886218224, |
| "grad_norm": 0.6568111181259155, |
| "learning_rate": 6.712328767123288e-05, |
| "loss": 2.1576, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03527580840394259, |
| "grad_norm": 0.5327390432357788, |
| "learning_rate": 6.84931506849315e-05, |
| "loss": 1.5562, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.035967490921666956, |
| "grad_norm": 0.6389997005462646, |
| "learning_rate": 6.986301369863014e-05, |
| "loss": 2.3966, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03665917343939132, |
| "grad_norm": 0.6374388933181763, |
| "learning_rate": 7.123287671232876e-05, |
| "loss": 2.4013, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03735085595711568, |
| "grad_norm": 0.4383719563484192, |
| "learning_rate": 7.26027397260274e-05, |
| "loss": 2.061, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03804253847484005, |
| "grad_norm": 0.4306814670562744, |
| "learning_rate": 7.397260273972603e-05, |
| "loss": 2.4931, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03873422099256441, |
| "grad_norm": 0.5693032145500183, |
| "learning_rate": 7.534246575342466e-05, |
| "loss": 2.2842, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.039425903510288775, |
| "grad_norm": 0.7792916297912598, |
| "learning_rate": 7.671232876712329e-05, |
| "loss": 1.8452, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.040117586028013144, |
| "grad_norm": 0.4936494827270508, |
| "learning_rate": 7.808219178082192e-05, |
| "loss": 1.8668, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.040809268545737507, |
| "grad_norm": 0.49626269936561584, |
| "learning_rate": 7.945205479452055e-05, |
| "loss": 1.9564, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.04150095106346187, |
| "grad_norm": 0.4702766537666321, |
| "learning_rate": 8.082191780821919e-05, |
| "loss": 2.419, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04219263358118624, |
| "grad_norm": 0.6959900259971619, |
| "learning_rate": 8.219178082191781e-05, |
| "loss": 1.5835, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0428843160989106, |
| "grad_norm": 0.5963976979255676, |
| "learning_rate": 8.356164383561645e-05, |
| "loss": 2.2932, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04357599861663496, |
| "grad_norm": 0.4605785310268402, |
| "learning_rate": 8.493150684931507e-05, |
| "loss": 2.397, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.04426768113435933, |
| "grad_norm": 0.4479796588420868, |
| "learning_rate": 8.630136986301371e-05, |
| "loss": 2.0257, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.044959363652083695, |
| "grad_norm": 0.4342099726200104, |
| "learning_rate": 8.767123287671233e-05, |
| "loss": 1.9476, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04565104616980806, |
| "grad_norm": 0.44643279910087585, |
| "learning_rate": 8.904109589041096e-05, |
| "loss": 2.4669, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04634272868753242, |
| "grad_norm": 0.513254702091217, |
| "learning_rate": 9.041095890410958e-05, |
| "loss": 2.2581, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04703441120525679, |
| "grad_norm": 0.6621958613395691, |
| "learning_rate": 9.178082191780822e-05, |
| "loss": 1.3013, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.04772609372298115, |
| "grad_norm": 0.43388229608535767, |
| "learning_rate": 9.315068493150684e-05, |
| "loss": 2.5325, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.048417776240705514, |
| "grad_norm": 0.539757490158081, |
| "learning_rate": 9.452054794520548e-05, |
| "loss": 1.76, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04910945875842988, |
| "grad_norm": 0.33598029613494873, |
| "learning_rate": 9.58904109589041e-05, |
| "loss": 1.6931, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.049801141276154245, |
| "grad_norm": 0.8661749958992004, |
| "learning_rate": 9.726027397260274e-05, |
| "loss": 2.1326, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.05049282379387861, |
| "grad_norm": 0.5956202149391174, |
| "learning_rate": 9.863013698630137e-05, |
| "loss": 2.0554, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.05118450631160298, |
| "grad_norm": 0.7723128199577332, |
| "learning_rate": 0.0001, |
| "loss": 1.4837, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.05187618882932734, |
| "grad_norm": 0.5357879996299744, |
| "learning_rate": 9.992716678805537e-05, |
| "loss": 2.1688, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0525678713470517, |
| "grad_norm": 0.48623108863830566, |
| "learning_rate": 9.985433357611071e-05, |
| "loss": 2.1567, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.053259553864776064, |
| "grad_norm": 0.4244164228439331, |
| "learning_rate": 9.978150036416607e-05, |
| "loss": 1.8235, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.053951236382500434, |
| "grad_norm": 0.6056337356567383, |
| "learning_rate": 9.970866715222141e-05, |
| "loss": 2.0154, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.054642918900224796, |
| "grad_norm": 0.5280382037162781, |
| "learning_rate": 9.963583394027677e-05, |
| "loss": 1.9959, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05533460141794916, |
| "grad_norm": 0.44745901226997375, |
| "learning_rate": 9.956300072833212e-05, |
| "loss": 1.2171, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05602628393567353, |
| "grad_norm": 0.7180289030075073, |
| "learning_rate": 9.949016751638748e-05, |
| "loss": 1.7036, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.05671796645339789, |
| "grad_norm": 0.6465222239494324, |
| "learning_rate": 9.941733430444284e-05, |
| "loss": 1.8992, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.05740964897112225, |
| "grad_norm": 0.4792153239250183, |
| "learning_rate": 9.934450109249819e-05, |
| "loss": 2.0712, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.05810133148884662, |
| "grad_norm": 0.47839808464050293, |
| "learning_rate": 9.927166788055353e-05, |
| "loss": 1.7548, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.058793014006570984, |
| "grad_norm": 0.7118557095527649, |
| "learning_rate": 9.919883466860888e-05, |
| "loss": 1.5835, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.05948469652429535, |
| "grad_norm": 0.5179592370986938, |
| "learning_rate": 9.912600145666424e-05, |
| "loss": 1.621, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.060176379042019716, |
| "grad_norm": 0.9235703349113464, |
| "learning_rate": 9.905316824471959e-05, |
| "loss": 0.8581, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.06086806155974408, |
| "grad_norm": 0.8734163045883179, |
| "learning_rate": 9.898033503277495e-05, |
| "loss": 2.2878, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.06155974407746844, |
| "grad_norm": 0.45049625635147095, |
| "learning_rate": 9.890750182083031e-05, |
| "loss": 2.2186, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0622514265951928, |
| "grad_norm": 0.6119429469108582, |
| "learning_rate": 9.883466860888566e-05, |
| "loss": 2.2461, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06294310911291717, |
| "grad_norm": 0.5095340013504028, |
| "learning_rate": 9.876183539694101e-05, |
| "loss": 2.2954, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.06363479163064154, |
| "grad_norm": 0.6094918251037598, |
| "learning_rate": 9.868900218499635e-05, |
| "loss": 2.3478, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0643264741483659, |
| "grad_norm": 1.8420301675796509, |
| "learning_rate": 9.861616897305172e-05, |
| "loss": 2.213, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06501815666609026, |
| "grad_norm": 0.6151532530784607, |
| "learning_rate": 9.854333576110706e-05, |
| "loss": 1.8737, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.06570983918381462, |
| "grad_norm": 0.5210903286933899, |
| "learning_rate": 9.847050254916242e-05, |
| "loss": 2.019, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.066401521701539, |
| "grad_norm": 0.535746693611145, |
| "learning_rate": 9.839766933721779e-05, |
| "loss": 2.399, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.06709320421926336, |
| "grad_norm": 0.8154505491256714, |
| "learning_rate": 9.832483612527313e-05, |
| "loss": 1.9442, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.06778488673698772, |
| "grad_norm": 0.521247923374176, |
| "learning_rate": 9.825200291332848e-05, |
| "loss": 2.5264, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06847656925471209, |
| "grad_norm": 0.41544124484062195, |
| "learning_rate": 9.817916970138383e-05, |
| "loss": 1.8267, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.06916825177243645, |
| "grad_norm": 0.538253128528595, |
| "learning_rate": 9.810633648943919e-05, |
| "loss": 1.6463, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06985993429016081, |
| "grad_norm": 0.8509485125541687, |
| "learning_rate": 9.803350327749454e-05, |
| "loss": 1.5877, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.07055161680788519, |
| "grad_norm": 0.47714293003082275, |
| "learning_rate": 9.79606700655499e-05, |
| "loss": 1.5043, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.07124329932560955, |
| "grad_norm": 0.5433914065361023, |
| "learning_rate": 9.788783685360526e-05, |
| "loss": 1.6454, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.07193498184333391, |
| "grad_norm": 0.6400073766708374, |
| "learning_rate": 9.781500364166059e-05, |
| "loss": 1.6994, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.07262666436105827, |
| "grad_norm": 0.6944612860679626, |
| "learning_rate": 9.774217042971595e-05, |
| "loss": 2.2063, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.07331834687878264, |
| "grad_norm": 0.6701168417930603, |
| "learning_rate": 9.76693372177713e-05, |
| "loss": 1.2487, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.074010029396507, |
| "grad_norm": 0.7888538837432861, |
| "learning_rate": 9.759650400582666e-05, |
| "loss": 1.8784, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.07470171191423136, |
| "grad_norm": 0.7106865048408508, |
| "learning_rate": 9.752367079388201e-05, |
| "loss": 2.3673, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.07539339443195574, |
| "grad_norm": 0.49588626623153687, |
| "learning_rate": 9.745083758193737e-05, |
| "loss": 1.8638, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0760850769496801, |
| "grad_norm": 0.48046427965164185, |
| "learning_rate": 9.737800436999273e-05, |
| "loss": 2.333, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07677675946740446, |
| "grad_norm": 0.6239060163497925, |
| "learning_rate": 9.730517115804807e-05, |
| "loss": 1.8378, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.07746844198512882, |
| "grad_norm": 0.6083034873008728, |
| "learning_rate": 9.723233794610343e-05, |
| "loss": 1.9826, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07816012450285319, |
| "grad_norm": 0.8679235577583313, |
| "learning_rate": 9.715950473415877e-05, |
| "loss": 1.3375, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.07885180702057755, |
| "grad_norm": 0.542757511138916, |
| "learning_rate": 9.708667152221414e-05, |
| "loss": 1.6349, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.07954348953830193, |
| "grad_norm": 0.5968992114067078, |
| "learning_rate": 9.701383831026948e-05, |
| "loss": 1.3598, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.08023517205602629, |
| "grad_norm": 0.5693714022636414, |
| "learning_rate": 9.694100509832484e-05, |
| "loss": 2.1527, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.08092685457375065, |
| "grad_norm": 0.6307690143585205, |
| "learning_rate": 9.68681718863802e-05, |
| "loss": 1.7178, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.08161853709147501, |
| "grad_norm": 0.5192516446113586, |
| "learning_rate": 9.679533867443554e-05, |
| "loss": 1.8115, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.08231021960919938, |
| "grad_norm": 1.036036729812622, |
| "learning_rate": 9.67225054624909e-05, |
| "loss": 2.2423, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.08300190212692374, |
| "grad_norm": 0.6450679302215576, |
| "learning_rate": 9.664967225054625e-05, |
| "loss": 1.9298, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0836935846446481, |
| "grad_norm": 0.8683953881263733, |
| "learning_rate": 9.657683903860161e-05, |
| "loss": 2.0791, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.08438526716237248, |
| "grad_norm": 0.7013575434684753, |
| "learning_rate": 9.650400582665696e-05, |
| "loss": 2.0814, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.08507694968009684, |
| "grad_norm": 0.6849325299263, |
| "learning_rate": 9.643117261471232e-05, |
| "loss": 1.83, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0857686321978212, |
| "grad_norm": 0.9820392727851868, |
| "learning_rate": 9.635833940276767e-05, |
| "loss": 1.5665, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.08646031471554556, |
| "grad_norm": 0.5546866655349731, |
| "learning_rate": 9.628550619082301e-05, |
| "loss": 1.7215, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.08715199723326993, |
| "grad_norm": 0.6748363971710205, |
| "learning_rate": 9.621267297887837e-05, |
| "loss": 1.6532, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.08784367975099429, |
| "grad_norm": 0.4760434627532959, |
| "learning_rate": 9.613983976693372e-05, |
| "loss": 1.3476, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.08853536226871866, |
| "grad_norm": 0.5710906386375427, |
| "learning_rate": 9.606700655498908e-05, |
| "loss": 2.025, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08922704478644303, |
| "grad_norm": 0.5950302481651306, |
| "learning_rate": 9.599417334304443e-05, |
| "loss": 2.6324, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.08991872730416739, |
| "grad_norm": 0.4994860887527466, |
| "learning_rate": 9.592134013109979e-05, |
| "loss": 2.0315, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09061040982189175, |
| "grad_norm": 0.6400249004364014, |
| "learning_rate": 9.584850691915514e-05, |
| "loss": 2.3628, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.09130209233961611, |
| "grad_norm": 1.0873314142227173, |
| "learning_rate": 9.577567370721049e-05, |
| "loss": 1.2664, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.09199377485734048, |
| "grad_norm": 0.5430288314819336, |
| "learning_rate": 9.570284049526585e-05, |
| "loss": 1.7676, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.09268545737506484, |
| "grad_norm": 0.5969283580780029, |
| "learning_rate": 9.56300072833212e-05, |
| "loss": 2.0824, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.09337713989278922, |
| "grad_norm": 0.6228020191192627, |
| "learning_rate": 9.555717407137656e-05, |
| "loss": 2.0687, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.09406882241051358, |
| "grad_norm": 0.6905536651611328, |
| "learning_rate": 9.54843408594319e-05, |
| "loss": 2.2948, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.09476050492823794, |
| "grad_norm": 0.6397948861122131, |
| "learning_rate": 9.541150764748726e-05, |
| "loss": 1.4147, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.0954521874459623, |
| "grad_norm": 0.5489908456802368, |
| "learning_rate": 9.533867443554261e-05, |
| "loss": 2.0582, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.09614386996368667, |
| "grad_norm": 0.5713220834732056, |
| "learning_rate": 9.526584122359796e-05, |
| "loss": 2.1708, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.09683555248141103, |
| "grad_norm": 0.6736558079719543, |
| "learning_rate": 9.519300801165332e-05, |
| "loss": 1.6808, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0975272349991354, |
| "grad_norm": 1.1222553253173828, |
| "learning_rate": 9.512017479970867e-05, |
| "loss": 1.563, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.09821891751685977, |
| "grad_norm": 0.6231582760810852, |
| "learning_rate": 9.504734158776403e-05, |
| "loss": 1.6458, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.09891060003458413, |
| "grad_norm": 0.716089129447937, |
| "learning_rate": 9.497450837581938e-05, |
| "loss": 1.7055, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.09960228255230849, |
| "grad_norm": 0.49629953503608704, |
| "learning_rate": 9.490167516387472e-05, |
| "loss": 1.2985, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.10029396507003285, |
| "grad_norm": 0.8354843258857727, |
| "learning_rate": 9.482884195193008e-05, |
| "loss": 1.8557, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.10098564758775722, |
| "grad_norm": 0.7507709860801697, |
| "learning_rate": 9.475600873998543e-05, |
| "loss": 1.1816, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.10167733010548158, |
| "grad_norm": 0.8973211050033569, |
| "learning_rate": 9.46831755280408e-05, |
| "loss": 1.4495, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.10236901262320595, |
| "grad_norm": 0.7720927000045776, |
| "learning_rate": 9.461034231609614e-05, |
| "loss": 2.1743, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.10306069514093032, |
| "grad_norm": 0.511075496673584, |
| "learning_rate": 9.45375091041515e-05, |
| "loss": 1.8267, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.10375237765865468, |
| "grad_norm": 0.6523205041885376, |
| "learning_rate": 9.446467589220685e-05, |
| "loss": 1.9682, |
| "step": 150 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1446, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4851981966311424.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|