| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 545, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009195402298850575, |
| "grad_norm": 20.714553450281343, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 2.2012, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01839080459770115, |
| "grad_norm": 16.813897390933732, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.1382, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.027586206896551724, |
| "grad_norm": 10.685135212219203, |
| "learning_rate": 3e-06, |
| "loss": 2.2476, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0367816091954023, |
| "grad_norm": 14.421748935553252, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 2.3914, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04597701149425287, |
| "grad_norm": 8.628671157622785, |
| "learning_rate": 5e-06, |
| "loss": 1.6079, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05517241379310345, |
| "grad_norm": 6.948007057279052, |
| "learning_rate": 6e-06, |
| "loss": 1.6653, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.06436781609195402, |
| "grad_norm": 11.209735445974314, |
| "learning_rate": 7e-06, |
| "loss": 1.9133, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0735632183908046, |
| "grad_norm": 7.97219877448209, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.5034, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.08275862068965517, |
| "grad_norm": 139.5080565326191, |
| "learning_rate": 9e-06, |
| "loss": 1.5352, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.09195402298850575, |
| "grad_norm": 23.862722229248178, |
| "learning_rate": 1e-05, |
| "loss": 2.3425, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10114942528735632, |
| "grad_norm": 22.34844242652855, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 2.2261, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1103448275862069, |
| "grad_norm": 15.393131095910384, |
| "learning_rate": 1.2e-05, |
| "loss": 1.7781, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.11954022988505747, |
| "grad_norm": 12.868959642332323, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 1.8264, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.12873563218390804, |
| "grad_norm": 9.227770966015983, |
| "learning_rate": 1.4e-05, |
| "loss": 1.7844, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.13793103448275862, |
| "grad_norm": 6.272388047403451, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 1.7839, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1471264367816092, |
| "grad_norm": 5.913338642035567, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 1.5099, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.15632183908045977, |
| "grad_norm": 11.31744159936266, |
| "learning_rate": 1.7e-05, |
| "loss": 1.6116, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.16551724137931034, |
| "grad_norm": 4.969853987438896, |
| "learning_rate": 1.8e-05, |
| "loss": 1.5317, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.17471264367816092, |
| "grad_norm": 5.355015500818536, |
| "learning_rate": 1.9e-05, |
| "loss": 1.6409, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1839080459770115, |
| "grad_norm": 5.196414742911105, |
| "learning_rate": 2e-05, |
| "loss": 1.5442, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19310344827586207, |
| "grad_norm": 5.614271101884435, |
| "learning_rate": 1.999988738608264e-05, |
| "loss": 1.5029, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.20229885057471264, |
| "grad_norm": 5.015394546767328, |
| "learning_rate": 1.9999549547148767e-05, |
| "loss": 1.8169, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.21149425287356322, |
| "grad_norm": 4.557156450415814, |
| "learning_rate": 1.9998986491652896e-05, |
| "loss": 1.5093, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.2206896551724138, |
| "grad_norm": 5.945840310610162, |
| "learning_rate": 1.9998198233685676e-05, |
| "loss": 1.6238, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.22988505747126436, |
| "grad_norm": 7.357312504680639, |
| "learning_rate": 1.9997184792973504e-05, |
| "loss": 1.4395, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.23908045977011494, |
| "grad_norm": 9.510181997589852, |
| "learning_rate": 1.999594619487806e-05, |
| "loss": 1.3813, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2482758620689655, |
| "grad_norm": 5.191011707934582, |
| "learning_rate": 1.999448247039565e-05, |
| "loss": 1.399, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.2574712643678161, |
| "grad_norm": 6.777591477906461, |
| "learning_rate": 1.999279365615644e-05, |
| "loss": 1.4485, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 12.91448741331523, |
| "learning_rate": 1.9990879794423536e-05, |
| "loss": 1.5291, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.27586206896551724, |
| "grad_norm": 7.9192564908233525, |
| "learning_rate": 1.9988740933091932e-05, |
| "loss": 1.752, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2850574712643678, |
| "grad_norm": 13.307612828008661, |
| "learning_rate": 1.9986377125687305e-05, |
| "loss": 1.5955, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.2942528735632184, |
| "grad_norm": 5.626525978301407, |
| "learning_rate": 1.998378843136468e-05, |
| "loss": 1.3663, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.30344827586206896, |
| "grad_norm": 7.3508239388348375, |
| "learning_rate": 1.998097491490695e-05, |
| "loss": 1.6621, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.31264367816091954, |
| "grad_norm": 8.066689186099643, |
| "learning_rate": 1.9977936646723254e-05, |
| "loss": 1.5935, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.3218390804597701, |
| "grad_norm": 6.180669424711909, |
| "learning_rate": 1.99746737028472e-05, |
| "loss": 1.7871, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3310344827586207, |
| "grad_norm": 9.183166350518048, |
| "learning_rate": 1.9971186164934995e-05, |
| "loss": 1.7529, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.34022988505747126, |
| "grad_norm": 7.006667910909564, |
| "learning_rate": 1.996747412026337e-05, |
| "loss": 1.6017, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.34942528735632183, |
| "grad_norm": 5.476091693081514, |
| "learning_rate": 1.9963537661727415e-05, |
| "loss": 1.5574, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3586206896551724, |
| "grad_norm": 7.073151215201376, |
| "learning_rate": 1.995937688783824e-05, |
| "loss": 1.52, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.367816091954023, |
| "grad_norm": 6.105787470572727, |
| "learning_rate": 1.995499190272053e-05, |
| "loss": 1.6445, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.37701149425287356, |
| "grad_norm": 26.79480096943695, |
| "learning_rate": 1.9950382816109904e-05, |
| "loss": 1.5081, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.38620689655172413, |
| "grad_norm": 12.678846665605036, |
| "learning_rate": 1.994554974335022e-05, |
| "loss": 1.2374, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3954022988505747, |
| "grad_norm": 8.982247296188962, |
| "learning_rate": 1.9940492805390644e-05, |
| "loss": 1.3977, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.4045977011494253, |
| "grad_norm": 9.660805568531522, |
| "learning_rate": 1.9935212128782637e-05, |
| "loss": 1.4276, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.41379310344827586, |
| "grad_norm": 5.878685972804514, |
| "learning_rate": 1.9929707845676796e-05, |
| "loss": 1.498, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.42298850574712643, |
| "grad_norm": 6.1805386339462425, |
| "learning_rate": 1.992398009381954e-05, |
| "loss": 1.5585, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.432183908045977, |
| "grad_norm": 7.004448856725815, |
| "learning_rate": 1.991802901654966e-05, |
| "loss": 1.5439, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.4413793103448276, |
| "grad_norm": 21.3461812408264, |
| "learning_rate": 1.9911854762794747e-05, |
| "loss": 1.48, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.45057471264367815, |
| "grad_norm": 11.305699015280368, |
| "learning_rate": 1.9905457487067438e-05, |
| "loss": 1.5159, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.45977011494252873, |
| "grad_norm": 9.362223347622393, |
| "learning_rate": 1.9898837349461573e-05, |
| "loss": 1.3899, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4689655172413793, |
| "grad_norm": 6.86071978513186, |
| "learning_rate": 1.989199451564819e-05, |
| "loss": 1.3236, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.4781609195402299, |
| "grad_norm": 10.919914686023162, |
| "learning_rate": 1.9884929156871348e-05, |
| "loss": 1.5464, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.48735632183908045, |
| "grad_norm": 18.62756427908137, |
| "learning_rate": 1.9877641449943884e-05, |
| "loss": 1.4592, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.496551724137931, |
| "grad_norm": 14.125885744657854, |
| "learning_rate": 1.9870131577242958e-05, |
| "loss": 1.6143, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.5057471264367817, |
| "grad_norm": 21.319792981406064, |
| "learning_rate": 1.98623997267055e-05, |
| "loss": 1.6257, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5149425287356322, |
| "grad_norm": 7.07342180739188, |
| "learning_rate": 1.98544460918235e-05, |
| "loss": 1.3489, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.5241379310344828, |
| "grad_norm": 11.602183894060184, |
| "learning_rate": 1.984627087163918e-05, |
| "loss": 1.3555, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 5.711430596116647, |
| "learning_rate": 1.9837874270740005e-05, |
| "loss": 1.4868, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.542528735632184, |
| "grad_norm": 6.872030995436107, |
| "learning_rate": 1.9829256499253548e-05, |
| "loss": 1.4138, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.5517241379310345, |
| "grad_norm": 6.389710028362303, |
| "learning_rate": 1.982041777284226e-05, |
| "loss": 1.532, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5609195402298851, |
| "grad_norm": 8.960725431515376, |
| "learning_rate": 1.9811358312698052e-05, |
| "loss": 1.4233, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5701149425287356, |
| "grad_norm": 7.068530411045381, |
| "learning_rate": 1.980207834553677e-05, |
| "loss": 1.4343, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.5793103448275863, |
| "grad_norm": 7.937280281308531, |
| "learning_rate": 1.9792578103592506e-05, |
| "loss": 1.4436, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.5885057471264368, |
| "grad_norm": 4.993919261195511, |
| "learning_rate": 1.978285782461182e-05, |
| "loss": 1.1707, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5977011494252874, |
| "grad_norm": 9.299339194434403, |
| "learning_rate": 1.977291775184775e-05, |
| "loss": 1.3752, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.6068965517241379, |
| "grad_norm": 8.969891010411576, |
| "learning_rate": 1.976275813405374e-05, |
| "loss": 1.7358, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.6160919540229886, |
| "grad_norm": 7.846317322412413, |
| "learning_rate": 1.9752379225477436e-05, |
| "loss": 1.6702, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.6252873563218391, |
| "grad_norm": 10.35641201740817, |
| "learning_rate": 1.974178128585429e-05, |
| "loss": 1.6179, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.6344827586206897, |
| "grad_norm": 13.053360167992375, |
| "learning_rate": 1.973096458040108e-05, |
| "loss": 1.3878, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.6436781609195402, |
| "grad_norm": 8.85650402977275, |
| "learning_rate": 1.9719929379809262e-05, |
| "loss": 1.402, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6528735632183909, |
| "grad_norm": 7.259573301011822, |
| "learning_rate": 1.9708675960238214e-05, |
| "loss": 1.325, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.6620689655172414, |
| "grad_norm": 8.2385844490914, |
| "learning_rate": 1.9697204603308303e-05, |
| "loss": 1.5098, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.671264367816092, |
| "grad_norm": 6.950518749393352, |
| "learning_rate": 1.9685515596093844e-05, |
| "loss": 1.318, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.6804597701149425, |
| "grad_norm": 9.109982816285358, |
| "learning_rate": 1.967360923111593e-05, |
| "loss": 1.4189, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 10.452230731667223, |
| "learning_rate": 1.9661485806335095e-05, |
| "loss": 1.4102, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6988505747126437, |
| "grad_norm": 19.344365444774066, |
| "learning_rate": 1.964914562514386e-05, |
| "loss": 1.7136, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.7080459770114943, |
| "grad_norm": 7.435243566159918, |
| "learning_rate": 1.9636588996359145e-05, |
| "loss": 1.3748, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.7172413793103448, |
| "grad_norm": 9.733411972174634, |
| "learning_rate": 1.9623816234214538e-05, |
| "loss": 1.3021, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.7264367816091954, |
| "grad_norm": 9.942892711776333, |
| "learning_rate": 1.9610827658352448e-05, |
| "loss": 1.5538, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.735632183908046, |
| "grad_norm": 9.084274644415883, |
| "learning_rate": 1.959762359381606e-05, |
| "loss": 1.5767, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7448275862068966, |
| "grad_norm": 6.513368914673006, |
| "learning_rate": 1.9584204371041257e-05, |
| "loss": 1.6025, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.7540229885057471, |
| "grad_norm": 7.173737363149948, |
| "learning_rate": 1.957057032584832e-05, |
| "loss": 1.8008, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.7632183908045977, |
| "grad_norm": 7.6666142708069, |
| "learning_rate": 1.955672179943351e-05, |
| "loss": 1.1672, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.7724137931034483, |
| "grad_norm": 12.748744044610136, |
| "learning_rate": 1.9542659138360575e-05, |
| "loss": 1.6484, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.7816091954022989, |
| "grad_norm": 10.435352031122768, |
| "learning_rate": 1.9528382694552033e-05, |
| "loss": 1.7322, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7908045977011494, |
| "grad_norm": 8.348806624357442, |
| "learning_rate": 1.9513892825280387e-05, |
| "loss": 1.6316, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 7.561464021812533, |
| "learning_rate": 1.9499189893159178e-05, |
| "loss": 1.5837, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.8091954022988506, |
| "grad_norm": 9.833304197128921, |
| "learning_rate": 1.9484274266133918e-05, |
| "loss": 1.8191, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.8183908045977012, |
| "grad_norm": 36.91977456946538, |
| "learning_rate": 1.9469146317472867e-05, |
| "loss": 1.6587, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.8275862068965517, |
| "grad_norm": 9.18997721365779, |
| "learning_rate": 1.9453806425757706e-05, |
| "loss": 1.6042, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8367816091954023, |
| "grad_norm": 5.8375441349876285, |
| "learning_rate": 1.9438254974874055e-05, |
| "loss": 1.4569, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.8459770114942529, |
| "grad_norm": 10.33531533117819, |
| "learning_rate": 1.9422492354001876e-05, |
| "loss": 1.554, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.8551724137931035, |
| "grad_norm": 7.653431229054158, |
| "learning_rate": 1.9406518957605716e-05, |
| "loss": 1.6409, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.864367816091954, |
| "grad_norm": 5.139794337597655, |
| "learning_rate": 1.9390335185424852e-05, |
| "loss": 1.4226, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.8735632183908046, |
| "grad_norm": 6.772516024095916, |
| "learning_rate": 1.9373941442463286e-05, |
| "loss": 1.6716, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.8827586206896552, |
| "grad_norm": 12.456055439523869, |
| "learning_rate": 1.9357338138979586e-05, |
| "loss": 1.3682, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.8919540229885058, |
| "grad_norm": 6.3363194804101886, |
| "learning_rate": 1.9340525690476665e-05, |
| "loss": 1.5991, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.9011494252873563, |
| "grad_norm": 6.178188975859817, |
| "learning_rate": 1.9323504517691335e-05, |
| "loss": 1.512, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.9103448275862069, |
| "grad_norm": 5.469881912998274, |
| "learning_rate": 1.9306275046583804e-05, |
| "loss": 1.3198, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.9195402298850575, |
| "grad_norm": 92.72146423017554, |
| "learning_rate": 1.9288837708327018e-05, |
| "loss": 1.325, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9287356321839081, |
| "grad_norm": 10.225670198371311, |
| "learning_rate": 1.9271192939295863e-05, |
| "loss": 1.3693, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.9379310344827586, |
| "grad_norm": 15.221298407043829, |
| "learning_rate": 1.925334118105623e-05, |
| "loss": 1.3868, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.9471264367816092, |
| "grad_norm": 7.296423072692595, |
| "learning_rate": 1.9235282880354e-05, |
| "loss": 1.4702, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.9563218390804598, |
| "grad_norm": 11.090698404128442, |
| "learning_rate": 1.9217018489103832e-05, |
| "loss": 1.493, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.9655172413793104, |
| "grad_norm": 13.113004849625796, |
| "learning_rate": 1.9198548464377875e-05, |
| "loss": 1.5315, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9747126436781609, |
| "grad_norm": 6.5579585036461765, |
| "learning_rate": 1.917987326839431e-05, |
| "loss": 1.401, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.9839080459770115, |
| "grad_norm": 23.571017224968177, |
| "learning_rate": 1.9160993368505803e-05, |
| "loss": 1.5408, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.993103448275862, |
| "grad_norm": 7.932308650354931, |
| "learning_rate": 1.914190923718779e-05, |
| "loss": 1.563, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 7.932308650354931, |
| "learning_rate": 1.912262135202667e-05, |
| "loss": 0.9182, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.0091954022988505, |
| "grad_norm": 6.574864715652341, |
| "learning_rate": 1.9103130195707846e-05, |
| "loss": 1.3379, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.018390804597701, |
| "grad_norm": 8.237386794566326, |
| "learning_rate": 1.9083436256003643e-05, |
| "loss": 1.4205, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.0275862068965518, |
| "grad_norm": 6.220467444924186, |
| "learning_rate": 1.906354002576111e-05, |
| "loss": 1.4788, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.0367816091954023, |
| "grad_norm": 13.393857842936615, |
| "learning_rate": 1.9043442002889663e-05, |
| "loss": 1.2128, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.0459770114942528, |
| "grad_norm": 7.240700390139476, |
| "learning_rate": 1.9023142690348663e-05, |
| "loss": 1.4041, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.0551724137931036, |
| "grad_norm": 12.403313459805997, |
| "learning_rate": 1.90026425961348e-05, |
| "loss": 1.1957, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.064367816091954, |
| "grad_norm": 5.294546200505072, |
| "learning_rate": 1.898194223326939e-05, |
| "loss": 1.5244, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.0735632183908046, |
| "grad_norm": 8.062777181362618, |
| "learning_rate": 1.8961042119785534e-05, |
| "loss": 1.5571, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.0827586206896551, |
| "grad_norm": 6.630411039426618, |
| "learning_rate": 1.893994277871515e-05, |
| "loss": 1.4017, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.0919540229885056, |
| "grad_norm": 7.466049537995627, |
| "learning_rate": 1.891864473807589e-05, |
| "loss": 1.6523, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.1011494252873564, |
| "grad_norm": 6.878184185710444, |
| "learning_rate": 1.8897148530857944e-05, |
| "loss": 1.7305, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.110344827586207, |
| "grad_norm": 10.183361403701092, |
| "learning_rate": 1.8875454695010655e-05, |
| "loss": 1.4861, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.1195402298850574, |
| "grad_norm": 7.846946240488356, |
| "learning_rate": 1.8853563773429102e-05, |
| "loss": 1.3378, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.1287356321839082, |
| "grad_norm": 8.380364873375658, |
| "learning_rate": 1.8831476313940495e-05, |
| "loss": 1.2773, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.1379310344827587, |
| "grad_norm": 7.13754465392972, |
| "learning_rate": 1.8809192869290463e-05, |
| "loss": 1.3115, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.1471264367816092, |
| "grad_norm": 16.170663480840823, |
| "learning_rate": 1.878671399712923e-05, |
| "loss": 1.5776, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.1563218390804597, |
| "grad_norm": 6.760966961951662, |
| "learning_rate": 1.8764040259997642e-05, |
| "loss": 1.6387, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.1655172413793102, |
| "grad_norm": 6.231697770807728, |
| "learning_rate": 1.874117222531312e-05, |
| "loss": 1.4857, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.174712643678161, |
| "grad_norm": 11.512205247824191, |
| "learning_rate": 1.8718110465355436e-05, |
| "loss": 1.958, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.1839080459770115, |
| "grad_norm": 10.234002782144223, |
| "learning_rate": 1.8694855557252395e-05, |
| "loss": 1.6003, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.193103448275862, |
| "grad_norm": 6.775464121063177, |
| "learning_rate": 1.8671408082965394e-05, |
| "loss": 1.3716, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.2022988505747128, |
| "grad_norm": 9.532716070689466, |
| "learning_rate": 1.8647768629274865e-05, |
| "loss": 1.2361, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.2114942528735633, |
| "grad_norm": 5.598812395655789, |
| "learning_rate": 1.8623937787765582e-05, |
| "loss": 1.2849, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.2206896551724138, |
| "grad_norm": 12.593202228212819, |
| "learning_rate": 1.8599916154811858e-05, |
| "loss": 1.3579, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.2298850574712643, |
| "grad_norm": 7.992727204119873, |
| "learning_rate": 1.8575704331562624e-05, |
| "loss": 1.293, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.2390804597701148, |
| "grad_norm": 10.180939056019497, |
| "learning_rate": 1.8551302923926387e-05, |
| "loss": 1.3632, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.2482758620689656, |
| "grad_norm": 5.835100637584005, |
| "learning_rate": 1.8526712542556054e-05, |
| "loss": 1.4304, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.257471264367816, |
| "grad_norm": 7.133504661503169, |
| "learning_rate": 1.8501933802833664e-05, |
| "loss": 1.4319, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.2666666666666666, |
| "grad_norm": 6.700994373390855, |
| "learning_rate": 1.8476967324854987e-05, |
| "loss": 1.6399, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.2758620689655173, |
| "grad_norm": 7.6168901919768315, |
| "learning_rate": 1.8451813733413998e-05, |
| "loss": 1.4226, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.2850574712643679, |
| "grad_norm": 6.869638065615107, |
| "learning_rate": 1.8426473657987238e-05, |
| "loss": 1.3926, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.2942528735632184, |
| "grad_norm": 7.9246065537695145, |
| "learning_rate": 1.8400947732718083e-05, |
| "loss": 1.3882, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.303448275862069, |
| "grad_norm": 35.57695232792952, |
| "learning_rate": 1.837523659640085e-05, |
| "loss": 1.2931, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.3126436781609194, |
| "grad_norm": 6.537575634392787, |
| "learning_rate": 1.8349340892464827e-05, |
| "loss": 1.3601, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.3218390804597702, |
| "grad_norm": 5.796967091596528, |
| "learning_rate": 1.832326126895816e-05, |
| "loss": 1.2791, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.3310344827586207, |
| "grad_norm": 5.370682917159942, |
| "learning_rate": 1.8296998378531634e-05, |
| "loss": 1.6052, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.3402298850574712, |
| "grad_norm": 8.766511156656957, |
| "learning_rate": 1.827055287842236e-05, |
| "loss": 1.3518, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.349425287356322, |
| "grad_norm": 6.480740512124651, |
| "learning_rate": 1.8243925430437314e-05, |
| "loss": 1.311, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.3586206896551725, |
| "grad_norm": 7.645471465966849, |
| "learning_rate": 1.821711670093676e-05, |
| "loss": 1.291, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.367816091954023, |
| "grad_norm": 9.381673919145971, |
| "learning_rate": 1.81901273608176e-05, |
| "loss": 1.4457, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.3770114942528735, |
| "grad_norm": 92.46895216336263, |
| "learning_rate": 1.8162958085496572e-05, |
| "loss": 1.2527, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.386206896551724, |
| "grad_norm": 5.256281556855925, |
| "learning_rate": 1.8135609554893345e-05, |
| "loss": 1.3901, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.3954022988505748, |
| "grad_norm": 6.207996783084738, |
| "learning_rate": 1.810808245341352e-05, |
| "loss": 1.3934, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.4045977011494253, |
| "grad_norm": 7.475298218689304, |
| "learning_rate": 1.8080377469931468e-05, |
| "loss": 1.5079, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.4137931034482758, |
| "grad_norm": 7.348051374244608, |
| "learning_rate": 1.8052495297773135e-05, |
| "loss": 1.3069, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.4229885057471265, |
| "grad_norm": 5.764809442997243, |
| "learning_rate": 1.802443663469867e-05, |
| "loss": 1.4919, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.432183908045977, |
| "grad_norm": 6.715860371189423, |
| "learning_rate": 1.7996202182884938e-05, |
| "loss": 1.4631, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.4413793103448276, |
| "grad_norm": 6.647142576932514, |
| "learning_rate": 1.7967792648907993e-05, |
| "loss": 1.5767, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.450574712643678, |
| "grad_norm": 15.258238976802454, |
| "learning_rate": 1.7939208743725378e-05, |
| "loss": 1.4467, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.4597701149425286, |
| "grad_norm": 7.134307398087775, |
| "learning_rate": 1.7910451182658318e-05, |
| "loss": 1.3992, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.4689655172413794, |
| "grad_norm": 10.178435844025032, |
| "learning_rate": 1.7881520685373836e-05, |
| "loss": 1.3086, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.4781609195402299, |
| "grad_norm": 7.9995750026556065, |
| "learning_rate": 1.7852417975866735e-05, |
| "loss": 1.3984, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.4873563218390804, |
| "grad_norm": 6.04856446144021, |
| "learning_rate": 1.7823143782441498e-05, |
| "loss": 1.3864, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.4965517241379311, |
| "grad_norm": 7.302148673860431, |
| "learning_rate": 1.779369883769403e-05, |
| "loss": 1.4692, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.5057471264367817, |
| "grad_norm": 11.710455921764995, |
| "learning_rate": 1.7764083878493342e-05, |
| "loss": 1.3108, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.5149425287356322, |
| "grad_norm": 6.297229352579108, |
| "learning_rate": 1.7734299645963126e-05, |
| "loss": 1.6995, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.524137931034483, |
| "grad_norm": 21.21748624748657, |
| "learning_rate": 1.7704346885463173e-05, |
| "loss": 1.3864, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 7.694329489180455, |
| "learning_rate": 1.7674226346570756e-05, |
| "loss": 1.4465, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.542528735632184, |
| "grad_norm": 6.791665210167091, |
| "learning_rate": 1.7643938783061844e-05, |
| "loss": 1.3967, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.5517241379310345, |
| "grad_norm": 18.25267999804304, |
| "learning_rate": 1.761348495289225e-05, |
| "loss": 1.7708, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.560919540229885, |
| "grad_norm": 11.606015421810417, |
| "learning_rate": 1.7582865618178673e-05, |
| "loss": 1.38, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.5701149425287357, |
| "grad_norm": 6.76568404259339, |
| "learning_rate": 1.755208154517961e-05, |
| "loss": 1.7734, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.5793103448275863, |
| "grad_norm": 7.457232551239884, |
| "learning_rate": 1.752113350427617e-05, |
| "loss": 1.3568, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.5885057471264368, |
| "grad_norm": 10.071218139243994, |
| "learning_rate": 1.7490022269952836e-05, |
| "loss": 1.3582, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.5977011494252875, |
| "grad_norm": 8.467685174322579, |
| "learning_rate": 1.7458748620778047e-05, |
| "loss": 1.4399, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.6068965517241378, |
| "grad_norm": 6.051347000729604, |
| "learning_rate": 1.742731333938472e-05, |
| "loss": 1.3508, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.6160919540229886, |
| "grad_norm": 6.367343243904751, |
| "learning_rate": 1.7395717212450673e-05, |
| "loss": 1.3251, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.625287356321839, |
| "grad_norm": 7.724598036207127, |
| "learning_rate": 1.736396103067893e-05, |
| "loss": 1.2026, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.6344827586206896, |
| "grad_norm": 5.76807974896288, |
| "learning_rate": 1.733204558877795e-05, |
| "loss": 1.1807, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.6436781609195403, |
| "grad_norm": 8.50190392019292, |
| "learning_rate": 1.729997168544171e-05, |
| "loss": 1.2231, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.6528735632183909, |
| "grad_norm": 19.54162117368854, |
| "learning_rate": 1.7267740123329756e-05, |
| "loss": 1.5237, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.6620689655172414, |
| "grad_norm": 9.986270310555119, |
| "learning_rate": 1.7235351709047072e-05, |
| "loss": 1.2517, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.6712643678160921, |
| "grad_norm": 6.78295518963419, |
| "learning_rate": 1.720280725312393e-05, |
| "loss": 1.6053, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.6804597701149424, |
| "grad_norm": 6.601674166563654, |
| "learning_rate": 1.7170107569995588e-05, |
| "loss": 1.2712, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.6896551724137931, |
| "grad_norm": 8.184620262857814, |
| "learning_rate": 1.7137253477981916e-05, |
| "loss": 1.3293, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.6988505747126437, |
| "grad_norm": 6.739412467459474, |
| "learning_rate": 1.7104245799266917e-05, |
| "loss": 1.0026, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.7080459770114942, |
| "grad_norm": 5.556603900105146, |
| "learning_rate": 1.707108535987815e-05, |
| "loss": 1.6606, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.717241379310345, |
| "grad_norm": 12.138471189450616, |
| "learning_rate": 1.7037772989666043e-05, |
| "loss": 1.3003, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.7264367816091954, |
| "grad_norm": 5.204252391318651, |
| "learning_rate": 1.7004309522283162e-05, |
| "loss": 1.4929, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.735632183908046, |
| "grad_norm": 14.919779522258695, |
| "learning_rate": 1.6970695795163322e-05, |
| "loss": 1.6902, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.7448275862068967, |
| "grad_norm": 5.328033889559845, |
| "learning_rate": 1.693693264950062e-05, |
| "loss": 1.4431, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.754022988505747, |
| "grad_norm": 6.230962658840152, |
| "learning_rate": 1.6903020930228424e-05, |
| "loss": 1.4314, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.7632183908045977, |
| "grad_norm": 6.180575508805239, |
| "learning_rate": 1.6868961485998178e-05, |
| "loss": 1.5364, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.7724137931034483, |
| "grad_norm": 10.76113257757336, |
| "learning_rate": 1.683475516915821e-05, |
| "loss": 1.3914, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.7816091954022988, |
| "grad_norm": 6.856163941107209, |
| "learning_rate": 1.6800402835732367e-05, |
| "loss": 1.304, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.7908045977011495, |
| "grad_norm": 6.413125687720114, |
| "learning_rate": 1.6765905345398618e-05, |
| "loss": 1.3577, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 10.350633192944896, |
| "learning_rate": 1.6731263561467514e-05, |
| "loss": 1.3384, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.8091954022988506, |
| "grad_norm": 6.680868526375388, |
| "learning_rate": 1.6696478350860625e-05, |
| "loss": 1.322, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.8183908045977013, |
| "grad_norm": 9.172318252799384, |
| "learning_rate": 1.666155058408879e-05, |
| "loss": 1.6331, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.8275862068965516, |
| "grad_norm": 8.408442568480286, |
| "learning_rate": 1.6626481135230378e-05, |
| "loss": 1.6042, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.8367816091954023, |
| "grad_norm": 7.431075981024314, |
| "learning_rate": 1.6591270881909393e-05, |
| "loss": 1.5691, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.8459770114942529, |
| "grad_norm": 17.102642928318303, |
| "learning_rate": 1.6555920705273513e-05, |
| "loss": 1.7698, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.8551724137931034, |
| "grad_norm": 14.163498166355847, |
| "learning_rate": 1.6520431489972043e-05, |
| "loss": 1.4268, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.8643678160919541, |
| "grad_norm": 8.38433733288465, |
| "learning_rate": 1.6484804124133772e-05, |
| "loss": 1.4326, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.8735632183908046, |
| "grad_norm": 7.414923080451205, |
| "learning_rate": 1.6449039499344755e-05, |
| "loss": 1.4021, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.8827586206896552, |
| "grad_norm": 9.285429331174253, |
| "learning_rate": 1.6413138510625994e-05, |
| "loss": 1.537, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.891954022988506, |
| "grad_norm": 8.620259857009387, |
| "learning_rate": 1.637710205641103e-05, |
| "loss": 1.5474, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.9011494252873562, |
| "grad_norm": 7.5352577306905175, |
| "learning_rate": 1.634093103852349e-05, |
| "loss": 1.276, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.910344827586207, |
| "grad_norm": 8.551871535313907, |
| "learning_rate": 1.6304626362154484e-05, |
| "loss": 1.2695, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.9195402298850575, |
| "grad_norm": 11.581334952401058, |
| "learning_rate": 1.6268188935839976e-05, |
| "loss": 1.5916, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.928735632183908, |
| "grad_norm": 13.17525028833506, |
| "learning_rate": 1.623161967143803e-05, |
| "loss": 1.6626, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.9379310344827587, |
| "grad_norm": 8.444643409343747, |
| "learning_rate": 1.6194919484106016e-05, |
| "loss": 1.3036, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.9471264367816092, |
| "grad_norm": 7.6138309875760415, |
| "learning_rate": 1.6158089292277674e-05, |
| "loss": 1.6266, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.9563218390804598, |
| "grad_norm": 8.510948546395023, |
| "learning_rate": 1.612113001764016e-05, |
| "loss": 1.2229, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.9655172413793105, |
| "grad_norm": 18.34541377646805, |
| "learning_rate": 1.6084042585110955e-05, |
| "loss": 1.5161, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.9747126436781608, |
| "grad_norm": 8.232021713485729, |
| "learning_rate": 1.6046827922814746e-05, |
| "loss": 1.5459, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.9839080459770115, |
| "grad_norm": 7.80867265713955, |
| "learning_rate": 1.6009486962060175e-05, |
| "loss": 1.311, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.993103448275862, |
| "grad_norm": 10.173776002475448, |
| "learning_rate": 1.597202063731655e-05, |
| "loss": 1.4924, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 9.754643284384423, |
| "learning_rate": 1.5934429886190444e-05, |
| "loss": 0.9814, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.0091954022988507, |
| "grad_norm": 9.478427097239926, |
| "learning_rate": 1.5896715649402245e-05, |
| "loss": 1.6133, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.018390804597701, |
| "grad_norm": 8.166444573768159, |
| "learning_rate": 1.585887887076261e-05, |
| "loss": 1.4502, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.027586206896552, |
| "grad_norm": 8.541283789837138, |
| "learning_rate": 1.582092049714884e-05, |
| "loss": 1.6396, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.036781609195402, |
| "grad_norm": 11.682225296224088, |
| "learning_rate": 1.5782841478481187e-05, |
| "loss": 1.5421, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.045977011494253, |
| "grad_norm": 10.149484070655848, |
| "learning_rate": 1.5744642767699093e-05, |
| "loss": 1.314, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.0551724137931036, |
| "grad_norm": 8.549351099175704, |
| "learning_rate": 1.5706325320737327e-05, |
| "loss": 1.1816, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.064367816091954, |
| "grad_norm": 6.459017391887839, |
| "learning_rate": 1.566789009650206e-05, |
| "loss": 1.2528, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.0735632183908046, |
| "grad_norm": 8.222834591178689, |
| "learning_rate": 1.562933805684689e-05, |
| "loss": 1.4919, |
| "step": 226 |
| }, |
| { |
| "epoch": 2.0827586206896553, |
| "grad_norm": 9.249895356593102, |
| "learning_rate": 1.5590670166548752e-05, |
| "loss": 1.1503, |
| "step": 227 |
| }, |
| { |
| "epoch": 2.0919540229885056, |
| "grad_norm": 7.8698554294535406, |
| "learning_rate": 1.5551887393283778e-05, |
| "loss": 1.4001, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.1011494252873564, |
| "grad_norm": 14.354528964959558, |
| "learning_rate": 1.551299070760309e-05, |
| "loss": 1.4355, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.110344827586207, |
| "grad_norm": 12.62190606379736, |
| "learning_rate": 1.547398108290849e-05, |
| "loss": 1.3149, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.1195402298850574, |
| "grad_norm": 5.985166640280286, |
| "learning_rate": 1.5434859495428126e-05, |
| "loss": 1.4758, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.128735632183908, |
| "grad_norm": 8.93057666695323, |
| "learning_rate": 1.539562692419205e-05, |
| "loss": 1.4132, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.1379310344827585, |
| "grad_norm": 169.89901344705734, |
| "learning_rate": 1.5356284351007713e-05, |
| "loss": 1.2222, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.147126436781609, |
| "grad_norm": 10.407513886004416, |
| "learning_rate": 1.5316832760435395e-05, |
| "loss": 1.403, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.15632183908046, |
| "grad_norm": 7.673828476190051, |
| "learning_rate": 1.5277273139763584e-05, |
| "loss": 1.2657, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.1655172413793102, |
| "grad_norm": 8.662439314553673, |
| "learning_rate": 1.5237606478984244e-05, |
| "loss": 1.4838, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.174712643678161, |
| "grad_norm": 6.161005447060972, |
| "learning_rate": 1.5197833770768053e-05, |
| "loss": 1.2036, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.1839080459770113, |
| "grad_norm": 10.211172062940802, |
| "learning_rate": 1.515795601043956e-05, |
| "loss": 1.3413, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.193103448275862, |
| "grad_norm": 31.652507696152473, |
| "learning_rate": 1.5117974195952286e-05, |
| "loss": 1.4092, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.2022988505747128, |
| "grad_norm": 8.725107105944577, |
| "learning_rate": 1.5077889327863725e-05, |
| "loss": 1.1694, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.211494252873563, |
| "grad_norm": 8.66761735043033, |
| "learning_rate": 1.5037702409310324e-05, |
| "loss": 1.387, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.220689655172414, |
| "grad_norm": 9.553327260316669, |
| "learning_rate": 1.499741444598238e-05, |
| "loss": 1.2606, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.2298850574712645, |
| "grad_norm": 10.696600046863653, |
| "learning_rate": 1.4957026446098867e-05, |
| "loss": 1.4158, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.239080459770115, |
| "grad_norm": 10.39962688994084, |
| "learning_rate": 1.4916539420382203e-05, |
| "loss": 1.3589, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.2482758620689656, |
| "grad_norm": 14.53443112403548, |
| "learning_rate": 1.4875954382032956e-05, |
| "loss": 1.4326, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.2574712643678163, |
| "grad_norm": 7.410550756457302, |
| "learning_rate": 1.4835272346704494e-05, |
| "loss": 1.1635, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.2666666666666666, |
| "grad_norm": 7.6372427743277775, |
| "learning_rate": 1.4794494332477566e-05, |
| "loss": 1.4257, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.2758620689655173, |
| "grad_norm": 21.947698234199052, |
| "learning_rate": 1.4753621359834822e-05, |
| "loss": 1.4056, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.2850574712643676, |
| "grad_norm": 11.646825257901302, |
| "learning_rate": 1.4712654451635275e-05, |
| "loss": 1.5212, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.2942528735632184, |
| "grad_norm": 17.799789150094412, |
| "learning_rate": 1.4671594633088704e-05, |
| "loss": 1.163, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.303448275862069, |
| "grad_norm": 11.049404976513573, |
| "learning_rate": 1.4630442931730007e-05, |
| "loss": 1.3228, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.3126436781609194, |
| "grad_norm": 10.135916865637768, |
| "learning_rate": 1.4589200377393467e-05, |
| "loss": 1.5016, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.32183908045977, |
| "grad_norm": 15.774619581016921, |
| "learning_rate": 1.4547868002186996e-05, |
| "loss": 1.5846, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.3310344827586205, |
| "grad_norm": 10.754021244507555, |
| "learning_rate": 1.4506446840466302e-05, |
| "loss": 1.2985, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.340229885057471, |
| "grad_norm": 13.937037843771375, |
| "learning_rate": 1.4464937928809009e-05, |
| "loss": 1.28, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.349425287356322, |
| "grad_norm": 12.31608417163875, |
| "learning_rate": 1.4423342305988697e-05, |
| "loss": 1.4902, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.3586206896551722, |
| "grad_norm": 9.954291617642005, |
| "learning_rate": 1.4381661012948933e-05, |
| "loss": 1.2722, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.367816091954023, |
| "grad_norm": 11.04918384734279, |
| "learning_rate": 1.4339895092777204e-05, |
| "loss": 1.2628, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.3770114942528737, |
| "grad_norm": 6.763154511930277, |
| "learning_rate": 1.4298045590678814e-05, |
| "loss": 1.1636, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.386206896551724, |
| "grad_norm": 13.574514226985405, |
| "learning_rate": 1.425611355395074e-05, |
| "loss": 1.428, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.3954022988505748, |
| "grad_norm": 9.65472307533206, |
| "learning_rate": 1.4214100031955404e-05, |
| "loss": 1.2303, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.4045977011494255, |
| "grad_norm": 8.266644434941332, |
| "learning_rate": 1.4172006076094427e-05, |
| "loss": 1.6992, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.413793103448276, |
| "grad_norm": 11.226367730103076, |
| "learning_rate": 1.4129832739782314e-05, |
| "loss": 1.3781, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.4229885057471265, |
| "grad_norm": 10.547590497185766, |
| "learning_rate": 1.408758107842009e-05, |
| "loss": 1.4745, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.432183908045977, |
| "grad_norm": 38.63935876164692, |
| "learning_rate": 1.4045252149368886e-05, |
| "loss": 1.4921, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.4413793103448276, |
| "grad_norm": 9.852443772549051, |
| "learning_rate": 1.4002847011923484e-05, |
| "loss": 1.584, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.4505747126436783, |
| "grad_norm": 12.380130191453606, |
| "learning_rate": 1.3960366727285809e-05, |
| "loss": 1.5535, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.4597701149425286, |
| "grad_norm": 8.857515071879746, |
| "learning_rate": 1.391781235853836e-05, |
| "loss": 1.3223, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.4689655172413794, |
| "grad_norm": 9.627852836741733, |
| "learning_rate": 1.3875184970617621e-05, |
| "loss": 1.5267, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.4781609195402297, |
| "grad_norm": 12.112138710475412, |
| "learning_rate": 1.3832485630287395e-05, |
| "loss": 1.5247, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.4873563218390804, |
| "grad_norm": 14.536255701000336, |
| "learning_rate": 1.3789715406112132e-05, |
| "loss": 1.5334, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.496551724137931, |
| "grad_norm": 11.226710463125984, |
| "learning_rate": 1.3746875368430156e-05, |
| "loss": 1.474, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.5057471264367814, |
| "grad_norm": 9.986652995059503, |
| "learning_rate": 1.3703966589326905e-05, |
| "loss": 1.1953, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.514942528735632, |
| "grad_norm": 15.399922495441178, |
| "learning_rate": 1.3660990142608093e-05, |
| "loss": 1.3754, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.524137931034483, |
| "grad_norm": 14.096871218357013, |
| "learning_rate": 1.3617947103772833e-05, |
| "loss": 1.5314, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.533333333333333, |
| "grad_norm": 20.22748729117087, |
| "learning_rate": 1.357483854998673e-05, |
| "loss": 1.2614, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.542528735632184, |
| "grad_norm": 15.107752634691163, |
| "learning_rate": 1.3531665560054922e-05, |
| "loss": 1.2576, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.5517241379310347, |
| "grad_norm": 9.065614108838506, |
| "learning_rate": 1.3488429214395078e-05, |
| "loss": 1.3296, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.560919540229885, |
| "grad_norm": 10.218458690356865, |
| "learning_rate": 1.3445130595010366e-05, |
| "loss": 1.4652, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.5701149425287357, |
| "grad_norm": 52.06028062114195, |
| "learning_rate": 1.3401770785462375e-05, |
| "loss": 1.2604, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.5793103448275865, |
| "grad_norm": 11.237376278555484, |
| "learning_rate": 1.3358350870843994e-05, |
| "loss": 1.4764, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.5885057471264368, |
| "grad_norm": 19.610789343097895, |
| "learning_rate": 1.3314871937752266e-05, |
| "loss": 1.7019, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.5977011494252875, |
| "grad_norm": 12.406919127163583, |
| "learning_rate": 1.3271335074261183e-05, |
| "loss": 1.4766, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.606896551724138, |
| "grad_norm": 11.381243573694883, |
| "learning_rate": 1.3227741369894464e-05, |
| "loss": 1.3762, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.6160919540229886, |
| "grad_norm": 9.470405274888344, |
| "learning_rate": 1.3184091915598301e-05, |
| "loss": 1.3369, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.625287356321839, |
| "grad_norm": 22.889960665827505, |
| "learning_rate": 1.3140387803714025e-05, |
| "loss": 1.2954, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.6344827586206896, |
| "grad_norm": 9.18406523843117, |
| "learning_rate": 1.309663012795081e-05, |
| "loss": 1.2422, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.6436781609195403, |
| "grad_norm": 11.268913127502152, |
| "learning_rate": 1.3052819983358269e-05, |
| "loss": 1.4489, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.6528735632183906, |
| "grad_norm": 13.1896905658553, |
| "learning_rate": 1.3008958466299068e-05, |
| "loss": 1.7273, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.6620689655172414, |
| "grad_norm": 10.572311262669949, |
| "learning_rate": 1.2965046674421491e-05, |
| "loss": 1.4719, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.671264367816092, |
| "grad_norm": 10.35820251976187, |
| "learning_rate": 1.2921085706631959e-05, |
| "loss": 1.4539, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.6804597701149424, |
| "grad_norm": 8.704710016620966, |
| "learning_rate": 1.2877076663067539e-05, |
| "loss": 1.3574, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.689655172413793, |
| "grad_norm": 6.599649782918219, |
| "learning_rate": 1.2833020645068402e-05, |
| "loss": 1.3322, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.698850574712644, |
| "grad_norm": 11.362295773438365, |
| "learning_rate": 1.2788918755150279e-05, |
| "loss": 1.2928, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.708045977011494, |
| "grad_norm": 37.10678640120499, |
| "learning_rate": 1.2744772096976853e-05, |
| "loss": 1.3816, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.717241379310345, |
| "grad_norm": 10.62094103048475, |
| "learning_rate": 1.2700581775332157e-05, |
| "loss": 1.3672, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.7264367816091957, |
| "grad_norm": 10.75935817308168, |
| "learning_rate": 1.2656348896092898e-05, |
| "loss": 1.4492, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.735632183908046, |
| "grad_norm": 8.077547364505877, |
| "learning_rate": 1.2612074566200823e-05, |
| "loss": 1.3044, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.7448275862068967, |
| "grad_norm": 12.203291711398258, |
| "learning_rate": 1.2567759893634972e-05, |
| "loss": 1.5552, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.754022988505747, |
| "grad_norm": 8.513713623012512, |
| "learning_rate": 1.2523405987383987e-05, |
| "loss": 1.2848, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.7632183908045977, |
| "grad_norm": 8.619755790356049, |
| "learning_rate": 1.2479013957418343e-05, |
| "loss": 1.4136, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.772413793103448, |
| "grad_norm": 10.308980428163558, |
| "learning_rate": 1.2434584914662573e-05, |
| "loss": 1.2261, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.781609195402299, |
| "grad_norm": 10.11745765990052, |
| "learning_rate": 1.2390119970967465e-05, |
| "loss": 1.8462, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.7908045977011495, |
| "grad_norm": 13.585789032324755, |
| "learning_rate": 1.2345620239082236e-05, |
| "loss": 1.3516, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 8.3622289803184, |
| "learning_rate": 1.23010868326267e-05, |
| "loss": 1.2363, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.8091954022988506, |
| "grad_norm": 11.137605463009926, |
| "learning_rate": 1.2256520866063375e-05, |
| "loss": 1.5193, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.8183908045977013, |
| "grad_norm": 7.9674983156349395, |
| "learning_rate": 1.221192345466961e-05, |
| "loss": 1.3356, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.8275862068965516, |
| "grad_norm": 9.86095902628443, |
| "learning_rate": 1.2167295714509675e-05, |
| "loss": 1.6582, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.8367816091954023, |
| "grad_norm": 8.9590317304695, |
| "learning_rate": 1.2122638762406824e-05, |
| "loss": 1.2642, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.845977011494253, |
| "grad_norm": 11.895962433965508, |
| "learning_rate": 1.2077953715915347e-05, |
| "loss": 1.2452, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.8551724137931034, |
| "grad_norm": 8.091787768627592, |
| "learning_rate": 1.2033241693292607e-05, |
| "loss": 1.6858, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.864367816091954, |
| "grad_norm": 12.390457672512209, |
| "learning_rate": 1.1988503813471058e-05, |
| "loss": 1.2549, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.873563218390805, |
| "grad_norm": 9.762226489244592, |
| "learning_rate": 1.1943741196030223e-05, |
| "loss": 1.2067, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.882758620689655, |
| "grad_norm": 10.427493990874517, |
| "learning_rate": 1.1898954961168712e-05, |
| "loss": 1.2787, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.891954022988506, |
| "grad_norm": 9.795089175759868, |
| "learning_rate": 1.1854146229676153e-05, |
| "loss": 1.5051, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.901149425287356, |
| "grad_norm": 10.796252759918572, |
| "learning_rate": 1.180931612290517e-05, |
| "loss": 1.4446, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.910344827586207, |
| "grad_norm": 9.367765215773524, |
| "learning_rate": 1.1764465762743301e-05, |
| "loss": 1.5287, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.9195402298850572, |
| "grad_norm": 9.973558960509926, |
| "learning_rate": 1.1719596271584937e-05, |
| "loss": 1.3678, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.928735632183908, |
| "grad_norm": 8.48030402628292, |
| "learning_rate": 1.1674708772303227e-05, |
| "loss": 1.7673, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.9379310344827587, |
| "grad_norm": 9.941583395501333, |
| "learning_rate": 1.1629804388221977e-05, |
| "loss": 1.3052, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.947126436781609, |
| "grad_norm": 20.295022543023112, |
| "learning_rate": 1.1584884243087542e-05, |
| "loss": 1.4888, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.9563218390804598, |
| "grad_norm": 14.791841790346075, |
| "learning_rate": 1.1539949461040704e-05, |
| "loss": 1.4082, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.9655172413793105, |
| "grad_norm": 11.397818542102845, |
| "learning_rate": 1.1495001166588538e-05, |
| "loss": 1.2513, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.974712643678161, |
| "grad_norm": 14.537936931794308, |
| "learning_rate": 1.1450040484576268e-05, |
| "loss": 1.3915, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.9839080459770115, |
| "grad_norm": 10.380255650087015, |
| "learning_rate": 1.140506854015912e-05, |
| "loss": 1.4326, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.9931034482758623, |
| "grad_norm": 10.815042710269552, |
| "learning_rate": 1.1360086458774173e-05, |
| "loss": 1.3435, |
| "step": 326 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 14.352409000062071, |
| "learning_rate": 1.1315095366112179e-05, |
| "loss": 0.8037, |
| "step": 327 |
| }, |
| { |
| "epoch": 3.0091954022988507, |
| "grad_norm": 8.564622119135102, |
| "learning_rate": 1.1270096388089405e-05, |
| "loss": 1.2927, |
| "step": 328 |
| }, |
| { |
| "epoch": 3.018390804597701, |
| "grad_norm": 9.827927749771158, |
| "learning_rate": 1.1225090650819443e-05, |
| "loss": 1.2504, |
| "step": 329 |
| }, |
| { |
| "epoch": 3.027586206896552, |
| "grad_norm": 13.500491825543243, |
| "learning_rate": 1.118007928058505e-05, |
| "loss": 1.2751, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.036781609195402, |
| "grad_norm": 13.400074147719494, |
| "learning_rate": 1.1135063403809942e-05, |
| "loss": 1.5854, |
| "step": 331 |
| }, |
| { |
| "epoch": 3.045977011494253, |
| "grad_norm": 9.303488220671063, |
| "learning_rate": 1.1090044147030612e-05, |
| "loss": 1.4025, |
| "step": 332 |
| }, |
| { |
| "epoch": 3.0551724137931036, |
| "grad_norm": 12.154817360125381, |
| "learning_rate": 1.104502263686814e-05, |
| "loss": 1.4901, |
| "step": 333 |
| }, |
| { |
| "epoch": 3.064367816091954, |
| "grad_norm": 11.037010387746522, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 1.2772, |
| "step": 334 |
| }, |
| { |
| "epoch": 3.0735632183908046, |
| "grad_norm": 8.990590987937784, |
| "learning_rate": 1.095497736313186e-05, |
| "loss": 1.5939, |
| "step": 335 |
| }, |
| { |
| "epoch": 3.0827586206896553, |
| "grad_norm": 9.84526893242006, |
| "learning_rate": 1.0909955852969392e-05, |
| "loss": 1.4225, |
| "step": 336 |
| }, |
| { |
| "epoch": 3.0919540229885056, |
| "grad_norm": 12.240144797467202, |
| "learning_rate": 1.0864936596190059e-05, |
| "loss": 1.6045, |
| "step": 337 |
| }, |
| { |
| "epoch": 3.1011494252873564, |
| "grad_norm": 8.484859364769594, |
| "learning_rate": 1.0819920719414953e-05, |
| "loss": 1.3782, |
| "step": 338 |
| }, |
| { |
| "epoch": 3.110344827586207, |
| "grad_norm": 11.97211375025308, |
| "learning_rate": 1.0774909349180558e-05, |
| "loss": 1.3038, |
| "step": 339 |
| }, |
| { |
| "epoch": 3.1195402298850574, |
| "grad_norm": 12.573640767997977, |
| "learning_rate": 1.07299036119106e-05, |
| "loss": 1.4471, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.128735632183908, |
| "grad_norm": 9.896343612454453, |
| "learning_rate": 1.0684904633887822e-05, |
| "loss": 1.4695, |
| "step": 341 |
| }, |
| { |
| "epoch": 3.1379310344827585, |
| "grad_norm": 12.867671267558745, |
| "learning_rate": 1.063991354122583e-05, |
| "loss": 1.327, |
| "step": 342 |
| }, |
| { |
| "epoch": 3.147126436781609, |
| "grad_norm": 31.710152688174862, |
| "learning_rate": 1.0594931459840882e-05, |
| "loss": 1.1624, |
| "step": 343 |
| }, |
| { |
| "epoch": 3.15632183908046, |
| "grad_norm": 13.711391540751984, |
| "learning_rate": 1.0549959515423736e-05, |
| "loss": 1.4283, |
| "step": 344 |
| }, |
| { |
| "epoch": 3.1655172413793102, |
| "grad_norm": 10.048106577520812, |
| "learning_rate": 1.0504998833411465e-05, |
| "loss": 1.3794, |
| "step": 345 |
| }, |
| { |
| "epoch": 3.174712643678161, |
| "grad_norm": 15.095877578355061, |
| "learning_rate": 1.0460050538959299e-05, |
| "loss": 1.2234, |
| "step": 346 |
| }, |
| { |
| "epoch": 3.1839080459770113, |
| "grad_norm": 10.008382234710478, |
| "learning_rate": 1.0415115756912462e-05, |
| "loss": 1.4849, |
| "step": 347 |
| }, |
| { |
| "epoch": 3.193103448275862, |
| "grad_norm": 10.834339570759653, |
| "learning_rate": 1.0370195611778027e-05, |
| "loss": 1.4008, |
| "step": 348 |
| }, |
| { |
| "epoch": 3.2022988505747128, |
| "grad_norm": 11.606165690508291, |
| "learning_rate": 1.0325291227696776e-05, |
| "loss": 1.2378, |
| "step": 349 |
| }, |
| { |
| "epoch": 3.211494252873563, |
| "grad_norm": 10.000685238606074, |
| "learning_rate": 1.0280403728415067e-05, |
| "loss": 1.5133, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.220689655172414, |
| "grad_norm": 12.169373195656735, |
| "learning_rate": 1.0235534237256702e-05, |
| "loss": 1.5, |
| "step": 351 |
| }, |
| { |
| "epoch": 3.2298850574712645, |
| "grad_norm": 9.464191323383275, |
| "learning_rate": 1.0190683877094832e-05, |
| "loss": 1.3682, |
| "step": 352 |
| }, |
| { |
| "epoch": 3.239080459770115, |
| "grad_norm": 24.696085137335555, |
| "learning_rate": 1.0145853770323846e-05, |
| "loss": 1.2056, |
| "step": 353 |
| }, |
| { |
| "epoch": 3.2482758620689656, |
| "grad_norm": 18.76575800304636, |
| "learning_rate": 1.0101045038831292e-05, |
| "loss": 1.142, |
| "step": 354 |
| }, |
| { |
| "epoch": 3.2574712643678163, |
| "grad_norm": 10.042567143762055, |
| "learning_rate": 1.0056258803969778e-05, |
| "loss": 1.3638, |
| "step": 355 |
| }, |
| { |
| "epoch": 3.2666666666666666, |
| "grad_norm": 11.318980756154916, |
| "learning_rate": 1.0011496186528947e-05, |
| "loss": 1.35, |
| "step": 356 |
| }, |
| { |
| "epoch": 3.2758620689655173, |
| "grad_norm": 40.050906379076665, |
| "learning_rate": 9.966758306707394e-06, |
| "loss": 1.2106, |
| "step": 357 |
| }, |
| { |
| "epoch": 3.2850574712643676, |
| "grad_norm": 9.353674650560334, |
| "learning_rate": 9.922046284084657e-06, |
| "loss": 1.3442, |
| "step": 358 |
| }, |
| { |
| "epoch": 3.2942528735632184, |
| "grad_norm": 9.677203924503825, |
| "learning_rate": 9.877361237593177e-06, |
| "loss": 1.3453, |
| "step": 359 |
| }, |
| { |
| "epoch": 3.303448275862069, |
| "grad_norm": 6.960886228003972, |
| "learning_rate": 9.832704285490326e-06, |
| "loss": 1.259, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.3126436781609194, |
| "grad_norm": 9.958595921108332, |
| "learning_rate": 9.788076545330392e-06, |
| "loss": 1.5625, |
| "step": 361 |
| }, |
| { |
| "epoch": 3.32183908045977, |
| "grad_norm": 51.85252081874326, |
| "learning_rate": 9.74347913393663e-06, |
| "loss": 1.5267, |
| "step": 362 |
| }, |
| { |
| "epoch": 3.3310344827586205, |
| "grad_norm": 11.704494609330304, |
| "learning_rate": 9.698913167373302e-06, |
| "loss": 1.2225, |
| "step": 363 |
| }, |
| { |
| "epoch": 3.340229885057471, |
| "grad_norm": 14.283897531385442, |
| "learning_rate": 9.654379760917765e-06, |
| "loss": 1.4331, |
| "step": 364 |
| }, |
| { |
| "epoch": 3.349425287356322, |
| "grad_norm": 13.26198377276854, |
| "learning_rate": 9.609880029032537e-06, |
| "loss": 1.4148, |
| "step": 365 |
| }, |
| { |
| "epoch": 3.3586206896551722, |
| "grad_norm": 38.207752905565194, |
| "learning_rate": 9.56541508533743e-06, |
| "loss": 1.3888, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.367816091954023, |
| "grad_norm": 14.71112455943351, |
| "learning_rate": 9.520986042581657e-06, |
| "loss": 1.4406, |
| "step": 367 |
| }, |
| { |
| "epoch": 3.3770114942528737, |
| "grad_norm": 10.84175054233535, |
| "learning_rate": 9.476594012616016e-06, |
| "loss": 1.4795, |
| "step": 368 |
| }, |
| { |
| "epoch": 3.386206896551724, |
| "grad_norm": 19.73830068691523, |
| "learning_rate": 9.43224010636503e-06, |
| "loss": 1.5071, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.3954022988505748, |
| "grad_norm": 21.400680474505883, |
| "learning_rate": 9.387925433799183e-06, |
| "loss": 1.6345, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.4045977011494255, |
| "grad_norm": 8.520388056506897, |
| "learning_rate": 9.343651103907101e-06, |
| "loss": 1.1921, |
| "step": 371 |
| }, |
| { |
| "epoch": 3.413793103448276, |
| "grad_norm": 15.614373578838704, |
| "learning_rate": 9.299418224667846e-06, |
| "loss": 1.3103, |
| "step": 372 |
| }, |
| { |
| "epoch": 3.4229885057471265, |
| "grad_norm": 10.195337396411798, |
| "learning_rate": 9.255227903023148e-06, |
| "loss": 1.0011, |
| "step": 373 |
| }, |
| { |
| "epoch": 3.432183908045977, |
| "grad_norm": 9.980736431264198, |
| "learning_rate": 9.211081244849724e-06, |
| "loss": 1.4138, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.4413793103448276, |
| "grad_norm": 6.407392093756993, |
| "learning_rate": 9.166979354931602e-06, |
| "loss": 1.3992, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.4505747126436783, |
| "grad_norm": 10.33551202023238, |
| "learning_rate": 9.122923336932466e-06, |
| "loss": 1.2931, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.4597701149425286, |
| "grad_norm": 7.860952040626876, |
| "learning_rate": 9.078914293368042e-06, |
| "loss": 1.3782, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.4689655172413794, |
| "grad_norm": 10.376178758945807, |
| "learning_rate": 9.034953325578513e-06, |
| "loss": 1.5396, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.4781609195402297, |
| "grad_norm": 8.92810875038656, |
| "learning_rate": 8.991041533700935e-06, |
| "loss": 1.1866, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.4873563218390804, |
| "grad_norm": 32.96472192490382, |
| "learning_rate": 8.947180016641736e-06, |
| "loss": 1.4369, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.496551724137931, |
| "grad_norm": 9.034006970425779, |
| "learning_rate": 8.903369872049192e-06, |
| "loss": 1.3536, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.5057471264367814, |
| "grad_norm": 8.45428130551336, |
| "learning_rate": 8.859612196285977e-06, |
| "loss": 1.248, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.514942528735632, |
| "grad_norm": 10.682107181389576, |
| "learning_rate": 8.815908084401704e-06, |
| "loss": 1.4265, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.524137931034483, |
| "grad_norm": 14.488560507246083, |
| "learning_rate": 8.772258630105537e-06, |
| "loss": 1.4996, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.533333333333333, |
| "grad_norm": 11.402417158079917, |
| "learning_rate": 8.728664925738818e-06, |
| "loss": 1.4463, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.542528735632184, |
| "grad_norm": 11.881058609468937, |
| "learning_rate": 8.685128062247739e-06, |
| "loss": 1.8416, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.5517241379310347, |
| "grad_norm": 10.296799405046839, |
| "learning_rate": 8.641649129156007e-06, |
| "loss": 1.3956, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.560919540229885, |
| "grad_norm": 9.65931052787777, |
| "learning_rate": 8.598229214537627e-06, |
| "loss": 1.3552, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.5701149425287357, |
| "grad_norm": 13.969316212816242, |
| "learning_rate": 8.554869404989636e-06, |
| "loss": 1.3024, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.5793103448275865, |
| "grad_norm": 8.281255790239513, |
| "learning_rate": 8.511570785604928e-06, |
| "loss": 1.6863, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.5885057471264368, |
| "grad_norm": 22.529643925769257, |
| "learning_rate": 8.46833443994508e-06, |
| "loss": 1.4396, |
| "step": 391 |
| }, |
| { |
| "epoch": 3.5977011494252875, |
| "grad_norm": 10.800560357820313, |
| "learning_rate": 8.42516145001327e-06, |
| "loss": 1.318, |
| "step": 392 |
| }, |
| { |
| "epoch": 3.606896551724138, |
| "grad_norm": 10.69995569676154, |
| "learning_rate": 8.382052896227168e-06, |
| "loss": 1.1625, |
| "step": 393 |
| }, |
| { |
| "epoch": 3.6160919540229886, |
| "grad_norm": 9.934078096312229, |
| "learning_rate": 8.339009857391912e-06, |
| "loss": 1.328, |
| "step": 394 |
| }, |
| { |
| "epoch": 3.625287356321839, |
| "grad_norm": 26.37916950240029, |
| "learning_rate": 8.296033410673096e-06, |
| "loss": 1.1736, |
| "step": 395 |
| }, |
| { |
| "epoch": 3.6344827586206896, |
| "grad_norm": 14.08324266869287, |
| "learning_rate": 8.253124631569847e-06, |
| "loss": 1.5264, |
| "step": 396 |
| }, |
| { |
| "epoch": 3.6436781609195403, |
| "grad_norm": 9.839565730283748, |
| "learning_rate": 8.210284593887869e-06, |
| "loss": 1.4744, |
| "step": 397 |
| }, |
| { |
| "epoch": 3.6528735632183906, |
| "grad_norm": 11.700788192703863, |
| "learning_rate": 8.167514369712608e-06, |
| "loss": 1.1398, |
| "step": 398 |
| }, |
| { |
| "epoch": 3.6620689655172414, |
| "grad_norm": 11.788317447015977, |
| "learning_rate": 8.124815029382382e-06, |
| "loss": 1.3801, |
| "step": 399 |
| }, |
| { |
| "epoch": 3.671264367816092, |
| "grad_norm": 12.788897221803238, |
| "learning_rate": 8.082187641461642e-06, |
| "loss": 1.3303, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.6804597701149424, |
| "grad_norm": 13.166615581355577, |
| "learning_rate": 8.03963327271419e-06, |
| "loss": 1.375, |
| "step": 401 |
| }, |
| { |
| "epoch": 3.689655172413793, |
| "grad_norm": 7.295624548089385, |
| "learning_rate": 7.99715298807652e-06, |
| "loss": 1.1687, |
| "step": 402 |
| }, |
| { |
| "epoch": 3.698850574712644, |
| "grad_norm": 12.478601761113927, |
| "learning_rate": 7.954747850631117e-06, |
| "loss": 1.3044, |
| "step": 403 |
| }, |
| { |
| "epoch": 3.708045977011494, |
| "grad_norm": 12.1752499296347, |
| "learning_rate": 7.912418921579914e-06, |
| "loss": 1.3738, |
| "step": 404 |
| }, |
| { |
| "epoch": 3.717241379310345, |
| "grad_norm": 9.637242180760817, |
| "learning_rate": 7.870167260217687e-06, |
| "loss": 1.5205, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.7264367816091957, |
| "grad_norm": 11.278800274918918, |
| "learning_rate": 7.827993923905578e-06, |
| "loss": 1.2157, |
| "step": 406 |
| }, |
| { |
| "epoch": 3.735632183908046, |
| "grad_norm": 8.172216735429602, |
| "learning_rate": 7.785899968044599e-06, |
| "loss": 1.1936, |
| "step": 407 |
| }, |
| { |
| "epoch": 3.7448275862068967, |
| "grad_norm": 8.096635818421476, |
| "learning_rate": 7.743886446049263e-06, |
| "loss": 1.5856, |
| "step": 408 |
| }, |
| { |
| "epoch": 3.754022988505747, |
| "grad_norm": 11.682569075404121, |
| "learning_rate": 7.701954409321187e-06, |
| "loss": 1.4744, |
| "step": 409 |
| }, |
| { |
| "epoch": 3.7632183908045977, |
| "grad_norm": 10.35206309414598, |
| "learning_rate": 7.660104907222801e-06, |
| "loss": 1.2172, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.772413793103448, |
| "grad_norm": 11.014122758958818, |
| "learning_rate": 7.618338987051068e-06, |
| "loss": 1.0511, |
| "step": 411 |
| }, |
| { |
| "epoch": 3.781609195402299, |
| "grad_norm": 8.85710415272957, |
| "learning_rate": 7.576657694011309e-06, |
| "loss": 1.3102, |
| "step": 412 |
| }, |
| { |
| "epoch": 3.7908045977011495, |
| "grad_norm": 12.157205224266306, |
| "learning_rate": 7.535062071190995e-06, |
| "loss": 1.1799, |
| "step": 413 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 8.418851066550117, |
| "learning_rate": 7.493553159533702e-06, |
| "loss": 1.2111, |
| "step": 414 |
| }, |
| { |
| "epoch": 3.8091954022988506, |
| "grad_norm": 7.713059082134044, |
| "learning_rate": 7.452131997813006e-06, |
| "loss": 1.2234, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.8183908045977013, |
| "grad_norm": 15.425297595516845, |
| "learning_rate": 7.410799622606539e-06, |
| "loss": 1.2979, |
| "step": 416 |
| }, |
| { |
| "epoch": 3.8275862068965516, |
| "grad_norm": 8.601420815684877, |
| "learning_rate": 7.369557068269997e-06, |
| "loss": 1.1259, |
| "step": 417 |
| }, |
| { |
| "epoch": 3.8367816091954023, |
| "grad_norm": 12.94115881368024, |
| "learning_rate": 7.3284053669112975e-06, |
| "loss": 1.3448, |
| "step": 418 |
| }, |
| { |
| "epoch": 3.845977011494253, |
| "grad_norm": 6.589550268233686, |
| "learning_rate": 7.287345548364728e-06, |
| "loss": 1.1129, |
| "step": 419 |
| }, |
| { |
| "epoch": 3.8551724137931034, |
| "grad_norm": 6.501312647159815, |
| "learning_rate": 7.2463786401651835e-06, |
| "loss": 1.1362, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.864367816091954, |
| "grad_norm": 18.587292777735644, |
| "learning_rate": 7.205505667522437e-06, |
| "loss": 1.2959, |
| "step": 421 |
| }, |
| { |
| "epoch": 3.873563218390805, |
| "grad_norm": 15.21488681302892, |
| "learning_rate": 7.164727653295512e-06, |
| "loss": 1.3545, |
| "step": 422 |
| }, |
| { |
| "epoch": 3.882758620689655, |
| "grad_norm": 8.577037418300366, |
| "learning_rate": 7.124045617967048e-06, |
| "loss": 1.4131, |
| "step": 423 |
| }, |
| { |
| "epoch": 3.891954022988506, |
| "grad_norm": 10.190524659959603, |
| "learning_rate": 7.0834605796178e-06, |
| "loss": 1.4512, |
| "step": 424 |
| }, |
| { |
| "epoch": 3.901149425287356, |
| "grad_norm": 7.727343437140264, |
| "learning_rate": 7.042973553901133e-06, |
| "loss": 1.6387, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.910344827586207, |
| "grad_norm": 22.911347407744433, |
| "learning_rate": 7.002585554017622e-06, |
| "loss": 1.3267, |
| "step": 426 |
| }, |
| { |
| "epoch": 3.9195402298850572, |
| "grad_norm": 10.152149046552406, |
| "learning_rate": 6.962297590689678e-06, |
| "loss": 1.2264, |
| "step": 427 |
| }, |
| { |
| "epoch": 3.928735632183908, |
| "grad_norm": 16.580706197370287, |
| "learning_rate": 6.922110672136282e-06, |
| "loss": 1.3127, |
| "step": 428 |
| }, |
| { |
| "epoch": 3.9379310344827587, |
| "grad_norm": 13.844974648013892, |
| "learning_rate": 6.882025804047718e-06, |
| "loss": 1.4424, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.947126436781609, |
| "grad_norm": 13.915774750616139, |
| "learning_rate": 6.842043989560443e-06, |
| "loss": 1.645, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.9563218390804598, |
| "grad_norm": 15.603474543946852, |
| "learning_rate": 6.802166229231952e-06, |
| "loss": 1.4729, |
| "step": 431 |
| }, |
| { |
| "epoch": 3.9655172413793105, |
| "grad_norm": 9.41327469077912, |
| "learning_rate": 6.76239352101576e-06, |
| "loss": 1.3605, |
| "step": 432 |
| }, |
| { |
| "epoch": 3.974712643678161, |
| "grad_norm": 14.457634374687824, |
| "learning_rate": 6.722726860236417e-06, |
| "loss": 1.5076, |
| "step": 433 |
| }, |
| { |
| "epoch": 3.9839080459770115, |
| "grad_norm": 9.965691507607113, |
| "learning_rate": 6.683167239564608e-06, |
| "loss": 1.4915, |
| "step": 434 |
| }, |
| { |
| "epoch": 3.9931034482758623, |
| "grad_norm": 51.33613508537111, |
| "learning_rate": 6.64371564899229e-06, |
| "loss": 1.1819, |
| "step": 435 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 9.648485492120264, |
| "learning_rate": 6.604373075807953e-06, |
| "loss": 1.0046, |
| "step": 436 |
| }, |
| { |
| "epoch": 4.00919540229885, |
| "grad_norm": 17.169545137709832, |
| "learning_rate": 6.5651405045718764e-06, |
| "loss": 1.3074, |
| "step": 437 |
| }, |
| { |
| "epoch": 4.0183908045977015, |
| "grad_norm": 8.532422194018014, |
| "learning_rate": 6.526018917091517e-06, |
| "loss": 1.2025, |
| "step": 438 |
| }, |
| { |
| "epoch": 4.027586206896552, |
| "grad_norm": 7.280675134670931, |
| "learning_rate": 6.4870092923969155e-06, |
| "loss": 1.2716, |
| "step": 439 |
| }, |
| { |
| "epoch": 4.036781609195402, |
| "grad_norm": 7.781465123090883, |
| "learning_rate": 6.4481126067162235e-06, |
| "loss": 1.4485, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.045977011494253, |
| "grad_norm": 9.325027749699055, |
| "learning_rate": 6.40932983345125e-06, |
| "loss": 1.4869, |
| "step": 441 |
| }, |
| { |
| "epoch": 4.055172413793104, |
| "grad_norm": 12.498570864158324, |
| "learning_rate": 6.3706619431531134e-06, |
| "loss": 1.3256, |
| "step": 442 |
| }, |
| { |
| "epoch": 4.064367816091954, |
| "grad_norm": 6.668369910017107, |
| "learning_rate": 6.3321099034979435e-06, |
| "loss": 1.2178, |
| "step": 443 |
| }, |
| { |
| "epoch": 4.073563218390804, |
| "grad_norm": 8.81432890911392, |
| "learning_rate": 6.29367467926268e-06, |
| "loss": 1.3246, |
| "step": 444 |
| }, |
| { |
| "epoch": 4.082758620689655, |
| "grad_norm": 9.5021881868705, |
| "learning_rate": 6.2553572323009094e-06, |
| "loss": 1.1871, |
| "step": 445 |
| }, |
| { |
| "epoch": 4.091954022988506, |
| "grad_norm": 22.404258092859155, |
| "learning_rate": 6.217158521518818e-06, |
| "loss": 1.031, |
| "step": 446 |
| }, |
| { |
| "epoch": 4.101149425287356, |
| "grad_norm": 8.322221675044315, |
| "learning_rate": 6.179079502851167e-06, |
| "loss": 1.3306, |
| "step": 447 |
| }, |
| { |
| "epoch": 4.110344827586207, |
| "grad_norm": 6.926175392564334, |
| "learning_rate": 6.141121129237393e-06, |
| "loss": 1.4648, |
| "step": 448 |
| }, |
| { |
| "epoch": 4.119540229885057, |
| "grad_norm": 10.795367849841528, |
| "learning_rate": 6.103284350597757e-06, |
| "loss": 1.4771, |
| "step": 449 |
| }, |
| { |
| "epoch": 4.128735632183908, |
| "grad_norm": 14.500806487945011, |
| "learning_rate": 6.0655701138095605e-06, |
| "loss": 1.2192, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.137931034482759, |
| "grad_norm": 16.827094370255022, |
| "learning_rate": 6.027979362683454e-06, |
| "loss": 1.3679, |
| "step": 451 |
| }, |
| { |
| "epoch": 4.147126436781609, |
| "grad_norm": 8.463630645608777, |
| "learning_rate": 5.990513037939828e-06, |
| "loss": 1.3866, |
| "step": 452 |
| }, |
| { |
| "epoch": 4.1563218390804595, |
| "grad_norm": 8.548138098505374, |
| "learning_rate": 5.953172077185257e-06, |
| "loss": 1.4866, |
| "step": 453 |
| }, |
| { |
| "epoch": 4.165517241379311, |
| "grad_norm": 10.311093163627458, |
| "learning_rate": 5.915957414889049e-06, |
| "loss": 1.1892, |
| "step": 454 |
| }, |
| { |
| "epoch": 4.174712643678161, |
| "grad_norm": 11.532873997330638, |
| "learning_rate": 5.878869982359845e-06, |
| "loss": 1.3153, |
| "step": 455 |
| }, |
| { |
| "epoch": 4.183908045977011, |
| "grad_norm": 13.043804766692809, |
| "learning_rate": 5.841910707722327e-06, |
| "loss": 1.4138, |
| "step": 456 |
| }, |
| { |
| "epoch": 4.1931034482758625, |
| "grad_norm": 16.829895447641974, |
| "learning_rate": 5.805080515893983e-06, |
| "loss": 1.478, |
| "step": 457 |
| }, |
| { |
| "epoch": 4.202298850574713, |
| "grad_norm": 9.03056316789035, |
| "learning_rate": 5.7683803285619686e-06, |
| "loss": 1.361, |
| "step": 458 |
| }, |
| { |
| "epoch": 4.211494252873563, |
| "grad_norm": 8.787065551733527, |
| "learning_rate": 5.731811064160027e-06, |
| "loss": 1.3326, |
| "step": 459 |
| }, |
| { |
| "epoch": 4.220689655172414, |
| "grad_norm": 13.275359960429332, |
| "learning_rate": 5.695373637845521e-06, |
| "loss": 1.5723, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.2298850574712645, |
| "grad_norm": 17.444256387950272, |
| "learning_rate": 5.659068961476514e-06, |
| "loss": 1.3682, |
| "step": 461 |
| }, |
| { |
| "epoch": 4.239080459770115, |
| "grad_norm": 15.78925693833788, |
| "learning_rate": 5.622897943588974e-06, |
| "loss": 1.5834, |
| "step": 462 |
| }, |
| { |
| "epoch": 4.248275862068965, |
| "grad_norm": 11.080860880855491, |
| "learning_rate": 5.5868614893740135e-06, |
| "loss": 1.5276, |
| "step": 463 |
| }, |
| { |
| "epoch": 4.257471264367816, |
| "grad_norm": 10.022698346820714, |
| "learning_rate": 5.550960500655247e-06, |
| "loss": 1.3053, |
| "step": 464 |
| }, |
| { |
| "epoch": 4.266666666666667, |
| "grad_norm": 7.298963870340777, |
| "learning_rate": 5.515195875866231e-06, |
| "loss": 1.2085, |
| "step": 465 |
| }, |
| { |
| "epoch": 4.275862068965517, |
| "grad_norm": 14.37636573814027, |
| "learning_rate": 5.479568510027963e-06, |
| "loss": 1.545, |
| "step": 466 |
| }, |
| { |
| "epoch": 4.285057471264368, |
| "grad_norm": 17.666776715292947, |
| "learning_rate": 5.444079294726491e-06, |
| "loss": 1.2861, |
| "step": 467 |
| }, |
| { |
| "epoch": 4.294252873563218, |
| "grad_norm": 8.829651952832249, |
| "learning_rate": 5.408729118090613e-06, |
| "loss": 1.2104, |
| "step": 468 |
| }, |
| { |
| "epoch": 4.303448275862069, |
| "grad_norm": 9.0505304099472, |
| "learning_rate": 5.373518864769627e-06, |
| "loss": 1.1511, |
| "step": 469 |
| }, |
| { |
| "epoch": 4.31264367816092, |
| "grad_norm": 8.787289804009772, |
| "learning_rate": 5.338449415911216e-06, |
| "loss": 1.5977, |
| "step": 470 |
| }, |
| { |
| "epoch": 4.32183908045977, |
| "grad_norm": 8.815130387834458, |
| "learning_rate": 5.30352164913938e-06, |
| "loss": 1.1611, |
| "step": 471 |
| }, |
| { |
| "epoch": 4.3310344827586205, |
| "grad_norm": 10.238419809690685, |
| "learning_rate": 5.268736438532487e-06, |
| "loss": 1.392, |
| "step": 472 |
| }, |
| { |
| "epoch": 4.340229885057472, |
| "grad_norm": 11.519780193080537, |
| "learning_rate": 5.234094654601386e-06, |
| "loss": 1.2612, |
| "step": 473 |
| }, |
| { |
| "epoch": 4.349425287356322, |
| "grad_norm": 7.1767835668590205, |
| "learning_rate": 5.199597164267637e-06, |
| "loss": 1.2562, |
| "step": 474 |
| }, |
| { |
| "epoch": 4.358620689655172, |
| "grad_norm": 12.876197100579766, |
| "learning_rate": 5.1652448308417935e-06, |
| "loss": 1.4492, |
| "step": 475 |
| }, |
| { |
| "epoch": 4.3678160919540225, |
| "grad_norm": 17.282709384518455, |
| "learning_rate": 5.131038514001825e-06, |
| "loss": 1.2496, |
| "step": 476 |
| }, |
| { |
| "epoch": 4.377011494252874, |
| "grad_norm": 14.639340531142409, |
| "learning_rate": 5.096979069771579e-06, |
| "loss": 1.4873, |
| "step": 477 |
| }, |
| { |
| "epoch": 4.386206896551724, |
| "grad_norm": 14.24447446889582, |
| "learning_rate": 5.063067350499382e-06, |
| "loss": 1.2217, |
| "step": 478 |
| }, |
| { |
| "epoch": 4.395402298850574, |
| "grad_norm": 37.86045060057325, |
| "learning_rate": 5.029304204836682e-06, |
| "loss": 1.4817, |
| "step": 479 |
| }, |
| { |
| "epoch": 4.4045977011494255, |
| "grad_norm": 10.562532665897013, |
| "learning_rate": 4.9956904777168384e-06, |
| "loss": 1.4619, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.413793103448276, |
| "grad_norm": 10.086438372444816, |
| "learning_rate": 4.96222701033396e-06, |
| "loss": 1.5967, |
| "step": 481 |
| }, |
| { |
| "epoch": 4.422988505747126, |
| "grad_norm": 10.497122443019308, |
| "learning_rate": 4.928914640121858e-06, |
| "loss": 1.1646, |
| "step": 482 |
| }, |
| { |
| "epoch": 4.432183908045977, |
| "grad_norm": 16.64984057120252, |
| "learning_rate": 4.895754200733085e-06, |
| "loss": 1.278, |
| "step": 483 |
| }, |
| { |
| "epoch": 4.441379310344828, |
| "grad_norm": 10.713798877735572, |
| "learning_rate": 4.8627465220180876e-06, |
| "loss": 1.5983, |
| "step": 484 |
| }, |
| { |
| "epoch": 4.450574712643678, |
| "grad_norm": 134.93369623218058, |
| "learning_rate": 4.8298924300044156e-06, |
| "loss": 1.3882, |
| "step": 485 |
| }, |
| { |
| "epoch": 4.459770114942529, |
| "grad_norm": 12.769535026952651, |
| "learning_rate": 4.797192746876076e-06, |
| "loss": 1.3936, |
| "step": 486 |
| }, |
| { |
| "epoch": 4.468965517241379, |
| "grad_norm": 11.974390164411723, |
| "learning_rate": 4.764648290952932e-06, |
| "loss": 1.3739, |
| "step": 487 |
| }, |
| { |
| "epoch": 4.47816091954023, |
| "grad_norm": 13.16047182415963, |
| "learning_rate": 4.732259876670246e-06, |
| "loss": 1.4498, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.487356321839081, |
| "grad_norm": 14.777382947651647, |
| "learning_rate": 4.7000283145582895e-06, |
| "loss": 1.1714, |
| "step": 489 |
| }, |
| { |
| "epoch": 4.496551724137931, |
| "grad_norm": 12.02307948203342, |
| "learning_rate": 4.6679544112220556e-06, |
| "loss": 1.5671, |
| "step": 490 |
| }, |
| { |
| "epoch": 4.505747126436781, |
| "grad_norm": 13.098215516758408, |
| "learning_rate": 4.636038969321073e-06, |
| "loss": 1.5305, |
| "step": 491 |
| }, |
| { |
| "epoch": 4.514942528735633, |
| "grad_norm": 15.243691962740888, |
| "learning_rate": 4.604282787549332e-06, |
| "loss": 1.5576, |
| "step": 492 |
| }, |
| { |
| "epoch": 4.524137931034483, |
| "grad_norm": 11.192240538762729, |
| "learning_rate": 4.572686660615285e-06, |
| "loss": 1.1947, |
| "step": 493 |
| }, |
| { |
| "epoch": 4.533333333333333, |
| "grad_norm": 10.14659787199047, |
| "learning_rate": 4.541251379221955e-06, |
| "loss": 1.4249, |
| "step": 494 |
| }, |
| { |
| "epoch": 4.5425287356321835, |
| "grad_norm": 9.650402852268794, |
| "learning_rate": 4.509977730047164e-06, |
| "loss": 1.3046, |
| "step": 495 |
| }, |
| { |
| "epoch": 4.551724137931035, |
| "grad_norm": 14.130488430747782, |
| "learning_rate": 4.47886649572383e-06, |
| "loss": 1.6035, |
| "step": 496 |
| }, |
| { |
| "epoch": 4.560919540229885, |
| "grad_norm": 11.192178018552626, |
| "learning_rate": 4.447918454820396e-06, |
| "loss": 1.298, |
| "step": 497 |
| }, |
| { |
| "epoch": 4.570114942528735, |
| "grad_norm": 12.405597150681189, |
| "learning_rate": 4.417134381821326e-06, |
| "loss": 1.5134, |
| "step": 498 |
| }, |
| { |
| "epoch": 4.5793103448275865, |
| "grad_norm": 10.531396717925594, |
| "learning_rate": 4.386515047107751e-06, |
| "loss": 1.4031, |
| "step": 499 |
| }, |
| { |
| "epoch": 4.588505747126437, |
| "grad_norm": 13.099105387499396, |
| "learning_rate": 4.356061216938159e-06, |
| "loss": 1.4768, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.597701149425287, |
| "grad_norm": 12.359508812966814, |
| "learning_rate": 4.325773653429247e-06, |
| "loss": 1.2485, |
| "step": 501 |
| }, |
| { |
| "epoch": 4.606896551724138, |
| "grad_norm": 31.377171273518833, |
| "learning_rate": 4.2956531145368285e-06, |
| "loss": 1.2531, |
| "step": 502 |
| }, |
| { |
| "epoch": 4.6160919540229886, |
| "grad_norm": 13.014730836178563, |
| "learning_rate": 4.265700354036876e-06, |
| "loss": 1.5782, |
| "step": 503 |
| }, |
| { |
| "epoch": 4.625287356321839, |
| "grad_norm": 10.599618786371353, |
| "learning_rate": 4.235916121506657e-06, |
| "loss": 1.1847, |
| "step": 504 |
| }, |
| { |
| "epoch": 4.63448275862069, |
| "grad_norm": 34.427638598511074, |
| "learning_rate": 4.206301162305973e-06, |
| "loss": 1.4019, |
| "step": 505 |
| }, |
| { |
| "epoch": 4.64367816091954, |
| "grad_norm": 19.310795806347553, |
| "learning_rate": 4.176856217558502e-06, |
| "loss": 1.5381, |
| "step": 506 |
| }, |
| { |
| "epoch": 4.652873563218391, |
| "grad_norm": 14.82958137975314, |
| "learning_rate": 4.147582024133265e-06, |
| "loss": 1.5117, |
| "step": 507 |
| }, |
| { |
| "epoch": 4.662068965517241, |
| "grad_norm": 11.090032542872493, |
| "learning_rate": 4.118479314626168e-06, |
| "loss": 1.4451, |
| "step": 508 |
| }, |
| { |
| "epoch": 4.671264367816092, |
| "grad_norm": 11.955549282502883, |
| "learning_rate": 4.089548817341689e-06, |
| "loss": 1.1528, |
| "step": 509 |
| }, |
| { |
| "epoch": 4.680459770114942, |
| "grad_norm": 28.85360373708239, |
| "learning_rate": 4.0607912562746265e-06, |
| "loss": 1.5181, |
| "step": 510 |
| }, |
| { |
| "epoch": 4.689655172413794, |
| "grad_norm": 10.621370388777802, |
| "learning_rate": 4.032207351092009e-06, |
| "loss": 1.213, |
| "step": 511 |
| }, |
| { |
| "epoch": 4.698850574712644, |
| "grad_norm": 18.051763664566412, |
| "learning_rate": 4.003797817115066e-06, |
| "loss": 1.4712, |
| "step": 512 |
| }, |
| { |
| "epoch": 4.708045977011494, |
| "grad_norm": 23.332927849711663, |
| "learning_rate": 3.975563365301336e-06, |
| "loss": 1.3973, |
| "step": 513 |
| }, |
| { |
| "epoch": 4.7172413793103445, |
| "grad_norm": 11.162486781374527, |
| "learning_rate": 3.9475047022268644e-06, |
| "loss": 1.5162, |
| "step": 514 |
| }, |
| { |
| "epoch": 4.726436781609196, |
| "grad_norm": 10.059503420640997, |
| "learning_rate": 3.919622530068535e-06, |
| "loss": 1.3472, |
| "step": 515 |
| }, |
| { |
| "epoch": 4.735632183908046, |
| "grad_norm": 11.8247751074863, |
| "learning_rate": 3.8919175465864855e-06, |
| "loss": 1.2245, |
| "step": 516 |
| }, |
| { |
| "epoch": 4.744827586206896, |
| "grad_norm": 42.81481813808986, |
| "learning_rate": 3.864390445106658e-06, |
| "loss": 1.1561, |
| "step": 517 |
| }, |
| { |
| "epoch": 4.7540229885057474, |
| "grad_norm": 9.370699926092977, |
| "learning_rate": 3.837041914503432e-06, |
| "loss": 1.2819, |
| "step": 518 |
| }, |
| { |
| "epoch": 4.763218390804598, |
| "grad_norm": 45.58103480423792, |
| "learning_rate": 3.8098726391824015e-06, |
| "loss": 1.2213, |
| "step": 519 |
| }, |
| { |
| "epoch": 4.772413793103448, |
| "grad_norm": 11.95812186592475, |
| "learning_rate": 3.7828832990632402e-06, |
| "loss": 1.2812, |
| "step": 520 |
| }, |
| { |
| "epoch": 4.781609195402299, |
| "grad_norm": 10.169736090325921, |
| "learning_rate": 3.7560745695626877e-06, |
| "loss": 1.4757, |
| "step": 521 |
| }, |
| { |
| "epoch": 4.7908045977011495, |
| "grad_norm": 116.08773199573162, |
| "learning_rate": 3.7294471215776383e-06, |
| "loss": 1.3319, |
| "step": 522 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 18.16653418521717, |
| "learning_rate": 3.7030016214683684e-06, |
| "loss": 1.2273, |
| "step": 523 |
| }, |
| { |
| "epoch": 4.809195402298851, |
| "grad_norm": 13.735726905274648, |
| "learning_rate": 3.6767387310418446e-06, |
| "loss": 1.291, |
| "step": 524 |
| }, |
| { |
| "epoch": 4.818390804597701, |
| "grad_norm": 31.68598274978449, |
| "learning_rate": 3.6506591075351762e-06, |
| "loss": 1.4346, |
| "step": 525 |
| }, |
| { |
| "epoch": 4.827586206896552, |
| "grad_norm": 12.830965155921534, |
| "learning_rate": 3.624763403599151e-06, |
| "loss": 1.3724, |
| "step": 526 |
| }, |
| { |
| "epoch": 4.836781609195402, |
| "grad_norm": 10.039164607525647, |
| "learning_rate": 3.5990522672819186e-06, |
| "loss": 1.3728, |
| "step": 527 |
| }, |
| { |
| "epoch": 4.845977011494253, |
| "grad_norm": 8.219551415671267, |
| "learning_rate": 3.573526342012763e-06, |
| "loss": 1.1454, |
| "step": 528 |
| }, |
| { |
| "epoch": 4.855172413793103, |
| "grad_norm": 15.30123567798395, |
| "learning_rate": 3.5481862665860063e-06, |
| "loss": 1.4489, |
| "step": 529 |
| }, |
| { |
| "epoch": 4.864367816091954, |
| "grad_norm": 11.507622582090901, |
| "learning_rate": 3.5230326751450138e-06, |
| "loss": 1.4098, |
| "step": 530 |
| }, |
| { |
| "epoch": 4.873563218390805, |
| "grad_norm": 13.34855796184993, |
| "learning_rate": 3.4980661971663375e-06, |
| "loss": 1.5815, |
| "step": 531 |
| }, |
| { |
| "epoch": 4.882758620689655, |
| "grad_norm": 7.43809443209873, |
| "learning_rate": 3.473287457443949e-06, |
| "loss": 1.2174, |
| "step": 532 |
| }, |
| { |
| "epoch": 4.8919540229885055, |
| "grad_norm": 20.28836303983412, |
| "learning_rate": 3.448697076073618e-06, |
| "loss": 1.3706, |
| "step": 533 |
| }, |
| { |
| "epoch": 4.901149425287357, |
| "grad_norm": 24.204538560430375, |
| "learning_rate": 3.4242956684373785e-06, |
| "loss": 1.2004, |
| "step": 534 |
| }, |
| { |
| "epoch": 4.910344827586207, |
| "grad_norm": 9.678071221104739, |
| "learning_rate": 3.4000838451881447e-06, |
| "loss": 1.2744, |
| "step": 535 |
| }, |
| { |
| "epoch": 4.919540229885057, |
| "grad_norm": 9.534325720534405, |
| "learning_rate": 3.376062212234421e-06, |
| "loss": 1.1697, |
| "step": 536 |
| }, |
| { |
| "epoch": 4.928735632183908, |
| "grad_norm": 14.97433781895165, |
| "learning_rate": 3.3522313707251385e-06, |
| "loss": 1.5248, |
| "step": 537 |
| }, |
| { |
| "epoch": 4.937931034482759, |
| "grad_norm": 9.912862051218715, |
| "learning_rate": 3.328591917034608e-06, |
| "loss": 1.3452, |
| "step": 538 |
| }, |
| { |
| "epoch": 4.947126436781609, |
| "grad_norm": 12.538408477826952, |
| "learning_rate": 3.3051444427476095e-06, |
| "loss": 1.1771, |
| "step": 539 |
| }, |
| { |
| "epoch": 4.956321839080459, |
| "grad_norm": 9.274005905173206, |
| "learning_rate": 3.2818895346445656e-06, |
| "loss": 1.3837, |
| "step": 540 |
| }, |
| { |
| "epoch": 4.9655172413793105, |
| "grad_norm": 14.74869885668137, |
| "learning_rate": 3.2588277746868825e-06, |
| "loss": 1.2489, |
| "step": 541 |
| }, |
| { |
| "epoch": 4.974712643678161, |
| "grad_norm": 9.239237891584507, |
| "learning_rate": 3.235959740002361e-06, |
| "loss": 1.3102, |
| "step": 542 |
| }, |
| { |
| "epoch": 4.983908045977012, |
| "grad_norm": 10.22129387678354, |
| "learning_rate": 3.2132860028707758e-06, |
| "loss": 1.213, |
| "step": 543 |
| }, |
| { |
| "epoch": 4.993103448275862, |
| "grad_norm": 11.074155532612387, |
| "learning_rate": 3.1908071307095377e-06, |
| "loss": 1.1949, |
| "step": 544 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 11.074155532612387, |
| "learning_rate": 3.1685236860595066e-06, |
| "loss": 0.9934, |
| "step": 545 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 648, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 304722427510784.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|