| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9959003468937244, | |
| "eval_steps": 500, | |
| "global_step": 396, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005045726900031536, | |
| "grad_norm": 0.16816571847556824, | |
| "learning_rate": 2.9999839160139495e-06, | |
| "loss": 0.7782, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010091453800063072, | |
| "grad_norm": 0.1469143977253523, | |
| "learning_rate": 2.9999356645057024e-06, | |
| "loss": 0.6817, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.015137180700094607, | |
| "grad_norm": 0.07996774677933757, | |
| "learning_rate": 2.9998552468249567e-06, | |
| "loss": 0.6735, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.020182907600126143, | |
| "grad_norm": 0.0800127664777818, | |
| "learning_rate": 2.999742665221167e-06, | |
| "loss": 0.6569, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02522863450015768, | |
| "grad_norm": 0.08070188267575489, | |
| "learning_rate": 2.999597922843484e-06, | |
| "loss": 0.6283, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.030274361400189215, | |
| "grad_norm": 0.06839180655145351, | |
| "learning_rate": 2.999421023740663e-06, | |
| "loss": 0.6446, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03532008830022075, | |
| "grad_norm": 0.05534188923028301, | |
| "learning_rate": 2.9992119728609516e-06, | |
| "loss": 0.6371, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.040365815200252286, | |
| "grad_norm": 0.07094943987370793, | |
| "learning_rate": 2.9989707760519526e-06, | |
| "loss": 0.6111, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04541154210028382, | |
| "grad_norm": 0.06436005389698786, | |
| "learning_rate": 2.9986974400604593e-06, | |
| "loss": 0.588, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05045726900031536, | |
| "grad_norm": 0.05699223365274786, | |
| "learning_rate": 2.9983919725322667e-06, | |
| "loss": 0.6101, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.055502995900346894, | |
| "grad_norm": 0.058843386030182285, | |
| "learning_rate": 2.9980543820119585e-06, | |
| "loss": 0.6047, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06054872280037843, | |
| "grad_norm": 0.047228554008764044, | |
| "learning_rate": 2.997684677942667e-06, | |
| "loss": 0.5937, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06559444970040997, | |
| "grad_norm": 0.04830399085917525, | |
| "learning_rate": 2.9972828706658102e-06, | |
| "loss": 0.6448, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0706401766004415, | |
| "grad_norm": 0.04469640349332499, | |
| "learning_rate": 2.996848971420801e-06, | |
| "loss": 0.6145, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07568590350047304, | |
| "grad_norm": 0.048907003957727534, | |
| "learning_rate": 2.996382992344734e-06, | |
| "loss": 0.5755, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08073163040050457, | |
| "grad_norm": 0.04502223888969105, | |
| "learning_rate": 2.9958849464720457e-06, | |
| "loss": 0.5765, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08577735730053611, | |
| "grad_norm": 0.04485565875842678, | |
| "learning_rate": 2.9953548477341497e-06, | |
| "loss": 0.6364, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.09082308420056764, | |
| "grad_norm": 0.04319237430058616, | |
| "learning_rate": 2.9947927109590477e-06, | |
| "loss": 0.568, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09586881110059918, | |
| "grad_norm": 0.042093297202993624, | |
| "learning_rate": 2.994198551870913e-06, | |
| "loss": 0.6184, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10091453800063072, | |
| "grad_norm": 0.04087623899598573, | |
| "learning_rate": 2.993572387089653e-06, | |
| "loss": 0.5822, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10596026490066225, | |
| "grad_norm": 0.042619877493329586, | |
| "learning_rate": 2.992914234130442e-06, | |
| "loss": 0.5983, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.11100599180069379, | |
| "grad_norm": 0.04314774114784986, | |
| "learning_rate": 2.9922241114032345e-06, | |
| "loss": 0.6058, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.11605171870072532, | |
| "grad_norm": 0.04125496902035363, | |
| "learning_rate": 2.9915020382122458e-06, | |
| "loss": 0.5741, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.12109744560075686, | |
| "grad_norm": 0.03985368427853683, | |
| "learning_rate": 2.990748034755415e-06, | |
| "loss": 0.6002, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.1261431725007884, | |
| "grad_norm": 0.04603566805698703, | |
| "learning_rate": 2.9899621221238394e-06, | |
| "loss": 0.5616, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.13118889940081993, | |
| "grad_norm": 0.033944581121186666, | |
| "learning_rate": 2.989144322301186e-06, | |
| "loss": 0.591, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.13623462630085148, | |
| "grad_norm": 0.0352127486146018, | |
| "learning_rate": 2.988294658163073e-06, | |
| "loss": 0.575, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.141280353200883, | |
| "grad_norm": 0.04026082684896548, | |
| "learning_rate": 2.9874131534764325e-06, | |
| "loss": 0.5783, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.14632608010091455, | |
| "grad_norm": 0.038584952671910096, | |
| "learning_rate": 2.9864998328988463e-06, | |
| "loss": 0.5814, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.15137180700094607, | |
| "grad_norm": 0.03294755370363045, | |
| "learning_rate": 2.985554721977853e-06, | |
| "loss": 0.5688, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.15641753390097762, | |
| "grad_norm": 0.035774614388450525, | |
| "learning_rate": 2.984577847150239e-06, | |
| "loss": 0.5914, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.16146326080100915, | |
| "grad_norm": 0.04512017281393784, | |
| "learning_rate": 2.983569235741291e-06, | |
| "loss": 0.557, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1665089877010407, | |
| "grad_norm": 0.03447545680264101, | |
| "learning_rate": 2.9825289159640397e-06, | |
| "loss": 0.568, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.17155471460107222, | |
| "grad_norm": 0.033658505681229516, | |
| "learning_rate": 2.9814569169184642e-06, | |
| "loss": 0.5868, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.17660044150110377, | |
| "grad_norm": 0.03071546221735757, | |
| "learning_rate": 2.980353268590683e-06, | |
| "loss": 0.5487, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1816461684011353, | |
| "grad_norm": 0.07417860742940319, | |
| "learning_rate": 2.9792180018521128e-06, | |
| "loss": 0.6099, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.18669189530116684, | |
| "grad_norm": 0.032180325133544276, | |
| "learning_rate": 2.978051148458604e-06, | |
| "loss": 0.5939, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.19173762220119836, | |
| "grad_norm": 0.031347752245340116, | |
| "learning_rate": 2.976852741049554e-06, | |
| "loss": 0.5764, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1967833491012299, | |
| "grad_norm": 0.035873383222778825, | |
| "learning_rate": 2.975622813146996e-06, | |
| "loss": 0.57, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.20182907600126143, | |
| "grad_norm": 0.03130302787258777, | |
| "learning_rate": 2.9743613991546548e-06, | |
| "loss": 0.5503, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.20687480290129298, | |
| "grad_norm": 0.04111552220221803, | |
| "learning_rate": 2.9730685343569934e-06, | |
| "loss": 0.6028, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.2119205298013245, | |
| "grad_norm": 0.031561335436647305, | |
| "learning_rate": 2.971744254918218e-06, | |
| "loss": 0.5682, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.21696625670135605, | |
| "grad_norm": 0.03466870924962832, | |
| "learning_rate": 2.9703885978812726e-06, | |
| "loss": 0.55, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.22201198360138757, | |
| "grad_norm": 0.03396258277418921, | |
| "learning_rate": 2.9690016011667974e-06, | |
| "loss": 0.5953, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.22705771050141912, | |
| "grad_norm": 0.033463552224919146, | |
| "learning_rate": 2.967583303572073e-06, | |
| "loss": 0.6231, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.23210343740145065, | |
| "grad_norm": 0.03747113039368738, | |
| "learning_rate": 2.9661337447699316e-06, | |
| "loss": 0.5742, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2371491643014822, | |
| "grad_norm": 0.04159182229285405, | |
| "learning_rate": 2.9646529653076493e-06, | |
| "loss": 0.5681, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.24219489120151372, | |
| "grad_norm": 0.032311171301265075, | |
| "learning_rate": 2.9631410066058098e-06, | |
| "loss": 0.5464, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.24724061810154527, | |
| "grad_norm": 0.035494911254562625, | |
| "learning_rate": 2.9615979109571493e-06, | |
| "loss": 0.5377, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2522863450015768, | |
| "grad_norm": 0.032401750473671755, | |
| "learning_rate": 2.9600237215253696e-06, | |
| "loss": 0.6043, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.25733207190160834, | |
| "grad_norm": 0.03477400444401883, | |
| "learning_rate": 2.9584184823439337e-06, | |
| "loss": 0.6078, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.26237779880163986, | |
| "grad_norm": 0.03586539534553979, | |
| "learning_rate": 2.9567822383148315e-06, | |
| "loss": 0.5857, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2674235257016714, | |
| "grad_norm": 0.034776366845092124, | |
| "learning_rate": 2.955115035207326e-06, | |
| "loss": 0.5652, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.27246925260170296, | |
| "grad_norm": 0.047916672806890825, | |
| "learning_rate": 2.953416919656672e-06, | |
| "loss": 0.529, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2775149795017345, | |
| "grad_norm": 0.035512253032401846, | |
| "learning_rate": 2.9516879391628125e-06, | |
| "loss": 0.6018, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.282560706401766, | |
| "grad_norm": 0.0669654595534551, | |
| "learning_rate": 2.9499281420890474e-06, | |
| "loss": 0.5832, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2876064333017975, | |
| "grad_norm": 0.04009377576904152, | |
| "learning_rate": 2.948137577660685e-06, | |
| "loss": 0.5376, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.2926521602018291, | |
| "grad_norm": 0.05517678453435375, | |
| "learning_rate": 2.946316295963661e-06, | |
| "loss": 0.5725, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2976978871018606, | |
| "grad_norm": 0.040682905696082294, | |
| "learning_rate": 2.9444643479431393e-06, | |
| "loss": 0.5887, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.30274361400189215, | |
| "grad_norm": 0.044774454426992336, | |
| "learning_rate": 2.9425817854020873e-06, | |
| "loss": 0.5756, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.30778934090192367, | |
| "grad_norm": 0.03263011910584542, | |
| "learning_rate": 2.940668660999826e-06, | |
| "loss": 0.5693, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.31283506780195525, | |
| "grad_norm": 0.032496230797448664, | |
| "learning_rate": 2.9387250282505583e-06, | |
| "loss": 0.586, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.31788079470198677, | |
| "grad_norm": 0.03310861754959189, | |
| "learning_rate": 2.9367509415218687e-06, | |
| "loss": 0.5548, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3229265216020183, | |
| "grad_norm": 0.031816229512850104, | |
| "learning_rate": 2.9347464560332084e-06, | |
| "loss": 0.6, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3279722485020498, | |
| "grad_norm": 0.036465675122600016, | |
| "learning_rate": 2.932711627854344e-06, | |
| "loss": 0.5613, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3330179754020814, | |
| "grad_norm": 0.03107217546123426, | |
| "learning_rate": 2.9306465139037947e-06, | |
| "loss": 0.5421, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3380637023021129, | |
| "grad_norm": 0.031633841290591734, | |
| "learning_rate": 2.9285511719472367e-06, | |
| "loss": 0.58, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.34310942920214443, | |
| "grad_norm": 0.030853855883844275, | |
| "learning_rate": 2.9264256605958885e-06, | |
| "loss": 0.5496, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.34815515610217596, | |
| "grad_norm": 0.036265638918391636, | |
| "learning_rate": 2.924270039304873e-06, | |
| "loss": 0.5939, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.35320088300220753, | |
| "grad_norm": 0.03703293289195566, | |
| "learning_rate": 2.9220843683715497e-06, | |
| "loss": 0.5311, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.35824660990223905, | |
| "grad_norm": 0.07682301940151573, | |
| "learning_rate": 2.9198687089338345e-06, | |
| "loss": 0.5655, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3632923368022706, | |
| "grad_norm": 0.03240731857642153, | |
| "learning_rate": 2.9176231229684835e-06, | |
| "loss": 0.5436, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3683380637023021, | |
| "grad_norm": 0.03550209155305971, | |
| "learning_rate": 2.9153476732893646e-06, | |
| "loss": 0.5529, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3733837906023337, | |
| "grad_norm": 0.03572110732287988, | |
| "learning_rate": 2.913042423545696e-06, | |
| "loss": 0.5601, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3784295175023652, | |
| "grad_norm": 0.030318267187340705, | |
| "learning_rate": 2.910707438220269e-06, | |
| "loss": 0.5827, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3834752444023967, | |
| "grad_norm": 0.030779462298936085, | |
| "learning_rate": 2.9083427826276414e-06, | |
| "loss": 0.5366, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.38852097130242824, | |
| "grad_norm": 0.033078267956613755, | |
| "learning_rate": 2.905948522912315e-06, | |
| "loss": 0.5769, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3935666982024598, | |
| "grad_norm": 0.032022182515529865, | |
| "learning_rate": 2.90352472604688e-06, | |
| "loss": 0.6059, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.39861242510249134, | |
| "grad_norm": 0.032572741826790486, | |
| "learning_rate": 2.901071459830145e-06, | |
| "loss": 0.5325, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.40365815200252286, | |
| "grad_norm": 0.03322477222052267, | |
| "learning_rate": 2.89858879288524e-06, | |
| "loss": 0.6102, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4087038789025544, | |
| "grad_norm": 0.03290381540817977, | |
| "learning_rate": 2.896076794657696e-06, | |
| "loss": 0.5297, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.41374960580258596, | |
| "grad_norm": 0.02986308712893659, | |
| "learning_rate": 2.893535535413504e-06, | |
| "loss": 0.6016, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4187953327026175, | |
| "grad_norm": 0.03708195442407903, | |
| "learning_rate": 2.8909650862371465e-06, | |
| "loss": 0.5644, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.423841059602649, | |
| "grad_norm": 0.05585756602200335, | |
| "learning_rate": 2.888365519029615e-06, | |
| "loss": 0.5645, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.42888678650268053, | |
| "grad_norm": 0.03232081336561299, | |
| "learning_rate": 2.8857369065063893e-06, | |
| "loss": 0.5492, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4339325134027121, | |
| "grad_norm": 0.03807439954613977, | |
| "learning_rate": 2.883079322195415e-06, | |
| "loss": 0.5694, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4389782403027436, | |
| "grad_norm": 0.03718669059491054, | |
| "learning_rate": 2.880392840435036e-06, | |
| "loss": 0.5603, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.44402396720277515, | |
| "grad_norm": 0.02993361885707706, | |
| "learning_rate": 2.8776775363719244e-06, | |
| "loss": 0.5193, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.44906969410280667, | |
| "grad_norm": 0.03471859873605538, | |
| "learning_rate": 2.8749334859589696e-06, | |
| "loss": 0.5195, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.45411542100283825, | |
| "grad_norm": 0.03468783264087511, | |
| "learning_rate": 2.872160765953162e-06, | |
| "loss": 0.5685, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.45916114790286977, | |
| "grad_norm": 0.06028818099354049, | |
| "learning_rate": 2.86935945391344e-06, | |
| "loss": 0.5875, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4642068748029013, | |
| "grad_norm": 0.03189592063110031, | |
| "learning_rate": 2.8665296281985232e-06, | |
| "loss": 0.5627, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4692526017029328, | |
| "grad_norm": 0.032489964455779306, | |
| "learning_rate": 2.8636713679647195e-06, | |
| "loss": 0.5398, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4742983286029644, | |
| "grad_norm": 0.03300509062363307, | |
| "learning_rate": 2.8607847531637127e-06, | |
| "loss": 0.5675, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4793440555029959, | |
| "grad_norm": 0.034805575232998785, | |
| "learning_rate": 2.857869864540323e-06, | |
| "loss": 0.5526, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.48438978240302744, | |
| "grad_norm": 0.03304213276713402, | |
| "learning_rate": 2.854926783630253e-06, | |
| "loss": 0.5475, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.48943550930305896, | |
| "grad_norm": 0.03753659611183512, | |
| "learning_rate": 2.851955592757801e-06, | |
| "loss": 0.5511, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.49448123620309054, | |
| "grad_norm": 0.033892234979303396, | |
| "learning_rate": 2.848956375033562e-06, | |
| "loss": 0.5232, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.49952696310312206, | |
| "grad_norm": 0.037074509268233, | |
| "learning_rate": 2.845929214352105e-06, | |
| "loss": 0.5655, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5045726900031536, | |
| "grad_norm": 0.03224402404614455, | |
| "learning_rate": 2.8428741953896195e-06, | |
| "loss": 0.5556, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5096184169031851, | |
| "grad_norm": 0.03072821069928307, | |
| "learning_rate": 2.839791403601555e-06, | |
| "loss": 0.5472, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5146641438032167, | |
| "grad_norm": 0.03380249813156003, | |
| "learning_rate": 2.8366809252202235e-06, | |
| "loss": 0.5413, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5197098707032481, | |
| "grad_norm": 0.0335216929092493, | |
| "learning_rate": 2.8335428472523927e-06, | |
| "loss": 0.5479, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5247555976032797, | |
| "grad_norm": 0.030808915999862914, | |
| "learning_rate": 2.8303772574768482e-06, | |
| "loss": 0.548, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 0.02912454306651085, | |
| "learning_rate": 2.8271842444419414e-06, | |
| "loss": 0.548, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5348470514033428, | |
| "grad_norm": 0.05012632142858796, | |
| "learning_rate": 2.8239638974631112e-06, | |
| "loss": 0.5152, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5398927783033743, | |
| "grad_norm": 0.0344361048789296, | |
| "learning_rate": 2.8207163066203843e-06, | |
| "loss": 0.5698, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5449385052034059, | |
| "grad_norm": 0.10550189124699653, | |
| "learning_rate": 2.8174415627558584e-06, | |
| "loss": 0.522, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5499842321034374, | |
| "grad_norm": 0.0323910546595259, | |
| "learning_rate": 2.8141397574711587e-06, | |
| "loss": 0.5518, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.555029959003469, | |
| "grad_norm": 0.032448012717327133, | |
| "learning_rate": 2.810810983124877e-06, | |
| "loss": 0.5839, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5600756859035004, | |
| "grad_norm": 0.03112632332891334, | |
| "learning_rate": 2.807455332829987e-06, | |
| "loss": 0.5635, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.565121412803532, | |
| "grad_norm": 0.035282498495490644, | |
| "learning_rate": 2.8040729004512415e-06, | |
| "loss": 0.535, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5701671397035636, | |
| "grad_norm": 0.02966255382156268, | |
| "learning_rate": 2.800663780602545e-06, | |
| "loss": 0.5492, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.575212866603595, | |
| "grad_norm": 0.03707668859813438, | |
| "learning_rate": 2.7972280686443077e-06, | |
| "loss": 0.5663, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5802585935036266, | |
| "grad_norm": 0.03659428904439035, | |
| "learning_rate": 2.793765860680779e-06, | |
| "loss": 0.542, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5853043204036582, | |
| "grad_norm": 0.035159210590374, | |
| "learning_rate": 2.790277253557359e-06, | |
| "loss": 0.5738, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5903500473036897, | |
| "grad_norm": 0.030990014790480254, | |
| "learning_rate": 2.7867623448578863e-06, | |
| "loss": 0.5892, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5953957742037213, | |
| "grad_norm": 0.03399071129755366, | |
| "learning_rate": 2.783221232901914e-06, | |
| "loss": 0.5677, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6004415011037527, | |
| "grad_norm": 0.03247591991435437, | |
| "learning_rate": 2.7796540167419567e-06, | |
| "loss": 0.5412, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6054872280037843, | |
| "grad_norm": 0.032370031907677656, | |
| "learning_rate": 2.7760607961607174e-06, | |
| "loss": 0.5556, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6105329549038159, | |
| "grad_norm": 0.033174635710333106, | |
| "learning_rate": 2.7724416716683005e-06, | |
| "loss": 0.5668, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6155786818038473, | |
| "grad_norm": 0.03148435780507138, | |
| "learning_rate": 2.7687967444993976e-06, | |
| "loss": 0.5205, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6206244087038789, | |
| "grad_norm": 0.03187378459330813, | |
| "learning_rate": 2.7651261166104574e-06, | |
| "loss": 0.5563, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6256701356039105, | |
| "grad_norm": 0.03429711811644175, | |
| "learning_rate": 2.7614298906768316e-06, | |
| "loss": 0.5167, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.630715862503942, | |
| "grad_norm": 0.03859718827910278, | |
| "learning_rate": 2.757708170089906e-06, | |
| "loss": 0.559, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6357615894039735, | |
| "grad_norm": 0.03623858619383074, | |
| "learning_rate": 2.7539610589542057e-06, | |
| "loss": 0.5795, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.640807316304005, | |
| "grad_norm": 0.03263585179382894, | |
| "learning_rate": 2.750188662084484e-06, | |
| "loss": 0.5566, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6458530432040366, | |
| "grad_norm": 0.03877113662246629, | |
| "learning_rate": 2.746391085002791e-06, | |
| "loss": 0.6018, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6508987701040682, | |
| "grad_norm": 0.03400954730094289, | |
| "learning_rate": 2.7425684339355203e-06, | |
| "loss": 0.5438, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6559444970040996, | |
| "grad_norm": 0.03140528838198611, | |
| "learning_rate": 2.7387208158104406e-06, | |
| "loss": 0.5554, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6609902239041312, | |
| "grad_norm": 0.037206585802644396, | |
| "learning_rate": 2.7348483382537015e-06, | |
| "loss": 0.5634, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6660359508041628, | |
| "grad_norm": 0.03211586967766076, | |
| "learning_rate": 2.7309511095868246e-06, | |
| "loss": 0.5391, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6710816777041942, | |
| "grad_norm": 0.03396187095971342, | |
| "learning_rate": 2.727029238823674e-06, | |
| "loss": 0.5406, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6761274046042258, | |
| "grad_norm": 0.03502286862929664, | |
| "learning_rate": 2.7230828356674047e-06, | |
| "loss": 0.5753, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6811731315042573, | |
| "grad_norm": 0.03214739270222773, | |
| "learning_rate": 2.7191120105073974e-06, | |
| "loss": 0.5245, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6862188584042889, | |
| "grad_norm": 0.03387635714681519, | |
| "learning_rate": 2.7151168744161664e-06, | |
| "loss": 0.54, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6912645853043204, | |
| "grad_norm": 0.032951263305626276, | |
| "learning_rate": 2.7110975391462574e-06, | |
| "loss": 0.5259, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6963103122043519, | |
| "grad_norm": 0.05015537211126262, | |
| "learning_rate": 2.707054117127118e-06, | |
| "loss": 0.5267, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7013560391043835, | |
| "grad_norm": 0.035135429069184716, | |
| "learning_rate": 2.7029867214619533e-06, | |
| "loss": 0.5518, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7064017660044151, | |
| "grad_norm": 0.0320337788657044, | |
| "learning_rate": 2.698895465924565e-06, | |
| "loss": 0.5555, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7114474929044465, | |
| "grad_norm": 0.03645990761972981, | |
| "learning_rate": 2.6947804649561633e-06, | |
| "loss": 0.572, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7164932198044781, | |
| "grad_norm": 0.03470519450277183, | |
| "learning_rate": 2.6906418336621724e-06, | |
| "loss": 0.5505, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7215389467045096, | |
| "grad_norm": 0.031235480044832488, | |
| "learning_rate": 2.686479687809006e-06, | |
| "loss": 0.5377, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7265846736045412, | |
| "grad_norm": 0.029941451042590675, | |
| "learning_rate": 2.6822941438208306e-06, | |
| "loss": 0.5381, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7316304005045727, | |
| "grad_norm": 0.035617952049989555, | |
| "learning_rate": 2.6780853187763096e-06, | |
| "loss": 0.5546, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7366761274046042, | |
| "grad_norm": 0.03343536591046196, | |
| "learning_rate": 2.673853330405326e-06, | |
| "loss": 0.5519, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7417218543046358, | |
| "grad_norm": 0.03110711029893078, | |
| "learning_rate": 2.6695982970856925e-06, | |
| "loss": 0.5744, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7467675812046674, | |
| "grad_norm": 0.030535258085941947, | |
| "learning_rate": 2.6653203378398375e-06, | |
| "loss": 0.5239, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7518133081046988, | |
| "grad_norm": 0.06889481171001853, | |
| "learning_rate": 2.661019572331478e-06, | |
| "loss": 0.5445, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7568590350047304, | |
| "grad_norm": 0.034136168134648794, | |
| "learning_rate": 2.6566961208622696e-06, | |
| "loss": 0.5403, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 0.03361146386829785, | |
| "learning_rate": 2.652350104368444e-06, | |
| "loss": 0.5252, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7669504888047934, | |
| "grad_norm": 0.03232558697340943, | |
| "learning_rate": 2.6479816444174253e-06, | |
| "loss": 0.5537, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.771996215704825, | |
| "grad_norm": 0.031246440682898686, | |
| "learning_rate": 2.643590863204429e-06, | |
| "loss": 0.5358, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7770419426048565, | |
| "grad_norm": 0.03123193076665316, | |
| "learning_rate": 2.6391778835490438e-06, | |
| "loss": 0.5162, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7820876695048881, | |
| "grad_norm": 0.042414916882850415, | |
| "learning_rate": 2.6347428288917972e-06, | |
| "loss": 0.5522, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7871333964049196, | |
| "grad_norm": 0.03663820317771632, | |
| "learning_rate": 2.630285823290702e-06, | |
| "loss": 0.5395, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7921791233049511, | |
| "grad_norm": 0.0323447950510978, | |
| "learning_rate": 2.625806991417786e-06, | |
| "loss": 0.5471, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7972248502049827, | |
| "grad_norm": 0.03301071769705723, | |
| "learning_rate": 2.621306458555604e-06, | |
| "loss": 0.5529, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8022705771050141, | |
| "grad_norm": 0.03261309640826233, | |
| "learning_rate": 2.6167843505937356e-06, | |
| "loss": 0.5507, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8073163040050457, | |
| "grad_norm": 0.03240576468524181, | |
| "learning_rate": 2.6122407940252608e-06, | |
| "loss": 0.5468, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8123620309050773, | |
| "grad_norm": 0.03018210963467149, | |
| "learning_rate": 2.6076759159432237e-06, | |
| "loss": 0.5583, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8174077578051088, | |
| "grad_norm": 0.031036132663069035, | |
| "learning_rate": 2.603089844037078e-06, | |
| "loss": 0.5226, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8224534847051403, | |
| "grad_norm": 0.0347386534073021, | |
| "learning_rate": 2.5984827065891126e-06, | |
| "loss": 0.5529, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8274992116051719, | |
| "grad_norm": 0.044827581860260125, | |
| "learning_rate": 2.593854632470866e-06, | |
| "loss": 0.6117, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8325449385052034, | |
| "grad_norm": 0.030210519399600917, | |
| "learning_rate": 2.5892057511395202e-06, | |
| "loss": 0.5436, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.837590665405235, | |
| "grad_norm": 0.031447473869825514, | |
| "learning_rate": 2.5845361926342794e-06, | |
| "loss": 0.5228, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8426363923052664, | |
| "grad_norm": 0.035332567006398724, | |
| "learning_rate": 2.5798460875727326e-06, | |
| "loss": 0.5478, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.847682119205298, | |
| "grad_norm": 0.02886367248116525, | |
| "learning_rate": 2.575135567147201e-06, | |
| "loss": 0.5114, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8527278461053296, | |
| "grad_norm": 0.031532384271644245, | |
| "learning_rate": 2.5704047631210664e-06, | |
| "loss": 0.5623, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8577735730053611, | |
| "grad_norm": 0.034567108800704585, | |
| "learning_rate": 2.5656538078250873e-06, | |
| "loss": 0.5657, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8628192999053926, | |
| "grad_norm": 0.03160145253530057, | |
| "learning_rate": 2.560882834153696e-06, | |
| "loss": 0.5136, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8678650268054242, | |
| "grad_norm": 0.03135869657202659, | |
| "learning_rate": 2.5560919755612823e-06, | |
| "loss": 0.544, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8729107537054557, | |
| "grad_norm": 0.03597121576219098, | |
| "learning_rate": 2.5512813660584597e-06, | |
| "loss": 0.5152, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8779564806054873, | |
| "grad_norm": 0.030221173382051915, | |
| "learning_rate": 2.5464511402083166e-06, | |
| "loss": 0.5251, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8830022075055187, | |
| "grad_norm": 0.030000072996604673, | |
| "learning_rate": 2.541601433122654e-06, | |
| "loss": 0.5186, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8880479344055503, | |
| "grad_norm": 0.03376922343601561, | |
| "learning_rate": 2.536732380458204e-06, | |
| "loss": 0.5164, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8930936613055819, | |
| "grad_norm": 0.03156034083736404, | |
| "learning_rate": 2.531844118412837e-06, | |
| "loss": 0.5316, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8981393882056133, | |
| "grad_norm": 0.03377666641180589, | |
| "learning_rate": 2.5269367837217488e-06, | |
| "loss": 0.5054, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9031851151056449, | |
| "grad_norm": 0.030954166615192916, | |
| "learning_rate": 2.522010513653642e-06, | |
| "loss": 0.5256, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9082308420056765, | |
| "grad_norm": 0.03484714660203487, | |
| "learning_rate": 2.517065446006878e-06, | |
| "loss": 0.5225, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.913276568905708, | |
| "grad_norm": 0.06223673456920668, | |
| "learning_rate": 2.5121017191056306e-06, | |
| "loss": 0.5207, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9183222958057395, | |
| "grad_norm": 0.04079770792332633, | |
| "learning_rate": 2.507119471796011e-06, | |
| "loss": 0.555, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.923368022705771, | |
| "grad_norm": 0.030029925631475055, | |
| "learning_rate": 2.5021188434421863e-06, | |
| "loss": 0.5435, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9284137496058026, | |
| "grad_norm": 0.03519161994895625, | |
| "learning_rate": 2.4970999739224816e-06, | |
| "loss": 0.5817, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9334594765058342, | |
| "grad_norm": 0.03194895322649527, | |
| "learning_rate": 2.492063003625466e-06, | |
| "loss": 0.5288, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9385052034058656, | |
| "grad_norm": 0.03862824281648028, | |
| "learning_rate": 2.487008073446027e-06, | |
| "loss": 0.5428, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9435509303058972, | |
| "grad_norm": 0.040703435887500077, | |
| "learning_rate": 2.481935324781427e-06, | |
| "loss": 0.5407, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9485966572059288, | |
| "grad_norm": 0.03251789567309417, | |
| "learning_rate": 2.4768448995273514e-06, | |
| "loss": 0.5305, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9536423841059603, | |
| "grad_norm": 0.031182531135357086, | |
| "learning_rate": 2.4717369400739372e-06, | |
| "loss": 0.5436, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9586881110059918, | |
| "grad_norm": 0.033544076361382125, | |
| "learning_rate": 2.466611589301791e-06, | |
| "loss": 0.5849, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9637338379060233, | |
| "grad_norm": 0.03146239237618079, | |
| "learning_rate": 2.4614689905779907e-06, | |
| "loss": 0.5424, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9687795648060549, | |
| "grad_norm": 0.031938896005507596, | |
| "learning_rate": 2.4563092877520776e-06, | |
| "loss": 0.5541, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.9738252917060864, | |
| "grad_norm": 0.031136299616893074, | |
| "learning_rate": 2.4511326251520325e-06, | |
| "loss": 0.58, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9788710186061179, | |
| "grad_norm": 0.03323472651029714, | |
| "learning_rate": 2.445939147580235e-06, | |
| "loss": 0.5073, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9839167455061495, | |
| "grad_norm": 0.03213726601057571, | |
| "learning_rate": 2.4407290003094177e-06, | |
| "loss": 0.5758, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9889624724061811, | |
| "grad_norm": 0.032768188978415214, | |
| "learning_rate": 2.4355023290785993e-06, | |
| "loss": 0.5354, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9940081993062125, | |
| "grad_norm": 0.10156434597935675, | |
| "learning_rate": 2.4302592800890095e-06, | |
| "loss": 0.5784, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9990539262062441, | |
| "grad_norm": 0.03993430739998164, | |
| "learning_rate": 2.425e-06, | |
| "loss": 0.532, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0018921475875118, | |
| "grad_norm": 0.14663213929873462, | |
| "learning_rate": 2.4197246359249405e-06, | |
| "loss": 0.7106, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0069378744875435, | |
| "grad_norm": 0.03682337094632227, | |
| "learning_rate": 2.4144333354271033e-06, | |
| "loss": 0.4702, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.011983601387575, | |
| "grad_norm": 0.04044872570852846, | |
| "learning_rate": 2.4091262465155386e-06, | |
| "loss": 0.5213, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0170293282876064, | |
| "grad_norm": 0.03956345299513896, | |
| "learning_rate": 2.403803517640932e-06, | |
| "loss": 0.488, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.022075055187638, | |
| "grad_norm": 0.035011481501724055, | |
| "learning_rate": 2.398465297691452e-06, | |
| "loss": 0.4754, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.0271207820876695, | |
| "grad_norm": 0.033861610990122235, | |
| "learning_rate": 2.393111735988585e-06, | |
| "loss": 0.4782, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.032166508987701, | |
| "grad_norm": 0.055350616536286666, | |
| "learning_rate": 2.387742982282961e-06, | |
| "loss": 0.479, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.0372122358877325, | |
| "grad_norm": 0.031693846556977066, | |
| "learning_rate": 2.3823591867501623e-06, | |
| "loss": 0.4708, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.0422579627877642, | |
| "grad_norm": 0.03527666796296507, | |
| "learning_rate": 2.376960499986522e-06, | |
| "loss": 0.504, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.0473036896877956, | |
| "grad_norm": 0.03114715236687362, | |
| "learning_rate": 2.3715470730049154e-06, | |
| "loss": 0.4656, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.052349416587827, | |
| "grad_norm": 0.03195304059581202, | |
| "learning_rate": 2.3661190572305315e-06, | |
| "loss": 0.5021, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0573951434878588, | |
| "grad_norm": 0.041909938570619656, | |
| "learning_rate": 2.3606766044966404e-06, | |
| "loss": 0.4477, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.0624408703878903, | |
| "grad_norm": 0.031765017759599695, | |
| "learning_rate": 2.355219867040344e-06, | |
| "loss": 0.4852, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.0674865972879217, | |
| "grad_norm": 0.03226173748335202, | |
| "learning_rate": 2.3497489974983195e-06, | |
| "loss": 0.4499, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.0725323241879534, | |
| "grad_norm": 0.04089170994785027, | |
| "learning_rate": 2.3442641489025476e-06, | |
| "loss": 0.4763, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.0775780510879849, | |
| "grad_norm": 0.03266826231646906, | |
| "learning_rate": 2.3387654746760346e-06, | |
| "loss": 0.5058, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.0826237779880163, | |
| "grad_norm": 0.031198910926951966, | |
| "learning_rate": 2.333253128628519e-06, | |
| "loss": 0.4761, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.087669504888048, | |
| "grad_norm": 0.03390455032734163, | |
| "learning_rate": 2.3277272649521696e-06, | |
| "loss": 0.5087, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0927152317880795, | |
| "grad_norm": 0.03890356772507247, | |
| "learning_rate": 2.3221880382172716e-06, | |
| "loss": 0.4581, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.097760958688111, | |
| "grad_norm": 0.03659037349548446, | |
| "learning_rate": 2.3166356033679037e-06, | |
| "loss": 0.4924, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.1028066855881424, | |
| "grad_norm": 0.03393150371468798, | |
| "learning_rate": 2.3110701157176058e-06, | |
| "loss": 0.467, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.1078524124881741, | |
| "grad_norm": 0.03457204743267538, | |
| "learning_rate": 2.3054917309450305e-06, | |
| "loss": 0.4769, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1128981393882056, | |
| "grad_norm": 0.032520047837292926, | |
| "learning_rate": 2.2999006050895913e-06, | |
| "loss": 0.5045, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.117943866288237, | |
| "grad_norm": 0.031699959269331585, | |
| "learning_rate": 2.2942968945470975e-06, | |
| "loss": 0.4459, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.1229895931882687, | |
| "grad_norm": 0.03425055841117851, | |
| "learning_rate": 2.28868075606538e-06, | |
| "loss": 0.45, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.1280353200883002, | |
| "grad_norm": 0.031848100692100624, | |
| "learning_rate": 2.2830523467399035e-06, | |
| "loss": 0.483, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.1330810469883317, | |
| "grad_norm": 0.030558054743067897, | |
| "learning_rate": 2.2774118240093768e-06, | |
| "loss": 0.4711, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.1381267738883634, | |
| "grad_norm": 0.0348970127065318, | |
| "learning_rate": 2.2717593456513453e-06, | |
| "loss": 0.4469, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.1431725007883948, | |
| "grad_norm": 0.0374572663863973, | |
| "learning_rate": 2.26609506977778e-06, | |
| "loss": 0.4781, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.1482182276884263, | |
| "grad_norm": 0.03510434827876549, | |
| "learning_rate": 2.2604191548306524e-06, | |
| "loss": 0.4833, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.153263954588458, | |
| "grad_norm": 0.030172104427951332, | |
| "learning_rate": 2.2547317595775065e-06, | |
| "loss": 0.4599, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.1583096814884895, | |
| "grad_norm": 0.046734886663062365, | |
| "learning_rate": 2.2490330431070117e-06, | |
| "loss": 0.4527, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.163355408388521, | |
| "grad_norm": 0.03666142541656924, | |
| "learning_rate": 2.243323164824519e-06, | |
| "loss": 0.5086, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.1684011352885526, | |
| "grad_norm": 0.03333582987080111, | |
| "learning_rate": 2.2376022844475983e-06, | |
| "loss": 0.4892, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.173446862188584, | |
| "grad_norm": 0.044806971398086565, | |
| "learning_rate": 2.2318705620015707e-06, | |
| "loss": 0.5171, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.1784925890886155, | |
| "grad_norm": 0.02976391238091838, | |
| "learning_rate": 2.226128157815035e-06, | |
| "loss": 0.473, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.1835383159886472, | |
| "grad_norm": 0.032855191116779925, | |
| "learning_rate": 2.2203752325153805e-06, | |
| "loss": 0.4622, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.1885840428886787, | |
| "grad_norm": 0.030746957757418838, | |
| "learning_rate": 2.214611947024294e-06, | |
| "loss": 0.5058, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.1936297697887102, | |
| "grad_norm": 0.04025604127464795, | |
| "learning_rate": 2.20883846255326e-06, | |
| "loss": 0.4532, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.1986754966887416, | |
| "grad_norm": 0.038162603936688196, | |
| "learning_rate": 2.2030549405990507e-06, | |
| "loss": 0.4807, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.2037212235887733, | |
| "grad_norm": 0.032258710879740395, | |
| "learning_rate": 2.1972615429392072e-06, | |
| "loss": 0.4641, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.2087669504888048, | |
| "grad_norm": 0.04981629781222614, | |
| "learning_rate": 2.1914584316275165e-06, | |
| "loss": 0.5017, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2138126773888362, | |
| "grad_norm": 0.04243199718669605, | |
| "learning_rate": 2.1856457689894754e-06, | |
| "loss": 0.4902, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.218858404288868, | |
| "grad_norm": 0.0342114934326818, | |
| "learning_rate": 2.179823717617754e-06, | |
| "loss": 0.5117, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.2239041311888994, | |
| "grad_norm": 0.035548749332123104, | |
| "learning_rate": 2.1739924403676444e-06, | |
| "loss": 0.4381, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.2289498580889309, | |
| "grad_norm": 0.034760487487630214, | |
| "learning_rate": 2.168152100352506e-06, | |
| "loss": 0.4591, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.2339955849889626, | |
| "grad_norm": 0.03546769169516817, | |
| "learning_rate": 2.1623028609392048e-06, | |
| "loss": 0.5399, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.239041311888994, | |
| "grad_norm": 0.030499056297799327, | |
| "learning_rate": 2.1564448857435402e-06, | |
| "loss": 0.4359, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.2440870387890255, | |
| "grad_norm": 0.0347599990737736, | |
| "learning_rate": 2.1505783386256712e-06, | |
| "loss": 0.4812, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.249132765689057, | |
| "grad_norm": 0.033702971243902105, | |
| "learning_rate": 2.1447033836855322e-06, | |
| "loss": 0.4722, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.2541784925890886, | |
| "grad_norm": 0.032222809178112814, | |
| "learning_rate": 2.1388201852582413e-06, | |
| "loss": 0.4685, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.2592242194891201, | |
| "grad_norm": 0.03251536305035892, | |
| "learning_rate": 2.1329289079095053e-06, | |
| "loss": 0.4863, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.2642699463891516, | |
| "grad_norm": 0.03032333505076748, | |
| "learning_rate": 2.127029716431017e-06, | |
| "loss": 0.4904, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.2693156732891833, | |
| "grad_norm": 0.03243078647811712, | |
| "learning_rate": 2.1211227758358416e-06, | |
| "loss": 0.4489, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.2743614001892147, | |
| "grad_norm": 0.029896600919276654, | |
| "learning_rate": 2.115208251353806e-06, | |
| "loss": 0.44, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.2794071270892462, | |
| "grad_norm": 0.03513886472785278, | |
| "learning_rate": 2.109286308426875e-06, | |
| "loss": 0.4469, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.2844528539892779, | |
| "grad_norm": 0.034551710763168, | |
| "learning_rate": 2.103357112704522e-06, | |
| "loss": 0.4679, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.2894985808893094, | |
| "grad_norm": 0.03318123641296534, | |
| "learning_rate": 2.0974208300390965e-06, | |
| "loss": 0.5459, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.2945443077893408, | |
| "grad_norm": 0.03692313739508695, | |
| "learning_rate": 2.0914776264811856e-06, | |
| "loss": 0.4515, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.2995900346893725, | |
| "grad_norm": 0.045176468474857956, | |
| "learning_rate": 2.0855276682749695e-06, | |
| "loss": 0.5145, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.304635761589404, | |
| "grad_norm": 0.031189748556156745, | |
| "learning_rate": 2.0795711218535688e-06, | |
| "loss": 0.4604, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.3096814884894354, | |
| "grad_norm": 0.03325438648076318, | |
| "learning_rate": 2.0736081538343916e-06, | |
| "loss": 0.4993, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3147272153894671, | |
| "grad_norm": 0.031199262961620483, | |
| "learning_rate": 2.0676389310144718e-06, | |
| "loss": 0.4362, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.3197729422894986, | |
| "grad_norm": 0.031944131266326836, | |
| "learning_rate": 2.0616636203658033e-06, | |
| "loss": 0.4564, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.32481866918953, | |
| "grad_norm": 0.03473404208713477, | |
| "learning_rate": 2.0556823890306702e-06, | |
| "loss": 0.4812, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.3298643960895618, | |
| "grad_norm": 0.036841604651030695, | |
| "learning_rate": 2.04969540431697e-06, | |
| "loss": 0.473, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.3349101229895932, | |
| "grad_norm": 0.033301870334618816, | |
| "learning_rate": 2.0437028336935354e-06, | |
| "loss": 0.5129, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.3399558498896247, | |
| "grad_norm": 0.03157054955686288, | |
| "learning_rate": 2.0377048447854483e-06, | |
| "loss": 0.4497, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.3450015767896564, | |
| "grad_norm": 0.031056401843407105, | |
| "learning_rate": 2.0317016053693527e-06, | |
| "loss": 0.4971, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.3500473036896878, | |
| "grad_norm": 0.031039315725744067, | |
| "learning_rate": 2.0256932833687594e-06, | |
| "loss": 0.4729, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.3550930305897193, | |
| "grad_norm": 0.030343085739004155, | |
| "learning_rate": 2.01968004684935e-06, | |
| "loss": 0.4571, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.360138757489751, | |
| "grad_norm": 0.03216221791171112, | |
| "learning_rate": 2.013662064014278e-06, | |
| "loss": 0.4892, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.3651844843897825, | |
| "grad_norm": 0.030997748793028454, | |
| "learning_rate": 2.0076395031994588e-06, | |
| "loss": 0.4853, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.370230211289814, | |
| "grad_norm": 0.03051181164140716, | |
| "learning_rate": 2.0016125328688645e-06, | |
| "loss": 0.4712, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.3752759381898454, | |
| "grad_norm": 0.033819677864526616, | |
| "learning_rate": 1.995581321609812e-06, | |
| "loss": 0.4936, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.380321665089877, | |
| "grad_norm": 0.03283516748621975, | |
| "learning_rate": 1.9895460381282443e-06, | |
| "loss": 0.4671, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.3853673919899085, | |
| "grad_norm": 0.0360523107800218, | |
| "learning_rate": 1.983506851244015e-06, | |
| "loss": 0.4313, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.39041311888994, | |
| "grad_norm": 0.033433849463724505, | |
| "learning_rate": 1.9774639298861625e-06, | |
| "loss": 0.4823, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.3954588457899715, | |
| "grad_norm": 0.03465852287263857, | |
| "learning_rate": 1.9714174430881886e-06, | |
| "loss": 0.4619, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.4005045726900032, | |
| "grad_norm": 0.036080701809884305, | |
| "learning_rate": 1.9653675599833256e-06, | |
| "loss": 0.4878, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.4055502995900346, | |
| "grad_norm": 0.030820007521684834, | |
| "learning_rate": 1.95931444979981e-06, | |
| "loss": 0.4698, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.410596026490066, | |
| "grad_norm": 0.04113205730752187, | |
| "learning_rate": 1.9532582818561455e-06, | |
| "loss": 0.4728, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4156417533900978, | |
| "grad_norm": 0.04806829892661083, | |
| "learning_rate": 1.9471992255563675e-06, | |
| "loss": 0.4906, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.4206874802901293, | |
| "grad_norm": 0.03503516935353285, | |
| "learning_rate": 1.941137450385307e-06, | |
| "loss": 0.4356, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.4257332071901607, | |
| "grad_norm": 0.030864677107393407, | |
| "learning_rate": 1.935073125903845e-06, | |
| "loss": 0.4418, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.4307789340901924, | |
| "grad_norm": 0.031871510937479065, | |
| "learning_rate": 1.929006421744173e-06, | |
| "loss": 0.4897, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.4358246609902239, | |
| "grad_norm": 0.040994048526582366, | |
| "learning_rate": 1.9229375076050492e-06, | |
| "loss": 0.4109, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.4408703878902553, | |
| "grad_norm": 0.04141601982962323, | |
| "learning_rate": 1.9168665532470472e-06, | |
| "loss": 0.466, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.445916114790287, | |
| "grad_norm": 0.03049794074820137, | |
| "learning_rate": 1.910793728487811e-06, | |
| "loss": 0.4316, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.4509618416903185, | |
| "grad_norm": 0.031744519891624884, | |
| "learning_rate": 1.904719203197304e-06, | |
| "loss": 0.479, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.45600756859035, | |
| "grad_norm": 0.029790700550052368, | |
| "learning_rate": 1.8986431472930554e-06, | |
| "loss": 0.4379, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.4610532954903817, | |
| "grad_norm": 0.029483006697427174, | |
| "learning_rate": 1.8925657307354117e-06, | |
| "loss": 0.4747, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.4660990223904131, | |
| "grad_norm": 0.0324936518347741, | |
| "learning_rate": 1.886487123522778e-06, | |
| "loss": 0.4777, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.4711447492904446, | |
| "grad_norm": 0.033649363452982924, | |
| "learning_rate": 1.8804074956868647e-06, | |
| "loss": 0.4891, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.4761904761904763, | |
| "grad_norm": 0.029439747594352722, | |
| "learning_rate": 1.874327017287931e-06, | |
| "loss": 0.4393, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.4812362030905077, | |
| "grad_norm": 0.03233846484977382, | |
| "learning_rate": 1.8682458584100292e-06, | |
| "loss": 0.4841, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.4862819299905392, | |
| "grad_norm": 0.03555793172558539, | |
| "learning_rate": 1.8621641891562458e-06, | |
| "loss": 0.4718, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.491327656890571, | |
| "grad_norm": 0.03769744501026416, | |
| "learning_rate": 1.8560821796439423e-06, | |
| "loss": 0.457, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.4963733837906024, | |
| "grad_norm": 0.031502776602075996, | |
| "learning_rate": 1.85e-06, | |
| "loss": 0.4982, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.5014191106906338, | |
| "grad_norm": 0.03095020085634947, | |
| "learning_rate": 1.8439178203560576e-06, | |
| "loss": 0.4435, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.5064648375906655, | |
| "grad_norm": 0.030404440931628602, | |
| "learning_rate": 1.8378358108437548e-06, | |
| "loss": 0.5126, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.511510564490697, | |
| "grad_norm": 0.08258306270058233, | |
| "learning_rate": 1.8317541415899707e-06, | |
| "loss": 0.4591, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5165562913907285, | |
| "grad_norm": 0.03146777191441174, | |
| "learning_rate": 1.8256729827120692e-06, | |
| "loss": 0.4413, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.5216020182907601, | |
| "grad_norm": 0.032432109365051594, | |
| "learning_rate": 1.8195925043131356e-06, | |
| "loss": 0.4957, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.5266477451907914, | |
| "grad_norm": 0.04497713045776527, | |
| "learning_rate": 1.8135128764772224e-06, | |
| "loss": 0.4689, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.531693472090823, | |
| "grad_norm": 0.03631842372313753, | |
| "learning_rate": 1.8074342692645883e-06, | |
| "loss": 0.4785, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.5367391989908548, | |
| "grad_norm": 0.033176312691736314, | |
| "learning_rate": 1.8013568527069445e-06, | |
| "loss": 0.441, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.541784925890886, | |
| "grad_norm": 0.04879715399407636, | |
| "learning_rate": 1.7952807968026965e-06, | |
| "loss": 0.4941, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.5468306527909177, | |
| "grad_norm": 0.03217993267587352, | |
| "learning_rate": 1.7892062715121891e-06, | |
| "loss": 0.5242, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.5518763796909494, | |
| "grad_norm": 0.03569118299828546, | |
| "learning_rate": 1.7831334467529527e-06, | |
| "loss": 0.4287, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.5569221065909806, | |
| "grad_norm": 0.03177012333685744, | |
| "learning_rate": 1.777062492394951e-06, | |
| "loss": 0.4974, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.5619678334910123, | |
| "grad_norm": 0.03274234727540437, | |
| "learning_rate": 1.770993578255827e-06, | |
| "loss": 0.4703, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.5670135603910438, | |
| "grad_norm": 0.030369567266031263, | |
| "learning_rate": 1.7649268740961555e-06, | |
| "loss": 0.457, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.5720592872910752, | |
| "grad_norm": 0.03230164034111529, | |
| "learning_rate": 1.7588625496146933e-06, | |
| "loss": 0.4662, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.577105014191107, | |
| "grad_norm": 0.035188240025334695, | |
| "learning_rate": 1.7528007744436325e-06, | |
| "loss": 0.5095, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.5821507410911384, | |
| "grad_norm": 0.04937876969659911, | |
| "learning_rate": 1.7467417181438546e-06, | |
| "loss": 0.4413, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.5871964679911699, | |
| "grad_norm": 0.04101297256391375, | |
| "learning_rate": 1.74068555020019e-06, | |
| "loss": 0.5508, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.5922421948912016, | |
| "grad_norm": 0.033882729246426053, | |
| "learning_rate": 1.7346324400166745e-06, | |
| "loss": 0.4622, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.597287921791233, | |
| "grad_norm": 0.03215113210114575, | |
| "learning_rate": 1.728582556911812e-06, | |
| "loss": 0.4585, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.6023336486912645, | |
| "grad_norm": 0.03490067236435116, | |
| "learning_rate": 1.7225360701138372e-06, | |
| "loss": 0.5404, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.6073793755912962, | |
| "grad_norm": 0.03652544099978071, | |
| "learning_rate": 1.716493148755985e-06, | |
| "loss": 0.4471, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.6124251024913276, | |
| "grad_norm": 0.030334831369185966, | |
| "learning_rate": 1.7104539618717559e-06, | |
| "loss": 0.4568, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.617470829391359, | |
| "grad_norm": 0.03266733090142478, | |
| "learning_rate": 1.7044186783901884e-06, | |
| "loss": 0.4875, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.6225165562913908, | |
| "grad_norm": 0.04508809652368408, | |
| "learning_rate": 1.6983874671311358e-06, | |
| "loss": 0.4776, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.6275622831914223, | |
| "grad_norm": 0.03328961623767999, | |
| "learning_rate": 1.6923604968005414e-06, | |
| "loss": 0.4614, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.6326080100914537, | |
| "grad_norm": 0.03179716261046707, | |
| "learning_rate": 1.6863379359857222e-06, | |
| "loss": 0.4531, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.6376537369914854, | |
| "grad_norm": 0.0335058061547864, | |
| "learning_rate": 1.6803199531506496e-06, | |
| "loss": 0.4375, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.6426994638915169, | |
| "grad_norm": 0.030638236461813146, | |
| "learning_rate": 1.674306716631241e-06, | |
| "loss": 0.4792, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.6477451907915484, | |
| "grad_norm": 0.031846036178745894, | |
| "learning_rate": 1.6682983946306477e-06, | |
| "loss": 0.504, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.65279091769158, | |
| "grad_norm": 0.03824258851286572, | |
| "learning_rate": 1.662295155214552e-06, | |
| "loss": 0.4691, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.6578366445916115, | |
| "grad_norm": 0.03121253232825317, | |
| "learning_rate": 1.656297166306465e-06, | |
| "loss": 0.4673, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.662882371491643, | |
| "grad_norm": 0.031088701662057044, | |
| "learning_rate": 1.6503045956830304e-06, | |
| "loss": 0.4996, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.6679280983916747, | |
| "grad_norm": 0.03144849354202538, | |
| "learning_rate": 1.6443176109693303e-06, | |
| "loss": 0.465, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.6729738252917061, | |
| "grad_norm": 0.03683704890058473, | |
| "learning_rate": 1.6383363796341966e-06, | |
| "loss": 0.501, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.6780195521917376, | |
| "grad_norm": 0.03627104452462568, | |
| "learning_rate": 1.6323610689855286e-06, | |
| "loss": 0.4897, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.6830652790917693, | |
| "grad_norm": 0.03257152567100647, | |
| "learning_rate": 1.6263918461656088e-06, | |
| "loss": 0.4835, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.6881110059918005, | |
| "grad_norm": 0.0378640087124608, | |
| "learning_rate": 1.6204288781464312e-06, | |
| "loss": 0.4469, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.6931567328918322, | |
| "grad_norm": 0.030972874904202405, | |
| "learning_rate": 1.614472331725031e-06, | |
| "loss": 0.4494, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.698202459791864, | |
| "grad_norm": 0.03394778481096688, | |
| "learning_rate": 1.6085223735188143e-06, | |
| "loss": 0.4676, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.7032481866918951, | |
| "grad_norm": 0.03572393558340104, | |
| "learning_rate": 1.602579169960904e-06, | |
| "loss": 0.501, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.7082939135919268, | |
| "grad_norm": 0.03870560088142547, | |
| "learning_rate": 1.5966428872954783e-06, | |
| "loss": 0.4457, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.7133396404919585, | |
| "grad_norm": 0.04014342840802024, | |
| "learning_rate": 1.5907136915731252e-06, | |
| "loss": 0.4901, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.7183853673919898, | |
| "grad_norm": 0.039704907757276385, | |
| "learning_rate": 1.5847917486461938e-06, | |
| "loss": 0.461, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.7234310942920215, | |
| "grad_norm": 0.04938782209596833, | |
| "learning_rate": 1.5788772241641584e-06, | |
| "loss": 0.482, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.728476821192053, | |
| "grad_norm": 0.037092246154721814, | |
| "learning_rate": 1.5729702835689837e-06, | |
| "loss": 0.4865, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.7335225480920844, | |
| "grad_norm": 0.032707227613851586, | |
| "learning_rate": 1.5670710920904944e-06, | |
| "loss": 0.4516, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.738568274992116, | |
| "grad_norm": 0.031571757553848674, | |
| "learning_rate": 1.561179814741759e-06, | |
| "loss": 0.4639, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.7436140018921475, | |
| "grad_norm": 0.03273940004779485, | |
| "learning_rate": 1.5552966163144681e-06, | |
| "loss": 0.4919, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.748659728792179, | |
| "grad_norm": 0.03350401150441058, | |
| "learning_rate": 1.549421661374329e-06, | |
| "loss": 0.4394, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.7537054556922107, | |
| "grad_norm": 0.03220088064904635, | |
| "learning_rate": 1.54355511425646e-06, | |
| "loss": 0.4608, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.7587511825922422, | |
| "grad_norm": 0.03644223976632455, | |
| "learning_rate": 1.537697139060795e-06, | |
| "loss": 0.4967, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.7637969094922736, | |
| "grad_norm": 0.03577416455443841, | |
| "learning_rate": 1.531847899647494e-06, | |
| "loss": 0.4963, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.7688426363923053, | |
| "grad_norm": 0.030234494381311913, | |
| "learning_rate": 1.526007559632356e-06, | |
| "loss": 0.4537, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.7738883632923368, | |
| "grad_norm": 0.05021736775351199, | |
| "learning_rate": 1.5201762823822463e-06, | |
| "loss": 0.4453, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.7789340901923683, | |
| "grad_norm": 0.032609834840694886, | |
| "learning_rate": 1.5143542310105248e-06, | |
| "loss": 0.4698, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.7839798170924, | |
| "grad_norm": 0.033746098258680315, | |
| "learning_rate": 1.5085415683724843e-06, | |
| "loss": 0.4457, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.7890255439924314, | |
| "grad_norm": 0.03362427498914355, | |
| "learning_rate": 1.5027384570607927e-06, | |
| "loss": 0.4742, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.7940712708924629, | |
| "grad_norm": 0.035952869257197026, | |
| "learning_rate": 1.496945059400949e-06, | |
| "loss": 0.4408, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.7991169977924946, | |
| "grad_norm": 0.028889846932020017, | |
| "learning_rate": 1.49116153744674e-06, | |
| "loss": 0.4491, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.804162724692526, | |
| "grad_norm": 0.03273155060985915, | |
| "learning_rate": 1.4853880529757062e-06, | |
| "loss": 0.4731, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.8092084515925575, | |
| "grad_norm": 0.03532824602993703, | |
| "learning_rate": 1.47962476748462e-06, | |
| "loss": 0.4839, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.8142541784925892, | |
| "grad_norm": 0.03220160867798885, | |
| "learning_rate": 1.4738718421849652e-06, | |
| "loss": 0.4767, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.8192999053926207, | |
| "grad_norm": 0.03352160784175046, | |
| "learning_rate": 1.4681294379984294e-06, | |
| "loss": 0.4798, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.8243456322926521, | |
| "grad_norm": 0.03748209206166754, | |
| "learning_rate": 1.4623977155524021e-06, | |
| "loss": 0.4836, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.8293913591926838, | |
| "grad_norm": 0.030250534508306597, | |
| "learning_rate": 1.4566768351754812e-06, | |
| "loss": 0.4275, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.8344370860927153, | |
| "grad_norm": 0.03391610497942822, | |
| "learning_rate": 1.4509669568929882e-06, | |
| "loss": 0.4825, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.8394828129927467, | |
| "grad_norm": 0.032112008948756314, | |
| "learning_rate": 1.4452682404224938e-06, | |
| "loss": 0.4645, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.8445285398927784, | |
| "grad_norm": 0.03403624240663962, | |
| "learning_rate": 1.4395808451693473e-06, | |
| "loss": 0.4619, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.8495742667928097, | |
| "grad_norm": 0.03749091866479118, | |
| "learning_rate": 1.4339049302222204e-06, | |
| "loss": 0.4343, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.8546199936928414, | |
| "grad_norm": 0.029918997859797403, | |
| "learning_rate": 1.428240654348655e-06, | |
| "loss": 0.4522, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.859665720592873, | |
| "grad_norm": 0.03955132899448908, | |
| "learning_rate": 1.4225881759906234e-06, | |
| "loss": 0.4242, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.8647114474929043, | |
| "grad_norm": 0.03906074659022793, | |
| "learning_rate": 1.4169476532600964e-06, | |
| "loss": 0.5012, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.869757174392936, | |
| "grad_norm": 0.03759480585936489, | |
| "learning_rate": 1.4113192439346204e-06, | |
| "loss": 0.4824, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.8748029012929677, | |
| "grad_norm": 0.03611657176269409, | |
| "learning_rate": 1.4057031054529018e-06, | |
| "loss": 0.4555, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.879848628192999, | |
| "grad_norm": 0.03157193868816336, | |
| "learning_rate": 1.4000993949104089e-06, | |
| "loss": 0.5048, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.8848943550930306, | |
| "grad_norm": 0.032589108247220215, | |
| "learning_rate": 1.3945082690549697e-06, | |
| "loss": 0.4368, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.889940081993062, | |
| "grad_norm": 0.05211891501111677, | |
| "learning_rate": 1.3889298842823946e-06, | |
| "loss": 0.4879, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.8949858088930935, | |
| "grad_norm": 0.03189397719448743, | |
| "learning_rate": 1.3833643966320962e-06, | |
| "loss": 0.4521, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.9000315357931252, | |
| "grad_norm": 0.03347894762625528, | |
| "learning_rate": 1.3778119617827286e-06, | |
| "loss": 0.4573, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.9050772626931567, | |
| "grad_norm": 0.03276542023591626, | |
| "learning_rate": 1.3722727350478307e-06, | |
| "loss": 0.477, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.9101229895931882, | |
| "grad_norm": 0.03266289908887754, | |
| "learning_rate": 1.3667468713714808e-06, | |
| "loss": 0.4705, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.9151687164932198, | |
| "grad_norm": 0.04100422652508631, | |
| "learning_rate": 1.3612345253239657e-06, | |
| "loss": 0.4428, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.9202144433932513, | |
| "grad_norm": 0.03635772954705938, | |
| "learning_rate": 1.3557358510974528e-06, | |
| "loss": 0.4571, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.9252601702932828, | |
| "grad_norm": 0.03332948092296265, | |
| "learning_rate": 1.3502510025016815e-06, | |
| "loss": 0.4586, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.9303058971933145, | |
| "grad_norm": 0.033737520978110024, | |
| "learning_rate": 1.344780132959656e-06, | |
| "loss": 0.4631, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.935351624093346, | |
| "grad_norm": 0.0396202506391791, | |
| "learning_rate": 1.3393233955033598e-06, | |
| "loss": 0.4419, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.9403973509933774, | |
| "grad_norm": 0.032843835129683065, | |
| "learning_rate": 1.3338809427694686e-06, | |
| "loss": 0.4667, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.945443077893409, | |
| "grad_norm": 0.03362335582738652, | |
| "learning_rate": 1.328452926995085e-06, | |
| "loss": 0.465, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.9504888047934406, | |
| "grad_norm": 0.029795822830050975, | |
| "learning_rate": 1.323039500013478e-06, | |
| "loss": 0.4181, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.955534531693472, | |
| "grad_norm": 0.030901381478255167, | |
| "learning_rate": 1.3176408132498381e-06, | |
| "loss": 0.4613, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.9605802585935037, | |
| "grad_norm": 0.04107008082709936, | |
| "learning_rate": 1.312257017717039e-06, | |
| "loss": 0.5063, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.9656259854935352, | |
| "grad_norm": 0.03897689094096851, | |
| "learning_rate": 1.306888264011415e-06, | |
| "loss": 0.5179, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.9706717123935666, | |
| "grad_norm": 0.02962518992797078, | |
| "learning_rate": 1.3015347023085483e-06, | |
| "loss": 0.4778, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.9757174392935983, | |
| "grad_norm": 0.030790008731447733, | |
| "learning_rate": 1.296196482359068e-06, | |
| "loss": 0.4489, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.9807631661936298, | |
| "grad_norm": 0.038857390420170904, | |
| "learning_rate": 1.290873753484461e-06, | |
| "loss": 0.4689, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.9858088930936613, | |
| "grad_norm": 0.17793014154471423, | |
| "learning_rate": 1.2855666645728969e-06, | |
| "loss": 0.4526, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.990854619993693, | |
| "grad_norm": 0.03723731978470476, | |
| "learning_rate": 1.28027536407506e-06, | |
| "loss": 0.4752, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.9959003468937244, | |
| "grad_norm": 0.034718969729881415, | |
| "learning_rate": 1.275e-06, | |
| "loss": 0.474, | |
| "step": 396 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 594, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1642720300892160.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |