| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 2187, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0004572473708276177, | |
| "grad_norm": 8.096893602247533, | |
| "learning_rate": 4.5662100456621004e-08, | |
| "loss": 4.0564, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002286236854138089, | |
| "grad_norm": 9.362922497604572, | |
| "learning_rate": 2.2831050228310502e-07, | |
| "loss": 4.0286, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004572473708276178, | |
| "grad_norm": 8.436200820951127, | |
| "learning_rate": 4.5662100456621004e-07, | |
| "loss": 4.0215, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006858710562414266, | |
| "grad_norm": 8.311166463949595, | |
| "learning_rate": 6.849315068493151e-07, | |
| "loss": 3.995, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.009144947416552356, | |
| "grad_norm": 8.314737539002055, | |
| "learning_rate": 9.132420091324201e-07, | |
| "loss": 4.0089, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.011431184270690443, | |
| "grad_norm": 8.207038819761589, | |
| "learning_rate": 1.1415525114155251e-06, | |
| "loss": 4.0363, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.013717421124828532, | |
| "grad_norm": 7.559088257570073, | |
| "learning_rate": 1.3698630136986302e-06, | |
| "loss": 3.9854, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01600365797896662, | |
| "grad_norm": 7.1469550333759315, | |
| "learning_rate": 1.5981735159817353e-06, | |
| "loss": 3.9172, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01828989483310471, | |
| "grad_norm": 5.971779564925808, | |
| "learning_rate": 1.8264840182648401e-06, | |
| "loss": 3.7833, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0205761316872428, | |
| "grad_norm": 5.297232176162252, | |
| "learning_rate": 2.0547945205479454e-06, | |
| "loss": 3.7258, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.022862368541380886, | |
| "grad_norm": 4.217560181118984, | |
| "learning_rate": 2.2831050228310503e-06, | |
| "loss": 3.6069, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.025148605395518976, | |
| "grad_norm": 3.786598130029432, | |
| "learning_rate": 2.511415525114155e-06, | |
| "loss": 3.553, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.027434842249657063, | |
| "grad_norm": 2.6863763381878782, | |
| "learning_rate": 2.7397260273972604e-06, | |
| "loss": 3.4564, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.029721079103795154, | |
| "grad_norm": 2.322206779034821, | |
| "learning_rate": 2.9680365296803653e-06, | |
| "loss": 3.331, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03200731595793324, | |
| "grad_norm": 1.7328570105860337, | |
| "learning_rate": 3.1963470319634706e-06, | |
| "loss": 3.2806, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03429355281207133, | |
| "grad_norm": 1.4846651112766411, | |
| "learning_rate": 3.4246575342465754e-06, | |
| "loss": 3.2356, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.03657978966620942, | |
| "grad_norm": 1.1848731903024705, | |
| "learning_rate": 3.6529680365296803e-06, | |
| "loss": 3.1934, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.038866026520347506, | |
| "grad_norm": 1.0381920877926754, | |
| "learning_rate": 3.881278538812785e-06, | |
| "loss": 3.1245, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.0411522633744856, | |
| "grad_norm": 0.9141218079482407, | |
| "learning_rate": 4.109589041095891e-06, | |
| "loss": 3.0468, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04343850022862369, | |
| "grad_norm": 1.0161753499176187, | |
| "learning_rate": 4.337899543378996e-06, | |
| "loss": 3.0329, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.04572473708276177, | |
| "grad_norm": 0.8996453649527762, | |
| "learning_rate": 4.566210045662101e-06, | |
| "loss": 2.9958, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04801097393689986, | |
| "grad_norm": 0.8082797335102323, | |
| "learning_rate": 4.7945205479452054e-06, | |
| "loss": 2.9651, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.05029721079103795, | |
| "grad_norm": 0.7090431405422901, | |
| "learning_rate": 5.02283105022831e-06, | |
| "loss": 2.927, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05258344764517604, | |
| "grad_norm": 1.2265537925663061, | |
| "learning_rate": 5.251141552511416e-06, | |
| "loss": 2.862, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.05486968449931413, | |
| "grad_norm": 0.650571444620453, | |
| "learning_rate": 5.479452054794521e-06, | |
| "loss": 2.857, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05715592135345222, | |
| "grad_norm": 0.7089485111846239, | |
| "learning_rate": 5.7077625570776266e-06, | |
| "loss": 2.8209, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05944215820759031, | |
| "grad_norm": 0.5922494361050838, | |
| "learning_rate": 5.936073059360731e-06, | |
| "loss": 2.8037, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06172839506172839, | |
| "grad_norm": 0.5597217919230902, | |
| "learning_rate": 6.164383561643836e-06, | |
| "loss": 2.7487, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.06401463191586648, | |
| "grad_norm": 0.6045746583730743, | |
| "learning_rate": 6.392694063926941e-06, | |
| "loss": 2.6981, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06630086877000457, | |
| "grad_norm": 0.6479924774135967, | |
| "learning_rate": 6.621004566210046e-06, | |
| "loss": 2.7036, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.06858710562414266, | |
| "grad_norm": 0.760723993748018, | |
| "learning_rate": 6.849315068493151e-06, | |
| "loss": 2.6821, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07087334247828075, | |
| "grad_norm": 0.5889973577684341, | |
| "learning_rate": 7.077625570776257e-06, | |
| "loss": 2.6882, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.07315957933241884, | |
| "grad_norm": 0.6201384588278992, | |
| "learning_rate": 7.305936073059361e-06, | |
| "loss": 2.6441, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07544581618655692, | |
| "grad_norm": 0.645862573214957, | |
| "learning_rate": 7.534246575342466e-06, | |
| "loss": 2.5878, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.07773205304069501, | |
| "grad_norm": 0.880791499233313, | |
| "learning_rate": 7.76255707762557e-06, | |
| "loss": 2.5665, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0800182898948331, | |
| "grad_norm": 0.7581098091472079, | |
| "learning_rate": 7.990867579908676e-06, | |
| "loss": 2.5423, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0823045267489712, | |
| "grad_norm": 0.7502504535360037, | |
| "learning_rate": 8.219178082191782e-06, | |
| "loss": 2.5348, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08459076360310928, | |
| "grad_norm": 0.9587325899501735, | |
| "learning_rate": 8.447488584474887e-06, | |
| "loss": 2.4652, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.08687700045724737, | |
| "grad_norm": 1.0327228370595574, | |
| "learning_rate": 8.675799086757991e-06, | |
| "loss": 2.4066, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08916323731138547, | |
| "grad_norm": 0.8853835960264104, | |
| "learning_rate": 8.904109589041097e-06, | |
| "loss": 2.3642, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.09144947416552354, | |
| "grad_norm": 1.0446953486337078, | |
| "learning_rate": 9.132420091324201e-06, | |
| "loss": 2.3237, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09373571101966163, | |
| "grad_norm": 1.1013758488210148, | |
| "learning_rate": 9.360730593607307e-06, | |
| "loss": 2.2331, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.09602194787379972, | |
| "grad_norm": 1.2192543249794923, | |
| "learning_rate": 9.589041095890411e-06, | |
| "loss": 2.1264, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.09830818472793781, | |
| "grad_norm": 1.3533953895273099, | |
| "learning_rate": 9.817351598173517e-06, | |
| "loss": 2.0554, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1005944215820759, | |
| "grad_norm": 1.1876482609404326, | |
| "learning_rate": 9.999993629265979e-06, | |
| "loss": 1.9859, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.102880658436214, | |
| "grad_norm": 1.1847416528253172, | |
| "learning_rate": 9.999770655279843e-06, | |
| "loss": 1.8986, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.10516689529035209, | |
| "grad_norm": 1.3137466650624998, | |
| "learning_rate": 9.999229160826947e-06, | |
| "loss": 1.8, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.10745313214449016, | |
| "grad_norm": 1.830150495140023, | |
| "learning_rate": 9.998369180404283e-06, | |
| "loss": 1.7138, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.10973936899862825, | |
| "grad_norm": 1.1159850299398295, | |
| "learning_rate": 9.997190768798639e-06, | |
| "loss": 1.6867, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11202560585276634, | |
| "grad_norm": 0.9727694366367986, | |
| "learning_rate": 9.995694001083103e-06, | |
| "loss": 1.6469, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.11431184270690443, | |
| "grad_norm": 1.135743426814773, | |
| "learning_rate": 9.993878972612276e-06, | |
| "loss": 1.5607, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11659807956104253, | |
| "grad_norm": 1.0363437963731608, | |
| "learning_rate": 9.991745799016206e-06, | |
| "loss": 1.5332, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.11888431641518062, | |
| "grad_norm": 1.018006180331875, | |
| "learning_rate": 9.989294616193018e-06, | |
| "loss": 1.4962, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1211705532693187, | |
| "grad_norm": 0.9493951106581935, | |
| "learning_rate": 9.986525580300253e-06, | |
| "loss": 1.4403, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.12345679012345678, | |
| "grad_norm": 4.150830186272059, | |
| "learning_rate": 9.983438867744923e-06, | |
| "loss": 1.4382, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12574302697759487, | |
| "grad_norm": 0.8458476848705546, | |
| "learning_rate": 9.980034675172274e-06, | |
| "loss": 1.4248, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.12802926383173296, | |
| "grad_norm": 1.8171861028727991, | |
| "learning_rate": 9.976313219453255e-06, | |
| "loss": 1.4055, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.13031550068587106, | |
| "grad_norm": 0.7389926811741014, | |
| "learning_rate": 9.972274737670702e-06, | |
| "loss": 1.4033, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.13260173754000915, | |
| "grad_norm": 0.8834746515415843, | |
| "learning_rate": 9.967919487104237e-06, | |
| "loss": 1.3724, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.13488797439414724, | |
| "grad_norm": 0.8166186304734012, | |
| "learning_rate": 9.963247745213876e-06, | |
| "loss": 1.3721, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.13717421124828533, | |
| "grad_norm": 0.6771475216933378, | |
| "learning_rate": 9.958259809622353e-06, | |
| "loss": 1.3555, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.13946044810242342, | |
| "grad_norm": 0.60525762012324, | |
| "learning_rate": 9.952955998096155e-06, | |
| "loss": 1.36, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1417466849565615, | |
| "grad_norm": 0.6126617626167846, | |
| "learning_rate": 9.94733664852529e-06, | |
| "loss": 1.353, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1440329218106996, | |
| "grad_norm": 0.6630794657190928, | |
| "learning_rate": 9.941402118901743e-06, | |
| "loss": 1.3359, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.1463191586648377, | |
| "grad_norm": 0.6758533351396738, | |
| "learning_rate": 9.935152787296689e-06, | |
| "loss": 1.3402, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.14860539551897575, | |
| "grad_norm": 0.739719330356037, | |
| "learning_rate": 9.928589051836392e-06, | |
| "loss": 1.3346, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.15089163237311384, | |
| "grad_norm": 0.7258290118963521, | |
| "learning_rate": 9.921711330676848e-06, | |
| "loss": 1.3356, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15317786922725193, | |
| "grad_norm": 0.6274092924270468, | |
| "learning_rate": 9.91452006197715e-06, | |
| "loss": 1.3362, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.15546410608139002, | |
| "grad_norm": 0.768028072114212, | |
| "learning_rate": 9.907015703871558e-06, | |
| "loss": 1.3214, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.15775034293552812, | |
| "grad_norm": 0.7738373400419118, | |
| "learning_rate": 9.899198734440335e-06, | |
| "loss": 1.331, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.1600365797896662, | |
| "grad_norm": 0.6855410863811031, | |
| "learning_rate": 9.891069651679273e-06, | |
| "loss": 1.3142, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1623228166438043, | |
| "grad_norm": 0.6405023247699122, | |
| "learning_rate": 9.882628973467972e-06, | |
| "loss": 1.3171, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.1646090534979424, | |
| "grad_norm": 0.6764400756880153, | |
| "learning_rate": 9.873877237536854e-06, | |
| "loss": 1.3189, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16689529035208048, | |
| "grad_norm": 0.6298462983903607, | |
| "learning_rate": 9.86481500143289e-06, | |
| "loss": 1.3059, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.16918152720621857, | |
| "grad_norm": 0.6606697771559132, | |
| "learning_rate": 9.855442842484101e-06, | |
| "loss": 1.3267, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.17146776406035666, | |
| "grad_norm": 0.5895037669135822, | |
| "learning_rate": 9.84576135776276e-06, | |
| "loss": 1.3057, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.17375400091449475, | |
| "grad_norm": 0.5762405642901876, | |
| "learning_rate": 9.835771164047365e-06, | |
| "loss": 1.3016, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.17604023776863284, | |
| "grad_norm": 0.6301891918568133, | |
| "learning_rate": 9.825472897783344e-06, | |
| "loss": 1.3046, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.17832647462277093, | |
| "grad_norm": 0.6189017845225122, | |
| "learning_rate": 9.814867215042503e-06, | |
| "loss": 1.3089, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.18061271147690902, | |
| "grad_norm": 0.6279515665165573, | |
| "learning_rate": 9.803954791481239e-06, | |
| "loss": 1.3011, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.18289894833104708, | |
| "grad_norm": 0.6380039476156935, | |
| "learning_rate": 9.792736322297489e-06, | |
| "loss": 1.2758, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 0.7506004279154695, | |
| "learning_rate": 9.781212522186442e-06, | |
| "loss": 1.312, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.18747142203932327, | |
| "grad_norm": 0.7054181242720778, | |
| "learning_rate": 9.769384125295012e-06, | |
| "loss": 1.3112, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.18975765889346136, | |
| "grad_norm": 0.5797880483237029, | |
| "learning_rate": 9.757251885175063e-06, | |
| "loss": 1.2998, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.19204389574759945, | |
| "grad_norm": 0.6040659600524477, | |
| "learning_rate": 9.744816574735405e-06, | |
| "loss": 1.3018, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.19433013260173754, | |
| "grad_norm": 0.7044299546094256, | |
| "learning_rate": 9.732078986192552e-06, | |
| "loss": 1.2818, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.19661636945587563, | |
| "grad_norm": 0.567841572649114, | |
| "learning_rate": 9.719039931020258e-06, | |
| "loss": 1.2733, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.19890260631001372, | |
| "grad_norm": 0.5378351616772565, | |
| "learning_rate": 9.705700239897809e-06, | |
| "loss": 1.2861, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.2011888431641518, | |
| "grad_norm": 0.5372339490006793, | |
| "learning_rate": 9.692060762657118e-06, | |
| "loss": 1.2821, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2034750800182899, | |
| "grad_norm": 0.6353680076674888, | |
| "learning_rate": 9.678122368228571e-06, | |
| "loss": 1.2643, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.205761316872428, | |
| "grad_norm": 0.6263499547366734, | |
| "learning_rate": 9.66388594458568e-06, | |
| "loss": 1.2826, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.20804755372656608, | |
| "grad_norm": 0.6119180746423146, | |
| "learning_rate": 9.649352398688506e-06, | |
| "loss": 1.2856, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.21033379058070417, | |
| "grad_norm": 0.6640618234127624, | |
| "learning_rate": 9.634522656425885e-06, | |
| "loss": 1.2765, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.21262002743484226, | |
| "grad_norm": 0.6253602428713037, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 1.2661, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.21490626428898033, | |
| "grad_norm": 0.6463257272674591, | |
| "learning_rate": 9.603978380648375e-06, | |
| "loss": 1.2838, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.21719250114311842, | |
| "grad_norm": 0.6916869993480118, | |
| "learning_rate": 9.588265793018141e-06, | |
| "loss": 1.2785, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.2194787379972565, | |
| "grad_norm": 0.578420093141111, | |
| "learning_rate": 9.572260900667794e-06, | |
| "loss": 1.2627, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2217649748513946, | |
| "grad_norm": 0.6016744117162259, | |
| "learning_rate": 9.555964723221258e-06, | |
| "loss": 1.2672, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2240512117055327, | |
| "grad_norm": 0.6325422647436533, | |
| "learning_rate": 9.539378298859365e-06, | |
| "loss": 1.2667, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.22633744855967078, | |
| "grad_norm": 0.674420764332063, | |
| "learning_rate": 9.522502684253709e-06, | |
| "loss": 1.2601, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.22862368541380887, | |
| "grad_norm": 0.6942742236531446, | |
| "learning_rate": 9.505338954499332e-06, | |
| "loss": 1.275, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23090992226794696, | |
| "grad_norm": 0.5661617220667517, | |
| "learning_rate": 9.487888203046232e-06, | |
| "loss": 1.2683, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.23319615912208505, | |
| "grad_norm": 0.6389133947347537, | |
| "learning_rate": 9.4701515416297e-06, | |
| "loss": 1.2659, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.23548239597622314, | |
| "grad_norm": 0.561786602813537, | |
| "learning_rate": 9.452130100199504e-06, | |
| "loss": 1.2664, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.23776863283036123, | |
| "grad_norm": 0.5666699221383189, | |
| "learning_rate": 9.433825026847891e-06, | |
| "loss": 1.2573, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.24005486968449932, | |
| "grad_norm": 0.6718711112993888, | |
| "learning_rate": 9.415237487736452e-06, | |
| "loss": 1.2545, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2423411065386374, | |
| "grad_norm": 0.5637527283960878, | |
| "learning_rate": 9.396368667021835e-06, | |
| "loss": 1.2723, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2446273433927755, | |
| "grad_norm": 0.583426898925874, | |
| "learning_rate": 9.377219766780288e-06, | |
| "loss": 1.2473, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.24691358024691357, | |
| "grad_norm": 0.7422622561747031, | |
| "learning_rate": 9.3577920069311e-06, | |
| "loss": 1.2609, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.24919981710105166, | |
| "grad_norm": 0.7536416453907702, | |
| "learning_rate": 9.338086625158867e-06, | |
| "loss": 1.2655, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.25148605395518975, | |
| "grad_norm": 0.5911621999933799, | |
| "learning_rate": 9.318104876834652e-06, | |
| "loss": 1.2652, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.25377229080932784, | |
| "grad_norm": 0.6482915887304207, | |
| "learning_rate": 9.297848034936007e-06, | |
| "loss": 1.2488, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.25605852766346593, | |
| "grad_norm": 0.7813862221549358, | |
| "learning_rate": 9.277317389965871e-06, | |
| "loss": 1.2678, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.258344764517604, | |
| "grad_norm": 0.601959447185496, | |
| "learning_rate": 9.256514249870366e-06, | |
| "loss": 1.2549, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.2606310013717421, | |
| "grad_norm": 0.5439593292691556, | |
| "learning_rate": 9.235439939955458e-06, | |
| "loss": 1.2311, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2629172382258802, | |
| "grad_norm": 0.6462948109732727, | |
| "learning_rate": 9.214095802802533e-06, | |
| "loss": 1.2605, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.2652034750800183, | |
| "grad_norm": 0.6523908850821281, | |
| "learning_rate": 9.192483198182876e-06, | |
| "loss": 1.2577, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2674897119341564, | |
| "grad_norm": 0.6285230592028435, | |
| "learning_rate": 9.170603502971017e-06, | |
| "loss": 1.233, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.2697759487882945, | |
| "grad_norm": 0.5990676661488948, | |
| "learning_rate": 9.148458111057043e-06, | |
| "loss": 1.2444, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.27206218564243256, | |
| "grad_norm": 0.5443537881683997, | |
| "learning_rate": 9.12604843325778e-06, | |
| "loss": 1.2282, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.27434842249657065, | |
| "grad_norm": 0.5804764131758829, | |
| "learning_rate": 9.103375897226919e-06, | |
| "loss": 1.253, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.27663465935070874, | |
| "grad_norm": 0.5905170219986889, | |
| "learning_rate": 9.080441947364065e-06, | |
| "loss": 1.2472, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.27892089620484684, | |
| "grad_norm": 0.6003218456115103, | |
| "learning_rate": 9.057248044722718e-06, | |
| "loss": 1.2421, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2812071330589849, | |
| "grad_norm": 0.5683857920528798, | |
| "learning_rate": 9.033795666917191e-06, | |
| "loss": 1.2551, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.283493369913123, | |
| "grad_norm": 0.5908776822300396, | |
| "learning_rate": 9.010086308028487e-06, | |
| "loss": 1.2375, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2857796067672611, | |
| "grad_norm": 0.6118010788168986, | |
| "learning_rate": 8.986121478509096e-06, | |
| "loss": 1.2347, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.2880658436213992, | |
| "grad_norm": 0.5787813457678733, | |
| "learning_rate": 8.961902705086785e-06, | |
| "loss": 1.2395, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2903520804755373, | |
| "grad_norm": 0.6290839595278495, | |
| "learning_rate": 8.937431530667329e-06, | |
| "loss": 1.2263, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.2926383173296754, | |
| "grad_norm": 0.5459763353494508, | |
| "learning_rate": 8.912709514236218e-06, | |
| "loss": 1.2285, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.29492455418381347, | |
| "grad_norm": 0.6301840515917086, | |
| "learning_rate": 8.887738230759334e-06, | |
| "loss": 1.2374, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.2972107910379515, | |
| "grad_norm": 0.5413584040020849, | |
| "learning_rate": 8.862519271082624e-06, | |
| "loss": 1.2505, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2994970278920896, | |
| "grad_norm": 0.5979355091788396, | |
| "learning_rate": 8.83705424183074e-06, | |
| "loss": 1.2238, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.3017832647462277, | |
| "grad_norm": 0.6873493941298675, | |
| "learning_rate": 8.811344765304698e-06, | |
| "loss": 1.2262, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3040695016003658, | |
| "grad_norm": 0.6699975954695512, | |
| "learning_rate": 8.785392479378522e-06, | |
| "loss": 1.23, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.30635573845450387, | |
| "grad_norm": 0.6860546025784545, | |
| "learning_rate": 8.759199037394888e-06, | |
| "loss": 1.2424, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.30864197530864196, | |
| "grad_norm": 0.7598573834174616, | |
| "learning_rate": 8.732766108059814e-06, | |
| "loss": 1.2138, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.31092821216278005, | |
| "grad_norm": 0.723323270057115, | |
| "learning_rate": 8.70609537533634e-06, | |
| "loss": 1.2373, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.31321444901691814, | |
| "grad_norm": 0.6170455054157933, | |
| "learning_rate": 8.679188538337248e-06, | |
| "loss": 1.2257, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.31550068587105623, | |
| "grad_norm": 0.7413957440287698, | |
| "learning_rate": 8.652047311216823e-06, | |
| "loss": 1.2075, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3177869227251943, | |
| "grad_norm": 0.7424365012242525, | |
| "learning_rate": 8.62467342306164e-06, | |
| "loss": 1.2238, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3200731595793324, | |
| "grad_norm": 0.8566227798899636, | |
| "learning_rate": 8.597068617780419e-06, | |
| "loss": 1.2278, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3223593964334705, | |
| "grad_norm": 0.647075376724737, | |
| "learning_rate": 8.569234653992916e-06, | |
| "loss": 1.2407, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.3246456332876086, | |
| "grad_norm": 0.6249088936722902, | |
| "learning_rate": 8.541173304917895e-06, | |
| "loss": 1.2231, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3269318701417467, | |
| "grad_norm": 0.70817264277616, | |
| "learning_rate": 8.512886358260162e-06, | |
| "loss": 1.2345, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.3292181069958848, | |
| "grad_norm": 0.5956107721750036, | |
| "learning_rate": 8.484375616096658e-06, | |
| "loss": 1.225, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.33150434385002286, | |
| "grad_norm": 0.6062042871270218, | |
| "learning_rate": 8.455642894761684e-06, | |
| "loss": 1.2185, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.33379058070416096, | |
| "grad_norm": 0.66611343630398, | |
| "learning_rate": 8.426690024731161e-06, | |
| "loss": 1.2171, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.33607681755829905, | |
| "grad_norm": 0.6006939272932527, | |
| "learning_rate": 8.39751885050603e-06, | |
| "loss": 1.2168, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.33836305441243714, | |
| "grad_norm": 0.5888998376074026, | |
| "learning_rate": 8.36813123049474e-06, | |
| "loss": 1.2447, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3406492912665752, | |
| "grad_norm": 0.6170255283448466, | |
| "learning_rate": 8.338529036894855e-06, | |
| "loss": 1.2386, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.3429355281207133, | |
| "grad_norm": 0.6592250171561639, | |
| "learning_rate": 8.308714155573785e-06, | |
| "loss": 1.2095, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3452217649748514, | |
| "grad_norm": 0.5948350472440084, | |
| "learning_rate": 8.278688485948634e-06, | |
| "loss": 1.2204, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.3475080018289895, | |
| "grad_norm": 0.6884759018973265, | |
| "learning_rate": 8.248453940865204e-06, | |
| "loss": 1.2205, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3497942386831276, | |
| "grad_norm": 0.5629453296642776, | |
| "learning_rate": 8.218012446476128e-06, | |
| "loss": 1.2087, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.3520804755372657, | |
| "grad_norm": 0.5703699859674032, | |
| "learning_rate": 8.187365942118162e-06, | |
| "loss": 1.2038, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.35436671239140377, | |
| "grad_norm": 0.5758055939006159, | |
| "learning_rate": 8.156516380188635e-06, | |
| "loss": 1.2015, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.35665294924554186, | |
| "grad_norm": 0.6814380489670292, | |
| "learning_rate": 8.125465726021068e-06, | |
| "loss": 1.2267, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.35893918609967995, | |
| "grad_norm": 0.58819101648096, | |
| "learning_rate": 8.09421595775997e-06, | |
| "loss": 1.2065, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.36122542295381804, | |
| "grad_norm": 0.599220106737159, | |
| "learning_rate": 8.062769066234807e-06, | |
| "loss": 1.2084, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3635116598079561, | |
| "grad_norm": 0.5687079813226833, | |
| "learning_rate": 8.031127054833192e-06, | |
| "loss": 1.2311, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.36579789666209417, | |
| "grad_norm": 0.6076443328436887, | |
| "learning_rate": 7.999291939373232e-06, | |
| "loss": 1.209, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.36808413351623226, | |
| "grad_norm": 0.5767468288489239, | |
| "learning_rate": 7.967265747975124e-06, | |
| "loss": 1.2153, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 0.6275130557605428, | |
| "learning_rate": 7.93505052093194e-06, | |
| "loss": 1.2206, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.37265660722450844, | |
| "grad_norm": 0.5920904031157348, | |
| "learning_rate": 7.90264831057965e-06, | |
| "loss": 1.2149, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.37494284407864653, | |
| "grad_norm": 0.5841477404583847, | |
| "learning_rate": 7.870061181166372e-06, | |
| "loss": 1.2134, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3772290809327846, | |
| "grad_norm": 0.545565275285448, | |
| "learning_rate": 7.837291208720867e-06, | |
| "loss": 1.2185, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.3795153177869227, | |
| "grad_norm": 0.6183231148929101, | |
| "learning_rate": 7.804340480920274e-06, | |
| "loss": 1.2064, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3818015546410608, | |
| "grad_norm": 0.5801259298558049, | |
| "learning_rate": 7.771211096957125e-06, | |
| "loss": 1.2049, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.3840877914951989, | |
| "grad_norm": 0.579347207611424, | |
| "learning_rate": 7.737905167405596e-06, | |
| "loss": 1.2185, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.386374028349337, | |
| "grad_norm": 0.6262921976973932, | |
| "learning_rate": 7.704424814087056e-06, | |
| "loss": 1.2137, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.3886602652034751, | |
| "grad_norm": 0.6070706881138944, | |
| "learning_rate": 7.670772169934902e-06, | |
| "loss": 1.2177, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.39094650205761317, | |
| "grad_norm": 0.5688216055326876, | |
| "learning_rate": 7.636949378858647e-06, | |
| "loss": 1.2016, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.39323273891175126, | |
| "grad_norm": 0.6166249078020826, | |
| "learning_rate": 7.602958595607375e-06, | |
| "loss": 1.1957, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.39551897576588935, | |
| "grad_norm": 0.5778886288472463, | |
| "learning_rate": 7.568801985632439e-06, | |
| "loss": 1.2105, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.39780521262002744, | |
| "grad_norm": 0.6732218435967291, | |
| "learning_rate": 7.5344817249495195e-06, | |
| "loss": 1.2047, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.40009144947416553, | |
| "grad_norm": 0.672208759556888, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.1854, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.4023776863283036, | |
| "grad_norm": 0.6180565492464766, | |
| "learning_rate": 7.465359007511667e-06, | |
| "loss": 1.185, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4046639231824417, | |
| "grad_norm": 0.6266745151721254, | |
| "learning_rate": 7.430560954358764e-06, | |
| "loss": 1.2082, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.4069501600365798, | |
| "grad_norm": 0.6163182978581346, | |
| "learning_rate": 7.395608057421406e-06, | |
| "loss": 1.2194, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4092363968907179, | |
| "grad_norm": 0.6262674693601461, | |
| "learning_rate": 7.360502543444339e-06, | |
| "loss": 1.2188, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.411522633744856, | |
| "grad_norm": 0.5549642780561265, | |
| "learning_rate": 7.325246648895089e-06, | |
| "loss": 1.1986, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.41380887059899407, | |
| "grad_norm": 0.5540368046559051, | |
| "learning_rate": 7.289842619821475e-06, | |
| "loss": 1.2175, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.41609510745313216, | |
| "grad_norm": 0.587023330497459, | |
| "learning_rate": 7.254292711708529e-06, | |
| "loss": 1.2029, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.41838134430727025, | |
| "grad_norm": 0.5513581130094706, | |
| "learning_rate": 7.218599189334799e-06, | |
| "loss": 1.2009, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.42066758116140834, | |
| "grad_norm": 0.7237520794327035, | |
| "learning_rate": 7.182764326628068e-06, | |
| "loss": 1.2063, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.42295381801554643, | |
| "grad_norm": 0.5476819110298711, | |
| "learning_rate": 7.146790406520491e-06, | |
| "loss": 1.2107, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.4252400548696845, | |
| "grad_norm": 0.5753924094787153, | |
| "learning_rate": 7.1106797208031554e-06, | |
| "loss": 1.2133, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4275262917238226, | |
| "grad_norm": 0.6489054914059448, | |
| "learning_rate": 7.0744345699800755e-06, | |
| "loss": 1.1991, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.42981252857796065, | |
| "grad_norm": 0.6239602498665449, | |
| "learning_rate": 7.038057263121639e-06, | |
| "loss": 1.1937, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.43209876543209874, | |
| "grad_norm": 0.5954140813357963, | |
| "learning_rate": 7.001550117717499e-06, | |
| "loss": 1.2092, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.43438500228623683, | |
| "grad_norm": 0.5953175778315464, | |
| "learning_rate": 6.9649154595289326e-06, | |
| "loss": 1.1957, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4366712391403749, | |
| "grad_norm": 0.6030938627687562, | |
| "learning_rate": 6.92815562244068e-06, | |
| "loss": 1.1827, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.438957475994513, | |
| "grad_norm": 0.6882999466791362, | |
| "learning_rate": 6.891272948312251e-06, | |
| "loss": 1.2102, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4412437128486511, | |
| "grad_norm": 0.6080281045836577, | |
| "learning_rate": 6.854269786828741e-06, | |
| "loss": 1.2093, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.4435299497027892, | |
| "grad_norm": 0.756192409869553, | |
| "learning_rate": 6.817148495351131e-06, | |
| "loss": 1.2159, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.4458161865569273, | |
| "grad_norm": 0.5892520162590819, | |
| "learning_rate": 6.779911438766117e-06, | |
| "loss": 1.193, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.4481024234110654, | |
| "grad_norm": 0.6265917897470434, | |
| "learning_rate": 6.742560989335438e-06, | |
| "loss": 1.1951, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.45038866026520347, | |
| "grad_norm": 0.5927415516536023, | |
| "learning_rate": 6.705099526544757e-06, | |
| "loss": 1.1973, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.45267489711934156, | |
| "grad_norm": 0.5602604942191215, | |
| "learning_rate": 6.667529436952064e-06, | |
| "loss": 1.1945, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.45496113397347965, | |
| "grad_norm": 0.751574883051813, | |
| "learning_rate": 6.629853114035643e-06, | |
| "loss": 1.2134, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.45724737082761774, | |
| "grad_norm": 0.6000318274839507, | |
| "learning_rate": 6.5920729580415795e-06, | |
| "loss": 1.2104, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.45953360768175583, | |
| "grad_norm": 0.5783065549399249, | |
| "learning_rate": 6.554191375830861e-06, | |
| "loss": 1.2016, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.4618198445358939, | |
| "grad_norm": 0.5751980188798808, | |
| "learning_rate": 6.516210780726032e-06, | |
| "loss": 1.1794, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.464106081390032, | |
| "grad_norm": 0.6096335885035103, | |
| "learning_rate": 6.478133592357455e-06, | |
| "loss": 1.1816, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.4663923182441701, | |
| "grad_norm": 0.5848690144740822, | |
| "learning_rate": 6.43996223650916e-06, | |
| "loss": 1.1735, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4686785550983082, | |
| "grad_norm": 0.6273777569367492, | |
| "learning_rate": 6.401699144964306e-06, | |
| "loss": 1.1864, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.4709647919524463, | |
| "grad_norm": 0.5772389229176554, | |
| "learning_rate": 6.3633467553502625e-06, | |
| "loss": 1.1953, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4732510288065844, | |
| "grad_norm": 0.6320660706578101, | |
| "learning_rate": 6.32490751098331e-06, | |
| "loss": 1.1778, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.47553726566072246, | |
| "grad_norm": 0.628014857385664, | |
| "learning_rate": 6.286383860712982e-06, | |
| "loss": 1.1978, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.47782350251486055, | |
| "grad_norm": 0.6165011857453245, | |
| "learning_rate": 6.247778258766069e-06, | |
| "loss": 1.1783, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.48010973936899864, | |
| "grad_norm": 0.6680859473813631, | |
| "learning_rate": 6.209093164590253e-06, | |
| "loss": 1.1883, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.48239597622313674, | |
| "grad_norm": 0.6230269069079273, | |
| "learning_rate": 6.170331042697425e-06, | |
| "loss": 1.1923, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.4846822130772748, | |
| "grad_norm": 0.6472681484163015, | |
| "learning_rate": 6.131494362506693e-06, | |
| "loss": 1.1826, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.4869684499314129, | |
| "grad_norm": 0.6799978087591872, | |
| "learning_rate": 6.09258559818704e-06, | |
| "loss": 1.1829, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.489254686785551, | |
| "grad_norm": 0.5617426984448537, | |
| "learning_rate": 6.053607228499719e-06, | |
| "loss": 1.1941, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.4915409236396891, | |
| "grad_norm": 0.6444058153599652, | |
| "learning_rate": 6.014561736640334e-06, | |
| "loss": 1.2, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.49382716049382713, | |
| "grad_norm": 0.6016265988080601, | |
| "learning_rate": 5.975451610080643e-06, | |
| "loss": 1.1655, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4961133973479652, | |
| "grad_norm": 0.7053148286233416, | |
| "learning_rate": 5.936279340410082e-06, | |
| "loss": 1.172, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.4983996342021033, | |
| "grad_norm": 0.5586357561653685, | |
| "learning_rate": 5.8970474231770445e-06, | |
| "loss": 1.1922, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5006858710562414, | |
| "grad_norm": 0.7895760074140119, | |
| "learning_rate": 5.857758357729892e-06, | |
| "loss": 1.1839, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.5029721079103795, | |
| "grad_norm": 0.7313666592611404, | |
| "learning_rate": 5.8184146470577265e-06, | |
| "loss": 1.1813, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5052583447645176, | |
| "grad_norm": 0.6067591576327228, | |
| "learning_rate": 5.779018797630934e-06, | |
| "loss": 1.1855, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.5075445816186557, | |
| "grad_norm": 0.6144330199450508, | |
| "learning_rate": 5.739573319241505e-06, | |
| "loss": 1.1924, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5098308184727938, | |
| "grad_norm": 0.6075048668745815, | |
| "learning_rate": 5.7000807248431466e-06, | |
| "loss": 1.1783, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.5121170553269319, | |
| "grad_norm": 0.6763365315316732, | |
| "learning_rate": 5.66054353039118e-06, | |
| "loss": 1.1873, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.51440329218107, | |
| "grad_norm": 0.652936999197392, | |
| "learning_rate": 5.620964254682267e-06, | |
| "loss": 1.2019, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.516689529035208, | |
| "grad_norm": 0.7510930690144121, | |
| "learning_rate": 5.58134541919394e-06, | |
| "loss": 1.1863, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5189757658893461, | |
| "grad_norm": 0.7485282723991191, | |
| "learning_rate": 5.5416895479239665e-06, | |
| "loss": 1.1878, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.5212620027434842, | |
| "grad_norm": 0.6650793765929232, | |
| "learning_rate": 5.501999167229554e-06, | |
| "loss": 1.1844, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5235482395976223, | |
| "grad_norm": 0.6617004106280673, | |
| "learning_rate": 5.4622768056664e-06, | |
| "loss": 1.1819, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.5258344764517604, | |
| "grad_norm": 0.639306148093516, | |
| "learning_rate": 5.42252499382761e-06, | |
| "loss": 1.1844, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5281207133058985, | |
| "grad_norm": 0.590573720499581, | |
| "learning_rate": 5.38274626418248e-06, | |
| "loss": 1.1848, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.5304069501600366, | |
| "grad_norm": 0.625235396788826, | |
| "learning_rate": 5.3429431509151515e-06, | |
| "loss": 1.1904, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5326931870141747, | |
| "grad_norm": 0.5840052674712635, | |
| "learning_rate": 5.303118189763187e-06, | |
| "loss": 1.1829, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.5349794238683128, | |
| "grad_norm": 0.5940842973816081, | |
| "learning_rate": 5.263273917856e-06, | |
| "loss": 1.1774, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.5372656607224509, | |
| "grad_norm": 0.5991239115995499, | |
| "learning_rate": 5.22341287355324e-06, | |
| "loss": 1.1857, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.539551897576589, | |
| "grad_norm": 0.6248756548437343, | |
| "learning_rate": 5.183537596283075e-06, | |
| "loss": 1.1799, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.541838134430727, | |
| "grad_norm": 0.6023807247895316, | |
| "learning_rate": 5.143650626380417e-06, | |
| "loss": 1.1858, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.5441243712848651, | |
| "grad_norm": 0.6101959497751839, | |
| "learning_rate": 5.103754504925071e-06, | |
| "loss": 1.1961, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.5464106081390032, | |
| "grad_norm": 0.569676114190435, | |
| "learning_rate": 5.06385177357987e-06, | |
| "loss": 1.1766, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.5486968449931413, | |
| "grad_norm": 0.5819652008689743, | |
| "learning_rate": 5.023944974428739e-06, | |
| "loss": 1.1734, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5509830818472794, | |
| "grad_norm": 0.5661449507234365, | |
| "learning_rate": 4.9840366498147495e-06, | |
| "loss": 1.1908, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.5532693187014175, | |
| "grad_norm": 0.6109491726102372, | |
| "learning_rate": 4.944129342178156e-06, | |
| "loss": 1.1784, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 0.5811074689104263, | |
| "learning_rate": 4.90422559389443e-06, | |
| "loss": 1.1746, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.5578417924096937, | |
| "grad_norm": 0.6060458081756667, | |
| "learning_rate": 4.864327947112281e-06, | |
| "loss": 1.195, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.5601280292638318, | |
| "grad_norm": 0.6226718536570417, | |
| "learning_rate": 4.82443894359171e-06, | |
| "loss": 1.1786, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.5624142661179699, | |
| "grad_norm": 0.5995864510713481, | |
| "learning_rate": 4.784561124542088e-06, | |
| "loss": 1.1791, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5647005029721079, | |
| "grad_norm": 0.5701958838449743, | |
| "learning_rate": 4.744697030460248e-06, | |
| "loss": 1.1647, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.566986739826246, | |
| "grad_norm": 0.6293939505655973, | |
| "learning_rate": 4.7048492009686525e-06, | |
| "loss": 1.1692, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5692729766803841, | |
| "grad_norm": 0.6850447194966206, | |
| "learning_rate": 4.6650201746535926e-06, | |
| "loss": 1.1673, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.5715592135345222, | |
| "grad_norm": 0.6040120516739561, | |
| "learning_rate": 4.625212488903467e-06, | |
| "loss": 1.1834, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5738454503886603, | |
| "grad_norm": 0.5686706476550618, | |
| "learning_rate": 4.585428679747133e-06, | |
| "loss": 1.1716, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.5761316872427984, | |
| "grad_norm": 0.5946931657837966, | |
| "learning_rate": 4.545671281692331e-06, | |
| "loss": 1.1705, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5784179240969365, | |
| "grad_norm": 0.6120143356512502, | |
| "learning_rate": 4.505942827564242e-06, | |
| "loss": 1.1807, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.5807041609510746, | |
| "grad_norm": 0.6341171747185648, | |
| "learning_rate": 4.466245848344106e-06, | |
| "loss": 1.1839, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5829903978052127, | |
| "grad_norm": 0.6494090868678567, | |
| "learning_rate": 4.426582873007999e-06, | |
| "loss": 1.1684, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.5852766346593508, | |
| "grad_norm": 0.6252524175950205, | |
| "learning_rate": 4.386956428365701e-06, | |
| "loss": 1.1878, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5875628715134888, | |
| "grad_norm": 0.5911175497758677, | |
| "learning_rate": 4.347369038899744e-06, | |
| "loss": 1.1828, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.5898491083676269, | |
| "grad_norm": 0.5988939599453593, | |
| "learning_rate": 4.307823226604555e-06, | |
| "loss": 1.1735, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5921353452217649, | |
| "grad_norm": 0.5813355536422021, | |
| "learning_rate": 4.2683215108258145e-06, | |
| "loss": 1.1706, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.594421582075903, | |
| "grad_norm": 0.6208043705991068, | |
| "learning_rate": 4.228866408099945e-06, | |
| "loss": 1.1907, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5967078189300411, | |
| "grad_norm": 0.6512006631857741, | |
| "learning_rate": 4.189460431993788e-06, | |
| "loss": 1.1951, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.5989940557841792, | |
| "grad_norm": 0.5845471180993255, | |
| "learning_rate": 4.150106092944475e-06, | |
| "loss": 1.1717, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6012802926383173, | |
| "grad_norm": 0.5949045334275538, | |
| "learning_rate": 4.110805898099492e-06, | |
| "loss": 1.1833, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.6035665294924554, | |
| "grad_norm": 0.5971913414181261, | |
| "learning_rate": 4.071562351156966e-06, | |
| "loss": 1.1786, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6058527663465935, | |
| "grad_norm": 0.6178601149254982, | |
| "learning_rate": 4.032377952206148e-06, | |
| "loss": 1.1793, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.6081390032007316, | |
| "grad_norm": 0.6046188006147395, | |
| "learning_rate": 3.993255197568154e-06, | |
| "loss": 1.169, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6104252400548696, | |
| "grad_norm": 0.5919458656130715, | |
| "learning_rate": 3.954196579636918e-06, | |
| "loss": 1.1692, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.6127114769090077, | |
| "grad_norm": 0.5727049539306068, | |
| "learning_rate": 3.91520458672042e-06, | |
| "loss": 1.1747, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6149977137631458, | |
| "grad_norm": 0.6040809405921704, | |
| "learning_rate": 3.876281702882156e-06, | |
| "loss": 1.1935, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.6172839506172839, | |
| "grad_norm": 0.5747789602798682, | |
| "learning_rate": 3.837430407782896e-06, | |
| "loss": 1.175, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.619570187471422, | |
| "grad_norm": 0.6001909994942644, | |
| "learning_rate": 3.7986531765226965e-06, | |
| "loss": 1.1718, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.6218564243255601, | |
| "grad_norm": 0.5499338552551708, | |
| "learning_rate": 3.759952479483232e-06, | |
| "loss": 1.1615, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.6241426611796982, | |
| "grad_norm": 0.62697610396954, | |
| "learning_rate": 3.7213307821704115e-06, | |
| "loss": 1.1616, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.6264288980338363, | |
| "grad_norm": 0.637904015143814, | |
| "learning_rate": 3.6827905450573022e-06, | |
| "loss": 1.1784, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.6287151348879744, | |
| "grad_norm": 0.6235229612947039, | |
| "learning_rate": 3.6443342234273905e-06, | |
| "loss": 1.1674, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.6310013717421125, | |
| "grad_norm": 0.744429415227132, | |
| "learning_rate": 3.6059642672181537e-06, | |
| "loss": 1.1678, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.6332876085962506, | |
| "grad_norm": 0.5903117671660288, | |
| "learning_rate": 3.5676831208649887e-06, | |
| "loss": 1.1661, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.6355738454503886, | |
| "grad_norm": 0.5977435348831742, | |
| "learning_rate": 3.5294932231454838e-06, | |
| "loss": 1.1655, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.6378600823045267, | |
| "grad_norm": 0.6262251229258455, | |
| "learning_rate": 3.4913970070240388e-06, | |
| "loss": 1.1827, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.6401463191586648, | |
| "grad_norm": 0.6039362156672261, | |
| "learning_rate": 3.4533968994968913e-06, | |
| "loss": 1.162, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6424325560128029, | |
| "grad_norm": 0.610471777862986, | |
| "learning_rate": 3.41549532143748e-06, | |
| "loss": 1.1719, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.644718792866941, | |
| "grad_norm": 0.6124948412563855, | |
| "learning_rate": 3.3776946874422268e-06, | |
| "loss": 1.161, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.6470050297210791, | |
| "grad_norm": 0.596054515528405, | |
| "learning_rate": 3.3399974056767095e-06, | |
| "loss": 1.1677, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.6492912665752172, | |
| "grad_norm": 0.6199519548446956, | |
| "learning_rate": 3.30240587772224e-06, | |
| "loss": 1.1731, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.6515775034293553, | |
| "grad_norm": 0.6123382818220521, | |
| "learning_rate": 3.2649224984228756e-06, | |
| "loss": 1.1751, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.6538637402834934, | |
| "grad_norm": 0.6521756883889377, | |
| "learning_rate": 3.227549655732843e-06, | |
| "loss": 1.1746, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.6561499771376315, | |
| "grad_norm": 0.6292502440238857, | |
| "learning_rate": 3.19028973056441e-06, | |
| "loss": 1.1796, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.6584362139917695, | |
| "grad_norm": 0.7223300006546375, | |
| "learning_rate": 3.153145096636211e-06, | |
| "loss": 1.1769, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.6607224508459076, | |
| "grad_norm": 0.6123252900962536, | |
| "learning_rate": 3.1161181203220146e-06, | |
| "loss": 1.1798, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.6630086877000457, | |
| "grad_norm": 0.6176590524451245, | |
| "learning_rate": 3.079211160499975e-06, | |
| "loss": 1.1628, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6652949245541838, | |
| "grad_norm": 0.6851380779593121, | |
| "learning_rate": 3.0424265684023556e-06, | |
| "loss": 1.1621, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.6675811614083219, | |
| "grad_norm": 0.6135186798564677, | |
| "learning_rate": 3.0057666874657365e-06, | |
| "loss": 1.1817, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.66986739826246, | |
| "grad_norm": 0.6162664151552476, | |
| "learning_rate": 2.9692338531817205e-06, | |
| "loss": 1.1621, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.6721536351165981, | |
| "grad_norm": 0.6209879083469707, | |
| "learning_rate": 2.9328303929481507e-06, | |
| "loss": 1.1788, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6744398719707362, | |
| "grad_norm": 0.6564960801220917, | |
| "learning_rate": 2.8965586259208295e-06, | |
| "loss": 1.1497, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.6767261088248743, | |
| "grad_norm": 0.6100366044161921, | |
| "learning_rate": 2.860420862865787e-06, | |
| "loss": 1.1641, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6790123456790124, | |
| "grad_norm": 0.6401282278697755, | |
| "learning_rate": 2.82441940601205e-06, | |
| "loss": 1.1647, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.6812985825331505, | |
| "grad_norm": 0.5948814066139619, | |
| "learning_rate": 2.7885565489049948e-06, | |
| "loss": 1.1862, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6835848193872885, | |
| "grad_norm": 0.575891260626997, | |
| "learning_rate": 2.7528345762602125e-06, | |
| "loss": 1.149, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.6858710562414266, | |
| "grad_norm": 0.6321328549868929, | |
| "learning_rate": 2.7172557638179674e-06, | |
| "loss": 1.1722, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6881572930955647, | |
| "grad_norm": 0.620537429422375, | |
| "learning_rate": 2.681822378198221e-06, | |
| "loss": 1.1667, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.6904435299497028, | |
| "grad_norm": 0.5916688359774108, | |
| "learning_rate": 2.6465366767562162e-06, | |
| "loss": 1.1742, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6927297668038409, | |
| "grad_norm": 0.649532932905328, | |
| "learning_rate": 2.611400907438685e-06, | |
| "loss": 1.1664, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.695016003657979, | |
| "grad_norm": 0.5887639490410209, | |
| "learning_rate": 2.5764173086406306e-06, | |
| "loss": 1.1684, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.6973022405121171, | |
| "grad_norm": 0.5909674256777088, | |
| "learning_rate": 2.5415881090627227e-06, | |
| "loss": 1.1681, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.6995884773662552, | |
| "grad_norm": 0.6669572713903603, | |
| "learning_rate": 2.506915527569318e-06, | |
| "loss": 1.1692, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.7018747142203933, | |
| "grad_norm": 0.6291006193664693, | |
| "learning_rate": 2.472401773047107e-06, | |
| "loss": 1.1707, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.7041609510745314, | |
| "grad_norm": 0.6241336853751712, | |
| "learning_rate": 2.438049044264382e-06, | |
| "loss": 1.1763, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7064471879286695, | |
| "grad_norm": 0.6233093811845397, | |
| "learning_rate": 2.4038595297309712e-06, | |
| "loss": 1.1595, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.7087334247828075, | |
| "grad_norm": 0.6099376654855213, | |
| "learning_rate": 2.3698354075588105e-06, | |
| "loss": 1.1815, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7110196616369456, | |
| "grad_norm": 0.608739940642273, | |
| "learning_rate": 2.3359788453231723e-06, | |
| "loss": 1.1558, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.7133058984910837, | |
| "grad_norm": 0.6060804682823651, | |
| "learning_rate": 2.3022919999245964e-06, | |
| "loss": 1.1737, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7155921353452218, | |
| "grad_norm": 0.6554029837627439, | |
| "learning_rate": 2.2687770174514674e-06, | |
| "loss": 1.1763, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.7178783721993599, | |
| "grad_norm": 0.6199763037940721, | |
| "learning_rate": 2.23543603304329e-06, | |
| "loss": 1.1668, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.720164609053498, | |
| "grad_norm": 0.7002533112076955, | |
| "learning_rate": 2.20227117075468e-06, | |
| "loss": 1.1717, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.7224508459076361, | |
| "grad_norm": 0.5685258465602809, | |
| "learning_rate": 2.1692845434200323e-06, | |
| "loss": 1.1793, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.7247370827617741, | |
| "grad_norm": 0.5988803647429354, | |
| "learning_rate": 2.136478252518924e-06, | |
| "loss": 1.1762, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.7270233196159122, | |
| "grad_norm": 0.6220944262982843, | |
| "learning_rate": 2.103854388042243e-06, | |
| "loss": 1.1732, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.7293095564700502, | |
| "grad_norm": 0.5872374752551915, | |
| "learning_rate": 2.071415028359026e-06, | |
| "loss": 1.1653, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.7315957933241883, | |
| "grad_norm": 0.6315378201627972, | |
| "learning_rate": 2.0391622400840665e-06, | |
| "loss": 1.1631, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7338820301783264, | |
| "grad_norm": 0.6166479295990325, | |
| "learning_rate": 2.0070980779462513e-06, | |
| "loss": 1.1632, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.7361682670324645, | |
| "grad_norm": 0.6082820756952414, | |
| "learning_rate": 1.975224584657648e-06, | |
| "loss": 1.1609, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.7384545038866026, | |
| "grad_norm": 0.5711567863660318, | |
| "learning_rate": 1.943543790783392e-06, | |
| "loss": 1.1629, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 0.5934876997772376, | |
| "learning_rate": 1.9120577146123125e-06, | |
| "loss": 1.1711, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.7430269775948788, | |
| "grad_norm": 0.6044258229955937, | |
| "learning_rate": 1.8807683620283496e-06, | |
| "loss": 1.1792, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.7453132144490169, | |
| "grad_norm": 0.6414108282805848, | |
| "learning_rate": 1.8496777263827775e-06, | |
| "loss": 1.1909, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.747599451303155, | |
| "grad_norm": 0.5928077840962543, | |
| "learning_rate": 1.8187877883672024e-06, | |
| "loss": 1.177, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.7498856881572931, | |
| "grad_norm": 0.5674967348667851, | |
| "learning_rate": 1.7881005158873826e-06, | |
| "loss": 1.1698, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.7521719250114312, | |
| "grad_norm": 0.6190325214784786, | |
| "learning_rate": 1.757617863937865e-06, | |
| "loss": 1.1564, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.7544581618655692, | |
| "grad_norm": 0.5994621485851359, | |
| "learning_rate": 1.7273417744774323e-06, | |
| "loss": 1.1682, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.7567443987197073, | |
| "grad_norm": 0.6486512119864596, | |
| "learning_rate": 1.6972741763053835e-06, | |
| "loss": 1.1695, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.7590306355738454, | |
| "grad_norm": 0.6124244446703457, | |
| "learning_rate": 1.6674169849386606e-06, | |
| "loss": 1.1735, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.7613168724279835, | |
| "grad_norm": 0.6215393083401685, | |
| "learning_rate": 1.6377721024898214e-06, | |
| "loss": 1.1611, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.7636031092821216, | |
| "grad_norm": 0.6379465283211975, | |
| "learning_rate": 1.608341417545849e-06, | |
| "loss": 1.1481, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.7658893461362597, | |
| "grad_norm": 0.5646658898706897, | |
| "learning_rate": 1.5791268050478487e-06, | |
| "loss": 1.1732, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.7681755829903978, | |
| "grad_norm": 0.6028441016085894, | |
| "learning_rate": 1.5501301261715896e-06, | |
| "loss": 1.1703, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.7704618198445359, | |
| "grad_norm": 0.6313316478647917, | |
| "learning_rate": 1.5213532282089466e-06, | |
| "loss": 1.1631, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.772748056698674, | |
| "grad_norm": 0.600237347487572, | |
| "learning_rate": 1.4927979444502028e-06, | |
| "loss": 1.1642, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.7750342935528121, | |
| "grad_norm": 0.5957448361281138, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 1.1668, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.7773205304069501, | |
| "grad_norm": 0.5872437663700951, | |
| "learning_rate": 1.4363594819977606e-06, | |
| "loss": 1.1707, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7796067672610882, | |
| "grad_norm": 0.7075549655922131, | |
| "learning_rate": 1.4084798988300684e-06, | |
| "loss": 1.1723, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.7818930041152263, | |
| "grad_norm": 0.6203199463017092, | |
| "learning_rate": 1.3808291206892232e-06, | |
| "loss": 1.1668, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.7841792409693644, | |
| "grad_norm": 0.5759538308213393, | |
| "learning_rate": 1.3534089091237757e-06, | |
| "loss": 1.1598, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.7864654778235025, | |
| "grad_norm": 0.5942123152988342, | |
| "learning_rate": 1.3262210109935719e-06, | |
| "loss": 1.1699, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.7887517146776406, | |
| "grad_norm": 0.6597153339968819, | |
| "learning_rate": 1.2992671583584587e-06, | |
| "loss": 1.163, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.7910379515317787, | |
| "grad_norm": 0.5994756887911626, | |
| "learning_rate": 1.2725490683679458e-06, | |
| "loss": 1.1797, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.7933241883859168, | |
| "grad_norm": 0.5942174681280669, | |
| "learning_rate": 1.2460684431518055e-06, | |
| "loss": 1.1649, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.7956104252400549, | |
| "grad_norm": 0.5884403788886147, | |
| "learning_rate": 1.2198269697116416e-06, | |
| "loss": 1.1627, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.797896662094193, | |
| "grad_norm": 0.5917506875732326, | |
| "learning_rate": 1.1938263198134087e-06, | |
| "loss": 1.1729, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.8001828989483311, | |
| "grad_norm": 0.5689945244963683, | |
| "learning_rate": 1.168068149880912e-06, | |
| "loss": 1.1639, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8024691358024691, | |
| "grad_norm": 0.5945700377730089, | |
| "learning_rate": 1.1425541008902852e-06, | |
| "loss": 1.1616, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.8047553726566072, | |
| "grad_norm": 0.5960318855848052, | |
| "learning_rate": 1.1172857982654445e-06, | |
| "loss": 1.1796, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.8070416095107453, | |
| "grad_norm": 0.606906781862042, | |
| "learning_rate": 1.092264851774536e-06, | |
| "loss": 1.1524, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.8093278463648834, | |
| "grad_norm": 0.6686014083887466, | |
| "learning_rate": 1.067492855427385e-06, | |
| "loss": 1.1681, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.8116140832190215, | |
| "grad_norm": 0.6637295349703526, | |
| "learning_rate": 1.0429713873739505e-06, | |
| "loss": 1.1603, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.8139003200731596, | |
| "grad_norm": 0.5937746781646984, | |
| "learning_rate": 1.0187020098037759e-06, | |
| "loss": 1.1577, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8161865569272977, | |
| "grad_norm": 0.6154438358761861, | |
| "learning_rate": 9.946862688464753e-07, | |
| "loss": 1.1596, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.8184727937814358, | |
| "grad_norm": 0.6511739287376433, | |
| "learning_rate": 9.709256944732343e-07, | |
| "loss": 1.1707, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8207590306355739, | |
| "grad_norm": 0.6174881374069865, | |
| "learning_rate": 9.474218003993275e-07, | |
| "loss": 1.1775, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.823045267489712, | |
| "grad_norm": 0.5791204684491382, | |
| "learning_rate": 9.241760839877023e-07, | |
| "loss": 1.1571, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.82533150434385, | |
| "grad_norm": 0.6464260391976697, | |
| "learning_rate": 9.011900261535767e-07, | |
| "loss": 1.1713, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.8276177411979881, | |
| "grad_norm": 0.6102288143326278, | |
| "learning_rate": 8.784650912700909e-07, | |
| "loss": 1.1654, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.8299039780521262, | |
| "grad_norm": 0.6226743471510658, | |
| "learning_rate": 8.560027270750276e-07, | |
| "loss": 1.1655, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.8321902149062643, | |
| "grad_norm": 0.6079710775307922, | |
| "learning_rate": 8.338043645785698e-07, | |
| "loss": 1.1669, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.8344764517604024, | |
| "grad_norm": 0.6077180347148399, | |
| "learning_rate": 8.118714179721404e-07, | |
| "loss": 1.1529, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.8367626886145405, | |
| "grad_norm": 0.6420590181680129, | |
| "learning_rate": 7.902052845383112e-07, | |
| "loss": 1.1662, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.8390489254686786, | |
| "grad_norm": 0.5675937752707487, | |
| "learning_rate": 7.6880734456178e-07, | |
| "loss": 1.1638, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.8413351623228167, | |
| "grad_norm": 0.5963600943686237, | |
| "learning_rate": 7.476789612414414e-07, | |
| "loss": 1.1648, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.8436213991769548, | |
| "grad_norm": 0.6248451529177521, | |
| "learning_rate": 7.268214806035423e-07, | |
| "loss": 1.1704, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.8459076360310929, | |
| "grad_norm": 0.6582130785897107, | |
| "learning_rate": 7.062362314159211e-07, | |
| "loss": 1.1716, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.848193872885231, | |
| "grad_norm": 0.6104979563533071, | |
| "learning_rate": 6.859245251033697e-07, | |
| "loss": 1.1551, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.850480109739369, | |
| "grad_norm": 0.6291505363028616, | |
| "learning_rate": 6.658876556640781e-07, | |
| "loss": 1.1606, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.8527663465935071, | |
| "grad_norm": 0.626351910055198, | |
| "learning_rate": 6.461268995871967e-07, | |
| "loss": 1.1648, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.8550525834476452, | |
| "grad_norm": 0.5991977091276379, | |
| "learning_rate": 6.266435157715222e-07, | |
| "loss": 1.1403, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.8573388203017832, | |
| "grad_norm": 0.6133109082285381, | |
| "learning_rate": 6.074387454452891e-07, | |
| "loss": 1.1578, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.8596250571559213, | |
| "grad_norm": 0.6062420232877472, | |
| "learning_rate": 5.885138120870965e-07, | |
| "loss": 1.1422, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.8619112940100594, | |
| "grad_norm": 0.5920619164293491, | |
| "learning_rate": 5.698699213479697e-07, | |
| "loss": 1.1503, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.8641975308641975, | |
| "grad_norm": 0.6179934405963249, | |
| "learning_rate": 5.515082609745465e-07, | |
| "loss": 1.1728, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.8664837677183356, | |
| "grad_norm": 0.6191884681224713, | |
| "learning_rate": 5.334300007334065e-07, | |
| "loss": 1.1514, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.8687700045724737, | |
| "grad_norm": 0.6148818189812965, | |
| "learning_rate": 5.156362923365587e-07, | |
| "loss": 1.1772, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8710562414266118, | |
| "grad_norm": 0.5927964681781609, | |
| "learning_rate": 4.981282693680584e-07, | |
| "loss": 1.1747, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.8733424782807498, | |
| "grad_norm": 0.630038523819453, | |
| "learning_rate": 4.80907047211796e-07, | |
| "loss": 1.1638, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.8756287151348879, | |
| "grad_norm": 0.5822419290829026, | |
| "learning_rate": 4.639737229804403e-07, | |
| "loss": 1.1667, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.877914951989026, | |
| "grad_norm": 0.6169634205827448, | |
| "learning_rate": 4.473293754455399e-07, | |
| "loss": 1.1695, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.8802011888431641, | |
| "grad_norm": 0.5892947845386679, | |
| "learning_rate": 4.3097506496880325e-07, | |
| "loss": 1.1684, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.8824874256973022, | |
| "grad_norm": 0.6796811793089527, | |
| "learning_rate": 4.149118334345403e-07, | |
| "loss": 1.1604, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.8847736625514403, | |
| "grad_norm": 0.5951100132603444, | |
| "learning_rate": 3.9914070418329123e-07, | |
| "loss": 1.1632, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.8870598994055784, | |
| "grad_norm": 0.6710610553022762, | |
| "learning_rate": 3.836626819466338e-07, | |
| "loss": 1.1455, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.8893461362597165, | |
| "grad_norm": 0.6128779790737046, | |
| "learning_rate": 3.684787527831707e-07, | |
| "loss": 1.1609, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.8916323731138546, | |
| "grad_norm": 0.5800567298586133, | |
| "learning_rate": 3.53589884015712e-07, | |
| "loss": 1.1636, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.8939186099679927, | |
| "grad_norm": 0.5600191099569565, | |
| "learning_rate": 3.3899702416965166e-07, | |
| "loss": 1.1721, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.8962048468221308, | |
| "grad_norm": 0.5964683215562515, | |
| "learning_rate": 3.247011029125391e-07, | |
| "loss": 1.1508, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.8984910836762688, | |
| "grad_norm": 0.6125213377358303, | |
| "learning_rate": 3.1070303099485055e-07, | |
| "loss": 1.1716, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.9007773205304069, | |
| "grad_norm": 0.5812964318078312, | |
| "learning_rate": 2.9700370019197287e-07, | |
| "loss": 1.1495, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.903063557384545, | |
| "grad_norm": 0.5947330421470328, | |
| "learning_rate": 2.8360398324738415e-07, | |
| "loss": 1.1446, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.9053497942386831, | |
| "grad_norm": 0.5936630268160432, | |
| "learning_rate": 2.7050473381706186e-07, | |
| "loss": 1.1519, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.9076360310928212, | |
| "grad_norm": 0.6228979256825669, | |
| "learning_rate": 2.577067864150906e-07, | |
| "loss": 1.1688, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.9099222679469593, | |
| "grad_norm": 0.6500515468078818, | |
| "learning_rate": 2.452109563605065e-07, | |
| "loss": 1.1718, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.9122085048010974, | |
| "grad_norm": 0.568112374463465, | |
| "learning_rate": 2.330180397253473e-07, | |
| "loss": 1.169, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.9144947416552355, | |
| "grad_norm": 0.6014335143268985, | |
| "learning_rate": 2.2112881328394287e-07, | |
| "loss": 1.1556, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9167809785093736, | |
| "grad_norm": 0.5814781144236604, | |
| "learning_rate": 2.0954403446342753e-07, | |
| "loss": 1.1688, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.9190672153635117, | |
| "grad_norm": 0.6269697024329176, | |
| "learning_rate": 1.9826444129548317e-07, | |
| "loss": 1.1791, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9213534522176497, | |
| "grad_norm": 0.5793724546294099, | |
| "learning_rate": 1.8729075236932903e-07, | |
| "loss": 1.1736, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.9236396890717878, | |
| "grad_norm": 0.5757028817840649, | |
| "learning_rate": 1.7662366678593502e-07, | |
| "loss": 1.1674, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 0.6383512892284545, | |
| "learning_rate": 1.6626386411348783e-07, | |
| "loss": 1.1725, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.928212162780064, | |
| "grad_norm": 0.6064267969457637, | |
| "learning_rate": 1.56212004344099e-07, | |
| "loss": 1.1596, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.9304983996342021, | |
| "grad_norm": 0.6046327277263103, | |
| "learning_rate": 1.4646872785175182e-07, | |
| "loss": 1.1616, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.9327846364883402, | |
| "grad_norm": 0.611959733363112, | |
| "learning_rate": 1.3703465535151505e-07, | |
| "loss": 1.1614, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.9350708733424783, | |
| "grad_norm": 0.6153837948383357, | |
| "learning_rate": 1.2791038785999243e-07, | |
| "loss": 1.1494, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.9373571101966164, | |
| "grad_norm": 0.5507733416769363, | |
| "learning_rate": 1.1909650665703265e-07, | |
| "loss": 1.1331, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.9396433470507545, | |
| "grad_norm": 0.5787602661155832, | |
| "learning_rate": 1.1059357324870456e-07, | |
| "loss": 1.1548, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.9419295839048926, | |
| "grad_norm": 0.5848374134615248, | |
| "learning_rate": 1.024021293315175e-07, | |
| "loss": 1.1628, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.9442158207590307, | |
| "grad_norm": 0.585861722501522, | |
| "learning_rate": 9.452269675791603e-08, | |
| "loss": 1.1424, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.9465020576131687, | |
| "grad_norm": 0.5870866242087308, | |
| "learning_rate": 8.69557775030344e-08, | |
| "loss": 1.181, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.9487882944673068, | |
| "grad_norm": 0.5917858310575264, | |
| "learning_rate": 7.970185363271432e-08, | |
| "loss": 1.1564, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.9510745313214449, | |
| "grad_norm": 0.6272259568011471, | |
| "learning_rate": 7.276138727279669e-08, | |
| "loss": 1.1659, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.953360768175583, | |
| "grad_norm": 0.607366888512829, | |
| "learning_rate": 6.613482057968023e-08, | |
| "loss": 1.1612, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.9556470050297211, | |
| "grad_norm": 0.61579614820576, | |
| "learning_rate": 5.982257571215178e-08, | |
| "loss": 1.1644, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.9579332418838592, | |
| "grad_norm": 0.6162342496797737, | |
| "learning_rate": 5.382505480449274e-08, | |
| "loss": 1.1439, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.9602194787379973, | |
| "grad_norm": 0.5880335959078453, | |
| "learning_rate": 4.814263994086077e-08, | |
| "loss": 1.1405, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9625057155921354, | |
| "grad_norm": 0.5978901392727579, | |
| "learning_rate": 4.2775693130948094e-08, | |
| "loss": 1.1792, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.9647919524462735, | |
| "grad_norm": 0.5725207858399001, | |
| "learning_rate": 3.772455628691829e-08, | |
| "loss": 1.1679, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.9670781893004116, | |
| "grad_norm": 0.6126681514493614, | |
| "learning_rate": 3.2989551201624836e-08, | |
| "loss": 1.1621, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.9693644261545497, | |
| "grad_norm": 0.6026354249744876, | |
| "learning_rate": 2.857097952810972e-08, | |
| "loss": 1.1728, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.9716506630086877, | |
| "grad_norm": 0.5876159431495082, | |
| "learning_rate": 2.4469122760388264e-08, | |
| "loss": 1.1552, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.9739368998628258, | |
| "grad_norm": 0.5795939734314318, | |
| "learning_rate": 2.0684242215511797e-08, | |
| "loss": 1.1586, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.9762231367169639, | |
| "grad_norm": 0.6100064497073957, | |
| "learning_rate": 1.7216579016925415e-08, | |
| "loss": 1.1585, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.978509373571102, | |
| "grad_norm": 0.6410024148442394, | |
| "learning_rate": 1.4066354079101396e-08, | |
| "loss": 1.1576, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.9807956104252401, | |
| "grad_norm": 0.5946394925998356, | |
| "learning_rate": 1.1233768093468766e-08, | |
| "loss": 1.1565, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.9830818472793782, | |
| "grad_norm": 0.5993080705042445, | |
| "learning_rate": 8.719001515627434e-09, | |
| "loss": 1.1649, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.9853680841335163, | |
| "grad_norm": 0.5857680491868433, | |
| "learning_rate": 6.5222145538501595e-09, | |
| "loss": 1.176, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 0.6157142971328977, | |
| "learning_rate": 4.643547158878492e-09, | |
| "loss": 1.146, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.9899405578417924, | |
| "grad_norm": 0.6005659801135901, | |
| "learning_rate": 3.0831190150054646e-09, | |
| "loss": 1.1607, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.9922267946959304, | |
| "grad_norm": 0.5963682235084494, | |
| "learning_rate": 1.8410295324505778e-09, | |
| "loss": 1.1668, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.9945130315500685, | |
| "grad_norm": 0.649218390898171, | |
| "learning_rate": 9.173578410281992e-10, | |
| "loss": 1.1602, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.9967992684042066, | |
| "grad_norm": 0.612662110275474, | |
| "learning_rate": 3.1216278510493027e-10, | |
| "loss": 1.1596, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.9990855052583447, | |
| "grad_norm": 0.6025732837303296, | |
| "learning_rate": 2.548291985149387e-11, | |
| "loss": 1.147, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_runtime": 4.0833, | |
| "eval_samples_per_second": 2.449, | |
| "eval_steps_per_second": 0.735, | |
| "step": 2187 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 2187, | |
| "total_flos": 9703359095242752.0, | |
| "train_loss": 0.0, | |
| "train_runtime": 0.009, | |
| "train_samples_per_second": 3870652.356, | |
| "train_steps_per_second": 241991.844 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2187, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9703359095242752.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |