diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9358 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 37.0, + "eval_steps": 500, + "global_step": 1332, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.028169014084507043, + "grad_norm": 12.737117767333984, + "learning_rate": 9.999993146109795e-07, + "loss": 0.6797127723693848, + "step": 1 + }, + { + "epoch": 0.056338028169014086, + "grad_norm": 12.04797649383545, + "learning_rate": 9.999972584460056e-07, + "loss": 0.6627321243286133, + "step": 2 + }, + { + "epoch": 0.08450704225352113, + "grad_norm": 12.632461547851562, + "learning_rate": 9.99993831511342e-07, + "loss": 0.6829236149787903, + "step": 3 + }, + { + "epoch": 0.11267605633802817, + "grad_norm": 11.97681713104248, + "learning_rate": 9.999890338174275e-07, + "loss": 0.6625960469245911, + "step": 4 + }, + { + "epoch": 0.14084507042253522, + "grad_norm": 11.185710906982422, + "learning_rate": 9.99982865378877e-07, + "loss": 0.6418126821517944, + "step": 5 + }, + { + "epoch": 0.16901408450704225, + "grad_norm": 11.49565315246582, + "learning_rate": 9.999753262144804e-07, + "loss": 0.6464570760726929, + "step": 6 + }, + { + "epoch": 0.19718309859154928, + "grad_norm": 10.954561233520508, + "learning_rate": 9.999664163472034e-07, + "loss": 0.63329017162323, + "step": 7 + }, + { + "epoch": 0.22535211267605634, + "grad_norm": 10.728333473205566, + "learning_rate": 9.999561358041868e-07, + "loss": 0.6382037401199341, + "step": 8 + }, + { + "epoch": 0.2535211267605634, + "grad_norm": 8.404616355895996, + "learning_rate": 9.99944484616747e-07, + "loss": 0.5870345830917358, + "step": 9 + }, + { + "epoch": 0.28169014084507044, + "grad_norm": 7.616209983825684, + "learning_rate": 9.99931462820376e-07, + "loss": 0.5672095417976379, + "step": 10 + }, + { + "epoch": 0.30985915492957744, + "grad_norm": 7.800975799560547, + "learning_rate": 9.999170704547398e-07, + "loss": 0.581696629524231, + "step": 11 + }, + { + "epoch": 0.3380281690140845, + "grad_norm": 7.584338665008545, + "learning_rate": 9.999013075636804e-07, + "loss": 0.5873032808303833, + "step": 12 + }, + { + "epoch": 0.36619718309859156, + "grad_norm": 6.736105442047119, + "learning_rate": 9.998841741952141e-07, + "loss": 0.5502372980117798, + "step": 13 + }, + { + "epoch": 0.39436619718309857, + "grad_norm": 6.839756965637207, + "learning_rate": 9.998656704015323e-07, + "loss": 0.5653150677680969, + "step": 14 + }, + { + "epoch": 0.4225352112676056, + "grad_norm": 7.052567005157471, + "learning_rate": 9.998457962390008e-07, + "loss": 0.5660480260848999, + "step": 15 + }, + { + "epoch": 0.4507042253521127, + "grad_norm": 6.61349630355835, + "learning_rate": 9.998245517681593e-07, + "loss": 0.552219033241272, + "step": 16 + }, + { + "epoch": 0.4788732394366197, + "grad_norm": 3.9956817626953125, + "learning_rate": 9.998019370537227e-07, + "loss": 0.5171241760253906, + "step": 17 + }, + { + "epoch": 0.5070422535211268, + "grad_norm": 3.6887121200561523, + "learning_rate": 9.997779521645791e-07, + "loss": 0.5023034811019897, + "step": 18 + }, + { + "epoch": 0.5352112676056338, + "grad_norm": 3.6457769870758057, + "learning_rate": 9.997525971737909e-07, + "loss": 0.505454421043396, + "step": 19 + }, + { + "epoch": 0.5633802816901409, + "grad_norm": 3.398740530014038, + "learning_rate": 9.997258721585931e-07, + "loss": 0.4978747069835663, + "step": 20 + }, + { + "epoch": 0.5915492957746479, + "grad_norm": 3.2862207889556885, + "learning_rate": 9.99697777200395e-07, + "loss": 0.5002620220184326, + "step": 21 + }, + { + "epoch": 0.6197183098591549, + "grad_norm": 3.3747572898864746, + "learning_rate": 9.996683123847795e-07, + "loss": 0.5069968700408936, + "step": 22 + }, + { + "epoch": 0.647887323943662, + "grad_norm": 3.001546621322632, + "learning_rate": 9.996374778015007e-07, + "loss": 0.4922000765800476, + "step": 23 + }, + { + "epoch": 0.676056338028169, + "grad_norm": 2.996706962585449, + "learning_rate": 9.996052735444862e-07, + "loss": 0.4938335716724396, + "step": 24 + }, + { + "epoch": 0.704225352112676, + "grad_norm": 2.668245315551758, + "learning_rate": 9.99571699711836e-07, + "loss": 0.49115338921546936, + "step": 25 + }, + { + "epoch": 0.7323943661971831, + "grad_norm": 2.4952428340911865, + "learning_rate": 9.995367564058216e-07, + "loss": 0.4847099483013153, + "step": 26 + }, + { + "epoch": 0.7605633802816901, + "grad_norm": 2.529451847076416, + "learning_rate": 9.995004437328865e-07, + "loss": 0.48129573464393616, + "step": 27 + }, + { + "epoch": 0.7887323943661971, + "grad_norm": 2.479883909225464, + "learning_rate": 9.994627618036452e-07, + "loss": 0.5088395476341248, + "step": 28 + }, + { + "epoch": 0.8169014084507042, + "grad_norm": 2.414393424987793, + "learning_rate": 9.994237107328838e-07, + "loss": 0.48045098781585693, + "step": 29 + }, + { + "epoch": 0.8450704225352113, + "grad_norm": 2.2080600261688232, + "learning_rate": 9.993832906395582e-07, + "loss": 0.47147125005722046, + "step": 30 + }, + { + "epoch": 0.8732394366197183, + "grad_norm": 1.912841558456421, + "learning_rate": 9.993415016467952e-07, + "loss": 0.4724900424480438, + "step": 31 + }, + { + "epoch": 0.9014084507042254, + "grad_norm": 1.282597303390503, + "learning_rate": 9.992983438818915e-07, + "loss": 0.46792298555374146, + "step": 32 + }, + { + "epoch": 0.9295774647887324, + "grad_norm": 1.4362828731536865, + "learning_rate": 9.992538174763127e-07, + "loss": 0.45093870162963867, + "step": 33 + }, + { + "epoch": 0.9577464788732394, + "grad_norm": 1.4296821355819702, + "learning_rate": 9.992079225656944e-07, + "loss": 0.44724205136299133, + "step": 34 + }, + { + "epoch": 0.9859154929577465, + "grad_norm": 1.4829713106155396, + "learning_rate": 9.9916065928984e-07, + "loss": 0.44936883449554443, + "step": 35 + }, + { + "epoch": 1.0, + "grad_norm": 1.387039303779602, + "learning_rate": 9.991120277927223e-07, + "loss": 0.47316086292266846, + "step": 36 + }, + { + "epoch": 1.028169014084507, + "grad_norm": 1.3140299320220947, + "learning_rate": 9.990620282224806e-07, + "loss": 0.4389120638370514, + "step": 37 + }, + { + "epoch": 1.056338028169014, + "grad_norm": 1.2881019115447998, + "learning_rate": 9.990106607314225e-07, + "loss": 0.43830516934394836, + "step": 38 + }, + { + "epoch": 1.084507042253521, + "grad_norm": 1.1489726305007935, + "learning_rate": 9.989579254760224e-07, + "loss": 0.44559216499328613, + "step": 39 + }, + { + "epoch": 1.1126760563380282, + "grad_norm": 1.0595662593841553, + "learning_rate": 9.989038226169207e-07, + "loss": 0.43717890977859497, + "step": 40 + }, + { + "epoch": 1.1408450704225352, + "grad_norm": 0.9458185434341431, + "learning_rate": 9.988483523189248e-07, + "loss": 0.43611639738082886, + "step": 41 + }, + { + "epoch": 1.1690140845070423, + "grad_norm": 0.8811507821083069, + "learning_rate": 9.98791514751006e-07, + "loss": 0.4194882810115814, + "step": 42 + }, + { + "epoch": 1.1971830985915493, + "grad_norm": 0.7880372405052185, + "learning_rate": 9.98733310086302e-07, + "loss": 0.4363758862018585, + "step": 43 + }, + { + "epoch": 1.2253521126760563, + "grad_norm": 0.7736399173736572, + "learning_rate": 9.98673738502114e-07, + "loss": 0.43049588799476624, + "step": 44 + }, + { + "epoch": 1.2535211267605635, + "grad_norm": 0.7198370695114136, + "learning_rate": 9.986128001799076e-07, + "loss": 0.43443119525909424, + "step": 45 + }, + { + "epoch": 1.2816901408450705, + "grad_norm": 0.7174084186553955, + "learning_rate": 9.985504953053113e-07, + "loss": 0.43092280626296997, + "step": 46 + }, + { + "epoch": 1.3098591549295775, + "grad_norm": 0.7043387293815613, + "learning_rate": 9.984868240681164e-07, + "loss": 0.417573481798172, + "step": 47 + }, + { + "epoch": 1.3380281690140845, + "grad_norm": 0.6884390115737915, + "learning_rate": 9.98421786662277e-07, + "loss": 0.4211745262145996, + "step": 48 + }, + { + "epoch": 1.3661971830985915, + "grad_norm": 0.7091729044914246, + "learning_rate": 9.983553832859078e-07, + "loss": 0.4147814214229584, + "step": 49 + }, + { + "epoch": 1.3943661971830985, + "grad_norm": 0.6925486326217651, + "learning_rate": 9.982876141412855e-07, + "loss": 0.432437002658844, + "step": 50 + }, + { + "epoch": 1.4225352112676055, + "grad_norm": 0.7119179368019104, + "learning_rate": 9.982184794348462e-07, + "loss": 0.41633373498916626, + "step": 51 + }, + { + "epoch": 1.4507042253521127, + "grad_norm": 0.6801888346672058, + "learning_rate": 9.981479793771866e-07, + "loss": 0.4228135645389557, + "step": 52 + }, + { + "epoch": 1.4788732394366197, + "grad_norm": 0.6876774430274963, + "learning_rate": 9.98076114183062e-07, + "loss": 0.41455432772636414, + "step": 53 + }, + { + "epoch": 1.5070422535211268, + "grad_norm": 0.6285378336906433, + "learning_rate": 9.98002884071386e-07, + "loss": 0.41491252183914185, + "step": 54 + }, + { + "epoch": 1.5352112676056338, + "grad_norm": 0.6261480450630188, + "learning_rate": 9.979282892652304e-07, + "loss": 0.42695990204811096, + "step": 55 + }, + { + "epoch": 1.563380281690141, + "grad_norm": 0.6269007325172424, + "learning_rate": 9.97852329991824e-07, + "loss": 0.41284894943237305, + "step": 56 + }, + { + "epoch": 1.591549295774648, + "grad_norm": 0.6070351600646973, + "learning_rate": 9.977750064825519e-07, + "loss": 0.42982780933380127, + "step": 57 + }, + { + "epoch": 1.619718309859155, + "grad_norm": 0.5970191955566406, + "learning_rate": 9.976963189729547e-07, + "loss": 0.41365376114845276, + "step": 58 + }, + { + "epoch": 1.647887323943662, + "grad_norm": 0.5778729319572449, + "learning_rate": 9.976162677027284e-07, + "loss": 0.42080622911453247, + "step": 59 + }, + { + "epoch": 1.676056338028169, + "grad_norm": 0.5267013907432556, + "learning_rate": 9.975348529157229e-07, + "loss": 0.40949106216430664, + "step": 60 + }, + { + "epoch": 1.704225352112676, + "grad_norm": 0.5284983515739441, + "learning_rate": 9.974520748599421e-07, + "loss": 0.4082256555557251, + "step": 61 + }, + { + "epoch": 1.732394366197183, + "grad_norm": 0.49156272411346436, + "learning_rate": 9.973679337875418e-07, + "loss": 0.3944624662399292, + "step": 62 + }, + { + "epoch": 1.76056338028169, + "grad_norm": 0.4944726824760437, + "learning_rate": 9.972824299548309e-07, + "loss": 0.4087256193161011, + "step": 63 + }, + { + "epoch": 1.788732394366197, + "grad_norm": 0.4764452874660492, + "learning_rate": 9.971955636222684e-07, + "loss": 0.4067206382751465, + "step": 64 + }, + { + "epoch": 1.8169014084507042, + "grad_norm": 0.48928746581077576, + "learning_rate": 9.971073350544644e-07, + "loss": 0.4004918336868286, + "step": 65 + }, + { + "epoch": 1.8450704225352113, + "grad_norm": 0.4580424726009369, + "learning_rate": 9.970177445201783e-07, + "loss": 0.4040325880050659, + "step": 66 + }, + { + "epoch": 1.8732394366197183, + "grad_norm": 0.5053924322128296, + "learning_rate": 9.969267922923188e-07, + "loss": 0.40139085054397583, + "step": 67 + }, + { + "epoch": 1.9014084507042255, + "grad_norm": 0.4661526679992676, + "learning_rate": 9.968344786479415e-07, + "loss": 0.38993388414382935, + "step": 68 + }, + { + "epoch": 1.9295774647887325, + "grad_norm": 0.4677845537662506, + "learning_rate": 9.967408038682505e-07, + "loss": 0.4014376401901245, + "step": 69 + }, + { + "epoch": 1.9577464788732395, + "grad_norm": 0.4655434787273407, + "learning_rate": 9.96645768238595e-07, + "loss": 0.3975449204444885, + "step": 70 + }, + { + "epoch": 1.9859154929577465, + "grad_norm": 0.4675063192844391, + "learning_rate": 9.965493720484698e-07, + "loss": 0.4009154438972473, + "step": 71 + }, + { + "epoch": 2.0, + "grad_norm": 0.5548242926597595, + "learning_rate": 9.964516155915151e-07, + "loss": 0.39267462491989136, + "step": 72 + }, + { + "epoch": 2.028169014084507, + "grad_norm": 0.4601926803588867, + "learning_rate": 9.963524991655133e-07, + "loss": 0.3973795473575592, + "step": 73 + }, + { + "epoch": 2.056338028169014, + "grad_norm": 0.4464695155620575, + "learning_rate": 9.962520230723906e-07, + "loss": 0.39020174741744995, + "step": 74 + }, + { + "epoch": 2.084507042253521, + "grad_norm": 0.42715415358543396, + "learning_rate": 9.961501876182148e-07, + "loss": 0.3930002450942993, + "step": 75 + }, + { + "epoch": 2.112676056338028, + "grad_norm": 0.3989242613315582, + "learning_rate": 9.960469931131936e-07, + "loss": 0.3865053653717041, + "step": 76 + }, + { + "epoch": 2.140845070422535, + "grad_norm": 0.4167341887950897, + "learning_rate": 9.959424398716763e-07, + "loss": 0.39777663350105286, + "step": 77 + }, + { + "epoch": 2.169014084507042, + "grad_norm": 0.4046856760978699, + "learning_rate": 9.958365282121496e-07, + "loss": 0.38023141026496887, + "step": 78 + }, + { + "epoch": 2.1971830985915495, + "grad_norm": 0.40858548879623413, + "learning_rate": 9.95729258457239e-07, + "loss": 0.37487876415252686, + "step": 79 + }, + { + "epoch": 2.2253521126760565, + "grad_norm": 0.3576146364212036, + "learning_rate": 9.956206309337066e-07, + "loss": 0.3785707354545593, + "step": 80 + }, + { + "epoch": 2.2535211267605635, + "grad_norm": 0.35235047340393066, + "learning_rate": 9.955106459724508e-07, + "loss": 0.38552170991897583, + "step": 81 + }, + { + "epoch": 2.2816901408450705, + "grad_norm": 0.373362272977829, + "learning_rate": 9.953993039085048e-07, + "loss": 0.38321995735168457, + "step": 82 + }, + { + "epoch": 2.3098591549295775, + "grad_norm": 0.3574947416782379, + "learning_rate": 9.952866050810363e-07, + "loss": 0.37346434593200684, + "step": 83 + }, + { + "epoch": 2.3380281690140845, + "grad_norm": 0.36156368255615234, + "learning_rate": 9.951725498333448e-07, + "loss": 0.382648229598999, + "step": 84 + }, + { + "epoch": 2.3661971830985915, + "grad_norm": 0.3521256148815155, + "learning_rate": 9.950571385128625e-07, + "loss": 0.3722230792045593, + "step": 85 + }, + { + "epoch": 2.3943661971830985, + "grad_norm": 0.3384946584701538, + "learning_rate": 9.949403714711526e-07, + "loss": 0.3648328185081482, + "step": 86 + }, + { + "epoch": 2.4225352112676055, + "grad_norm": 0.34228095412254333, + "learning_rate": 9.948222490639075e-07, + "loss": 0.372160941362381, + "step": 87 + }, + { + "epoch": 2.4507042253521125, + "grad_norm": 0.34330716729164124, + "learning_rate": 9.947027716509488e-07, + "loss": 0.36588054895401, + "step": 88 + }, + { + "epoch": 2.4788732394366195, + "grad_norm": 0.34555092453956604, + "learning_rate": 9.94581939596225e-07, + "loss": 0.38422292470932007, + "step": 89 + }, + { + "epoch": 2.507042253521127, + "grad_norm": 0.34432411193847656, + "learning_rate": 9.944597532678119e-07, + "loss": 0.3802357316017151, + "step": 90 + }, + { + "epoch": 2.535211267605634, + "grad_norm": 0.35508641600608826, + "learning_rate": 9.943362130379101e-07, + "loss": 0.37436896562576294, + "step": 91 + }, + { + "epoch": 2.563380281690141, + "grad_norm": 0.3540443181991577, + "learning_rate": 9.942113192828444e-07, + "loss": 0.39830613136291504, + "step": 92 + }, + { + "epoch": 2.591549295774648, + "grad_norm": 0.3429860472679138, + "learning_rate": 9.940850723830632e-07, + "loss": 0.38153308629989624, + "step": 93 + }, + { + "epoch": 2.619718309859155, + "grad_norm": 0.3220756947994232, + "learning_rate": 9.939574727231362e-07, + "loss": 0.36020469665527344, + "step": 94 + }, + { + "epoch": 2.647887323943662, + "grad_norm": 0.3417351245880127, + "learning_rate": 9.93828520691754e-07, + "loss": 0.38868680596351624, + "step": 95 + }, + { + "epoch": 2.676056338028169, + "grad_norm": 0.3259858191013336, + "learning_rate": 9.93698216681727e-07, + "loss": 0.37741273641586304, + "step": 96 + }, + { + "epoch": 2.704225352112676, + "grad_norm": 0.33722448348999023, + "learning_rate": 9.93566561089984e-07, + "loss": 0.3821848928928375, + "step": 97 + }, + { + "epoch": 2.732394366197183, + "grad_norm": 0.31846100091934204, + "learning_rate": 9.934335543175705e-07, + "loss": 0.3690311014652252, + "step": 98 + }, + { + "epoch": 2.76056338028169, + "grad_norm": 0.34040549397468567, + "learning_rate": 9.932991967696482e-07, + "loss": 0.3875328600406647, + "step": 99 + }, + { + "epoch": 2.788732394366197, + "grad_norm": 0.3258971571922302, + "learning_rate": 9.931634888554935e-07, + "loss": 0.3811268210411072, + "step": 100 + }, + { + "epoch": 2.816901408450704, + "grad_norm": 0.32806867361068726, + "learning_rate": 9.930264309884964e-07, + "loss": 0.3713844418525696, + "step": 101 + }, + { + "epoch": 2.845070422535211, + "grad_norm": 0.3252440094947815, + "learning_rate": 9.928880235861588e-07, + "loss": 0.3812159299850464, + "step": 102 + }, + { + "epoch": 2.873239436619718, + "grad_norm": 0.33440181612968445, + "learning_rate": 9.927482670700936e-07, + "loss": 0.37723666429519653, + "step": 103 + }, + { + "epoch": 2.9014084507042255, + "grad_norm": 0.3046083152294159, + "learning_rate": 9.926071618660237e-07, + "loss": 0.3681407868862152, + "step": 104 + }, + { + "epoch": 2.9295774647887325, + "grad_norm": 0.3097338378429413, + "learning_rate": 9.924647084037797e-07, + "loss": 0.3724687099456787, + "step": 105 + }, + { + "epoch": 2.9577464788732395, + "grad_norm": 0.32305970788002014, + "learning_rate": 9.923209071172994e-07, + "loss": 0.3641166090965271, + "step": 106 + }, + { + "epoch": 2.9859154929577465, + "grad_norm": 0.32677826285362244, + "learning_rate": 9.921757584446268e-07, + "loss": 0.36330974102020264, + "step": 107 + }, + { + "epoch": 3.0, + "grad_norm": 0.4263511896133423, + "learning_rate": 9.9202926282791e-07, + "loss": 0.35592788457870483, + "step": 108 + }, + { + "epoch": 3.028169014084507, + "grad_norm": 0.2994212508201599, + "learning_rate": 9.918814207133997e-07, + "loss": 0.3603532314300537, + "step": 109 + }, + { + "epoch": 3.056338028169014, + "grad_norm": 0.30977630615234375, + "learning_rate": 9.917322325514487e-07, + "loss": 0.374819278717041, + "step": 110 + }, + { + "epoch": 3.084507042253521, + "grad_norm": 0.31614792346954346, + "learning_rate": 9.915816987965102e-07, + "loss": 0.3680700957775116, + "step": 111 + }, + { + "epoch": 3.112676056338028, + "grad_norm": 0.30458712577819824, + "learning_rate": 9.91429819907136e-07, + "loss": 0.3753468692302704, + "step": 112 + }, + { + "epoch": 3.140845070422535, + "grad_norm": 0.30280736088752747, + "learning_rate": 9.912765963459756e-07, + "loss": 0.3559075593948364, + "step": 113 + }, + { + "epoch": 3.169014084507042, + "grad_norm": 0.3088322579860687, + "learning_rate": 9.911220285797748e-07, + "loss": 0.36761462688446045, + "step": 114 + }, + { + "epoch": 3.1971830985915495, + "grad_norm": 0.3007463216781616, + "learning_rate": 9.909661170793733e-07, + "loss": 0.3572486340999603, + "step": 115 + }, + { + "epoch": 3.2253521126760565, + "grad_norm": 0.29317507147789, + "learning_rate": 9.908088623197048e-07, + "loss": 0.37356066703796387, + "step": 116 + }, + { + "epoch": 3.2535211267605635, + "grad_norm": 0.30190175771713257, + "learning_rate": 9.906502647797945e-07, + "loss": 0.3747510015964508, + "step": 117 + }, + { + "epoch": 3.2816901408450705, + "grad_norm": 0.300547331571579, + "learning_rate": 9.904903249427582e-07, + "loss": 0.3723798096179962, + "step": 118 + }, + { + "epoch": 3.3098591549295775, + "grad_norm": 0.2943092882633209, + "learning_rate": 9.903290432958003e-07, + "loss": 0.3614634573459625, + "step": 119 + }, + { + "epoch": 3.3380281690140845, + "grad_norm": 0.2933284342288971, + "learning_rate": 9.901664203302124e-07, + "loss": 0.34804195165634155, + "step": 120 + }, + { + "epoch": 3.3661971830985915, + "grad_norm": 0.2936899662017822, + "learning_rate": 9.900024565413727e-07, + "loss": 0.3482627272605896, + "step": 121 + }, + { + "epoch": 3.3943661971830985, + "grad_norm": 0.2972092628479004, + "learning_rate": 9.89837152428743e-07, + "loss": 0.35861676931381226, + "step": 122 + }, + { + "epoch": 3.4225352112676055, + "grad_norm": 0.296779602766037, + "learning_rate": 9.896705084958687e-07, + "loss": 0.37210696935653687, + "step": 123 + }, + { + "epoch": 3.4507042253521125, + "grad_norm": 0.2911286950111389, + "learning_rate": 9.895025252503755e-07, + "loss": 0.33883392810821533, + "step": 124 + }, + { + "epoch": 3.4788732394366195, + "grad_norm": 0.29729408025741577, + "learning_rate": 9.8933320320397e-07, + "loss": 0.3569541573524475, + "step": 125 + }, + { + "epoch": 3.507042253521127, + "grad_norm": 0.29103100299835205, + "learning_rate": 9.891625428724364e-07, + "loss": 0.36078906059265137, + "step": 126 + }, + { + "epoch": 3.535211267605634, + "grad_norm": 0.2976583242416382, + "learning_rate": 9.889905447756355e-07, + "loss": 0.3531530499458313, + "step": 127 + }, + { + "epoch": 3.563380281690141, + "grad_norm": 0.3033563196659088, + "learning_rate": 9.888172094375033e-07, + "loss": 0.37008020281791687, + "step": 128 + }, + { + "epoch": 3.591549295774648, + "grad_norm": 0.30928340554237366, + "learning_rate": 9.886425373860496e-07, + "loss": 0.3652263283729553, + "step": 129 + }, + { + "epoch": 3.619718309859155, + "grad_norm": 0.3299793601036072, + "learning_rate": 9.88466529153356e-07, + "loss": 0.36931300163269043, + "step": 130 + }, + { + "epoch": 3.647887323943662, + "grad_norm": 0.29216262698173523, + "learning_rate": 9.882891852755732e-07, + "loss": 0.3560551404953003, + "step": 131 + }, + { + "epoch": 3.676056338028169, + "grad_norm": 0.3086439371109009, + "learning_rate": 9.881105062929221e-07, + "loss": 0.3592608869075775, + "step": 132 + }, + { + "epoch": 3.704225352112676, + "grad_norm": 0.3008037805557251, + "learning_rate": 9.879304927496896e-07, + "loss": 0.35765546560287476, + "step": 133 + }, + { + "epoch": 3.732394366197183, + "grad_norm": 0.3011510968208313, + "learning_rate": 9.877491451942284e-07, + "loss": 0.35755690932273865, + "step": 134 + }, + { + "epoch": 3.76056338028169, + "grad_norm": 0.28508952260017395, + "learning_rate": 9.875664641789543e-07, + "loss": 0.3475223183631897, + "step": 135 + }, + { + "epoch": 3.788732394366197, + "grad_norm": 0.29807090759277344, + "learning_rate": 9.873824502603459e-07, + "loss": 0.3468858003616333, + "step": 136 + }, + { + "epoch": 3.816901408450704, + "grad_norm": 0.30015671253204346, + "learning_rate": 9.871971039989407e-07, + "loss": 0.3525606393814087, + "step": 137 + }, + { + "epoch": 3.845070422535211, + "grad_norm": 0.2894802689552307, + "learning_rate": 9.870104259593362e-07, + "loss": 0.35189589858055115, + "step": 138 + }, + { + "epoch": 3.873239436619718, + "grad_norm": 0.2956956624984741, + "learning_rate": 9.86822416710186e-07, + "loss": 0.3662959337234497, + "step": 139 + }, + { + "epoch": 3.9014084507042255, + "grad_norm": 0.28614693880081177, + "learning_rate": 9.866330768241983e-07, + "loss": 0.3523305654525757, + "step": 140 + }, + { + "epoch": 3.9295774647887325, + "grad_norm": 0.3109326958656311, + "learning_rate": 9.86442406878136e-07, + "loss": 0.3661171495914459, + "step": 141 + }, + { + "epoch": 3.9577464788732395, + "grad_norm": 0.29977917671203613, + "learning_rate": 9.862504074528126e-07, + "loss": 0.3687261939048767, + "step": 142 + }, + { + "epoch": 3.9859154929577465, + "grad_norm": 0.2874816954135895, + "learning_rate": 9.860570791330911e-07, + "loss": 0.35026735067367554, + "step": 143 + }, + { + "epoch": 4.0, + "grad_norm": 0.39478132128715515, + "learning_rate": 9.85862422507884e-07, + "loss": 0.329179584980011, + "step": 144 + }, + { + "epoch": 4.028169014084507, + "grad_norm": 0.29594185948371887, + "learning_rate": 9.856664381701483e-07, + "loss": 0.34915629029273987, + "step": 145 + }, + { + "epoch": 4.056338028169014, + "grad_norm": 0.2942439615726471, + "learning_rate": 9.854691267168871e-07, + "loss": 0.3501034080982208, + "step": 146 + }, + { + "epoch": 4.084507042253521, + "grad_norm": 0.3186146318912506, + "learning_rate": 9.852704887491445e-07, + "loss": 0.3498520255088806, + "step": 147 + }, + { + "epoch": 4.112676056338028, + "grad_norm": 0.2865906059741974, + "learning_rate": 9.850705248720068e-07, + "loss": 0.359851598739624, + "step": 148 + }, + { + "epoch": 4.140845070422535, + "grad_norm": 0.2773308753967285, + "learning_rate": 9.848692356945981e-07, + "loss": 0.34519776701927185, + "step": 149 + }, + { + "epoch": 4.169014084507042, + "grad_norm": 0.27520084381103516, + "learning_rate": 9.846666218300807e-07, + "loss": 0.3370436429977417, + "step": 150 + }, + { + "epoch": 4.197183098591549, + "grad_norm": 0.31606534123420715, + "learning_rate": 9.844626838956513e-07, + "loss": 0.3660886287689209, + "step": 151 + }, + { + "epoch": 4.225352112676056, + "grad_norm": 0.30757179856300354, + "learning_rate": 9.8425742251254e-07, + "loss": 0.3431619703769684, + "step": 152 + }, + { + "epoch": 4.253521126760563, + "grad_norm": 0.2864473760128021, + "learning_rate": 9.84050838306009e-07, + "loss": 0.3478638231754303, + "step": 153 + }, + { + "epoch": 4.28169014084507, + "grad_norm": 0.2924051880836487, + "learning_rate": 9.838429319053495e-07, + "loss": 0.3459091782569885, + "step": 154 + }, + { + "epoch": 4.309859154929577, + "grad_norm": 0.2723977565765381, + "learning_rate": 9.836337039438803e-07, + "loss": 0.3437414765357971, + "step": 155 + }, + { + "epoch": 4.338028169014084, + "grad_norm": 0.28301340341567993, + "learning_rate": 9.83423155058946e-07, + "loss": 0.351753830909729, + "step": 156 + }, + { + "epoch": 4.366197183098592, + "grad_norm": 0.3007968068122864, + "learning_rate": 9.832112858919155e-07, + "loss": 0.3534032106399536, + "step": 157 + }, + { + "epoch": 4.394366197183099, + "grad_norm": 0.2823623716831207, + "learning_rate": 9.829980970881784e-07, + "loss": 0.33871978521347046, + "step": 158 + }, + { + "epoch": 4.422535211267606, + "grad_norm": 0.27985984086990356, + "learning_rate": 9.82783589297145e-07, + "loss": 0.35134732723236084, + "step": 159 + }, + { + "epoch": 4.450704225352113, + "grad_norm": 0.29764989018440247, + "learning_rate": 9.825677631722435e-07, + "loss": 0.35344886779785156, + "step": 160 + }, + { + "epoch": 4.47887323943662, + "grad_norm": 0.2861703634262085, + "learning_rate": 9.823506193709174e-07, + "loss": 0.3553098440170288, + "step": 161 + }, + { + "epoch": 4.507042253521127, + "grad_norm": 0.3005011975765228, + "learning_rate": 9.821321585546243e-07, + "loss": 0.349773645401001, + "step": 162 + }, + { + "epoch": 4.535211267605634, + "grad_norm": 0.28691744804382324, + "learning_rate": 9.81912381388834e-07, + "loss": 0.3327012360095978, + "step": 163 + }, + { + "epoch": 4.563380281690141, + "grad_norm": 0.3060745298862457, + "learning_rate": 9.816912885430258e-07, + "loss": 0.3464226722717285, + "step": 164 + }, + { + "epoch": 4.591549295774648, + "grad_norm": 0.3035100996494293, + "learning_rate": 9.814688806906868e-07, + "loss": 0.3499942719936371, + "step": 165 + }, + { + "epoch": 4.619718309859155, + "grad_norm": 0.3114430606365204, + "learning_rate": 9.812451585093098e-07, + "loss": 0.3396627604961395, + "step": 166 + }, + { + "epoch": 4.647887323943662, + "grad_norm": 0.30142080783843994, + "learning_rate": 9.810201226803917e-07, + "loss": 0.3466919958591461, + "step": 167 + }, + { + "epoch": 4.676056338028169, + "grad_norm": 0.2819617986679077, + "learning_rate": 9.807937738894303e-07, + "loss": 0.34856730699539185, + "step": 168 + }, + { + "epoch": 4.704225352112676, + "grad_norm": 0.29183247685432434, + "learning_rate": 9.805661128259235e-07, + "loss": 0.3437175750732422, + "step": 169 + }, + { + "epoch": 4.732394366197183, + "grad_norm": 0.29465699195861816, + "learning_rate": 9.80337140183366e-07, + "loss": 0.3438083827495575, + "step": 170 + }, + { + "epoch": 4.76056338028169, + "grad_norm": 0.28720420598983765, + "learning_rate": 9.801068566592483e-07, + "loss": 0.3422589898109436, + "step": 171 + }, + { + "epoch": 4.788732394366197, + "grad_norm": 0.2751031816005707, + "learning_rate": 9.798752629550546e-07, + "loss": 0.3460365831851959, + "step": 172 + }, + { + "epoch": 4.816901408450704, + "grad_norm": 0.2868765592575073, + "learning_rate": 9.796423597762588e-07, + "loss": 0.3391006886959076, + "step": 173 + }, + { + "epoch": 4.845070422535211, + "grad_norm": 0.2844865024089813, + "learning_rate": 9.794081478323245e-07, + "loss": 0.3488645851612091, + "step": 174 + }, + { + "epoch": 4.873239436619718, + "grad_norm": 0.28600648045539856, + "learning_rate": 9.791726278367021e-07, + "loss": 0.3440667986869812, + "step": 175 + }, + { + "epoch": 4.901408450704225, + "grad_norm": 0.29167741537094116, + "learning_rate": 9.78935800506826e-07, + "loss": 0.34016746282577515, + "step": 176 + }, + { + "epoch": 4.929577464788732, + "grad_norm": 0.29203853011131287, + "learning_rate": 9.786976665641138e-07, + "loss": 0.33034777641296387, + "step": 177 + }, + { + "epoch": 4.957746478873239, + "grad_norm": 0.29975563287734985, + "learning_rate": 9.784582267339622e-07, + "loss": 0.34664660692214966, + "step": 178 + }, + { + "epoch": 4.985915492957746, + "grad_norm": 0.2778502106666565, + "learning_rate": 9.78217481745747e-07, + "loss": 0.34249287843704224, + "step": 179 + }, + { + "epoch": 5.0, + "grad_norm": 0.396133691072464, + "learning_rate": 9.779754323328192e-07, + "loss": 0.34673285484313965, + "step": 180 + }, + { + "epoch": 5.028169014084507, + "grad_norm": 0.29174622893333435, + "learning_rate": 9.777320792325025e-07, + "loss": 0.3266841173171997, + "step": 181 + }, + { + "epoch": 5.056338028169014, + "grad_norm": 0.28281646966934204, + "learning_rate": 9.774874231860935e-07, + "loss": 0.3295621871948242, + "step": 182 + }, + { + "epoch": 5.084507042253521, + "grad_norm": 0.2767295837402344, + "learning_rate": 9.772414649388568e-07, + "loss": 0.3460637629032135, + "step": 183 + }, + { + "epoch": 5.112676056338028, + "grad_norm": 0.28246212005615234, + "learning_rate": 9.769942052400235e-07, + "loss": 0.3325508236885071, + "step": 184 + }, + { + "epoch": 5.140845070422535, + "grad_norm": 0.31317514181137085, + "learning_rate": 9.767456448427896e-07, + "loss": 0.3373739719390869, + "step": 185 + }, + { + "epoch": 5.169014084507042, + "grad_norm": 0.29388973116874695, + "learning_rate": 9.764957845043135e-07, + "loss": 0.3335680365562439, + "step": 186 + }, + { + "epoch": 5.197183098591549, + "grad_norm": 0.3093099892139435, + "learning_rate": 9.76244624985713e-07, + "loss": 0.3288199007511139, + "step": 187 + }, + { + "epoch": 5.225352112676056, + "grad_norm": 0.2718607187271118, + "learning_rate": 9.759921670520634e-07, + "loss": 0.33789312839508057, + "step": 188 + }, + { + "epoch": 5.253521126760563, + "grad_norm": 0.3087296485900879, + "learning_rate": 9.757384114723953e-07, + "loss": 0.3482661843299866, + "step": 189 + }, + { + "epoch": 5.28169014084507, + "grad_norm": 0.2887554466724396, + "learning_rate": 9.754833590196926e-07, + "loss": 0.3353871703147888, + "step": 190 + }, + { + "epoch": 5.309859154929577, + "grad_norm": 0.2770691514015198, + "learning_rate": 9.752270104708888e-07, + "loss": 0.33239609003067017, + "step": 191 + }, + { + "epoch": 5.338028169014084, + "grad_norm": 0.29489442706108093, + "learning_rate": 9.749693666068663e-07, + "loss": 0.34318211674690247, + "step": 192 + }, + { + "epoch": 5.366197183098592, + "grad_norm": 0.31870850920677185, + "learning_rate": 9.747104282124531e-07, + "loss": 0.33540403842926025, + "step": 193 + }, + { + "epoch": 5.394366197183099, + "grad_norm": 0.27267521619796753, + "learning_rate": 9.744501960764203e-07, + "loss": 0.33416521549224854, + "step": 194 + }, + { + "epoch": 5.422535211267606, + "grad_norm": 0.284470796585083, + "learning_rate": 9.741886709914803e-07, + "loss": 0.3242385685443878, + "step": 195 + }, + { + "epoch": 5.450704225352113, + "grad_norm": 0.2988561689853668, + "learning_rate": 9.739258537542835e-07, + "loss": 0.3325580656528473, + "step": 196 + }, + { + "epoch": 5.47887323943662, + "grad_norm": 0.29107666015625, + "learning_rate": 9.73661745165417e-07, + "loss": 0.34368401765823364, + "step": 197 + }, + { + "epoch": 5.507042253521127, + "grad_norm": 0.289497047662735, + "learning_rate": 9.733963460294015e-07, + "loss": 0.33908677101135254, + "step": 198 + }, + { + "epoch": 5.535211267605634, + "grad_norm": 0.27910080552101135, + "learning_rate": 9.731296571546885e-07, + "loss": 0.3478449285030365, + "step": 199 + }, + { + "epoch": 5.563380281690141, + "grad_norm": 0.2966774106025696, + "learning_rate": 9.728616793536587e-07, + "loss": 0.3371037244796753, + "step": 200 + }, + { + "epoch": 5.591549295774648, + "grad_norm": 0.30997180938720703, + "learning_rate": 9.72592413442619e-07, + "loss": 0.3469342589378357, + "step": 201 + }, + { + "epoch": 5.619718309859155, + "grad_norm": 0.2851829528808594, + "learning_rate": 9.723218602418e-07, + "loss": 0.3497530221939087, + "step": 202 + }, + { + "epoch": 5.647887323943662, + "grad_norm": 0.29238471388816833, + "learning_rate": 9.720500205753538e-07, + "loss": 0.3286020755767822, + "step": 203 + }, + { + "epoch": 5.676056338028169, + "grad_norm": 0.2877226769924164, + "learning_rate": 9.717768952713511e-07, + "loss": 0.338655948638916, + "step": 204 + }, + { + "epoch": 5.704225352112676, + "grad_norm": 0.28834086656570435, + "learning_rate": 9.71502485161779e-07, + "loss": 0.333360880613327, + "step": 205 + }, + { + "epoch": 5.732394366197183, + "grad_norm": 0.28225836157798767, + "learning_rate": 9.71226791082538e-07, + "loss": 0.3514789640903473, + "step": 206 + }, + { + "epoch": 5.76056338028169, + "grad_norm": 0.28878796100616455, + "learning_rate": 9.709498138734403e-07, + "loss": 0.3271612524986267, + "step": 207 + }, + { + "epoch": 5.788732394366197, + "grad_norm": 0.29221564531326294, + "learning_rate": 9.706715543782064e-07, + "loss": 0.32984620332717896, + "step": 208 + }, + { + "epoch": 5.816901408450704, + "grad_norm": 0.31417179107666016, + "learning_rate": 9.703920134444632e-07, + "loss": 0.32708263397216797, + "step": 209 + }, + { + "epoch": 5.845070422535211, + "grad_norm": 0.30656933784484863, + "learning_rate": 9.701111919237408e-07, + "loss": 0.3378485143184662, + "step": 210 + }, + { + "epoch": 5.873239436619718, + "grad_norm": 0.28274714946746826, + "learning_rate": 9.698290906714702e-07, + "loss": 0.3210570812225342, + "step": 211 + }, + { + "epoch": 5.901408450704225, + "grad_norm": 0.28694605827331543, + "learning_rate": 9.695457105469804e-07, + "loss": 0.33672863245010376, + "step": 212 + }, + { + "epoch": 5.929577464788732, + "grad_norm": 0.2965106666088104, + "learning_rate": 9.69261052413497e-07, + "loss": 0.34379851818084717, + "step": 213 + }, + { + "epoch": 5.957746478873239, + "grad_norm": 0.3144500255584717, + "learning_rate": 9.689751171381377e-07, + "loss": 0.33530962467193604, + "step": 214 + }, + { + "epoch": 5.985915492957746, + "grad_norm": 0.274070680141449, + "learning_rate": 9.68687905591911e-07, + "loss": 0.32609909772872925, + "step": 215 + }, + { + "epoch": 6.0, + "grad_norm": 0.3976318836212158, + "learning_rate": 9.683994186497132e-07, + "loss": 0.3320915997028351, + "step": 216 + }, + { + "epoch": 6.028169014084507, + "grad_norm": 0.27306580543518066, + "learning_rate": 9.681096571903252e-07, + "loss": 0.32757407426834106, + "step": 217 + }, + { + "epoch": 6.056338028169014, + "grad_norm": 0.2815074622631073, + "learning_rate": 9.67818622096411e-07, + "loss": 0.31570878624916077, + "step": 218 + }, + { + "epoch": 6.084507042253521, + "grad_norm": 0.29271578788757324, + "learning_rate": 9.67526314254514e-07, + "loss": 0.33092743158340454, + "step": 219 + }, + { + "epoch": 6.112676056338028, + "grad_norm": 0.2819676399230957, + "learning_rate": 9.672327345550543e-07, + "loss": 0.32412028312683105, + "step": 220 + }, + { + "epoch": 6.140845070422535, + "grad_norm": 0.29121264815330505, + "learning_rate": 9.669378838923267e-07, + "loss": 0.324832558631897, + "step": 221 + }, + { + "epoch": 6.169014084507042, + "grad_norm": 0.28991273045539856, + "learning_rate": 9.666417631644976e-07, + "loss": 0.3393062949180603, + "step": 222 + }, + { + "epoch": 6.197183098591549, + "grad_norm": 0.28072309494018555, + "learning_rate": 9.66344373273602e-07, + "loss": 0.32950296998023987, + "step": 223 + }, + { + "epoch": 6.225352112676056, + "grad_norm": 0.3102487027645111, + "learning_rate": 9.66045715125541e-07, + "loss": 0.3289036154747009, + "step": 224 + }, + { + "epoch": 6.253521126760563, + "grad_norm": 0.2856598198413849, + "learning_rate": 9.657457896300791e-07, + "loss": 0.30844709277153015, + "step": 225 + }, + { + "epoch": 6.28169014084507, + "grad_norm": 0.28150248527526855, + "learning_rate": 9.654445977008414e-07, + "loss": 0.32252323627471924, + "step": 226 + }, + { + "epoch": 6.309859154929577, + "grad_norm": 0.3106309175491333, + "learning_rate": 9.651421402553108e-07, + "loss": 0.3153507113456726, + "step": 227 + }, + { + "epoch": 6.338028169014084, + "grad_norm": 0.3323248028755188, + "learning_rate": 9.648384182148252e-07, + "loss": 0.3372737169265747, + "step": 228 + }, + { + "epoch": 6.366197183098592, + "grad_norm": 0.2816256880760193, + "learning_rate": 9.645334325045745e-07, + "loss": 0.3402503728866577, + "step": 229 + }, + { + "epoch": 6.394366197183099, + "grad_norm": 0.28511133790016174, + "learning_rate": 9.64227184053598e-07, + "loss": 0.3433256149291992, + "step": 230 + }, + { + "epoch": 6.422535211267606, + "grad_norm": 0.27890780568122864, + "learning_rate": 9.63919673794782e-07, + "loss": 0.3293980658054352, + "step": 231 + }, + { + "epoch": 6.450704225352113, + "grad_norm": 0.29692021012306213, + "learning_rate": 9.636109026648554e-07, + "loss": 0.3282950818538666, + "step": 232 + }, + { + "epoch": 6.47887323943662, + "grad_norm": 0.2867494523525238, + "learning_rate": 9.633008716043892e-07, + "loss": 0.3350924253463745, + "step": 233 + }, + { + "epoch": 6.507042253521127, + "grad_norm": 0.27419739961624146, + "learning_rate": 9.629895815577915e-07, + "loss": 0.33370357751846313, + "step": 234 + }, + { + "epoch": 6.535211267605634, + "grad_norm": 0.2837441563606262, + "learning_rate": 9.626770334733058e-07, + "loss": 0.3225363790988922, + "step": 235 + }, + { + "epoch": 6.563380281690141, + "grad_norm": 0.28063684701919556, + "learning_rate": 9.623632283030077e-07, + "loss": 0.33922791481018066, + "step": 236 + }, + { + "epoch": 6.591549295774648, + "grad_norm": 0.2789226770401001, + "learning_rate": 9.620481670028026e-07, + "loss": 0.3289903998374939, + "step": 237 + }, + { + "epoch": 6.619718309859155, + "grad_norm": 0.2788150906562805, + "learning_rate": 9.617318505324212e-07, + "loss": 0.3213944435119629, + "step": 238 + }, + { + "epoch": 6.647887323943662, + "grad_norm": 0.2622866928577423, + "learning_rate": 9.614142798554186e-07, + "loss": 0.3391764461994171, + "step": 239 + }, + { + "epoch": 6.676056338028169, + "grad_norm": 0.2952481806278229, + "learning_rate": 9.610954559391704e-07, + "loss": 0.31737983226776123, + "step": 240 + }, + { + "epoch": 6.704225352112676, + "grad_norm": 0.28387367725372314, + "learning_rate": 9.607753797548691e-07, + "loss": 0.33009767532348633, + "step": 241 + }, + { + "epoch": 6.732394366197183, + "grad_norm": 0.28222769498825073, + "learning_rate": 9.604540522775227e-07, + "loss": 0.3226430416107178, + "step": 242 + }, + { + "epoch": 6.76056338028169, + "grad_norm": 0.2985075116157532, + "learning_rate": 9.601314744859504e-07, + "loss": 0.3328002393245697, + "step": 243 + }, + { + "epoch": 6.788732394366197, + "grad_norm": 0.2787352204322815, + "learning_rate": 9.598076473627796e-07, + "loss": 0.3292522728443146, + "step": 244 + }, + { + "epoch": 6.816901408450704, + "grad_norm": 0.2772713899612427, + "learning_rate": 9.594825718944444e-07, + "loss": 0.322078138589859, + "step": 245 + }, + { + "epoch": 6.845070422535211, + "grad_norm": 0.28727421164512634, + "learning_rate": 9.59156249071181e-07, + "loss": 0.3206414580345154, + "step": 246 + }, + { + "epoch": 6.873239436619718, + "grad_norm": 0.28722915053367615, + "learning_rate": 9.588286798870248e-07, + "loss": 0.34071967005729675, + "step": 247 + }, + { + "epoch": 6.901408450704225, + "grad_norm": 0.2791661322116852, + "learning_rate": 9.58499865339809e-07, + "loss": 0.32371699810028076, + "step": 248 + }, + { + "epoch": 6.929577464788732, + "grad_norm": 0.30174046754837036, + "learning_rate": 9.581698064311592e-07, + "loss": 0.32212015986442566, + "step": 249 + }, + { + "epoch": 6.957746478873239, + "grad_norm": 0.2757203280925751, + "learning_rate": 9.578385041664925e-07, + "loss": 0.3286738395690918, + "step": 250 + }, + { + "epoch": 6.985915492957746, + "grad_norm": 0.2977890968322754, + "learning_rate": 9.575059595550127e-07, + "loss": 0.32400673627853394, + "step": 251 + }, + { + "epoch": 7.0, + "grad_norm": 0.38676717877388, + "learning_rate": 9.571721736097088e-07, + "loss": 0.31549203395843506, + "step": 252 + }, + { + "epoch": 7.028169014084507, + "grad_norm": 0.28209057450294495, + "learning_rate": 9.568371473473503e-07, + "loss": 0.3403396010398865, + "step": 253 + }, + { + "epoch": 7.056338028169014, + "grad_norm": 0.28578808903694153, + "learning_rate": 9.565008817884854e-07, + "loss": 0.32727712392807007, + "step": 254 + }, + { + "epoch": 7.084507042253521, + "grad_norm": 0.2921590805053711, + "learning_rate": 9.561633779574372e-07, + "loss": 0.33234310150146484, + "step": 255 + }, + { + "epoch": 7.112676056338028, + "grad_norm": 0.27242740988731384, + "learning_rate": 9.55824636882301e-07, + "loss": 0.3204275965690613, + "step": 256 + }, + { + "epoch": 7.140845070422535, + "grad_norm": 0.28681573271751404, + "learning_rate": 9.554846595949413e-07, + "loss": 0.3127729594707489, + "step": 257 + }, + { + "epoch": 7.169014084507042, + "grad_norm": 0.27501875162124634, + "learning_rate": 9.55143447130987e-07, + "loss": 0.3219028115272522, + "step": 258 + }, + { + "epoch": 7.197183098591549, + "grad_norm": 0.2893284261226654, + "learning_rate": 9.54801000529831e-07, + "loss": 0.3149603009223938, + "step": 259 + }, + { + "epoch": 7.225352112676056, + "grad_norm": 0.29977115988731384, + "learning_rate": 9.54457320834625e-07, + "loss": 0.3116862177848816, + "step": 260 + }, + { + "epoch": 7.253521126760563, + "grad_norm": 0.2911919355392456, + "learning_rate": 9.54112409092277e-07, + "loss": 0.3377895653247833, + "step": 261 + }, + { + "epoch": 7.28169014084507, + "grad_norm": 0.32472458481788635, + "learning_rate": 9.537662663534477e-07, + "loss": 0.3152693510055542, + "step": 262 + }, + { + "epoch": 7.309859154929577, + "grad_norm": 0.2667696177959442, + "learning_rate": 9.534188936725483e-07, + "loss": 0.3181629180908203, + "step": 263 + }, + { + "epoch": 7.338028169014084, + "grad_norm": 0.29469212889671326, + "learning_rate": 9.530702921077358e-07, + "loss": 0.32251378893852234, + "step": 264 + }, + { + "epoch": 7.366197183098592, + "grad_norm": 0.2710505425930023, + "learning_rate": 9.527204627209112e-07, + "loss": 0.3157137632369995, + "step": 265 + }, + { + "epoch": 7.394366197183099, + "grad_norm": 0.29605209827423096, + "learning_rate": 9.523694065777156e-07, + "loss": 0.32492029666900635, + "step": 266 + }, + { + "epoch": 7.422535211267606, + "grad_norm": 0.28292831778526306, + "learning_rate": 9.520171247475268e-07, + "loss": 0.3182477653026581, + "step": 267 + }, + { + "epoch": 7.450704225352113, + "grad_norm": 0.28567084670066833, + "learning_rate": 9.516636183034564e-07, + "loss": 0.317740797996521, + "step": 268 + }, + { + "epoch": 7.47887323943662, + "grad_norm": 0.26249128580093384, + "learning_rate": 9.513088883223463e-07, + "loss": 0.3064804971218109, + "step": 269 + }, + { + "epoch": 7.507042253521127, + "grad_norm": 0.2805914878845215, + "learning_rate": 9.509529358847654e-07, + "loss": 0.32089754939079285, + "step": 270 + }, + { + "epoch": 7.535211267605634, + "grad_norm": 0.2892814874649048, + "learning_rate": 9.505957620750069e-07, + "loss": 0.31203514337539673, + "step": 271 + }, + { + "epoch": 7.563380281690141, + "grad_norm": 0.2809925079345703, + "learning_rate": 9.502373679810839e-07, + "loss": 0.3222312331199646, + "step": 272 + }, + { + "epoch": 7.591549295774648, + "grad_norm": 0.2793818414211273, + "learning_rate": 9.49877754694727e-07, + "loss": 0.30804064869880676, + "step": 273 + }, + { + "epoch": 7.619718309859155, + "grad_norm": 0.27966272830963135, + "learning_rate": 9.495169233113806e-07, + "loss": 0.32768452167510986, + "step": 274 + }, + { + "epoch": 7.647887323943662, + "grad_norm": 0.2743930220603943, + "learning_rate": 9.491548749301997e-07, + "loss": 0.3242339491844177, + "step": 275 + }, + { + "epoch": 7.676056338028169, + "grad_norm": 0.2765263319015503, + "learning_rate": 9.487916106540465e-07, + "loss": 0.3245530128479004, + "step": 276 + }, + { + "epoch": 7.704225352112676, + "grad_norm": 0.29381853342056274, + "learning_rate": 9.484271315894871e-07, + "loss": 0.32187986373901367, + "step": 277 + }, + { + "epoch": 7.732394366197183, + "grad_norm": 0.27294641733169556, + "learning_rate": 9.480614388467877e-07, + "loss": 0.3233500123023987, + "step": 278 + }, + { + "epoch": 7.76056338028169, + "grad_norm": 0.28944891691207886, + "learning_rate": 9.47694533539912e-07, + "loss": 0.31809201836586, + "step": 279 + }, + { + "epoch": 7.788732394366197, + "grad_norm": 0.2922861576080322, + "learning_rate": 9.473264167865171e-07, + "loss": 0.33151817321777344, + "step": 280 + }, + { + "epoch": 7.816901408450704, + "grad_norm": 0.2928006649017334, + "learning_rate": 9.469570897079504e-07, + "loss": 0.3220402002334595, + "step": 281 + }, + { + "epoch": 7.845070422535211, + "grad_norm": 0.28323814272880554, + "learning_rate": 9.465865534292464e-07, + "loss": 0.31611043214797974, + "step": 282 + }, + { + "epoch": 7.873239436619718, + "grad_norm": 0.28506791591644287, + "learning_rate": 9.462148090791228e-07, + "loss": 0.32090169191360474, + "step": 283 + }, + { + "epoch": 7.901408450704225, + "grad_norm": 0.2799360156059265, + "learning_rate": 9.458418577899774e-07, + "loss": 0.344720721244812, + "step": 284 + }, + { + "epoch": 7.929577464788732, + "grad_norm": 0.27799472212791443, + "learning_rate": 9.454677006978842e-07, + "loss": 0.3141616880893707, + "step": 285 + }, + { + "epoch": 7.957746478873239, + "grad_norm": 0.27411341667175293, + "learning_rate": 9.450923389425911e-07, + "loss": 0.31020885705947876, + "step": 286 + }, + { + "epoch": 7.985915492957746, + "grad_norm": 0.28921812772750854, + "learning_rate": 9.44715773667515e-07, + "loss": 0.3182592988014221, + "step": 287 + }, + { + "epoch": 8.0, + "grad_norm": 0.3832477331161499, + "learning_rate": 9.443380060197385e-07, + "loss": 0.32039332389831543, + "step": 288 + }, + { + "epoch": 8.028169014084508, + "grad_norm": 0.2698141932487488, + "learning_rate": 9.43959037150008e-07, + "loss": 0.3155902028083801, + "step": 289 + }, + { + "epoch": 8.056338028169014, + "grad_norm": 0.2765481472015381, + "learning_rate": 9.43578868212728e-07, + "loss": 0.3177169859409332, + "step": 290 + }, + { + "epoch": 8.084507042253522, + "grad_norm": 0.27723443508148193, + "learning_rate": 9.431975003659594e-07, + "loss": 0.31647437810897827, + "step": 291 + }, + { + "epoch": 8.112676056338028, + "grad_norm": 0.26522088050842285, + "learning_rate": 9.428149347714143e-07, + "loss": 0.31819185614585876, + "step": 292 + }, + { + "epoch": 8.140845070422536, + "grad_norm": 0.28780215978622437, + "learning_rate": 9.424311725944543e-07, + "loss": 0.31119635701179504, + "step": 293 + }, + { + "epoch": 8.169014084507042, + "grad_norm": 0.2786031663417816, + "learning_rate": 9.420462150040852e-07, + "loss": 0.31440460681915283, + "step": 294 + }, + { + "epoch": 8.19718309859155, + "grad_norm": 0.26644277572631836, + "learning_rate": 9.416600631729548e-07, + "loss": 0.32182344794273376, + "step": 295 + }, + { + "epoch": 8.225352112676056, + "grad_norm": 0.2974756062030792, + "learning_rate": 9.412727182773486e-07, + "loss": 0.3225427269935608, + "step": 296 + }, + { + "epoch": 8.253521126760564, + "grad_norm": 0.2951170802116394, + "learning_rate": 9.408841814971861e-07, + "loss": 0.31894785165786743, + "step": 297 + }, + { + "epoch": 8.28169014084507, + "grad_norm": 0.28619688749313354, + "learning_rate": 9.404944540160177e-07, + "loss": 0.31788474321365356, + "step": 298 + }, + { + "epoch": 8.309859154929578, + "grad_norm": 0.2877795398235321, + "learning_rate": 9.401035370210212e-07, + "loss": 0.3235325217247009, + "step": 299 + }, + { + "epoch": 8.338028169014084, + "grad_norm": 0.30395635962486267, + "learning_rate": 9.397114317029974e-07, + "loss": 0.33284687995910645, + "step": 300 + }, + { + "epoch": 8.366197183098592, + "grad_norm": 0.2896060347557068, + "learning_rate": 9.393181392563669e-07, + "loss": 0.32644715905189514, + "step": 301 + }, + { + "epoch": 8.394366197183098, + "grad_norm": 0.2763223648071289, + "learning_rate": 9.38923660879167e-07, + "loss": 0.304126501083374, + "step": 302 + }, + { + "epoch": 8.422535211267606, + "grad_norm": 0.2764940559864044, + "learning_rate": 9.385279977730472e-07, + "loss": 0.3124150037765503, + "step": 303 + }, + { + "epoch": 8.450704225352112, + "grad_norm": 0.2838902771472931, + "learning_rate": 9.381311511432658e-07, + "loss": 0.32950958609580994, + "step": 304 + }, + { + "epoch": 8.47887323943662, + "grad_norm": 0.2854890823364258, + "learning_rate": 9.377331221986866e-07, + "loss": 0.30994099378585815, + "step": 305 + }, + { + "epoch": 8.507042253521126, + "grad_norm": 0.2682625353336334, + "learning_rate": 9.373339121517746e-07, + "loss": 0.31963592767715454, + "step": 306 + }, + { + "epoch": 8.535211267605634, + "grad_norm": 0.2849690318107605, + "learning_rate": 9.36933522218593e-07, + "loss": 0.3182557225227356, + "step": 307 + }, + { + "epoch": 8.56338028169014, + "grad_norm": 0.28616634011268616, + "learning_rate": 9.36531953618799e-07, + "loss": 0.30273881554603577, + "step": 308 + }, + { + "epoch": 8.591549295774648, + "grad_norm": 0.2721138596534729, + "learning_rate": 9.361292075756401e-07, + "loss": 0.3207533657550812, + "step": 309 + }, + { + "epoch": 8.619718309859154, + "grad_norm": 0.2752065360546112, + "learning_rate": 9.357252853159505e-07, + "loss": 0.3186470866203308, + "step": 310 + }, + { + "epoch": 8.647887323943662, + "grad_norm": 0.2684236168861389, + "learning_rate": 9.353201880701477e-07, + "loss": 0.31932806968688965, + "step": 311 + }, + { + "epoch": 8.676056338028168, + "grad_norm": 0.28039291501045227, + "learning_rate": 9.34913917072228e-07, + "loss": 0.31683626770973206, + "step": 312 + }, + { + "epoch": 8.704225352112676, + "grad_norm": 0.2638692855834961, + "learning_rate": 9.345064735597633e-07, + "loss": 0.2991946339607239, + "step": 313 + }, + { + "epoch": 8.732394366197184, + "grad_norm": 0.30425477027893066, + "learning_rate": 9.340978587738972e-07, + "loss": 0.3023770749568939, + "step": 314 + }, + { + "epoch": 8.76056338028169, + "grad_norm": 0.27750107645988464, + "learning_rate": 9.336880739593415e-07, + "loss": 0.31177228689193726, + "step": 315 + }, + { + "epoch": 8.788732394366198, + "grad_norm": 0.2731636166572571, + "learning_rate": 9.332771203643714e-07, + "loss": 0.3076733946800232, + "step": 316 + }, + { + "epoch": 8.816901408450704, + "grad_norm": 0.2740687131881714, + "learning_rate": 9.328649992408231e-07, + "loss": 0.30277711153030396, + "step": 317 + }, + { + "epoch": 8.845070422535212, + "grad_norm": 0.27956005930900574, + "learning_rate": 9.324517118440888e-07, + "loss": 0.30988752841949463, + "step": 318 + }, + { + "epoch": 8.873239436619718, + "grad_norm": 0.28827622532844543, + "learning_rate": 9.320372594331137e-07, + "loss": 0.32537323236465454, + "step": 319 + }, + { + "epoch": 8.901408450704226, + "grad_norm": 0.2771560847759247, + "learning_rate": 9.316216432703916e-07, + "loss": 0.3233356475830078, + "step": 320 + }, + { + "epoch": 8.929577464788732, + "grad_norm": 0.2804992198944092, + "learning_rate": 9.312048646219617e-07, + "loss": 0.31110987067222595, + "step": 321 + }, + { + "epoch": 8.95774647887324, + "grad_norm": 0.29048794507980347, + "learning_rate": 9.307869247574038e-07, + "loss": 0.3100625276565552, + "step": 322 + }, + { + "epoch": 8.985915492957746, + "grad_norm": 0.2751557230949402, + "learning_rate": 9.303678249498352e-07, + "loss": 0.30283451080322266, + "step": 323 + }, + { + "epoch": 9.0, + "grad_norm": 0.38358354568481445, + "learning_rate": 9.299475664759068e-07, + "loss": 0.3202640414237976, + "step": 324 + }, + { + "epoch": 9.028169014084508, + "grad_norm": 0.26551520824432373, + "learning_rate": 9.295261506157985e-07, + "loss": 0.31331080198287964, + "step": 325 + }, + { + "epoch": 9.056338028169014, + "grad_norm": 0.28371915221214294, + "learning_rate": 9.291035786532163e-07, + "loss": 0.3039785325527191, + "step": 326 + }, + { + "epoch": 9.084507042253522, + "grad_norm": 0.28972727060317993, + "learning_rate": 9.286798518753878e-07, + "loss": 0.3172224462032318, + "step": 327 + }, + { + "epoch": 9.112676056338028, + "grad_norm": 0.2863673269748688, + "learning_rate": 9.282549715730579e-07, + "loss": 0.3220033049583435, + "step": 328 + }, + { + "epoch": 9.140845070422536, + "grad_norm": 0.27619102597236633, + "learning_rate": 9.278289390404859e-07, + "loss": 0.31595173478126526, + "step": 329 + }, + { + "epoch": 9.169014084507042, + "grad_norm": 0.2838309705257416, + "learning_rate": 9.274017555754407e-07, + "loss": 0.31470271944999695, + "step": 330 + }, + { + "epoch": 9.19718309859155, + "grad_norm": 0.28437867760658264, + "learning_rate": 9.269734224791974e-07, + "loss": 0.31371644139289856, + "step": 331 + }, + { + "epoch": 9.225352112676056, + "grad_norm": 0.28935906291007996, + "learning_rate": 9.265439410565328e-07, + "loss": 0.3154122829437256, + "step": 332 + }, + { + "epoch": 9.253521126760564, + "grad_norm": 0.28751862049102783, + "learning_rate": 9.261133126157217e-07, + "loss": 0.3072774410247803, + "step": 333 + }, + { + "epoch": 9.28169014084507, + "grad_norm": 0.2829267680644989, + "learning_rate": 9.256815384685328e-07, + "loss": 0.30855560302734375, + "step": 334 + }, + { + "epoch": 9.309859154929578, + "grad_norm": 0.28372108936309814, + "learning_rate": 9.252486199302256e-07, + "loss": 0.3047599792480469, + "step": 335 + }, + { + "epoch": 9.338028169014084, + "grad_norm": 0.26949799060821533, + "learning_rate": 9.248145583195447e-07, + "loss": 0.3051632046699524, + "step": 336 + }, + { + "epoch": 9.366197183098592, + "grad_norm": 0.26946741342544556, + "learning_rate": 9.243793549587171e-07, + "loss": 0.30776509642601013, + "step": 337 + }, + { + "epoch": 9.394366197183098, + "grad_norm": 0.2829545736312866, + "learning_rate": 9.239430111734476e-07, + "loss": 0.30643659830093384, + "step": 338 + }, + { + "epoch": 9.422535211267606, + "grad_norm": 0.30891162157058716, + "learning_rate": 9.235055282929153e-07, + "loss": 0.30099156498908997, + "step": 339 + }, + { + "epoch": 9.450704225352112, + "grad_norm": 0.2820793390274048, + "learning_rate": 9.230669076497687e-07, + "loss": 0.31829434633255005, + "step": 340 + }, + { + "epoch": 9.47887323943662, + "grad_norm": 0.27604445815086365, + "learning_rate": 9.226271505801224e-07, + "loss": 0.31647807359695435, + "step": 341 + }, + { + "epoch": 9.507042253521126, + "grad_norm": 0.2793697714805603, + "learning_rate": 9.221862584235526e-07, + "loss": 0.30784907937049866, + "step": 342 + }, + { + "epoch": 9.535211267605634, + "grad_norm": 0.27153849601745605, + "learning_rate": 9.217442325230936e-07, + "loss": 0.29595351219177246, + "step": 343 + }, + { + "epoch": 9.56338028169014, + "grad_norm": 0.28174859285354614, + "learning_rate": 9.213010742252327e-07, + "loss": 0.3158809244632721, + "step": 344 + }, + { + "epoch": 9.591549295774648, + "grad_norm": 0.27065321803092957, + "learning_rate": 9.208567848799069e-07, + "loss": 0.29831117391586304, + "step": 345 + }, + { + "epoch": 9.619718309859154, + "grad_norm": 0.2704644799232483, + "learning_rate": 9.204113658404989e-07, + "loss": 0.31440460681915283, + "step": 346 + }, + { + "epoch": 9.647887323943662, + "grad_norm": 0.2712800204753876, + "learning_rate": 9.199648184638318e-07, + "loss": 0.2985243499279022, + "step": 347 + }, + { + "epoch": 9.676056338028168, + "grad_norm": 0.2808634042739868, + "learning_rate": 9.195171441101668e-07, + "loss": 0.3167741000652313, + "step": 348 + }, + { + "epoch": 9.704225352112676, + "grad_norm": 0.27340877056121826, + "learning_rate": 9.190683441431974e-07, + "loss": 0.3019712269306183, + "step": 349 + }, + { + "epoch": 9.732394366197184, + "grad_norm": 0.2813129723072052, + "learning_rate": 9.186184199300463e-07, + "loss": 0.3006363809108734, + "step": 350 + }, + { + "epoch": 9.76056338028169, + "grad_norm": 0.28003188967704773, + "learning_rate": 9.181673728412605e-07, + "loss": 0.31190669536590576, + "step": 351 + }, + { + "epoch": 9.788732394366198, + "grad_norm": 0.2703484892845154, + "learning_rate": 9.177152042508077e-07, + "loss": 0.3077196478843689, + "step": 352 + }, + { + "epoch": 9.816901408450704, + "grad_norm": 0.2803649604320526, + "learning_rate": 9.17261915536072e-07, + "loss": 0.30905407667160034, + "step": 353 + }, + { + "epoch": 9.845070422535212, + "grad_norm": 0.2884216606616974, + "learning_rate": 9.168075080778494e-07, + "loss": 0.30327335000038147, + "step": 354 + }, + { + "epoch": 9.873239436619718, + "grad_norm": 0.2796288728713989, + "learning_rate": 9.163519832603436e-07, + "loss": 0.3104422390460968, + "step": 355 + }, + { + "epoch": 9.901408450704226, + "grad_norm": 0.30282527208328247, + "learning_rate": 9.158953424711624e-07, + "loss": 0.3279035985469818, + "step": 356 + }, + { + "epoch": 9.929577464788732, + "grad_norm": 0.2795606851577759, + "learning_rate": 9.154375871013128e-07, + "loss": 0.3136137127876282, + "step": 357 + }, + { + "epoch": 9.95774647887324, + "grad_norm": 0.2871512174606323, + "learning_rate": 9.149787185451969e-07, + "loss": 0.3188316226005554, + "step": 358 + }, + { + "epoch": 9.985915492957746, + "grad_norm": 0.2814459502696991, + "learning_rate": 9.145187382006081e-07, + "loss": 0.3084180951118469, + "step": 359 + }, + { + "epoch": 10.0, + "grad_norm": 0.4135233461856842, + "learning_rate": 9.140576474687263e-07, + "loss": 0.32664716243743896, + "step": 360 + }, + { + "epoch": 10.028169014084508, + "grad_norm": 0.2743515968322754, + "learning_rate": 9.135954477541137e-07, + "loss": 0.31237614154815674, + "step": 361 + }, + { + "epoch": 10.056338028169014, + "grad_norm": 0.2790542244911194, + "learning_rate": 9.131321404647109e-07, + "loss": 0.32110899686813354, + "step": 362 + }, + { + "epoch": 10.084507042253522, + "grad_norm": 0.32552531361579895, + "learning_rate": 9.126677270118322e-07, + "loss": 0.31540626287460327, + "step": 363 + }, + { + "epoch": 10.112676056338028, + "grad_norm": 0.27251535654067993, + "learning_rate": 9.122022088101613e-07, + "loss": 0.2956544756889343, + "step": 364 + }, + { + "epoch": 10.140845070422536, + "grad_norm": 0.3012971878051758, + "learning_rate": 9.117355872777477e-07, + "loss": 0.3012295961380005, + "step": 365 + }, + { + "epoch": 10.169014084507042, + "grad_norm": 0.29038530588150024, + "learning_rate": 9.112678638360015e-07, + "loss": 0.2931394875049591, + "step": 366 + }, + { + "epoch": 10.19718309859155, + "grad_norm": 0.2870721220970154, + "learning_rate": 9.107990399096893e-07, + "loss": 0.2930557131767273, + "step": 367 + }, + { + "epoch": 10.225352112676056, + "grad_norm": 0.281965047121048, + "learning_rate": 9.103291169269299e-07, + "loss": 0.3096895217895508, + "step": 368 + }, + { + "epoch": 10.253521126760564, + "grad_norm": 0.2720247209072113, + "learning_rate": 9.098580963191907e-07, + "loss": 0.302044540643692, + "step": 369 + }, + { + "epoch": 10.28169014084507, + "grad_norm": 0.2841237783432007, + "learning_rate": 9.093859795212817e-07, + "loss": 0.32047468423843384, + "step": 370 + }, + { + "epoch": 10.309859154929578, + "grad_norm": 0.29989898204803467, + "learning_rate": 9.089127679713529e-07, + "loss": 0.31085067987442017, + "step": 371 + }, + { + "epoch": 10.338028169014084, + "grad_norm": 0.29164332151412964, + "learning_rate": 9.084384631108882e-07, + "loss": 0.3052881360054016, + "step": 372 + }, + { + "epoch": 10.366197183098592, + "grad_norm": 0.2740509808063507, + "learning_rate": 9.079630663847031e-07, + "loss": 0.31468653678894043, + "step": 373 + }, + { + "epoch": 10.394366197183098, + "grad_norm": 0.2791116535663605, + "learning_rate": 9.074865792409381e-07, + "loss": 0.30899161100387573, + "step": 374 + }, + { + "epoch": 10.422535211267606, + "grad_norm": 0.30149030685424805, + "learning_rate": 9.070090031310558e-07, + "loss": 0.3094651997089386, + "step": 375 + }, + { + "epoch": 10.450704225352112, + "grad_norm": 0.2970089018344879, + "learning_rate": 9.065303395098358e-07, + "loss": 0.3142540156841278, + "step": 376 + }, + { + "epoch": 10.47887323943662, + "grad_norm": 0.2772645652294159, + "learning_rate": 9.060505898353705e-07, + "loss": 0.32443171739578247, + "step": 377 + }, + { + "epoch": 10.507042253521126, + "grad_norm": 0.2707611620426178, + "learning_rate": 9.055697555690607e-07, + "loss": 0.30495521426200867, + "step": 378 + }, + { + "epoch": 10.535211267605634, + "grad_norm": 0.2923314869403839, + "learning_rate": 9.050878381756107e-07, + "loss": 0.30734074115753174, + "step": 379 + }, + { + "epoch": 10.56338028169014, + "grad_norm": 0.2865448594093323, + "learning_rate": 9.046048391230247e-07, + "loss": 0.2913230061531067, + "step": 380 + }, + { + "epoch": 10.591549295774648, + "grad_norm": 0.29643693566322327, + "learning_rate": 9.041207598826017e-07, + "loss": 0.30088239908218384, + "step": 381 + }, + { + "epoch": 10.619718309859154, + "grad_norm": 0.2761143445968628, + "learning_rate": 9.036356019289309e-07, + "loss": 0.30702435970306396, + "step": 382 + }, + { + "epoch": 10.647887323943662, + "grad_norm": 0.27720797061920166, + "learning_rate": 9.031493667398872e-07, + "loss": 0.2953702509403229, + "step": 383 + }, + { + "epoch": 10.676056338028168, + "grad_norm": 0.30037540197372437, + "learning_rate": 9.026620557966279e-07, + "loss": 0.3012697696685791, + "step": 384 + }, + { + "epoch": 10.704225352112676, + "grad_norm": 0.27628859877586365, + "learning_rate": 9.021736705835862e-07, + "loss": 0.30558526515960693, + "step": 385 + }, + { + "epoch": 10.732394366197184, + "grad_norm": 0.2692992091178894, + "learning_rate": 9.016842125884684e-07, + "loss": 0.288699209690094, + "step": 386 + }, + { + "epoch": 10.76056338028169, + "grad_norm": 0.30020084977149963, + "learning_rate": 9.011936833022484e-07, + "loss": 0.294253945350647, + "step": 387 + }, + { + "epoch": 10.788732394366198, + "grad_norm": 0.29289868474006653, + "learning_rate": 9.007020842191634e-07, + "loss": 0.31805676221847534, + "step": 388 + }, + { + "epoch": 10.816901408450704, + "grad_norm": 0.28465571999549866, + "learning_rate": 9.002094168367095e-07, + "loss": 0.3168966472148895, + "step": 389 + }, + { + "epoch": 10.845070422535212, + "grad_norm": 0.27562448382377625, + "learning_rate": 8.997156826556369e-07, + "loss": 0.302585631608963, + "step": 390 + }, + { + "epoch": 10.873239436619718, + "grad_norm": 0.28200119733810425, + "learning_rate": 8.992208831799456e-07, + "loss": 0.3037059009075165, + "step": 391 + }, + { + "epoch": 10.901408450704226, + "grad_norm": 0.2829252779483795, + "learning_rate": 8.987250199168808e-07, + "loss": 0.2850543260574341, + "step": 392 + }, + { + "epoch": 10.929577464788732, + "grad_norm": 0.28010982275009155, + "learning_rate": 8.982280943769278e-07, + "loss": 0.30365508794784546, + "step": 393 + }, + { + "epoch": 10.95774647887324, + "grad_norm": 0.2917790114879608, + "learning_rate": 8.977301080738079e-07, + "loss": 0.32212477922439575, + "step": 394 + }, + { + "epoch": 10.985915492957746, + "grad_norm": 0.27254894375801086, + "learning_rate": 8.97231062524474e-07, + "loss": 0.29733577370643616, + "step": 395 + }, + { + "epoch": 11.0, + "grad_norm": 0.38847291469573975, + "learning_rate": 8.967309592491052e-07, + "loss": 0.31824764609336853, + "step": 396 + }, + { + "epoch": 11.028169014084508, + "grad_norm": 0.27360019087791443, + "learning_rate": 8.962297997711027e-07, + "loss": 0.2907956540584564, + "step": 397 + }, + { + "epoch": 11.056338028169014, + "grad_norm": 0.28565695881843567, + "learning_rate": 8.957275856170855e-07, + "loss": 0.30498966574668884, + "step": 398 + }, + { + "epoch": 11.084507042253522, + "grad_norm": 0.2826082408428192, + "learning_rate": 8.952243183168848e-07, + "loss": 0.3076494634151459, + "step": 399 + }, + { + "epoch": 11.112676056338028, + "grad_norm": 0.28598853945732117, + "learning_rate": 8.9471999940354e-07, + "loss": 0.29677921533584595, + "step": 400 + }, + { + "epoch": 11.140845070422536, + "grad_norm": 0.27635788917541504, + "learning_rate": 8.942146304132943e-07, + "loss": 0.28424787521362305, + "step": 401 + }, + { + "epoch": 11.169014084507042, + "grad_norm": 0.3110678195953369, + "learning_rate": 8.937082128855891e-07, + "loss": 0.31091392040252686, + "step": 402 + }, + { + "epoch": 11.19718309859155, + "grad_norm": 0.28018108010292053, + "learning_rate": 8.932007483630596e-07, + "loss": 0.2973289489746094, + "step": 403 + }, + { + "epoch": 11.225352112676056, + "grad_norm": 0.2748464345932007, + "learning_rate": 8.926922383915315e-07, + "loss": 0.3064712882041931, + "step": 404 + }, + { + "epoch": 11.253521126760564, + "grad_norm": 0.2758099138736725, + "learning_rate": 8.921826845200138e-07, + "loss": 0.30080002546310425, + "step": 405 + }, + { + "epoch": 11.28169014084507, + "grad_norm": 0.27323541045188904, + "learning_rate": 8.916720883006963e-07, + "loss": 0.30011099576950073, + "step": 406 + }, + { + "epoch": 11.309859154929578, + "grad_norm": 0.2751684784889221, + "learning_rate": 8.911604512889434e-07, + "loss": 0.3021606206893921, + "step": 407 + }, + { + "epoch": 11.338028169014084, + "grad_norm": 0.278543084859848, + "learning_rate": 8.906477750432903e-07, + "loss": 0.2979898452758789, + "step": 408 + }, + { + "epoch": 11.366197183098592, + "grad_norm": 0.2872096300125122, + "learning_rate": 8.901340611254378e-07, + "loss": 0.30450716614723206, + "step": 409 + }, + { + "epoch": 11.394366197183098, + "grad_norm": 0.27768319845199585, + "learning_rate": 8.896193111002475e-07, + "loss": 0.31025999784469604, + "step": 410 + }, + { + "epoch": 11.422535211267606, + "grad_norm": 0.28008511662483215, + "learning_rate": 8.891035265357371e-07, + "loss": 0.2903551757335663, + "step": 411 + }, + { + "epoch": 11.450704225352112, + "grad_norm": 0.28000614047050476, + "learning_rate": 8.88586709003076e-07, + "loss": 0.30711328983306885, + "step": 412 + }, + { + "epoch": 11.47887323943662, + "grad_norm": 0.27915990352630615, + "learning_rate": 8.8806886007658e-07, + "loss": 0.309296578168869, + "step": 413 + }, + { + "epoch": 11.507042253521126, + "grad_norm": 0.2682763636112213, + "learning_rate": 8.875499813337067e-07, + "loss": 0.3053497076034546, + "step": 414 + }, + { + "epoch": 11.535211267605634, + "grad_norm": 0.26592400670051575, + "learning_rate": 8.87030074355051e-07, + "loss": 0.29761987924575806, + "step": 415 + }, + { + "epoch": 11.56338028169014, + "grad_norm": 0.2664642333984375, + "learning_rate": 8.865091407243394e-07, + "loss": 0.2986457645893097, + "step": 416 + }, + { + "epoch": 11.591549295774648, + "grad_norm": 0.2615084648132324, + "learning_rate": 8.859871820284261e-07, + "loss": 0.31391632556915283, + "step": 417 + }, + { + "epoch": 11.619718309859154, + "grad_norm": 0.27312856912612915, + "learning_rate": 8.85464199857288e-07, + "loss": 0.3128984570503235, + "step": 418 + }, + { + "epoch": 11.647887323943662, + "grad_norm": 0.2734473645687103, + "learning_rate": 8.849401958040192e-07, + "loss": 0.298526793718338, + "step": 419 + }, + { + "epoch": 11.676056338028168, + "grad_norm": 0.2901906669139862, + "learning_rate": 8.844151714648274e-07, + "loss": 0.31268036365509033, + "step": 420 + }, + { + "epoch": 11.704225352112676, + "grad_norm": 0.28374356031417847, + "learning_rate": 8.838891284390273e-07, + "loss": 0.3042759299278259, + "step": 421 + }, + { + "epoch": 11.732394366197184, + "grad_norm": 0.26128286123275757, + "learning_rate": 8.833620683290375e-07, + "loss": 0.30057787895202637, + "step": 422 + }, + { + "epoch": 11.76056338028169, + "grad_norm": 0.29005923867225647, + "learning_rate": 8.828339927403745e-07, + "loss": 0.2969115376472473, + "step": 423 + }, + { + "epoch": 11.788732394366198, + "grad_norm": 0.26823022961616516, + "learning_rate": 8.823049032816478e-07, + "loss": 0.3024095296859741, + "step": 424 + }, + { + "epoch": 11.816901408450704, + "grad_norm": 0.2938059866428375, + "learning_rate": 8.817748015645558e-07, + "loss": 0.2982884347438812, + "step": 425 + }, + { + "epoch": 11.845070422535212, + "grad_norm": 0.2794440686702728, + "learning_rate": 8.812436892038805e-07, + "loss": 0.3006170094013214, + "step": 426 + }, + { + "epoch": 11.873239436619718, + "grad_norm": 0.27727699279785156, + "learning_rate": 8.807115678174819e-07, + "loss": 0.29938215017318726, + "step": 427 + }, + { + "epoch": 11.901408450704226, + "grad_norm": 0.28038865327835083, + "learning_rate": 8.801784390262943e-07, + "loss": 0.3107326924800873, + "step": 428 + }, + { + "epoch": 11.929577464788732, + "grad_norm": 0.29747217893600464, + "learning_rate": 8.796443044543203e-07, + "loss": 0.2999688982963562, + "step": 429 + }, + { + "epoch": 11.95774647887324, + "grad_norm": 0.2875438332557678, + "learning_rate": 8.791091657286267e-07, + "loss": 0.2930242419242859, + "step": 430 + }, + { + "epoch": 11.985915492957746, + "grad_norm": 0.2946978211402893, + "learning_rate": 8.785730244793386e-07, + "loss": 0.295132577419281, + "step": 431 + }, + { + "epoch": 12.0, + "grad_norm": 0.39752283692359924, + "learning_rate": 8.780358823396352e-07, + "loss": 0.30750101804733276, + "step": 432 + }, + { + "epoch": 12.028169014084508, + "grad_norm": 0.2708489000797272, + "learning_rate": 8.774977409457447e-07, + "loss": 0.3058265447616577, + "step": 433 + }, + { + "epoch": 12.056338028169014, + "grad_norm": 0.2773410975933075, + "learning_rate": 8.769586019369391e-07, + "loss": 0.30409157276153564, + "step": 434 + }, + { + "epoch": 12.084507042253522, + "grad_norm": 0.26894107460975647, + "learning_rate": 8.764184669555293e-07, + "loss": 0.30384916067123413, + "step": 435 + }, + { + "epoch": 12.112676056338028, + "grad_norm": 0.27837878465652466, + "learning_rate": 8.758773376468604e-07, + "loss": 0.2943356931209564, + "step": 436 + }, + { + "epoch": 12.140845070422536, + "grad_norm": 0.2690330445766449, + "learning_rate": 8.753352156593055e-07, + "loss": 0.2933955788612366, + "step": 437 + }, + { + "epoch": 12.169014084507042, + "grad_norm": 0.27980291843414307, + "learning_rate": 8.747921026442629e-07, + "loss": 0.28997617959976196, + "step": 438 + }, + { + "epoch": 12.19718309859155, + "grad_norm": 0.287624329328537, + "learning_rate": 8.742480002561487e-07, + "loss": 0.30039626359939575, + "step": 439 + }, + { + "epoch": 12.225352112676056, + "grad_norm": 0.28817304968833923, + "learning_rate": 8.737029101523929e-07, + "loss": 0.3200758099555969, + "step": 440 + }, + { + "epoch": 12.253521126760564, + "grad_norm": 0.2769193649291992, + "learning_rate": 8.731568339934348e-07, + "loss": 0.2976597547531128, + "step": 441 + }, + { + "epoch": 12.28169014084507, + "grad_norm": 0.309583842754364, + "learning_rate": 8.726097734427172e-07, + "loss": 0.2977990210056305, + "step": 442 + }, + { + "epoch": 12.309859154929578, + "grad_norm": 0.26997339725494385, + "learning_rate": 8.72061730166681e-07, + "loss": 0.29733020067214966, + "step": 443 + }, + { + "epoch": 12.338028169014084, + "grad_norm": 0.2782990634441376, + "learning_rate": 8.715127058347614e-07, + "loss": 0.29592543840408325, + "step": 444 + }, + { + "epoch": 12.366197183098592, + "grad_norm": 0.2781784236431122, + "learning_rate": 8.709627021193816e-07, + "loss": 0.2965870797634125, + "step": 445 + }, + { + "epoch": 12.394366197183098, + "grad_norm": 0.2965787649154663, + "learning_rate": 8.704117206959484e-07, + "loss": 0.30272242426872253, + "step": 446 + }, + { + "epoch": 12.422535211267606, + "grad_norm": 0.2780534625053406, + "learning_rate": 8.698597632428466e-07, + "loss": 0.30883416533470154, + "step": 447 + }, + { + "epoch": 12.450704225352112, + "grad_norm": 0.27513188123703003, + "learning_rate": 8.693068314414344e-07, + "loss": 0.30461177229881287, + "step": 448 + }, + { + "epoch": 12.47887323943662, + "grad_norm": 0.2838785946369171, + "learning_rate": 8.687529269760379e-07, + "loss": 0.2927112281322479, + "step": 449 + }, + { + "epoch": 12.507042253521126, + "grad_norm": 0.28894707560539246, + "learning_rate": 8.681980515339463e-07, + "loss": 0.28816863894462585, + "step": 450 + }, + { + "epoch": 12.535211267605634, + "grad_norm": 0.28006207942962646, + "learning_rate": 8.676422068054064e-07, + "loss": 0.29931047558784485, + "step": 451 + }, + { + "epoch": 12.56338028169014, + "grad_norm": 0.2799602150917053, + "learning_rate": 8.670853944836176e-07, + "loss": 0.3038347363471985, + "step": 452 + }, + { + "epoch": 12.591549295774648, + "grad_norm": 0.2760638892650604, + "learning_rate": 8.665276162647267e-07, + "loss": 0.30183106660842896, + "step": 453 + }, + { + "epoch": 12.619718309859154, + "grad_norm": 0.278127521276474, + "learning_rate": 8.659688738478231e-07, + "loss": 0.3019717335700989, + "step": 454 + }, + { + "epoch": 12.647887323943662, + "grad_norm": 0.26856380701065063, + "learning_rate": 8.654091689349329e-07, + "loss": 0.2945576310157776, + "step": 455 + }, + { + "epoch": 12.676056338028168, + "grad_norm": 0.2749437391757965, + "learning_rate": 8.648485032310144e-07, + "loss": 0.3023756444454193, + "step": 456 + }, + { + "epoch": 12.704225352112676, + "grad_norm": 0.2729102671146393, + "learning_rate": 8.642868784439527e-07, + "loss": 0.2842894196510315, + "step": 457 + }, + { + "epoch": 12.732394366197184, + "grad_norm": 0.28390341997146606, + "learning_rate": 8.63724296284554e-07, + "loss": 0.2940555810928345, + "step": 458 + }, + { + "epoch": 12.76056338028169, + "grad_norm": 0.2739807069301605, + "learning_rate": 8.631607584665413e-07, + "loss": 0.2935922145843506, + "step": 459 + }, + { + "epoch": 12.788732394366198, + "grad_norm": 0.2823079824447632, + "learning_rate": 8.625962667065487e-07, + "loss": 0.2949485182762146, + "step": 460 + }, + { + "epoch": 12.816901408450704, + "grad_norm": 0.2843155264854431, + "learning_rate": 8.620308227241157e-07, + "loss": 0.31058311462402344, + "step": 461 + }, + { + "epoch": 12.845070422535212, + "grad_norm": 0.2805749475955963, + "learning_rate": 8.614644282416831e-07, + "loss": 0.2892061173915863, + "step": 462 + }, + { + "epoch": 12.873239436619718, + "grad_norm": 0.2773419916629791, + "learning_rate": 8.608970849845862e-07, + "loss": 0.28688696026802063, + "step": 463 + }, + { + "epoch": 12.901408450704226, + "grad_norm": 0.28667542338371277, + "learning_rate": 8.603287946810513e-07, + "loss": 0.30356699228286743, + "step": 464 + }, + { + "epoch": 12.929577464788732, + "grad_norm": 0.2785196900367737, + "learning_rate": 8.597595590621892e-07, + "loss": 0.29802441596984863, + "step": 465 + }, + { + "epoch": 12.95774647887324, + "grad_norm": 0.2778855562210083, + "learning_rate": 8.591893798619903e-07, + "loss": 0.29154932498931885, + "step": 466 + }, + { + "epoch": 12.985915492957746, + "grad_norm": 0.28308385610580444, + "learning_rate": 8.586182588173194e-07, + "loss": 0.29143208265304565, + "step": 467 + }, + { + "epoch": 13.0, + "grad_norm": 0.39711424708366394, + "learning_rate": 8.580461976679099e-07, + "loss": 0.2990560233592987, + "step": 468 + }, + { + "epoch": 13.028169014084508, + "grad_norm": 0.26802533864974976, + "learning_rate": 8.574731981563597e-07, + "loss": 0.29934608936309814, + "step": 469 + }, + { + "epoch": 13.056338028169014, + "grad_norm": 0.2663622498512268, + "learning_rate": 8.568992620281243e-07, + "loss": 0.29982200264930725, + "step": 470 + }, + { + "epoch": 13.084507042253522, + "grad_norm": 0.28624898195266724, + "learning_rate": 8.56324391031513e-07, + "loss": 0.2810109555721283, + "step": 471 + }, + { + "epoch": 13.112676056338028, + "grad_norm": 0.28607407212257385, + "learning_rate": 8.557485869176825e-07, + "loss": 0.2949367165565491, + "step": 472 + }, + { + "epoch": 13.140845070422536, + "grad_norm": 0.26953044533729553, + "learning_rate": 8.551718514406318e-07, + "loss": 0.2851143479347229, + "step": 473 + }, + { + "epoch": 13.169014084507042, + "grad_norm": 0.31105440855026245, + "learning_rate": 8.545941863571973e-07, + "loss": 0.2858909070491791, + "step": 474 + }, + { + "epoch": 13.19718309859155, + "grad_norm": 0.28143224120140076, + "learning_rate": 8.540155934270471e-07, + "loss": 0.2961467504501343, + "step": 475 + }, + { + "epoch": 13.225352112676056, + "grad_norm": 0.2862183451652527, + "learning_rate": 8.534360744126753e-07, + "loss": 0.29882240295410156, + "step": 476 + }, + { + "epoch": 13.253521126760564, + "grad_norm": 0.26780712604522705, + "learning_rate": 8.528556310793979e-07, + "loss": 0.2933373749256134, + "step": 477 + }, + { + "epoch": 13.28169014084507, + "grad_norm": 0.27026116847991943, + "learning_rate": 8.522742651953456e-07, + "loss": 0.2968083918094635, + "step": 478 + }, + { + "epoch": 13.309859154929578, + "grad_norm": 0.2800562381744385, + "learning_rate": 8.516919785314595e-07, + "loss": 0.3015640377998352, + "step": 479 + }, + { + "epoch": 13.338028169014084, + "grad_norm": 0.29154452681541443, + "learning_rate": 8.511087728614862e-07, + "loss": 0.31045541167259216, + "step": 480 + }, + { + "epoch": 13.366197183098592, + "grad_norm": 0.28183555603027344, + "learning_rate": 8.50524649961971e-07, + "loss": 0.29173219203948975, + "step": 481 + }, + { + "epoch": 13.394366197183098, + "grad_norm": 0.2971493601799011, + "learning_rate": 8.499396116122535e-07, + "loss": 0.2765740752220154, + "step": 482 + }, + { + "epoch": 13.422535211267606, + "grad_norm": 0.26922252774238586, + "learning_rate": 8.493536595944622e-07, + "loss": 0.297348290681839, + "step": 483 + }, + { + "epoch": 13.450704225352112, + "grad_norm": 0.27836039662361145, + "learning_rate": 8.487667956935087e-07, + "loss": 0.28694790601730347, + "step": 484 + }, + { + "epoch": 13.47887323943662, + "grad_norm": 0.29267406463623047, + "learning_rate": 8.481790216970819e-07, + "loss": 0.2862587571144104, + "step": 485 + }, + { + "epoch": 13.507042253521126, + "grad_norm": 0.27863144874572754, + "learning_rate": 8.475903393956433e-07, + "loss": 0.2894202470779419, + "step": 486 + }, + { + "epoch": 13.535211267605634, + "grad_norm": 0.2911999523639679, + "learning_rate": 8.470007505824215e-07, + "loss": 0.29356449842453003, + "step": 487 + }, + { + "epoch": 13.56338028169014, + "grad_norm": 0.2968003451824188, + "learning_rate": 8.464102570534061e-07, + "loss": 0.29188239574432373, + "step": 488 + }, + { + "epoch": 13.591549295774648, + "grad_norm": 0.2842749357223511, + "learning_rate": 8.458188606073431e-07, + "loss": 0.28485268354415894, + "step": 489 + }, + { + "epoch": 13.619718309859154, + "grad_norm": 0.2762301564216614, + "learning_rate": 8.452265630457282e-07, + "loss": 0.2829025387763977, + "step": 490 + }, + { + "epoch": 13.647887323943662, + "grad_norm": 0.27368924021720886, + "learning_rate": 8.446333661728028e-07, + "loss": 0.3129264712333679, + "step": 491 + }, + { + "epoch": 13.676056338028168, + "grad_norm": 0.3042363226413727, + "learning_rate": 8.440392717955475e-07, + "loss": 0.298667311668396, + "step": 492 + }, + { + "epoch": 13.704225352112676, + "grad_norm": 0.31437602639198303, + "learning_rate": 8.434442817236765e-07, + "loss": 0.2911669909954071, + "step": 493 + }, + { + "epoch": 13.732394366197184, + "grad_norm": 0.2624206840991974, + "learning_rate": 8.428483977696328e-07, + "loss": 0.2875954508781433, + "step": 494 + }, + { + "epoch": 13.76056338028169, + "grad_norm": 0.2824702858924866, + "learning_rate": 8.422516217485825e-07, + "loss": 0.28079336881637573, + "step": 495 + }, + { + "epoch": 13.788732394366198, + "grad_norm": 0.27612945437431335, + "learning_rate": 8.416539554784089e-07, + "loss": 0.3052091598510742, + "step": 496 + }, + { + "epoch": 13.816901408450704, + "grad_norm": 0.28139790892601013, + "learning_rate": 8.410554007797068e-07, + "loss": 0.2918257415294647, + "step": 497 + }, + { + "epoch": 13.845070422535212, + "grad_norm": 0.2779678702354431, + "learning_rate": 8.404559594757777e-07, + "loss": 0.30707138776779175, + "step": 498 + }, + { + "epoch": 13.873239436619718, + "grad_norm": 0.2710152566432953, + "learning_rate": 8.398556333926239e-07, + "loss": 0.3128437101840973, + "step": 499 + }, + { + "epoch": 13.901408450704226, + "grad_norm": 0.2958044707775116, + "learning_rate": 8.392544243589427e-07, + "loss": 0.29653337597846985, + "step": 500 + }, + { + "epoch": 13.929577464788732, + "grad_norm": 0.28408974409103394, + "learning_rate": 8.38652334206121e-07, + "loss": 0.29291969537734985, + "step": 501 + }, + { + "epoch": 13.95774647887324, + "grad_norm": 0.27897724509239197, + "learning_rate": 8.3804936476823e-07, + "loss": 0.3117462992668152, + "step": 502 + }, + { + "epoch": 13.985915492957746, + "grad_norm": 0.27391254901885986, + "learning_rate": 8.374455178820189e-07, + "loss": 0.30571603775024414, + "step": 503 + }, + { + "epoch": 14.0, + "grad_norm": 0.3995163142681122, + "learning_rate": 8.368407953869103e-07, + "loss": 0.2876809239387512, + "step": 504 + }, + { + "epoch": 14.028169014084508, + "grad_norm": 0.3068762719631195, + "learning_rate": 8.362351991249937e-07, + "loss": 0.28866052627563477, + "step": 505 + }, + { + "epoch": 14.056338028169014, + "grad_norm": 0.278751939535141, + "learning_rate": 8.356287309410204e-07, + "loss": 0.3048397898674011, + "step": 506 + }, + { + "epoch": 14.084507042253522, + "grad_norm": 0.2831234335899353, + "learning_rate": 8.350213926823974e-07, + "loss": 0.28643566370010376, + "step": 507 + }, + { + "epoch": 14.112676056338028, + "grad_norm": 0.2744354009628296, + "learning_rate": 8.344131861991828e-07, + "loss": 0.30159255862236023, + "step": 508 + }, + { + "epoch": 14.140845070422536, + "grad_norm": 0.2834227383136749, + "learning_rate": 8.338041133440788e-07, + "loss": 0.2945912182331085, + "step": 509 + }, + { + "epoch": 14.169014084507042, + "grad_norm": 0.2914932072162628, + "learning_rate": 8.331941759724268e-07, + "loss": 0.30261489748954773, + "step": 510 + }, + { + "epoch": 14.19718309859155, + "grad_norm": 0.2795814871788025, + "learning_rate": 8.325833759422021e-07, + "loss": 0.29661813378334045, + "step": 511 + }, + { + "epoch": 14.225352112676056, + "grad_norm": 0.2715330719947815, + "learning_rate": 8.319717151140072e-07, + "loss": 0.28672271966934204, + "step": 512 + }, + { + "epoch": 14.253521126760564, + "grad_norm": 0.2859768271446228, + "learning_rate": 8.313591953510673e-07, + "loss": 0.2985742390155792, + "step": 513 + }, + { + "epoch": 14.28169014084507, + "grad_norm": 0.2789771854877472, + "learning_rate": 8.307458185192238e-07, + "loss": 0.2883588671684265, + "step": 514 + }, + { + "epoch": 14.309859154929578, + "grad_norm": 0.2849474549293518, + "learning_rate": 8.301315864869289e-07, + "loss": 0.3045833706855774, + "step": 515 + }, + { + "epoch": 14.338028169014084, + "grad_norm": 0.28583216667175293, + "learning_rate": 8.295165011252396e-07, + "loss": 0.28541919589042664, + "step": 516 + }, + { + "epoch": 14.366197183098592, + "grad_norm": 0.286767840385437, + "learning_rate": 8.289005643078131e-07, + "loss": 0.2928876280784607, + "step": 517 + }, + { + "epoch": 14.394366197183098, + "grad_norm": 0.2851925790309906, + "learning_rate": 8.282837779108993e-07, + "loss": 0.29808348417282104, + "step": 518 + }, + { + "epoch": 14.422535211267606, + "grad_norm": 0.2843434512615204, + "learning_rate": 8.276661438133368e-07, + "loss": 0.281357079744339, + "step": 519 + }, + { + "epoch": 14.450704225352112, + "grad_norm": 0.29959535598754883, + "learning_rate": 8.270476638965461e-07, + "loss": 0.287128746509552, + "step": 520 + }, + { + "epoch": 14.47887323943662, + "grad_norm": 0.2812483310699463, + "learning_rate": 8.264283400445243e-07, + "loss": 0.29306480288505554, + "step": 521 + }, + { + "epoch": 14.507042253521126, + "grad_norm": 0.3015466034412384, + "learning_rate": 8.258081741438394e-07, + "loss": 0.3011341691017151, + "step": 522 + }, + { + "epoch": 14.535211267605634, + "grad_norm": 0.2930891215801239, + "learning_rate": 8.25187168083624e-07, + "loss": 0.2976144850254059, + "step": 523 + }, + { + "epoch": 14.56338028169014, + "grad_norm": 0.2777521312236786, + "learning_rate": 8.245653237555705e-07, + "loss": 0.2829003930091858, + "step": 524 + }, + { + "epoch": 14.591549295774648, + "grad_norm": 0.2916077673435211, + "learning_rate": 8.239426430539243e-07, + "loss": 0.28546392917633057, + "step": 525 + }, + { + "epoch": 14.619718309859154, + "grad_norm": 0.3006315231323242, + "learning_rate": 8.23319127875479e-07, + "loss": 0.2851755619049072, + "step": 526 + }, + { + "epoch": 14.647887323943662, + "grad_norm": 0.2654482424259186, + "learning_rate": 8.226947801195699e-07, + "loss": 0.28430840373039246, + "step": 527 + }, + { + "epoch": 14.676056338028168, + "grad_norm": 0.2679372727870941, + "learning_rate": 8.220696016880687e-07, + "loss": 0.282630980014801, + "step": 528 + }, + { + "epoch": 14.704225352112676, + "grad_norm": 0.28538262844085693, + "learning_rate": 8.21443594485377e-07, + "loss": 0.2789214551448822, + "step": 529 + }, + { + "epoch": 14.732394366197184, + "grad_norm": 0.2713358700275421, + "learning_rate": 8.208167604184217e-07, + "loss": 0.2909342646598816, + "step": 530 + }, + { + "epoch": 14.76056338028169, + "grad_norm": 0.30056601762771606, + "learning_rate": 8.201891013966478e-07, + "loss": 0.2838485836982727, + "step": 531 + }, + { + "epoch": 14.788732394366198, + "grad_norm": 0.2811543345451355, + "learning_rate": 8.195606193320136e-07, + "loss": 0.29030710458755493, + "step": 532 + }, + { + "epoch": 14.816901408450704, + "grad_norm": 0.2930709719657898, + "learning_rate": 8.189313161389844e-07, + "loss": 0.2922976613044739, + "step": 533 + }, + { + "epoch": 14.845070422535212, + "grad_norm": 0.29798057675361633, + "learning_rate": 8.183011937345271e-07, + "loss": 0.2951294183731079, + "step": 534 + }, + { + "epoch": 14.873239436619718, + "grad_norm": 0.28483426570892334, + "learning_rate": 8.176702540381036e-07, + "loss": 0.2938500642776489, + "step": 535 + }, + { + "epoch": 14.901408450704226, + "grad_norm": 0.2990010380744934, + "learning_rate": 8.170384989716657e-07, + "loss": 0.29805850982666016, + "step": 536 + }, + { + "epoch": 14.929577464788732, + "grad_norm": 0.2896774411201477, + "learning_rate": 8.164059304596488e-07, + "loss": 0.29530227184295654, + "step": 537 + }, + { + "epoch": 14.95774647887324, + "grad_norm": 0.28662148118019104, + "learning_rate": 8.157725504289664e-07, + "loss": 0.28371667861938477, + "step": 538 + }, + { + "epoch": 14.985915492957746, + "grad_norm": 0.2807771861553192, + "learning_rate": 8.151383608090039e-07, + "loss": 0.29020193219184875, + "step": 539 + }, + { + "epoch": 15.0, + "grad_norm": 0.39528268575668335, + "learning_rate": 8.145033635316128e-07, + "loss": 0.30530279874801636, + "step": 540 + }, + { + "epoch": 15.028169014084508, + "grad_norm": 0.28691425919532776, + "learning_rate": 8.138675605311051e-07, + "loss": 0.27306681871414185, + "step": 541 + }, + { + "epoch": 15.056338028169014, + "grad_norm": 0.27633434534072876, + "learning_rate": 8.13230953744247e-07, + "loss": 0.2900540828704834, + "step": 542 + }, + { + "epoch": 15.084507042253522, + "grad_norm": 0.28263136744499207, + "learning_rate": 8.125935451102528e-07, + "loss": 0.29298198223114014, + "step": 543 + }, + { + "epoch": 15.112676056338028, + "grad_norm": 0.2708156406879425, + "learning_rate": 8.119553365707802e-07, + "loss": 0.2728630006313324, + "step": 544 + }, + { + "epoch": 15.140845070422536, + "grad_norm": 0.28263747692108154, + "learning_rate": 8.113163300699228e-07, + "loss": 0.2994900047779083, + "step": 545 + }, + { + "epoch": 15.169014084507042, + "grad_norm": 0.2628503739833832, + "learning_rate": 8.106765275542053e-07, + "loss": 0.2943934202194214, + "step": 546 + }, + { + "epoch": 15.19718309859155, + "grad_norm": 0.2844214141368866, + "learning_rate": 8.100359309725774e-07, + "loss": 0.286617636680603, + "step": 547 + }, + { + "epoch": 15.225352112676056, + "grad_norm": 0.2979234457015991, + "learning_rate": 8.093945422764069e-07, + "loss": 0.28598904609680176, + "step": 548 + }, + { + "epoch": 15.253521126760564, + "grad_norm": 0.2918255925178528, + "learning_rate": 8.087523634194754e-07, + "loss": 0.2826801538467407, + "step": 549 + }, + { + "epoch": 15.28169014084507, + "grad_norm": 0.30238643288612366, + "learning_rate": 8.081093963579707e-07, + "loss": 0.3018723726272583, + "step": 550 + }, + { + "epoch": 15.309859154929578, + "grad_norm": 0.2762410342693329, + "learning_rate": 8.074656430504823e-07, + "loss": 0.27831658720970154, + "step": 551 + }, + { + "epoch": 15.338028169014084, + "grad_norm": 0.28324148058891296, + "learning_rate": 8.068211054579943e-07, + "loss": 0.30506500601768494, + "step": 552 + }, + { + "epoch": 15.366197183098592, + "grad_norm": 0.2893829643726349, + "learning_rate": 8.061757855438799e-07, + "loss": 0.29023078083992004, + "step": 553 + }, + { + "epoch": 15.394366197183098, + "grad_norm": 0.2907930016517639, + "learning_rate": 8.055296852738956e-07, + "loss": 0.28343409299850464, + "step": 554 + }, + { + "epoch": 15.422535211267606, + "grad_norm": 0.28478139638900757, + "learning_rate": 8.048828066161747e-07, + "loss": 0.28546571731567383, + "step": 555 + }, + { + "epoch": 15.450704225352112, + "grad_norm": 0.2851191759109497, + "learning_rate": 8.04235151541222e-07, + "loss": 0.2884707748889923, + "step": 556 + }, + { + "epoch": 15.47887323943662, + "grad_norm": 0.2689509987831116, + "learning_rate": 8.035867220219071e-07, + "loss": 0.2950664758682251, + "step": 557 + }, + { + "epoch": 15.507042253521126, + "grad_norm": 0.2825435400009155, + "learning_rate": 8.029375200334587e-07, + "loss": 0.281552791595459, + "step": 558 + }, + { + "epoch": 15.535211267605634, + "grad_norm": 0.28483787178993225, + "learning_rate": 8.022875475534588e-07, + "loss": 0.2870042622089386, + "step": 559 + }, + { + "epoch": 15.56338028169014, + "grad_norm": 0.27896517515182495, + "learning_rate": 8.01636806561836e-07, + "loss": 0.287916362285614, + "step": 560 + }, + { + "epoch": 15.591549295774648, + "grad_norm": 0.2788335382938385, + "learning_rate": 8.009852990408606e-07, + "loss": 0.28609931468963623, + "step": 561 + }, + { + "epoch": 15.619718309859154, + "grad_norm": 0.2826322019100189, + "learning_rate": 8.003330269751372e-07, + "loss": 0.2950190305709839, + "step": 562 + }, + { + "epoch": 15.647887323943662, + "grad_norm": 0.2843019366264343, + "learning_rate": 7.996799923515997e-07, + "loss": 0.2914244532585144, + "step": 563 + }, + { + "epoch": 15.676056338028168, + "grad_norm": 0.26445460319519043, + "learning_rate": 7.990261971595048e-07, + "loss": 0.27984780073165894, + "step": 564 + }, + { + "epoch": 15.704225352112676, + "grad_norm": 0.27918627858161926, + "learning_rate": 7.983716433904262e-07, + "loss": 0.27757298946380615, + "step": 565 + }, + { + "epoch": 15.732394366197184, + "grad_norm": 0.2938336133956909, + "learning_rate": 7.977163330382479e-07, + "loss": 0.2920360565185547, + "step": 566 + }, + { + "epoch": 15.76056338028169, + "grad_norm": 0.28976547718048096, + "learning_rate": 7.970602680991592e-07, + "loss": 0.2951090931892395, + "step": 567 + }, + { + "epoch": 15.788732394366198, + "grad_norm": 0.27327752113342285, + "learning_rate": 7.964034505716476e-07, + "loss": 0.29640987515449524, + "step": 568 + }, + { + "epoch": 15.816901408450704, + "grad_norm": 0.27222704887390137, + "learning_rate": 7.957458824564931e-07, + "loss": 0.28876399993896484, + "step": 569 + }, + { + "epoch": 15.845070422535212, + "grad_norm": 0.29962998628616333, + "learning_rate": 7.950875657567621e-07, + "loss": 0.3039361238479614, + "step": 570 + }, + { + "epoch": 15.873239436619718, + "grad_norm": 0.2705839276313782, + "learning_rate": 7.944285024778017e-07, + "loss": 0.28840112686157227, + "step": 571 + }, + { + "epoch": 15.901408450704226, + "grad_norm": 0.28124475479125977, + "learning_rate": 7.93768694627233e-07, + "loss": 0.2832530736923218, + "step": 572 + }, + { + "epoch": 15.929577464788732, + "grad_norm": 0.29025372862815857, + "learning_rate": 7.931081442149448e-07, + "loss": 0.28588593006134033, + "step": 573 + }, + { + "epoch": 15.95774647887324, + "grad_norm": 0.27376946806907654, + "learning_rate": 7.924468532530883e-07, + "loss": 0.2883457839488983, + "step": 574 + }, + { + "epoch": 15.985915492957746, + "grad_norm": 0.28059038519859314, + "learning_rate": 7.917848237560708e-07, + "loss": 0.2923107147216797, + "step": 575 + }, + { + "epoch": 16.0, + "grad_norm": 0.39920157194137573, + "learning_rate": 7.911220577405484e-07, + "loss": 0.2896960973739624, + "step": 576 + }, + { + "epoch": 16.028169014084508, + "grad_norm": 0.2756041884422302, + "learning_rate": 7.904585572254218e-07, + "loss": 0.2934238910675049, + "step": 577 + }, + { + "epoch": 16.056338028169016, + "grad_norm": 0.2831096947193146, + "learning_rate": 7.897943242318285e-07, + "loss": 0.2862626910209656, + "step": 578 + }, + { + "epoch": 16.08450704225352, + "grad_norm": 0.27020981907844543, + "learning_rate": 7.891293607831373e-07, + "loss": 0.3019767999649048, + "step": 579 + }, + { + "epoch": 16.112676056338028, + "grad_norm": 0.2866615056991577, + "learning_rate": 7.884636689049422e-07, + "loss": 0.29431337118148804, + "step": 580 + }, + { + "epoch": 16.140845070422536, + "grad_norm": 0.27709120512008667, + "learning_rate": 7.877972506250562e-07, + "loss": 0.26718783378601074, + "step": 581 + }, + { + "epoch": 16.169014084507044, + "grad_norm": 0.2864624261856079, + "learning_rate": 7.871301079735049e-07, + "loss": 0.28138402104377747, + "step": 582 + }, + { + "epoch": 16.197183098591548, + "grad_norm": 0.2806070148944855, + "learning_rate": 7.864622429825204e-07, + "loss": 0.29040491580963135, + "step": 583 + }, + { + "epoch": 16.225352112676056, + "grad_norm": 0.2866605818271637, + "learning_rate": 7.857936576865356e-07, + "loss": 0.2876106798648834, + "step": 584 + }, + { + "epoch": 16.253521126760564, + "grad_norm": 0.2853955626487732, + "learning_rate": 7.851243541221769e-07, + "loss": 0.30784159898757935, + "step": 585 + }, + { + "epoch": 16.281690140845072, + "grad_norm": 0.290031760931015, + "learning_rate": 7.844543343282595e-07, + "loss": 0.27567434310913086, + "step": 586 + }, + { + "epoch": 16.309859154929576, + "grad_norm": 0.283806174993515, + "learning_rate": 7.837836003457793e-07, + "loss": 0.28710314631462097, + "step": 587 + }, + { + "epoch": 16.338028169014084, + "grad_norm": 0.2768094539642334, + "learning_rate": 7.831121542179086e-07, + "loss": 0.27676063776016235, + "step": 588 + }, + { + "epoch": 16.366197183098592, + "grad_norm": 0.27568569779396057, + "learning_rate": 7.824399979899889e-07, + "loss": 0.2947593927383423, + "step": 589 + }, + { + "epoch": 16.3943661971831, + "grad_norm": 0.3079885244369507, + "learning_rate": 7.817671337095244e-07, + "loss": 0.2868027985095978, + "step": 590 + }, + { + "epoch": 16.422535211267604, + "grad_norm": 0.29744645953178406, + "learning_rate": 7.810935634261764e-07, + "loss": 0.2946295738220215, + "step": 591 + }, + { + "epoch": 16.450704225352112, + "grad_norm": 0.28457650542259216, + "learning_rate": 7.804192891917571e-07, + "loss": 0.2790455222129822, + "step": 592 + }, + { + "epoch": 16.47887323943662, + "grad_norm": 0.28848767280578613, + "learning_rate": 7.797443130602226e-07, + "loss": 0.2941606640815735, + "step": 593 + }, + { + "epoch": 16.507042253521128, + "grad_norm": 0.2936708927154541, + "learning_rate": 7.79068637087667e-07, + "loss": 0.2923729121685028, + "step": 594 + }, + { + "epoch": 16.535211267605632, + "grad_norm": 0.28460994362831116, + "learning_rate": 7.783922633323169e-07, + "loss": 0.2795827090740204, + "step": 595 + }, + { + "epoch": 16.56338028169014, + "grad_norm": 0.28233277797698975, + "learning_rate": 7.777151938545235e-07, + "loss": 0.29222947359085083, + "step": 596 + }, + { + "epoch": 16.591549295774648, + "grad_norm": 0.28648558259010315, + "learning_rate": 7.770374307167585e-07, + "loss": 0.27923721075057983, + "step": 597 + }, + { + "epoch": 16.619718309859156, + "grad_norm": 0.2813912332057953, + "learning_rate": 7.763589759836058e-07, + "loss": 0.2912202477455139, + "step": 598 + }, + { + "epoch": 16.647887323943664, + "grad_norm": 0.28273841738700867, + "learning_rate": 7.756798317217558e-07, + "loss": 0.29805850982666016, + "step": 599 + }, + { + "epoch": 16.676056338028168, + "grad_norm": 0.2922080457210541, + "learning_rate": 7.75e-07, + "loss": 0.2834911346435547, + "step": 600 + }, + { + "epoch": 16.704225352112676, + "grad_norm": 0.27855902910232544, + "learning_rate": 7.743194828892235e-07, + "loss": 0.2842041552066803, + "step": 601 + }, + { + "epoch": 16.732394366197184, + "grad_norm": 0.2905668318271637, + "learning_rate": 7.736382824623999e-07, + "loss": 0.281250923871994, + "step": 602 + }, + { + "epoch": 16.760563380281692, + "grad_norm": 0.2928289771080017, + "learning_rate": 7.729564007945834e-07, + "loss": 0.2863979935646057, + "step": 603 + }, + { + "epoch": 16.788732394366196, + "grad_norm": 0.28705668449401855, + "learning_rate": 7.72273839962904e-07, + "loss": 0.287672221660614, + "step": 604 + }, + { + "epoch": 16.816901408450704, + "grad_norm": 0.29107093811035156, + "learning_rate": 7.715906020465602e-07, + "loss": 0.27715277671813965, + "step": 605 + }, + { + "epoch": 16.845070422535212, + "grad_norm": 0.28827348351478577, + "learning_rate": 7.709066891268133e-07, + "loss": 0.2648072838783264, + "step": 606 + }, + { + "epoch": 16.87323943661972, + "grad_norm": 0.28768298029899597, + "learning_rate": 7.702221032869808e-07, + "loss": 0.26861560344696045, + "step": 607 + }, + { + "epoch": 16.901408450704224, + "grad_norm": 0.3000086843967438, + "learning_rate": 7.695368466124296e-07, + "loss": 0.2910693287849426, + "step": 608 + }, + { + "epoch": 16.929577464788732, + "grad_norm": 0.3058622181415558, + "learning_rate": 7.688509211905707e-07, + "loss": 0.2804388105869293, + "step": 609 + }, + { + "epoch": 16.95774647887324, + "grad_norm": 0.2874692678451538, + "learning_rate": 7.681643291108517e-07, + "loss": 0.2883044481277466, + "step": 610 + }, + { + "epoch": 16.985915492957748, + "grad_norm": 0.2868764102458954, + "learning_rate": 7.67477072464751e-07, + "loss": 0.2847598195075989, + "step": 611 + }, + { + "epoch": 17.0, + "grad_norm": 0.3980148136615753, + "learning_rate": 7.667891533457718e-07, + "loss": 0.29258161783218384, + "step": 612 + }, + { + "epoch": 17.028169014084508, + "grad_norm": 0.2752118408679962, + "learning_rate": 7.661005738494349e-07, + "loss": 0.28283417224884033, + "step": 613 + }, + { + "epoch": 17.056338028169016, + "grad_norm": 0.2837778627872467, + "learning_rate": 7.654113360732732e-07, + "loss": 0.2758600115776062, + "step": 614 + }, + { + "epoch": 17.08450704225352, + "grad_norm": 0.2887240946292877, + "learning_rate": 7.647214421168238e-07, + "loss": 0.2864817976951599, + "step": 615 + }, + { + "epoch": 17.112676056338028, + "grad_norm": 0.27935662865638733, + "learning_rate": 7.640308940816239e-07, + "loss": 0.28024283051490784, + "step": 616 + }, + { + "epoch": 17.140845070422536, + "grad_norm": 0.2960900664329529, + "learning_rate": 7.633396940712023e-07, + "loss": 0.2681460976600647, + "step": 617 + }, + { + "epoch": 17.169014084507044, + "grad_norm": 0.2915673553943634, + "learning_rate": 7.626478441910744e-07, + "loss": 0.2805773913860321, + "step": 618 + }, + { + "epoch": 17.197183098591548, + "grad_norm": 0.2789720892906189, + "learning_rate": 7.619553465487344e-07, + "loss": 0.28847092390060425, + "step": 619 + }, + { + "epoch": 17.225352112676056, + "grad_norm": 0.2745218575000763, + "learning_rate": 7.612622032536507e-07, + "loss": 0.28274643421173096, + "step": 620 + }, + { + "epoch": 17.253521126760564, + "grad_norm": 0.2962469458580017, + "learning_rate": 7.60568416417258e-07, + "loss": 0.2827341556549072, + "step": 621 + }, + { + "epoch": 17.281690140845072, + "grad_norm": 0.28243717551231384, + "learning_rate": 7.59873988152951e-07, + "loss": 0.2872379422187805, + "step": 622 + }, + { + "epoch": 17.309859154929576, + "grad_norm": 0.2935909926891327, + "learning_rate": 7.591789205760789e-07, + "loss": 0.29077547788619995, + "step": 623 + }, + { + "epoch": 17.338028169014084, + "grad_norm": 0.2725030481815338, + "learning_rate": 7.584832158039378e-07, + "loss": 0.28079894185066223, + "step": 624 + }, + { + "epoch": 17.366197183098592, + "grad_norm": 0.2863542437553406, + "learning_rate": 7.577868759557653e-07, + "loss": 0.2759760618209839, + "step": 625 + }, + { + "epoch": 17.3943661971831, + "grad_norm": 0.2829958498477936, + "learning_rate": 7.570899031527332e-07, + "loss": 0.27316516637802124, + "step": 626 + }, + { + "epoch": 17.422535211267604, + "grad_norm": 0.28861963748931885, + "learning_rate": 7.563922995179418e-07, + "loss": 0.2758478820323944, + "step": 627 + }, + { + "epoch": 17.450704225352112, + "grad_norm": 0.2935570478439331, + "learning_rate": 7.556940671764124e-07, + "loss": 0.28437983989715576, + "step": 628 + }, + { + "epoch": 17.47887323943662, + "grad_norm": 0.3037278652191162, + "learning_rate": 7.54995208255082e-07, + "loss": 0.28943467140197754, + "step": 629 + }, + { + "epoch": 17.507042253521128, + "grad_norm": 0.31774893403053284, + "learning_rate": 7.54295724882796e-07, + "loss": 0.29023581743240356, + "step": 630 + }, + { + "epoch": 17.535211267605632, + "grad_norm": 0.28832852840423584, + "learning_rate": 7.535956191903021e-07, + "loss": 0.2840030789375305, + "step": 631 + }, + { + "epoch": 17.56338028169014, + "grad_norm": 0.28122231364250183, + "learning_rate": 7.528948933102438e-07, + "loss": 0.28523629903793335, + "step": 632 + }, + { + "epoch": 17.591549295774648, + "grad_norm": 0.29538190364837646, + "learning_rate": 7.521935493771534e-07, + "loss": 0.28018033504486084, + "step": 633 + }, + { + "epoch": 17.619718309859156, + "grad_norm": 0.3163702189922333, + "learning_rate": 7.514915895274463e-07, + "loss": 0.2885722517967224, + "step": 634 + }, + { + "epoch": 17.647887323943664, + "grad_norm": 0.2946973741054535, + "learning_rate": 7.507890158994139e-07, + "loss": 0.2785816490650177, + "step": 635 + }, + { + "epoch": 17.676056338028168, + "grad_norm": 0.2805889844894409, + "learning_rate": 7.500858306332172e-07, + "loss": 0.2974117398262024, + "step": 636 + }, + { + "epoch": 17.704225352112676, + "grad_norm": 0.28544914722442627, + "learning_rate": 7.493820358708809e-07, + "loss": 0.2892162501811981, + "step": 637 + }, + { + "epoch": 17.732394366197184, + "grad_norm": 0.3272300064563751, + "learning_rate": 7.486776337562853e-07, + "loss": 0.3017275333404541, + "step": 638 + }, + { + "epoch": 17.760563380281692, + "grad_norm": 0.28177788853645325, + "learning_rate": 7.479726264351618e-07, + "loss": 0.2729823589324951, + "step": 639 + }, + { + "epoch": 17.788732394366196, + "grad_norm": 0.2774059474468231, + "learning_rate": 7.472670160550848e-07, + "loss": 0.27497977018356323, + "step": 640 + }, + { + "epoch": 17.816901408450704, + "grad_norm": 0.2898328900337219, + "learning_rate": 7.46560804765466e-07, + "loss": 0.27945676445961, + "step": 641 + }, + { + "epoch": 17.845070422535212, + "grad_norm": 0.2784922420978546, + "learning_rate": 7.458539947175473e-07, + "loss": 0.29566580057144165, + "step": 642 + }, + { + "epoch": 17.87323943661972, + "grad_norm": 0.2864189147949219, + "learning_rate": 7.45146588064395e-07, + "loss": 0.2862587869167328, + "step": 643 + }, + { + "epoch": 17.901408450704224, + "grad_norm": 0.2896963953971863, + "learning_rate": 7.444385869608921e-07, + "loss": 0.2924667000770569, + "step": 644 + }, + { + "epoch": 17.929577464788732, + "grad_norm": 0.28463807702064514, + "learning_rate": 7.437299935637328e-07, + "loss": 0.2862287163734436, + "step": 645 + }, + { + "epoch": 17.95774647887324, + "grad_norm": 0.28407302498817444, + "learning_rate": 7.430208100314156e-07, + "loss": 0.2759779989719391, + "step": 646 + }, + { + "epoch": 17.985915492957748, + "grad_norm": 0.2773316502571106, + "learning_rate": 7.423110385242366e-07, + "loss": 0.2798498272895813, + "step": 647 + }, + { + "epoch": 18.0, + "grad_norm": 0.3958338499069214, + "learning_rate": 7.416006812042827e-07, + "loss": 0.28481870889663696, + "step": 648 + }, + { + "epoch": 18.028169014084508, + "grad_norm": 0.2922191321849823, + "learning_rate": 7.408897402354255e-07, + "loss": 0.2781963348388672, + "step": 649 + }, + { + "epoch": 18.056338028169016, + "grad_norm": 0.29166096448898315, + "learning_rate": 7.401782177833147e-07, + "loss": 0.2843964099884033, + "step": 650 + }, + { + "epoch": 18.08450704225352, + "grad_norm": 0.28290343284606934, + "learning_rate": 7.394661160153709e-07, + "loss": 0.2840275168418884, + "step": 651 + }, + { + "epoch": 18.112676056338028, + "grad_norm": 0.28300249576568604, + "learning_rate": 7.387534371007797e-07, + "loss": 0.2893407642841339, + "step": 652 + }, + { + "epoch": 18.140845070422536, + "grad_norm": 0.2870761752128601, + "learning_rate": 7.380401832104845e-07, + "loss": 0.26570916175842285, + "step": 653 + }, + { + "epoch": 18.169014084507044, + "grad_norm": 0.2919873297214508, + "learning_rate": 7.373263565171805e-07, + "loss": 0.26768985390663147, + "step": 654 + }, + { + "epoch": 18.197183098591548, + "grad_norm": 0.2856583893299103, + "learning_rate": 7.366119591953075e-07, + "loss": 0.2823103070259094, + "step": 655 + }, + { + "epoch": 18.225352112676056, + "grad_norm": 0.2853250801563263, + "learning_rate": 7.358969934210438e-07, + "loss": 0.28462791442871094, + "step": 656 + }, + { + "epoch": 18.253521126760564, + "grad_norm": 0.27667704224586487, + "learning_rate": 7.35181461372299e-07, + "loss": 0.27125126123428345, + "step": 657 + }, + { + "epoch": 18.281690140845072, + "grad_norm": 0.2884734272956848, + "learning_rate": 7.344653652287077e-07, + "loss": 0.271454781293869, + "step": 658 + }, + { + "epoch": 18.309859154929576, + "grad_norm": 0.28490886092185974, + "learning_rate": 7.337487071716232e-07, + "loss": 0.286302775144577, + "step": 659 + }, + { + "epoch": 18.338028169014084, + "grad_norm": 0.27361124753952026, + "learning_rate": 7.330314893841101e-07, + "loss": 0.2801797389984131, + "step": 660 + }, + { + "epoch": 18.366197183098592, + "grad_norm": 0.28517088294029236, + "learning_rate": 7.323137140509381e-07, + "loss": 0.2785356640815735, + "step": 661 + }, + { + "epoch": 18.3943661971831, + "grad_norm": 0.2725742757320404, + "learning_rate": 7.315953833585755e-07, + "loss": 0.27504605054855347, + "step": 662 + }, + { + "epoch": 18.422535211267604, + "grad_norm": 0.29915499687194824, + "learning_rate": 7.308764994951821e-07, + "loss": 0.2808704078197479, + "step": 663 + }, + { + "epoch": 18.450704225352112, + "grad_norm": 0.31304341554641724, + "learning_rate": 7.301570646506027e-07, + "loss": 0.2911706566810608, + "step": 664 + }, + { + "epoch": 18.47887323943662, + "grad_norm": 0.2919553816318512, + "learning_rate": 7.294370810163607e-07, + "loss": 0.27866852283477783, + "step": 665 + }, + { + "epoch": 18.507042253521128, + "grad_norm": 0.3162909746170044, + "learning_rate": 7.287165507856512e-07, + "loss": 0.2802932560443878, + "step": 666 + }, + { + "epoch": 18.535211267605632, + "grad_norm": 0.303523451089859, + "learning_rate": 7.279954761533342e-07, + "loss": 0.2824591398239136, + "step": 667 + }, + { + "epoch": 18.56338028169014, + "grad_norm": 0.29366716742515564, + "learning_rate": 7.27273859315928e-07, + "loss": 0.28101497888565063, + "step": 668 + }, + { + "epoch": 18.591549295774648, + "grad_norm": 0.28469985723495483, + "learning_rate": 7.265517024716026e-07, + "loss": 0.29134345054626465, + "step": 669 + }, + { + "epoch": 18.619718309859156, + "grad_norm": 0.28721922636032104, + "learning_rate": 7.258290078201731e-07, + "loss": 0.284817636013031, + "step": 670 + }, + { + "epoch": 18.647887323943664, + "grad_norm": 0.30535197257995605, + "learning_rate": 7.251057775630927e-07, + "loss": 0.28168779611587524, + "step": 671 + }, + { + "epoch": 18.676056338028168, + "grad_norm": 0.2980702817440033, + "learning_rate": 7.243820139034464e-07, + "loss": 0.27493056654930115, + "step": 672 + }, + { + "epoch": 18.704225352112676, + "grad_norm": 0.28984636068344116, + "learning_rate": 7.236577190459433e-07, + "loss": 0.2975635528564453, + "step": 673 + }, + { + "epoch": 18.732394366197184, + "grad_norm": 0.29580390453338623, + "learning_rate": 7.229328951969115e-07, + "loss": 0.2849118113517761, + "step": 674 + }, + { + "epoch": 18.760563380281692, + "grad_norm": 0.2950834035873413, + "learning_rate": 7.222075445642904e-07, + "loss": 0.26458609104156494, + "step": 675 + }, + { + "epoch": 18.788732394366196, + "grad_norm": 0.29167890548706055, + "learning_rate": 7.214816693576234e-07, + "loss": 0.2846098840236664, + "step": 676 + }, + { + "epoch": 18.816901408450704, + "grad_norm": 0.2784614861011505, + "learning_rate": 7.207552717880522e-07, + "loss": 0.28443169593811035, + "step": 677 + }, + { + "epoch": 18.845070422535212, + "grad_norm": 0.29537051916122437, + "learning_rate": 7.200283540683102e-07, + "loss": 0.27960023283958435, + "step": 678 + }, + { + "epoch": 18.87323943661972, + "grad_norm": 0.2873672544956207, + "learning_rate": 7.193009184127145e-07, + "loss": 0.28757309913635254, + "step": 679 + }, + { + "epoch": 18.901408450704224, + "grad_norm": 0.28597328066825867, + "learning_rate": 7.185729670371604e-07, + "loss": 0.2904655635356903, + "step": 680 + }, + { + "epoch": 18.929577464788732, + "grad_norm": 0.29267045855522156, + "learning_rate": 7.17844502159114e-07, + "loss": 0.2797931432723999, + "step": 681 + }, + { + "epoch": 18.95774647887324, + "grad_norm": 0.27707934379577637, + "learning_rate": 7.171155259976057e-07, + "loss": 0.2788022458553314, + "step": 682 + }, + { + "epoch": 18.985915492957748, + "grad_norm": 0.2854091227054596, + "learning_rate": 7.163860407732231e-07, + "loss": 0.28216353058815, + "step": 683 + }, + { + "epoch": 19.0, + "grad_norm": 0.4010404348373413, + "learning_rate": 7.156560487081051e-07, + "loss": 0.2831748127937317, + "step": 684 + }, + { + "epoch": 19.028169014084508, + "grad_norm": 0.2948407232761383, + "learning_rate": 7.149255520259338e-07, + "loss": 0.26844292879104614, + "step": 685 + }, + { + "epoch": 19.056338028169016, + "grad_norm": 0.2946661114692688, + "learning_rate": 7.141945529519288e-07, + "loss": 0.2809017300605774, + "step": 686 + }, + { + "epoch": 19.08450704225352, + "grad_norm": 0.27715936303138733, + "learning_rate": 7.134630537128403e-07, + "loss": 0.2835448980331421, + "step": 687 + }, + { + "epoch": 19.112676056338028, + "grad_norm": 0.2933226525783539, + "learning_rate": 7.127310565369415e-07, + "loss": 0.2795133888721466, + "step": 688 + }, + { + "epoch": 19.140845070422536, + "grad_norm": 0.28180861473083496, + "learning_rate": 7.11998563654023e-07, + "loss": 0.2750745713710785, + "step": 689 + }, + { + "epoch": 19.169014084507044, + "grad_norm": 0.2755012810230255, + "learning_rate": 7.11265577295385e-07, + "loss": 0.281097412109375, + "step": 690 + }, + { + "epoch": 19.197183098591548, + "grad_norm": 0.2865377962589264, + "learning_rate": 7.105320996938314e-07, + "loss": 0.2677628993988037, + "step": 691 + }, + { + "epoch": 19.225352112676056, + "grad_norm": 0.2958216369152069, + "learning_rate": 7.097981330836616e-07, + "loss": 0.2733122408390045, + "step": 692 + }, + { + "epoch": 19.253521126760564, + "grad_norm": 0.2982434034347534, + "learning_rate": 7.090636797006657e-07, + "loss": 0.2764785885810852, + "step": 693 + }, + { + "epoch": 19.281690140845072, + "grad_norm": 0.31210824847221375, + "learning_rate": 7.083287417821157e-07, + "loss": 0.27116531133651733, + "step": 694 + }, + { + "epoch": 19.309859154929576, + "grad_norm": 0.29045426845550537, + "learning_rate": 7.075933215667604e-07, + "loss": 0.2775840163230896, + "step": 695 + }, + { + "epoch": 19.338028169014084, + "grad_norm": 0.29685893654823303, + "learning_rate": 7.068574212948169e-07, + "loss": 0.2803945541381836, + "step": 696 + }, + { + "epoch": 19.366197183098592, + "grad_norm": 0.2790866494178772, + "learning_rate": 7.06121043207965e-07, + "loss": 0.2769659161567688, + "step": 697 + }, + { + "epoch": 19.3943661971831, + "grad_norm": 0.31644630432128906, + "learning_rate": 7.053841895493406e-07, + "loss": 0.27923786640167236, + "step": 698 + }, + { + "epoch": 19.422535211267604, + "grad_norm": 0.30641067028045654, + "learning_rate": 7.046468625635274e-07, + "loss": 0.2825276255607605, + "step": 699 + }, + { + "epoch": 19.450704225352112, + "grad_norm": 0.292458713054657, + "learning_rate": 7.039090644965509e-07, + "loss": 0.27422571182250977, + "step": 700 + }, + { + "epoch": 19.47887323943662, + "grad_norm": 0.2903311550617218, + "learning_rate": 7.031707975958726e-07, + "loss": 0.27189522981643677, + "step": 701 + }, + { + "epoch": 19.507042253521128, + "grad_norm": 0.2947315275669098, + "learning_rate": 7.024320641103811e-07, + "loss": 0.2683555483818054, + "step": 702 + }, + { + "epoch": 19.535211267605632, + "grad_norm": 0.29522547125816345, + "learning_rate": 7.01692866290387e-07, + "loss": 0.28815943002700806, + "step": 703 + }, + { + "epoch": 19.56338028169014, + "grad_norm": 0.28272008895874023, + "learning_rate": 7.009532063876148e-07, + "loss": 0.2853075861930847, + "step": 704 + }, + { + "epoch": 19.591549295774648, + "grad_norm": 0.286604642868042, + "learning_rate": 7.002130866551968e-07, + "loss": 0.2744004726409912, + "step": 705 + }, + { + "epoch": 19.619718309859156, + "grad_norm": 0.2829611301422119, + "learning_rate": 6.994725093476664e-07, + "loss": 0.2899395525455475, + "step": 706 + }, + { + "epoch": 19.647887323943664, + "grad_norm": 0.3035781681537628, + "learning_rate": 6.987314767209503e-07, + "loss": 0.29819610714912415, + "step": 707 + }, + { + "epoch": 19.676056338028168, + "grad_norm": 0.30463680624961853, + "learning_rate": 6.979899910323624e-07, + "loss": 0.2818058729171753, + "step": 708 + }, + { + "epoch": 19.704225352112676, + "grad_norm": 0.29514482617378235, + "learning_rate": 6.972480545405968e-07, + "loss": 0.294766366481781, + "step": 709 + }, + { + "epoch": 19.732394366197184, + "grad_norm": 0.282625675201416, + "learning_rate": 6.965056695057204e-07, + "loss": 0.27316591143608093, + "step": 710 + }, + { + "epoch": 19.760563380281692, + "grad_norm": 0.3090338110923767, + "learning_rate": 6.957628381891673e-07, + "loss": 0.2785091698169708, + "step": 711 + }, + { + "epoch": 19.788732394366196, + "grad_norm": 0.2826164960861206, + "learning_rate": 6.950195628537299e-07, + "loss": 0.2870754301548004, + "step": 712 + }, + { + "epoch": 19.816901408450704, + "grad_norm": 0.29807525873184204, + "learning_rate": 6.942758457635543e-07, + "loss": 0.27232879400253296, + "step": 713 + }, + { + "epoch": 19.845070422535212, + "grad_norm": 0.2901877760887146, + "learning_rate": 6.935316891841315e-07, + "loss": 0.2786208987236023, + "step": 714 + }, + { + "epoch": 19.87323943661972, + "grad_norm": 0.2947152853012085, + "learning_rate": 6.927870953822915e-07, + "loss": 0.2676268517971039, + "step": 715 + }, + { + "epoch": 19.901408450704224, + "grad_norm": 0.30847856402397156, + "learning_rate": 6.920420666261961e-07, + "loss": 0.27726125717163086, + "step": 716 + }, + { + "epoch": 19.929577464788732, + "grad_norm": 0.29455119371414185, + "learning_rate": 6.912966051853322e-07, + "loss": 0.28886911273002625, + "step": 717 + }, + { + "epoch": 19.95774647887324, + "grad_norm": 0.2961712181568146, + "learning_rate": 6.905507133305047e-07, + "loss": 0.2736320495605469, + "step": 718 + }, + { + "epoch": 19.985915492957748, + "grad_norm": 0.2923624515533447, + "learning_rate": 6.898043933338293e-07, + "loss": 0.2720155119895935, + "step": 719 + }, + { + "epoch": 20.0, + "grad_norm": 0.40786370635032654, + "learning_rate": 6.890576474687263e-07, + "loss": 0.3052176237106323, + "step": 720 + }, + { + "epoch": 20.028169014084508, + "grad_norm": 0.281310498714447, + "learning_rate": 6.883104780099133e-07, + "loss": 0.2827909588813782, + "step": 721 + }, + { + "epoch": 20.056338028169016, + "grad_norm": 0.28428319096565247, + "learning_rate": 6.875628872333975e-07, + "loss": 0.2593810558319092, + "step": 722 + }, + { + "epoch": 20.08450704225352, + "grad_norm": 0.28026291728019714, + "learning_rate": 6.868148774164706e-07, + "loss": 0.2783263027667999, + "step": 723 + }, + { + "epoch": 20.112676056338028, + "grad_norm": 0.2842010259628296, + "learning_rate": 6.860664508377001e-07, + "loss": 0.2809029221534729, + "step": 724 + }, + { + "epoch": 20.140845070422536, + "grad_norm": 0.2880638539791107, + "learning_rate": 6.853176097769228e-07, + "loss": 0.26888588070869446, + "step": 725 + }, + { + "epoch": 20.169014084507044, + "grad_norm": 0.28630784153938293, + "learning_rate": 6.84568356515239e-07, + "loss": 0.2781735062599182, + "step": 726 + }, + { + "epoch": 20.197183098591548, + "grad_norm": 0.30342307686805725, + "learning_rate": 6.838186933350036e-07, + "loss": 0.27911239862442017, + "step": 727 + }, + { + "epoch": 20.225352112676056, + "grad_norm": 0.29965290427207947, + "learning_rate": 6.83068622519821e-07, + "loss": 0.2759650945663452, + "step": 728 + }, + { + "epoch": 20.253521126760564, + "grad_norm": 0.2921484708786011, + "learning_rate": 6.823181463545366e-07, + "loss": 0.26791465282440186, + "step": 729 + }, + { + "epoch": 20.281690140845072, + "grad_norm": 0.29477155208587646, + "learning_rate": 6.815672671252315e-07, + "loss": 0.27440106868743896, + "step": 730 + }, + { + "epoch": 20.309859154929576, + "grad_norm": 0.2930176854133606, + "learning_rate": 6.808159871192136e-07, + "loss": 0.28788119554519653, + "step": 731 + }, + { + "epoch": 20.338028169014084, + "grad_norm": 0.304382860660553, + "learning_rate": 6.800643086250121e-07, + "loss": 0.2717517614364624, + "step": 732 + }, + { + "epoch": 20.366197183098592, + "grad_norm": 0.2945499122142792, + "learning_rate": 6.793122339323705e-07, + "loss": 0.29744279384613037, + "step": 733 + }, + { + "epoch": 20.3943661971831, + "grad_norm": 0.2932227849960327, + "learning_rate": 6.78559765332238e-07, + "loss": 0.2782973051071167, + "step": 734 + }, + { + "epoch": 20.422535211267604, + "grad_norm": 0.29432976245880127, + "learning_rate": 6.778069051167653e-07, + "loss": 0.28551533818244934, + "step": 735 + }, + { + "epoch": 20.450704225352112, + "grad_norm": 0.30091312527656555, + "learning_rate": 6.770536555792944e-07, + "loss": 0.28610894083976746, + "step": 736 + }, + { + "epoch": 20.47887323943662, + "grad_norm": 0.29813316464424133, + "learning_rate": 6.763000190143545e-07, + "loss": 0.28137102723121643, + "step": 737 + }, + { + "epoch": 20.507042253521128, + "grad_norm": 0.28738856315612793, + "learning_rate": 6.755459977176532e-07, + "loss": 0.26876533031463623, + "step": 738 + }, + { + "epoch": 20.535211267605632, + "grad_norm": 0.2894875407218933, + "learning_rate": 6.747915939860701e-07, + "loss": 0.2704589366912842, + "step": 739 + }, + { + "epoch": 20.56338028169014, + "grad_norm": 0.3046717047691345, + "learning_rate": 6.740368101176495e-07, + "loss": 0.28678447008132935, + "step": 740 + }, + { + "epoch": 20.591549295774648, + "grad_norm": 0.29942622780799866, + "learning_rate": 6.732816484115946e-07, + "loss": 0.27722471952438354, + "step": 741 + }, + { + "epoch": 20.619718309859156, + "grad_norm": 0.2984582185745239, + "learning_rate": 6.725261111682584e-07, + "loss": 0.2638360261917114, + "step": 742 + }, + { + "epoch": 20.647887323943664, + "grad_norm": 0.2943922281265259, + "learning_rate": 6.717702006891386e-07, + "loss": 0.286998450756073, + "step": 743 + }, + { + "epoch": 20.676056338028168, + "grad_norm": 0.2971697747707367, + "learning_rate": 6.710139192768694e-07, + "loss": 0.2628033757209778, + "step": 744 + }, + { + "epoch": 20.704225352112676, + "grad_norm": 0.2915992736816406, + "learning_rate": 6.702572692352155e-07, + "loss": 0.2789704203605652, + "step": 745 + }, + { + "epoch": 20.732394366197184, + "grad_norm": 0.29871392250061035, + "learning_rate": 6.695002528690639e-07, + "loss": 0.2669401168823242, + "step": 746 + }, + { + "epoch": 20.760563380281692, + "grad_norm": 0.29496580362319946, + "learning_rate": 6.687428724844179e-07, + "loss": 0.2711006999015808, + "step": 747 + }, + { + "epoch": 20.788732394366196, + "grad_norm": 0.29237619042396545, + "learning_rate": 6.679851303883891e-07, + "loss": 0.2822151780128479, + "step": 748 + }, + { + "epoch": 20.816901408450704, + "grad_norm": 0.29689720273017883, + "learning_rate": 6.672270288891918e-07, + "loss": 0.2751491665840149, + "step": 749 + }, + { + "epoch": 20.845070422535212, + "grad_norm": 0.28889331221580505, + "learning_rate": 6.664685702961344e-07, + "loss": 0.2681749761104584, + "step": 750 + }, + { + "epoch": 20.87323943661972, + "grad_norm": 0.2995631694793701, + "learning_rate": 6.657097569196133e-07, + "loss": 0.2793988287448883, + "step": 751 + }, + { + "epoch": 20.901408450704224, + "grad_norm": 0.29980671405792236, + "learning_rate": 6.649505910711058e-07, + "loss": 0.27338624000549316, + "step": 752 + }, + { + "epoch": 20.929577464788732, + "grad_norm": 0.29344668984413147, + "learning_rate": 6.641910750631626e-07, + "loss": 0.284781813621521, + "step": 753 + }, + { + "epoch": 20.95774647887324, + "grad_norm": 0.29827746748924255, + "learning_rate": 6.634312112094013e-07, + "loss": 0.27890220284461975, + "step": 754 + }, + { + "epoch": 20.985915492957748, + "grad_norm": 0.2813144326210022, + "learning_rate": 6.626710018244987e-07, + "loss": 0.2822881042957306, + "step": 755 + }, + { + "epoch": 21.0, + "grad_norm": 0.3963703215122223, + "learning_rate": 6.619104492241847e-07, + "loss": 0.27128899097442627, + "step": 756 + }, + { + "epoch": 21.028169014084508, + "grad_norm": 0.2815580666065216, + "learning_rate": 6.611495557252344e-07, + "loss": 0.26516419649124146, + "step": 757 + }, + { + "epoch": 21.056338028169016, + "grad_norm": 0.2884436845779419, + "learning_rate": 6.603883236454612e-07, + "loss": 0.2861919701099396, + "step": 758 + }, + { + "epoch": 21.08450704225352, + "grad_norm": 0.29655352234840393, + "learning_rate": 6.596267553037102e-07, + "loss": 0.28643375635147095, + "step": 759 + }, + { + "epoch": 21.112676056338028, + "grad_norm": 0.2927301824092865, + "learning_rate": 6.588648530198504e-07, + "loss": 0.26665711402893066, + "step": 760 + }, + { + "epoch": 21.140845070422536, + "grad_norm": 0.3053556978702545, + "learning_rate": 6.581026191147687e-07, + "loss": 0.2608697712421417, + "step": 761 + }, + { + "epoch": 21.169014084507044, + "grad_norm": 0.2939828634262085, + "learning_rate": 6.573400559103613e-07, + "loss": 0.2792375683784485, + "step": 762 + }, + { + "epoch": 21.197183098591548, + "grad_norm": 0.2972046136856079, + "learning_rate": 6.565771657295285e-07, + "loss": 0.28457099199295044, + "step": 763 + }, + { + "epoch": 21.225352112676056, + "grad_norm": 0.2918429672718048, + "learning_rate": 6.558139508961654e-07, + "loss": 0.2648508548736572, + "step": 764 + }, + { + "epoch": 21.253521126760564, + "grad_norm": 0.28380143642425537, + "learning_rate": 6.550504137351575e-07, + "loss": 0.27792784571647644, + "step": 765 + }, + { + "epoch": 21.281690140845072, + "grad_norm": 0.3151639997959137, + "learning_rate": 6.542865565723707e-07, + "loss": 0.2657250165939331, + "step": 766 + }, + { + "epoch": 21.309859154929576, + "grad_norm": 0.2861776351928711, + "learning_rate": 6.53522381734647e-07, + "loss": 0.27351340651512146, + "step": 767 + }, + { + "epoch": 21.338028169014084, + "grad_norm": 0.28596001863479614, + "learning_rate": 6.527578915497951e-07, + "loss": 0.28022241592407227, + "step": 768 + }, + { + "epoch": 21.366197183098592, + "grad_norm": 0.29702675342559814, + "learning_rate": 6.519930883465847e-07, + "loss": 0.2644035518169403, + "step": 769 + }, + { + "epoch": 21.3943661971831, + "grad_norm": 0.2863904535770416, + "learning_rate": 6.512279744547392e-07, + "loss": 0.2721293568611145, + "step": 770 + }, + { + "epoch": 21.422535211267604, + "grad_norm": 0.311262845993042, + "learning_rate": 6.50462552204928e-07, + "loss": 0.2911388874053955, + "step": 771 + }, + { + "epoch": 21.450704225352112, + "grad_norm": 0.3132490813732147, + "learning_rate": 6.496968239287603e-07, + "loss": 0.27957841753959656, + "step": 772 + }, + { + "epoch": 21.47887323943662, + "grad_norm": 0.29439255595207214, + "learning_rate": 6.489307919587769e-07, + "loss": 0.28288164734840393, + "step": 773 + }, + { + "epoch": 21.507042253521128, + "grad_norm": 0.3006008267402649, + "learning_rate": 6.481644586284442e-07, + "loss": 0.26865097880363464, + "step": 774 + }, + { + "epoch": 21.535211267605632, + "grad_norm": 0.28934645652770996, + "learning_rate": 6.473978262721463e-07, + "loss": 0.28625524044036865, + "step": 775 + }, + { + "epoch": 21.56338028169014, + "grad_norm": 0.28962355852127075, + "learning_rate": 6.466308972251785e-07, + "loss": 0.2737366855144501, + "step": 776 + }, + { + "epoch": 21.591549295774648, + "grad_norm": 0.29193779826164246, + "learning_rate": 6.458636738237395e-07, + "loss": 0.2644401788711548, + "step": 777 + }, + { + "epoch": 21.619718309859156, + "grad_norm": 0.31439822912216187, + "learning_rate": 6.45096158404925e-07, + "loss": 0.2638384699821472, + "step": 778 + }, + { + "epoch": 21.647887323943664, + "grad_norm": 0.2855563163757324, + "learning_rate": 6.443283533067198e-07, + "loss": 0.2697969079017639, + "step": 779 + }, + { + "epoch": 21.676056338028168, + "grad_norm": 0.2941296398639679, + "learning_rate": 6.435602608679916e-07, + "loss": 0.27152666449546814, + "step": 780 + }, + { + "epoch": 21.704225352112676, + "grad_norm": 0.2861116826534271, + "learning_rate": 6.427918834284834e-07, + "loss": 0.2749404013156891, + "step": 781 + }, + { + "epoch": 21.732394366197184, + "grad_norm": 0.30467715859413147, + "learning_rate": 6.420232233288055e-07, + "loss": 0.28106456995010376, + "step": 782 + }, + { + "epoch": 21.760563380281692, + "grad_norm": 0.2885453402996063, + "learning_rate": 6.412542829104306e-07, + "loss": 0.2661711275577545, + "step": 783 + }, + { + "epoch": 21.788732394366196, + "grad_norm": 0.30243006348609924, + "learning_rate": 6.404850645156841e-07, + "loss": 0.28171294927597046, + "step": 784 + }, + { + "epoch": 21.816901408450704, + "grad_norm": 0.29606276750564575, + "learning_rate": 6.397155704877388e-07, + "loss": 0.2737141251564026, + "step": 785 + }, + { + "epoch": 21.845070422535212, + "grad_norm": 0.30514174699783325, + "learning_rate": 6.389458031706068e-07, + "loss": 0.2778671979904175, + "step": 786 + }, + { + "epoch": 21.87323943661972, + "grad_norm": 0.29419445991516113, + "learning_rate": 6.381757649091329e-07, + "loss": 0.27829116582870483, + "step": 787 + }, + { + "epoch": 21.901408450704224, + "grad_norm": 0.30376535654067993, + "learning_rate": 6.374054580489873e-07, + "loss": 0.26818743348121643, + "step": 788 + }, + { + "epoch": 21.929577464788732, + "grad_norm": 0.29063352942466736, + "learning_rate": 6.366348849366583e-07, + "loss": 0.28016185760498047, + "step": 789 + }, + { + "epoch": 21.95774647887324, + "grad_norm": 0.29429173469543457, + "learning_rate": 6.358640479194451e-07, + "loss": 0.27824854850769043, + "step": 790 + }, + { + "epoch": 21.985915492957748, + "grad_norm": 0.28934815526008606, + "learning_rate": 6.35092949345451e-07, + "loss": 0.2743881344795227, + "step": 791 + }, + { + "epoch": 22.0, + "grad_norm": 0.41559702157974243, + "learning_rate": 6.343215915635761e-07, + "loss": 0.2856147289276123, + "step": 792 + }, + { + "epoch": 22.028169014084508, + "grad_norm": 0.29498717188835144, + "learning_rate": 6.335499769235098e-07, + "loss": 0.2729465961456299, + "step": 793 + }, + { + "epoch": 22.056338028169016, + "grad_norm": 0.30124449729919434, + "learning_rate": 6.327781077757241e-07, + "loss": 0.2874697744846344, + "step": 794 + }, + { + "epoch": 22.08450704225352, + "grad_norm": 0.3204105794429779, + "learning_rate": 6.320059864714664e-07, + "loss": 0.2923066020011902, + "step": 795 + }, + { + "epoch": 22.112676056338028, + "grad_norm": 0.2912622392177582, + "learning_rate": 6.31233615362752e-07, + "loss": 0.2808852791786194, + "step": 796 + }, + { + "epoch": 22.140845070422536, + "grad_norm": 0.30250096321105957, + "learning_rate": 6.304609968023572e-07, + "loss": 0.27111589908599854, + "step": 797 + }, + { + "epoch": 22.169014084507044, + "grad_norm": 0.3024645447731018, + "learning_rate": 6.296881331438126e-07, + "loss": 0.2812804877758026, + "step": 798 + }, + { + "epoch": 22.197183098591548, + "grad_norm": 0.29673656821250916, + "learning_rate": 6.289150267413942e-07, + "loss": 0.2681958079338074, + "step": 799 + }, + { + "epoch": 22.225352112676056, + "grad_norm": 0.29564592242240906, + "learning_rate": 6.281416799501187e-07, + "loss": 0.26508989930152893, + "step": 800 + }, + { + "epoch": 22.253521126760564, + "grad_norm": 0.2849496603012085, + "learning_rate": 6.273680951257342e-07, + "loss": 0.27044007182121277, + "step": 801 + }, + { + "epoch": 22.281690140845072, + "grad_norm": 0.30459970235824585, + "learning_rate": 6.265942746247146e-07, + "loss": 0.26503556966781616, + "step": 802 + }, + { + "epoch": 22.309859154929576, + "grad_norm": 0.29415223002433777, + "learning_rate": 6.258202208042511e-07, + "loss": 0.26770085096359253, + "step": 803 + }, + { + "epoch": 22.338028169014084, + "grad_norm": 0.3101199269294739, + "learning_rate": 6.25045936022246e-07, + "loss": 0.26633113622665405, + "step": 804 + }, + { + "epoch": 22.366197183098592, + "grad_norm": 0.28551825881004333, + "learning_rate": 6.242714226373049e-07, + "loss": 0.2745598256587982, + "step": 805 + }, + { + "epoch": 22.3943661971831, + "grad_norm": 0.30341607332229614, + "learning_rate": 6.2349668300873e-07, + "loss": 0.2879912853240967, + "step": 806 + }, + { + "epoch": 22.422535211267604, + "grad_norm": 0.33077767491340637, + "learning_rate": 6.227217194965125e-07, + "loss": 0.28035950660705566, + "step": 807 + }, + { + "epoch": 22.450704225352112, + "grad_norm": 0.305733859539032, + "learning_rate": 6.219465344613258e-07, + "loss": 0.2842296361923218, + "step": 808 + }, + { + "epoch": 22.47887323943662, + "grad_norm": 0.2931113839149475, + "learning_rate": 6.211711302645177e-07, + "loss": 0.2730957865715027, + "step": 809 + }, + { + "epoch": 22.507042253521128, + "grad_norm": 0.2949962913990021, + "learning_rate": 6.203955092681039e-07, + "loss": 0.281680166721344, + "step": 810 + }, + { + "epoch": 22.535211267605632, + "grad_norm": 0.30062124133110046, + "learning_rate": 6.196196738347607e-07, + "loss": 0.2771790027618408, + "step": 811 + }, + { + "epoch": 22.56338028169014, + "grad_norm": 0.29685312509536743, + "learning_rate": 6.188436263278172e-07, + "loss": 0.27885377407073975, + "step": 812 + }, + { + "epoch": 22.591549295774648, + "grad_norm": 0.30217039585113525, + "learning_rate": 6.180673691112486e-07, + "loss": 0.2664039433002472, + "step": 813 + }, + { + "epoch": 22.619718309859156, + "grad_norm": 0.2935945987701416, + "learning_rate": 6.172909045496694e-07, + "loss": 0.266349196434021, + "step": 814 + }, + { + "epoch": 22.647887323943664, + "grad_norm": 0.31217825412750244, + "learning_rate": 6.165142350083249e-07, + "loss": 0.2723742127418518, + "step": 815 + }, + { + "epoch": 22.676056338028168, + "grad_norm": 0.2960183918476105, + "learning_rate": 6.157373628530852e-07, + "loss": 0.272281289100647, + "step": 816 + }, + { + "epoch": 22.704225352112676, + "grad_norm": 0.2914189100265503, + "learning_rate": 6.149602904504378e-07, + "loss": 0.26770728826522827, + "step": 817 + }, + { + "epoch": 22.732394366197184, + "grad_norm": 0.2774648368358612, + "learning_rate": 6.141830201674802e-07, + "loss": 0.2694011330604553, + "step": 818 + }, + { + "epoch": 22.760563380281692, + "grad_norm": 0.29001736640930176, + "learning_rate": 6.134055543719121e-07, + "loss": 0.2670798897743225, + "step": 819 + }, + { + "epoch": 22.788732394366196, + "grad_norm": 0.31117716431617737, + "learning_rate": 6.126278954320294e-07, + "loss": 0.26127567887306213, + "step": 820 + }, + { + "epoch": 22.816901408450704, + "grad_norm": 0.29720577597618103, + "learning_rate": 6.118500457167159e-07, + "loss": 0.27497297525405884, + "step": 821 + }, + { + "epoch": 22.845070422535212, + "grad_norm": 0.3057437241077423, + "learning_rate": 6.11072007595437e-07, + "loss": 0.27363038063049316, + "step": 822 + }, + { + "epoch": 22.87323943661972, + "grad_norm": 0.323045939207077, + "learning_rate": 6.102937834382315e-07, + "loss": 0.27130627632141113, + "step": 823 + }, + { + "epoch": 22.901408450704224, + "grad_norm": 0.28948745131492615, + "learning_rate": 6.095153756157051e-07, + "loss": 0.26591163873672485, + "step": 824 + }, + { + "epoch": 22.929577464788732, + "grad_norm": 0.27952200174331665, + "learning_rate": 6.087367864990232e-07, + "loss": 0.266745388507843, + "step": 825 + }, + { + "epoch": 22.95774647887324, + "grad_norm": 0.30804452300071716, + "learning_rate": 6.079580184599032e-07, + "loss": 0.2794422507286072, + "step": 826 + }, + { + "epoch": 22.985915492957748, + "grad_norm": 0.3002220392227173, + "learning_rate": 6.071790738706078e-07, + "loss": 0.26469242572784424, + "step": 827 + }, + { + "epoch": 23.0, + "grad_norm": 0.4127134084701538, + "learning_rate": 6.06399955103937e-07, + "loss": 0.2482779324054718, + "step": 828 + }, + { + "epoch": 23.028169014084508, + "grad_norm": 0.30051475763320923, + "learning_rate": 6.056206645332217e-07, + "loss": 0.26631736755371094, + "step": 829 + }, + { + "epoch": 23.056338028169016, + "grad_norm": 0.3008311688899994, + "learning_rate": 6.048412045323164e-07, + "loss": 0.27459877729415894, + "step": 830 + }, + { + "epoch": 23.08450704225352, + "grad_norm": 0.28853461146354675, + "learning_rate": 6.040615774755911e-07, + "loss": 0.26959413290023804, + "step": 831 + }, + { + "epoch": 23.112676056338028, + "grad_norm": 0.29199543595314026, + "learning_rate": 6.032817857379256e-07, + "loss": 0.2588391900062561, + "step": 832 + }, + { + "epoch": 23.140845070422536, + "grad_norm": 0.29191362857818604, + "learning_rate": 6.025018316946999e-07, + "loss": 0.27447617053985596, + "step": 833 + }, + { + "epoch": 23.169014084507044, + "grad_norm": 0.29501983523368835, + "learning_rate": 6.017217177217899e-07, + "loss": 0.26884716749191284, + "step": 834 + }, + { + "epoch": 23.197183098591548, + "grad_norm": 0.3098088502883911, + "learning_rate": 6.009414461955581e-07, + "loss": 0.28516972064971924, + "step": 835 + }, + { + "epoch": 23.225352112676056, + "grad_norm": 0.3027796149253845, + "learning_rate": 6.001610194928464e-07, + "loss": 0.2739514112472534, + "step": 836 + }, + { + "epoch": 23.253521126760564, + "grad_norm": 0.31156665086746216, + "learning_rate": 5.993804399909703e-07, + "loss": 0.26852983236312866, + "step": 837 + }, + { + "epoch": 23.281690140845072, + "grad_norm": 0.2958903908729553, + "learning_rate": 5.985997100677103e-07, + "loss": 0.2743365168571472, + "step": 838 + }, + { + "epoch": 23.309859154929576, + "grad_norm": 0.31140410900115967, + "learning_rate": 5.97818832101305e-07, + "loss": 0.27525418996810913, + "step": 839 + }, + { + "epoch": 23.338028169014084, + "grad_norm": 0.3082049787044525, + "learning_rate": 5.97037808470444e-07, + "loss": 0.27074384689331055, + "step": 840 + }, + { + "epoch": 23.366197183098592, + "grad_norm": 0.2950114905834198, + "learning_rate": 5.96256641554261e-07, + "loss": 0.26068389415740967, + "step": 841 + }, + { + "epoch": 23.3943661971831, + "grad_norm": 0.31746307015419006, + "learning_rate": 5.954753337323259e-07, + "loss": 0.2648658752441406, + "step": 842 + }, + { + "epoch": 23.422535211267604, + "grad_norm": 0.2906374931335449, + "learning_rate": 5.946938873846375e-07, + "loss": 0.29040125012397766, + "step": 843 + }, + { + "epoch": 23.450704225352112, + "grad_norm": 0.3055919408798218, + "learning_rate": 5.939123048916173e-07, + "loss": 0.2694965600967407, + "step": 844 + }, + { + "epoch": 23.47887323943662, + "grad_norm": 0.3007211983203888, + "learning_rate": 5.931305886341008e-07, + "loss": 0.25987839698791504, + "step": 845 + }, + { + "epoch": 23.507042253521128, + "grad_norm": 0.3042035400867462, + "learning_rate": 5.923487409933315e-07, + "loss": 0.26484209299087524, + "step": 846 + }, + { + "epoch": 23.535211267605632, + "grad_norm": 0.30741506814956665, + "learning_rate": 5.915667643509528e-07, + "loss": 0.2735103368759155, + "step": 847 + }, + { + "epoch": 23.56338028169014, + "grad_norm": 0.30859899520874023, + "learning_rate": 5.907846610890011e-07, + "loss": 0.27706003189086914, + "step": 848 + }, + { + "epoch": 23.591549295774648, + "grad_norm": 0.29999226331710815, + "learning_rate": 5.900024335898987e-07, + "loss": 0.2733941674232483, + "step": 849 + }, + { + "epoch": 23.619718309859156, + "grad_norm": 0.3084903955459595, + "learning_rate": 5.892200842364462e-07, + "loss": 0.282131165266037, + "step": 850 + }, + { + "epoch": 23.647887323943664, + "grad_norm": 0.29400384426116943, + "learning_rate": 5.884376154118154e-07, + "loss": 0.26756390929222107, + "step": 851 + }, + { + "epoch": 23.676056338028168, + "grad_norm": 0.31666234135627747, + "learning_rate": 5.87655029499542e-07, + "loss": 0.2766130268573761, + "step": 852 + }, + { + "epoch": 23.704225352112676, + "grad_norm": 0.30233001708984375, + "learning_rate": 5.868723288835184e-07, + "loss": 0.2544291019439697, + "step": 853 + }, + { + "epoch": 23.732394366197184, + "grad_norm": 0.2888985276222229, + "learning_rate": 5.860895159479864e-07, + "loss": 0.272182822227478, + "step": 854 + }, + { + "epoch": 23.760563380281692, + "grad_norm": 0.29870662093162537, + "learning_rate": 5.853065930775303e-07, + "loss": 0.2798278331756592, + "step": 855 + }, + { + "epoch": 23.788732394366196, + "grad_norm": 0.307162344455719, + "learning_rate": 5.845235626570683e-07, + "loss": 0.2772548794746399, + "step": 856 + }, + { + "epoch": 23.816901408450704, + "grad_norm": 0.290558785200119, + "learning_rate": 5.837404270718475e-07, + "loss": 0.2746056020259857, + "step": 857 + }, + { + "epoch": 23.845070422535212, + "grad_norm": 0.30080270767211914, + "learning_rate": 5.829571887074343e-07, + "loss": 0.2648829519748688, + "step": 858 + }, + { + "epoch": 23.87323943661972, + "grad_norm": 0.3067336678504944, + "learning_rate": 5.821738499497086e-07, + "loss": 0.2871520519256592, + "step": 859 + }, + { + "epoch": 23.901408450704224, + "grad_norm": 0.29975709319114685, + "learning_rate": 5.813904131848564e-07, + "loss": 0.26279598474502563, + "step": 860 + }, + { + "epoch": 23.929577464788732, + "grad_norm": 0.3006797730922699, + "learning_rate": 5.806068807993617e-07, + "loss": 0.2586716115474701, + "step": 861 + }, + { + "epoch": 23.95774647887324, + "grad_norm": 0.31139636039733887, + "learning_rate": 5.798232551800002e-07, + "loss": 0.26469486951828003, + "step": 862 + }, + { + "epoch": 23.985915492957748, + "grad_norm": 0.295448899269104, + "learning_rate": 5.790395387138311e-07, + "loss": 0.27641937136650085, + "step": 863 + }, + { + "epoch": 24.0, + "grad_norm": 0.41943204402923584, + "learning_rate": 5.78255733788191e-07, + "loss": 0.2656780779361725, + "step": 864 + }, + { + "epoch": 24.028169014084508, + "grad_norm": 0.2978457808494568, + "learning_rate": 5.774718427906856e-07, + "loss": 0.27108752727508545, + "step": 865 + }, + { + "epoch": 24.056338028169016, + "grad_norm": 0.2980673015117645, + "learning_rate": 5.766878681091828e-07, + "loss": 0.27321118116378784, + "step": 866 + }, + { + "epoch": 24.08450704225352, + "grad_norm": 0.30751070380210876, + "learning_rate": 5.759038121318052e-07, + "loss": 0.26482248306274414, + "step": 867 + }, + { + "epoch": 24.112676056338028, + "grad_norm": 0.2982223629951477, + "learning_rate": 5.751196772469237e-07, + "loss": 0.2737855315208435, + "step": 868 + }, + { + "epoch": 24.140845070422536, + "grad_norm": 0.2943744361400604, + "learning_rate": 5.743354658431489e-07, + "loss": 0.27646419405937195, + "step": 869 + }, + { + "epoch": 24.169014084507044, + "grad_norm": 0.2863228917121887, + "learning_rate": 5.735511803093248e-07, + "loss": 0.2726101279258728, + "step": 870 + }, + { + "epoch": 24.197183098591548, + "grad_norm": 0.2973101735115051, + "learning_rate": 5.727668230345209e-07, + "loss": 0.2601590156555176, + "step": 871 + }, + { + "epoch": 24.225352112676056, + "grad_norm": 0.3052431344985962, + "learning_rate": 5.71982396408026e-07, + "loss": 0.27889275550842285, + "step": 872 + }, + { + "epoch": 24.253521126760564, + "grad_norm": 0.3076930046081543, + "learning_rate": 5.711979028193391e-07, + "loss": 0.2612301707267761, + "step": 873 + }, + { + "epoch": 24.281690140845072, + "grad_norm": 0.2986485958099365, + "learning_rate": 5.704133446581642e-07, + "loss": 0.27018094062805176, + "step": 874 + }, + { + "epoch": 24.309859154929576, + "grad_norm": 0.3108276426792145, + "learning_rate": 5.696287243144012e-07, + "loss": 0.27102935314178467, + "step": 875 + }, + { + "epoch": 24.338028169014084, + "grad_norm": 0.30193671584129333, + "learning_rate": 5.688440441781398e-07, + "loss": 0.2653925120830536, + "step": 876 + }, + { + "epoch": 24.366197183098592, + "grad_norm": 0.3071465492248535, + "learning_rate": 5.680593066396518e-07, + "loss": 0.2752073109149933, + "step": 877 + }, + { + "epoch": 24.3943661971831, + "grad_norm": 0.31397056579589844, + "learning_rate": 5.672745140893839e-07, + "loss": 0.2662411332130432, + "step": 878 + }, + { + "epoch": 24.422535211267604, + "grad_norm": 0.2991463243961334, + "learning_rate": 5.664896689179504e-07, + "loss": 0.24169263243675232, + "step": 879 + }, + { + "epoch": 24.450704225352112, + "grad_norm": 0.3123292028903961, + "learning_rate": 5.657047735161255e-07, + "loss": 0.27330368757247925, + "step": 880 + }, + { + "epoch": 24.47887323943662, + "grad_norm": 0.3062734305858612, + "learning_rate": 5.649198302748368e-07, + "loss": 0.26652461290359497, + "step": 881 + }, + { + "epoch": 24.507042253521128, + "grad_norm": 0.2875562906265259, + "learning_rate": 5.641348415851577e-07, + "loss": 0.2717418670654297, + "step": 882 + }, + { + "epoch": 24.535211267605632, + "grad_norm": 0.30724218487739563, + "learning_rate": 5.633498098382998e-07, + "loss": 0.2761197090148926, + "step": 883 + }, + { + "epoch": 24.56338028169014, + "grad_norm": 0.30381572246551514, + "learning_rate": 5.625647374256061e-07, + "loss": 0.2838340997695923, + "step": 884 + }, + { + "epoch": 24.591549295774648, + "grad_norm": 0.30817776918411255, + "learning_rate": 5.617796267385429e-07, + "loss": 0.26739388704299927, + "step": 885 + }, + { + "epoch": 24.619718309859156, + "grad_norm": 0.31107473373413086, + "learning_rate": 5.60994480168694e-07, + "loss": 0.27139878273010254, + "step": 886 + }, + { + "epoch": 24.647887323943664, + "grad_norm": 0.29710572957992554, + "learning_rate": 5.602093001077517e-07, + "loss": 0.26788806915283203, + "step": 887 + }, + { + "epoch": 24.676056338028168, + "grad_norm": 0.31037789583206177, + "learning_rate": 5.594240889475106e-07, + "loss": 0.2767243981361389, + "step": 888 + }, + { + "epoch": 24.704225352112676, + "grad_norm": 0.30905231833457947, + "learning_rate": 5.586388490798604e-07, + "loss": 0.2679288685321808, + "step": 889 + }, + { + "epoch": 24.732394366197184, + "grad_norm": 0.30612513422966003, + "learning_rate": 5.578535828967777e-07, + "loss": 0.2660091519355774, + "step": 890 + }, + { + "epoch": 24.760563380281692, + "grad_norm": 0.29661476612091064, + "learning_rate": 5.570682927903193e-07, + "loss": 0.27202385663986206, + "step": 891 + }, + { + "epoch": 24.788732394366196, + "grad_norm": 0.31154492497444153, + "learning_rate": 5.562829811526154e-07, + "loss": 0.26965251564979553, + "step": 892 + }, + { + "epoch": 24.816901408450704, + "grad_norm": 0.29887905716896057, + "learning_rate": 5.554976503758612e-07, + "loss": 0.2663193345069885, + "step": 893 + }, + { + "epoch": 24.845070422535212, + "grad_norm": 0.3046702444553375, + "learning_rate": 5.547123028523106e-07, + "loss": 0.26517826318740845, + "step": 894 + }, + { + "epoch": 24.87323943661972, + "grad_norm": 0.29926952719688416, + "learning_rate": 5.539269409742683e-07, + "loss": 0.2689710855484009, + "step": 895 + }, + { + "epoch": 24.901408450704224, + "grad_norm": 0.31607043743133545, + "learning_rate": 5.531415671340826e-07, + "loss": 0.2774956226348877, + "step": 896 + }, + { + "epoch": 24.929577464788732, + "grad_norm": 0.313334584236145, + "learning_rate": 5.523561837241387e-07, + "loss": 0.2801990807056427, + "step": 897 + }, + { + "epoch": 24.95774647887324, + "grad_norm": 0.3167824149131775, + "learning_rate": 5.515707931368507e-07, + "loss": 0.2556470036506653, + "step": 898 + }, + { + "epoch": 24.985915492957748, + "grad_norm": 0.3055095970630646, + "learning_rate": 5.507853977646543e-07, + "loss": 0.2693515121936798, + "step": 899 + }, + { + "epoch": 25.0, + "grad_norm": 0.41877350211143494, + "learning_rate": 5.5e-07, + "loss": 0.2642577588558197, + "step": 900 + }, + { + "epoch": 25.028169014084508, + "grad_norm": 0.3000764548778534, + "learning_rate": 5.492146022353459e-07, + "loss": 0.2616558074951172, + "step": 901 + }, + { + "epoch": 25.056338028169016, + "grad_norm": 0.30835723876953125, + "learning_rate": 5.484292068631494e-07, + "loss": 0.260206401348114, + "step": 902 + }, + { + "epoch": 25.08450704225352, + "grad_norm": 0.30945923924446106, + "learning_rate": 5.476438162758611e-07, + "loss": 0.26666033267974854, + "step": 903 + }, + { + "epoch": 25.112676056338028, + "grad_norm": 0.3131259083747864, + "learning_rate": 5.468584328659172e-07, + "loss": 0.2688153386116028, + "step": 904 + }, + { + "epoch": 25.140845070422536, + "grad_norm": 0.31281140446662903, + "learning_rate": 5.460730590257317e-07, + "loss": 0.25907081365585327, + "step": 905 + }, + { + "epoch": 25.169014084507044, + "grad_norm": 0.300714910030365, + "learning_rate": 5.452876971476896e-07, + "loss": 0.2585920989513397, + "step": 906 + }, + { + "epoch": 25.197183098591548, + "grad_norm": 0.31137779355049133, + "learning_rate": 5.445023496241388e-07, + "loss": 0.2691946029663086, + "step": 907 + }, + { + "epoch": 25.225352112676056, + "grad_norm": 0.31905803084373474, + "learning_rate": 5.437170188473847e-07, + "loss": 0.25889474153518677, + "step": 908 + }, + { + "epoch": 25.253521126760564, + "grad_norm": 0.30952438712120056, + "learning_rate": 5.429317072096807e-07, + "loss": 0.26691755652427673, + "step": 909 + }, + { + "epoch": 25.281690140845072, + "grad_norm": 0.3063667416572571, + "learning_rate": 5.421464171032224e-07, + "loss": 0.2661867141723633, + "step": 910 + }, + { + "epoch": 25.309859154929576, + "grad_norm": 0.31403201818466187, + "learning_rate": 5.413611509201396e-07, + "loss": 0.26902246475219727, + "step": 911 + }, + { + "epoch": 25.338028169014084, + "grad_norm": 0.3037600815296173, + "learning_rate": 5.405759110524894e-07, + "loss": 0.26004883646965027, + "step": 912 + }, + { + "epoch": 25.366197183098592, + "grad_norm": 0.3116777837276459, + "learning_rate": 5.397906998922483e-07, + "loss": 0.27219873666763306, + "step": 913 + }, + { + "epoch": 25.3943661971831, + "grad_norm": 0.2961476445198059, + "learning_rate": 5.390055198313061e-07, + "loss": 0.26753348112106323, + "step": 914 + }, + { + "epoch": 25.422535211267604, + "grad_norm": 0.3180798888206482, + "learning_rate": 5.382203732614571e-07, + "loss": 0.2706093192100525, + "step": 915 + }, + { + "epoch": 25.450704225352112, + "grad_norm": 0.2982124090194702, + "learning_rate": 5.37435262574394e-07, + "loss": 0.2601392865180969, + "step": 916 + }, + { + "epoch": 25.47887323943662, + "grad_norm": 0.29854777455329895, + "learning_rate": 5.366501901617001e-07, + "loss": 0.2788724899291992, + "step": 917 + }, + { + "epoch": 25.507042253521128, + "grad_norm": 0.30327802896499634, + "learning_rate": 5.358651584148423e-07, + "loss": 0.26465606689453125, + "step": 918 + }, + { + "epoch": 25.535211267605632, + "grad_norm": 0.3136656582355499, + "learning_rate": 5.350801697251633e-07, + "loss": 0.2621968984603882, + "step": 919 + }, + { + "epoch": 25.56338028169014, + "grad_norm": 0.3008262813091278, + "learning_rate": 5.342952264838747e-07, + "loss": 0.2775859236717224, + "step": 920 + }, + { + "epoch": 25.591549295774648, + "grad_norm": 0.31797295808792114, + "learning_rate": 5.335103310820496e-07, + "loss": 0.2715638279914856, + "step": 921 + }, + { + "epoch": 25.619718309859156, + "grad_norm": 0.3112519383430481, + "learning_rate": 5.32725485910616e-07, + "loss": 0.26941171288490295, + "step": 922 + }, + { + "epoch": 25.647887323943664, + "grad_norm": 0.2887360453605652, + "learning_rate": 5.319406933603482e-07, + "loss": 0.26261216402053833, + "step": 923 + }, + { + "epoch": 25.676056338028168, + "grad_norm": 0.3208933472633362, + "learning_rate": 5.311559558218603e-07, + "loss": 0.26436418294906616, + "step": 924 + }, + { + "epoch": 25.704225352112676, + "grad_norm": 0.30341023206710815, + "learning_rate": 5.303712756855988e-07, + "loss": 0.2747180461883545, + "step": 925 + }, + { + "epoch": 25.732394366197184, + "grad_norm": 0.31803277134895325, + "learning_rate": 5.295866553418358e-07, + "loss": 0.2771461606025696, + "step": 926 + }, + { + "epoch": 25.760563380281692, + "grad_norm": 0.3123302459716797, + "learning_rate": 5.288020971806608e-07, + "loss": 0.26546305418014526, + "step": 927 + }, + { + "epoch": 25.788732394366196, + "grad_norm": 0.3141644597053528, + "learning_rate": 5.28017603591974e-07, + "loss": 0.27546215057373047, + "step": 928 + }, + { + "epoch": 25.816901408450704, + "grad_norm": 0.29840072989463806, + "learning_rate": 5.27233176965479e-07, + "loss": 0.25834715366363525, + "step": 929 + }, + { + "epoch": 25.845070422535212, + "grad_norm": 0.3083305060863495, + "learning_rate": 5.264488196906752e-07, + "loss": 0.2746443748474121, + "step": 930 + }, + { + "epoch": 25.87323943661972, + "grad_norm": 0.30847135186195374, + "learning_rate": 5.256645341568511e-07, + "loss": 0.2748471200466156, + "step": 931 + }, + { + "epoch": 25.901408450704224, + "grad_norm": 0.30591723322868347, + "learning_rate": 5.248803227530763e-07, + "loss": 0.26996147632598877, + "step": 932 + }, + { + "epoch": 25.929577464788732, + "grad_norm": 0.314569354057312, + "learning_rate": 5.240961878681947e-07, + "loss": 0.28236207365989685, + "step": 933 + }, + { + "epoch": 25.95774647887324, + "grad_norm": 0.32219424843788147, + "learning_rate": 5.233121318908173e-07, + "loss": 0.2674041986465454, + "step": 934 + }, + { + "epoch": 25.985915492957748, + "grad_norm": 0.3121417760848999, + "learning_rate": 5.225281572093143e-07, + "loss": 0.2723839282989502, + "step": 935 + }, + { + "epoch": 26.0, + "grad_norm": 0.4469078481197357, + "learning_rate": 5.21744266211809e-07, + "loss": 0.2659713625907898, + "step": 936 + }, + { + "epoch": 26.028169014084508, + "grad_norm": 0.3079273998737335, + "learning_rate": 5.20960461286169e-07, + "loss": 0.2612949013710022, + "step": 937 + }, + { + "epoch": 26.056338028169016, + "grad_norm": 0.29670900106430054, + "learning_rate": 5.2017674482e-07, + "loss": 0.26683154702186584, + "step": 938 + }, + { + "epoch": 26.08450704225352, + "grad_norm": 0.3200303018093109, + "learning_rate": 5.193931192006385e-07, + "loss": 0.2616243362426758, + "step": 939 + }, + { + "epoch": 26.112676056338028, + "grad_norm": 0.31682220101356506, + "learning_rate": 5.186095868151436e-07, + "loss": 0.27138951420783997, + "step": 940 + }, + { + "epoch": 26.140845070422536, + "grad_norm": 0.30821120738983154, + "learning_rate": 5.178261500502912e-07, + "loss": 0.26395922899246216, + "step": 941 + }, + { + "epoch": 26.169014084507044, + "grad_norm": 0.3168351352214813, + "learning_rate": 5.170428112925659e-07, + "loss": 0.2528039813041687, + "step": 942 + }, + { + "epoch": 26.197183098591548, + "grad_norm": 0.31877174973487854, + "learning_rate": 5.162595729281526e-07, + "loss": 0.268981397151947, + "step": 943 + }, + { + "epoch": 26.225352112676056, + "grad_norm": 0.30236542224884033, + "learning_rate": 5.154764373429315e-07, + "loss": 0.26689520478248596, + "step": 944 + }, + { + "epoch": 26.253521126760564, + "grad_norm": 0.31615039706230164, + "learning_rate": 5.146934069224698e-07, + "loss": 0.25211524963378906, + "step": 945 + }, + { + "epoch": 26.281690140845072, + "grad_norm": 0.304155558347702, + "learning_rate": 5.139104840520135e-07, + "loss": 0.26361894607543945, + "step": 946 + }, + { + "epoch": 26.309859154929576, + "grad_norm": 0.31038856506347656, + "learning_rate": 5.131276711164815e-07, + "loss": 0.26455777883529663, + "step": 947 + }, + { + "epoch": 26.338028169014084, + "grad_norm": 0.3139597177505493, + "learning_rate": 5.123449705004581e-07, + "loss": 0.2526125907897949, + "step": 948 + }, + { + "epoch": 26.366197183098592, + "grad_norm": 0.3288014233112335, + "learning_rate": 5.115623845881847e-07, + "loss": 0.2677180767059326, + "step": 949 + }, + { + "epoch": 26.3943661971831, + "grad_norm": 0.33518192172050476, + "learning_rate": 5.107799157635538e-07, + "loss": 0.2683093249797821, + "step": 950 + }, + { + "epoch": 26.422535211267604, + "grad_norm": 0.3219356834888458, + "learning_rate": 5.099975664101014e-07, + "loss": 0.2773933708667755, + "step": 951 + }, + { + "epoch": 26.450704225352112, + "grad_norm": 0.32385388016700745, + "learning_rate": 5.09215338910999e-07, + "loss": 0.2612137198448181, + "step": 952 + }, + { + "epoch": 26.47887323943662, + "grad_norm": 0.32834818959236145, + "learning_rate": 5.084332356490472e-07, + "loss": 0.2747904658317566, + "step": 953 + }, + { + "epoch": 26.507042253521128, + "grad_norm": 0.32953891158103943, + "learning_rate": 5.076512590066685e-07, + "loss": 0.2700774669647217, + "step": 954 + }, + { + "epoch": 26.535211267605632, + "grad_norm": 0.31470146775245667, + "learning_rate": 5.068694113658992e-07, + "loss": 0.26825615763664246, + "step": 955 + }, + { + "epoch": 26.56338028169014, + "grad_norm": 0.3184269964694977, + "learning_rate": 5.060876951083828e-07, + "loss": 0.2559502124786377, + "step": 956 + }, + { + "epoch": 26.591549295774648, + "grad_norm": 0.3205021619796753, + "learning_rate": 5.053061126153624e-07, + "loss": 0.26462531089782715, + "step": 957 + }, + { + "epoch": 26.619718309859156, + "grad_norm": 0.3158126473426819, + "learning_rate": 5.045246662676741e-07, + "loss": 0.2701690196990967, + "step": 958 + }, + { + "epoch": 26.647887323943664, + "grad_norm": 0.3104144334793091, + "learning_rate": 5.037433584457389e-07, + "loss": 0.27104830741882324, + "step": 959 + }, + { + "epoch": 26.676056338028168, + "grad_norm": 0.3229422867298126, + "learning_rate": 5.02962191529556e-07, + "loss": 0.2765110731124878, + "step": 960 + }, + { + "epoch": 26.704225352112676, + "grad_norm": 0.3127235770225525, + "learning_rate": 5.021811678986951e-07, + "loss": 0.26477351784706116, + "step": 961 + }, + { + "epoch": 26.732394366197184, + "grad_norm": 0.31363457441329956, + "learning_rate": 5.014002899322896e-07, + "loss": 0.2696647644042969, + "step": 962 + }, + { + "epoch": 26.760563380281692, + "grad_norm": 0.3330313265323639, + "learning_rate": 5.006195600090296e-07, + "loss": 0.2720947861671448, + "step": 963 + }, + { + "epoch": 26.788732394366196, + "grad_norm": 0.3137781023979187, + "learning_rate": 4.998389805071536e-07, + "loss": 0.2770814001560211, + "step": 964 + }, + { + "epoch": 26.816901408450704, + "grad_norm": 0.30663928389549255, + "learning_rate": 4.990585538044419e-07, + "loss": 0.26743337512016296, + "step": 965 + }, + { + "epoch": 26.845070422535212, + "grad_norm": 0.3439841866493225, + "learning_rate": 4.982782822782101e-07, + "loss": 0.26640748977661133, + "step": 966 + }, + { + "epoch": 26.87323943661972, + "grad_norm": 0.30016517639160156, + "learning_rate": 4.974981683053001e-07, + "loss": 0.2630905508995056, + "step": 967 + }, + { + "epoch": 26.901408450704224, + "grad_norm": 0.30313640832901, + "learning_rate": 4.967182142620745e-07, + "loss": 0.26278769969940186, + "step": 968 + }, + { + "epoch": 26.929577464788732, + "grad_norm": 0.3100942373275757, + "learning_rate": 4.959384225244087e-07, + "loss": 0.25859004259109497, + "step": 969 + }, + { + "epoch": 26.95774647887324, + "grad_norm": 0.3049146234989166, + "learning_rate": 4.951587954676837e-07, + "loss": 0.2737579941749573, + "step": 970 + }, + { + "epoch": 26.985915492957748, + "grad_norm": 0.3105259835720062, + "learning_rate": 4.943793354667783e-07, + "loss": 0.2698732018470764, + "step": 971 + }, + { + "epoch": 27.0, + "grad_norm": 0.43671199679374695, + "learning_rate": 4.93600044896063e-07, + "loss": 0.2851495146751404, + "step": 972 + }, + { + "epoch": 27.028169014084508, + "grad_norm": 0.3152709901332855, + "learning_rate": 4.928209261293923e-07, + "loss": 0.27372750639915466, + "step": 973 + }, + { + "epoch": 27.056338028169016, + "grad_norm": 0.3281909227371216, + "learning_rate": 4.920419815400968e-07, + "loss": 0.26317745447158813, + "step": 974 + }, + { + "epoch": 27.08450704225352, + "grad_norm": 0.30629420280456543, + "learning_rate": 4.912632135009769e-07, + "loss": 0.267042338848114, + "step": 975 + }, + { + "epoch": 27.112676056338028, + "grad_norm": 0.31097206473350525, + "learning_rate": 4.904846243842949e-07, + "loss": 0.2647910714149475, + "step": 976 + }, + { + "epoch": 27.140845070422536, + "grad_norm": 0.30723172426223755, + "learning_rate": 4.897062165617686e-07, + "loss": 0.27176767587661743, + "step": 977 + }, + { + "epoch": 27.169014084507044, + "grad_norm": 0.333957701921463, + "learning_rate": 4.88927992404563e-07, + "loss": 0.26361826062202454, + "step": 978 + }, + { + "epoch": 27.197183098591548, + "grad_norm": 0.30476778745651245, + "learning_rate": 4.881499542832841e-07, + "loss": 0.2584869861602783, + "step": 979 + }, + { + "epoch": 27.225352112676056, + "grad_norm": 0.3146997392177582, + "learning_rate": 4.873721045679706e-07, + "loss": 0.2549043893814087, + "step": 980 + }, + { + "epoch": 27.253521126760564, + "grad_norm": 0.30739930272102356, + "learning_rate": 4.865944456280878e-07, + "loss": 0.2622683644294739, + "step": 981 + }, + { + "epoch": 27.281690140845072, + "grad_norm": 0.3006227910518646, + "learning_rate": 4.858169798325198e-07, + "loss": 0.27283164858818054, + "step": 982 + }, + { + "epoch": 27.309859154929576, + "grad_norm": 0.31303322315216064, + "learning_rate": 4.850397095495621e-07, + "loss": 0.2585863471031189, + "step": 983 + }, + { + "epoch": 27.338028169014084, + "grad_norm": 0.3036518692970276, + "learning_rate": 4.842626371469149e-07, + "loss": 0.2656107246875763, + "step": 984 + }, + { + "epoch": 27.366197183098592, + "grad_norm": 0.3137490749359131, + "learning_rate": 4.834857649916752e-07, + "loss": 0.25737249851226807, + "step": 985 + }, + { + "epoch": 27.3943661971831, + "grad_norm": 0.3161812424659729, + "learning_rate": 4.827090954503308e-07, + "loss": 0.2658624053001404, + "step": 986 + }, + { + "epoch": 27.422535211267604, + "grad_norm": 0.2974465489387512, + "learning_rate": 4.819326308887513e-07, + "loss": 0.2653939425945282, + "step": 987 + }, + { + "epoch": 27.450704225352112, + "grad_norm": 0.3207877576351166, + "learning_rate": 4.811563736721829e-07, + "loss": 0.2567484378814697, + "step": 988 + }, + { + "epoch": 27.47887323943662, + "grad_norm": 0.30379563570022583, + "learning_rate": 4.803803261652395e-07, + "loss": 0.2731136083602905, + "step": 989 + }, + { + "epoch": 27.507042253521128, + "grad_norm": 0.30110257863998413, + "learning_rate": 4.79604490731896e-07, + "loss": 0.2533247172832489, + "step": 990 + }, + { + "epoch": 27.535211267605632, + "grad_norm": 0.32354485988616943, + "learning_rate": 4.788288697354824e-07, + "loss": 0.2776826024055481, + "step": 991 + }, + { + "epoch": 27.56338028169014, + "grad_norm": 0.3137172758579254, + "learning_rate": 4.780534655386743e-07, + "loss": 0.2678206264972687, + "step": 992 + }, + { + "epoch": 27.591549295774648, + "grad_norm": 0.3129335641860962, + "learning_rate": 4.772782805034876e-07, + "loss": 0.27128273248672485, + "step": 993 + }, + { + "epoch": 27.619718309859156, + "grad_norm": 0.3112099766731262, + "learning_rate": 4.7650331699127013e-07, + "loss": 0.25505757331848145, + "step": 994 + }, + { + "epoch": 27.647887323943664, + "grad_norm": 0.3214300274848938, + "learning_rate": 4.75728577362695e-07, + "loss": 0.252490371465683, + "step": 995 + }, + { + "epoch": 27.676056338028168, + "grad_norm": 0.3177250623703003, + "learning_rate": 4.749540639777539e-07, + "loss": 0.2748945355415344, + "step": 996 + }, + { + "epoch": 27.704225352112676, + "grad_norm": 0.3087361752986908, + "learning_rate": 4.741797791957489e-07, + "loss": 0.26117944717407227, + "step": 997 + }, + { + "epoch": 27.732394366197184, + "grad_norm": 0.3008691072463989, + "learning_rate": 4.7340572537528547e-07, + "loss": 0.2576630115509033, + "step": 998 + }, + { + "epoch": 27.760563380281692, + "grad_norm": 0.3111347556114197, + "learning_rate": 4.7263190487426563e-07, + "loss": 0.26800209283828735, + "step": 999 + }, + { + "epoch": 27.788732394366196, + "grad_norm": 0.2986048758029938, + "learning_rate": 4.7185832004988133e-07, + "loss": 0.2734978497028351, + "step": 1000 + }, + { + "epoch": 27.816901408450704, + "grad_norm": 0.31797438859939575, + "learning_rate": 4.710849732586059e-07, + "loss": 0.2649095356464386, + "step": 1001 + }, + { + "epoch": 27.845070422535212, + "grad_norm": 0.3100630044937134, + "learning_rate": 4.703118668561875e-07, + "loss": 0.2550201117992401, + "step": 1002 + }, + { + "epoch": 27.87323943661972, + "grad_norm": 0.3206699788570404, + "learning_rate": 4.6953900319764274e-07, + "loss": 0.26471948623657227, + "step": 1003 + }, + { + "epoch": 27.901408450704224, + "grad_norm": 0.3138802945613861, + "learning_rate": 4.68766384637248e-07, + "loss": 0.26174217462539673, + "step": 1004 + }, + { + "epoch": 27.929577464788732, + "grad_norm": 0.3069911301136017, + "learning_rate": 4.679940135285336e-07, + "loss": 0.26182085275650024, + "step": 1005 + }, + { + "epoch": 27.95774647887324, + "grad_norm": 0.3080894351005554, + "learning_rate": 4.672218922242759e-07, + "loss": 0.272597074508667, + "step": 1006 + }, + { + "epoch": 27.985915492957748, + "grad_norm": 0.30975106358528137, + "learning_rate": 4.664500230764903e-07, + "loss": 0.28192490339279175, + "step": 1007 + }, + { + "epoch": 28.0, + "grad_norm": 0.44492414593696594, + "learning_rate": 4.656784084364238e-07, + "loss": 0.2805609405040741, + "step": 1008 + }, + { + "epoch": 28.028169014084508, + "grad_norm": 0.3142589330673218, + "learning_rate": 4.6490705065454883e-07, + "loss": 0.2571072280406952, + "step": 1009 + }, + { + "epoch": 28.056338028169016, + "grad_norm": 0.3059631884098053, + "learning_rate": 4.641359520805548e-07, + "loss": 0.2683190107345581, + "step": 1010 + }, + { + "epoch": 28.08450704225352, + "grad_norm": 0.32835182547569275, + "learning_rate": 4.6336511506334177e-07, + "loss": 0.2751193344593048, + "step": 1011 + }, + { + "epoch": 28.112676056338028, + "grad_norm": 0.31909412145614624, + "learning_rate": 4.6259454195101267e-07, + "loss": 0.27306729555130005, + "step": 1012 + }, + { + "epoch": 28.140845070422536, + "grad_norm": 0.32016029953956604, + "learning_rate": 4.61824235090867e-07, + "loss": 0.2615482211112976, + "step": 1013 + }, + { + "epoch": 28.169014084507044, + "grad_norm": 0.30900275707244873, + "learning_rate": 4.6105419682939316e-07, + "loss": 0.2553929388523102, + "step": 1014 + }, + { + "epoch": 28.197183098591548, + "grad_norm": 0.3047516942024231, + "learning_rate": 4.602844295122613e-07, + "loss": 0.26050907373428345, + "step": 1015 + }, + { + "epoch": 28.225352112676056, + "grad_norm": 0.31619319319725037, + "learning_rate": 4.59514935484316e-07, + "loss": 0.2493715137243271, + "step": 1016 + }, + { + "epoch": 28.253521126760564, + "grad_norm": 0.31594234704971313, + "learning_rate": 4.5874571708956953e-07, + "loss": 0.26061999797821045, + "step": 1017 + }, + { + "epoch": 28.281690140845072, + "grad_norm": 0.31763410568237305, + "learning_rate": 4.579767766711944e-07, + "loss": 0.2720048427581787, + "step": 1018 + }, + { + "epoch": 28.309859154929576, + "grad_norm": 0.3225538432598114, + "learning_rate": 4.572081165715167e-07, + "loss": 0.26587527990341187, + "step": 1019 + }, + { + "epoch": 28.338028169014084, + "grad_norm": 0.33830496668815613, + "learning_rate": 4.5643973913200837e-07, + "loss": 0.26142361760139465, + "step": 1020 + }, + { + "epoch": 28.366197183098592, + "grad_norm": 0.30440667271614075, + "learning_rate": 4.556716466932803e-07, + "loss": 0.25490373373031616, + "step": 1021 + }, + { + "epoch": 28.3943661971831, + "grad_norm": 0.30009451508522034, + "learning_rate": 4.549038415950751e-07, + "loss": 0.258319616317749, + "step": 1022 + }, + { + "epoch": 28.422535211267604, + "grad_norm": 0.32110437750816345, + "learning_rate": 4.5413632617626054e-07, + "loss": 0.2684330344200134, + "step": 1023 + }, + { + "epoch": 28.450704225352112, + "grad_norm": 0.3126528561115265, + "learning_rate": 4.5336910277482155e-07, + "loss": 0.2647142708301544, + "step": 1024 + }, + { + "epoch": 28.47887323943662, + "grad_norm": 0.30162736773490906, + "learning_rate": 4.526021737278537e-07, + "loss": 0.2717491388320923, + "step": 1025 + }, + { + "epoch": 28.507042253521128, + "grad_norm": 0.32018333673477173, + "learning_rate": 4.51835541371556e-07, + "loss": 0.2770422697067261, + "step": 1026 + }, + { + "epoch": 28.535211267605632, + "grad_norm": 0.3132731318473816, + "learning_rate": 4.5106920804122304e-07, + "loss": 0.2692522406578064, + "step": 1027 + }, + { + "epoch": 28.56338028169014, + "grad_norm": 0.30906060338020325, + "learning_rate": 4.503031760712397e-07, + "loss": 0.2523694932460785, + "step": 1028 + }, + { + "epoch": 28.591549295774648, + "grad_norm": 0.3276032507419586, + "learning_rate": 4.4953744779507197e-07, + "loss": 0.26482313871383667, + "step": 1029 + }, + { + "epoch": 28.619718309859156, + "grad_norm": 0.33187615871429443, + "learning_rate": 4.4877202554526084e-07, + "loss": 0.2603946924209595, + "step": 1030 + }, + { + "epoch": 28.647887323943664, + "grad_norm": 0.30181628465652466, + "learning_rate": 4.480069116534151e-07, + "loss": 0.25871700048446655, + "step": 1031 + }, + { + "epoch": 28.676056338028168, + "grad_norm": 0.3155851662158966, + "learning_rate": 4.4724210845020494e-07, + "loss": 0.2617461681365967, + "step": 1032 + }, + { + "epoch": 28.704225352112676, + "grad_norm": 0.30370378494262695, + "learning_rate": 4.4647761826535303e-07, + "loss": 0.26235488057136536, + "step": 1033 + }, + { + "epoch": 28.732394366197184, + "grad_norm": 0.317186564207077, + "learning_rate": 4.457134434276293e-07, + "loss": 0.26761680841445923, + "step": 1034 + }, + { + "epoch": 28.760563380281692, + "grad_norm": 0.3287314772605896, + "learning_rate": 4.449495862648427e-07, + "loss": 0.261843204498291, + "step": 1035 + }, + { + "epoch": 28.788732394366196, + "grad_norm": 0.33204883337020874, + "learning_rate": 4.441860491038345e-07, + "loss": 0.2633381485939026, + "step": 1036 + }, + { + "epoch": 28.816901408450704, + "grad_norm": 0.32268011569976807, + "learning_rate": 4.4342283427047164e-07, + "loss": 0.24900981783866882, + "step": 1037 + }, + { + "epoch": 28.845070422535212, + "grad_norm": 0.3224244713783264, + "learning_rate": 4.4265994408963867e-07, + "loss": 0.2667103111743927, + "step": 1038 + }, + { + "epoch": 28.87323943661972, + "grad_norm": 0.3169482350349426, + "learning_rate": 4.418973808852313e-07, + "loss": 0.268291175365448, + "step": 1039 + }, + { + "epoch": 28.901408450704224, + "grad_norm": 0.33006441593170166, + "learning_rate": 4.4113514698014953e-07, + "loss": 0.27004534006118774, + "step": 1040 + }, + { + "epoch": 28.929577464788732, + "grad_norm": 0.35179299116134644, + "learning_rate": 4.403732446962899e-07, + "loss": 0.2628635764122009, + "step": 1041 + }, + { + "epoch": 28.95774647887324, + "grad_norm": 0.3151315748691559, + "learning_rate": 4.3961167635453876e-07, + "loss": 0.2677478492259979, + "step": 1042 + }, + { + "epoch": 28.985915492957748, + "grad_norm": 0.3185572922229767, + "learning_rate": 4.388504442747657e-07, + "loss": 0.2660791873931885, + "step": 1043 + }, + { + "epoch": 29.0, + "grad_norm": 0.45902183651924133, + "learning_rate": 4.3808955077581546e-07, + "loss": 0.2720754146575928, + "step": 1044 + }, + { + "epoch": 29.028169014084508, + "grad_norm": 0.3011077344417572, + "learning_rate": 4.373289981755013e-07, + "loss": 0.25422877073287964, + "step": 1045 + }, + { + "epoch": 29.056338028169016, + "grad_norm": 0.3089461028575897, + "learning_rate": 4.365687887905988e-07, + "loss": 0.2498088926076889, + "step": 1046 + }, + { + "epoch": 29.08450704225352, + "grad_norm": 0.32150641083717346, + "learning_rate": 4.358089249368375e-07, + "loss": 0.2662513554096222, + "step": 1047 + }, + { + "epoch": 29.112676056338028, + "grad_norm": 0.32592031359672546, + "learning_rate": 4.350494089288943e-07, + "loss": 0.2539994418621063, + "step": 1048 + }, + { + "epoch": 29.140845070422536, + "grad_norm": 0.31924694776535034, + "learning_rate": 4.3429024308038686e-07, + "loss": 0.2557491958141327, + "step": 1049 + }, + { + "epoch": 29.169014084507044, + "grad_norm": 0.32504960894584656, + "learning_rate": 4.3353142970386557e-07, + "loss": 0.26317501068115234, + "step": 1050 + }, + { + "epoch": 29.197183098591548, + "grad_norm": 0.3093854784965515, + "learning_rate": 4.327729711108082e-07, + "loss": 0.25340092182159424, + "step": 1051 + }, + { + "epoch": 29.225352112676056, + "grad_norm": 0.313862144947052, + "learning_rate": 4.3201486961161093e-07, + "loss": 0.2559676766395569, + "step": 1052 + }, + { + "epoch": 29.253521126760564, + "grad_norm": 0.3301529288291931, + "learning_rate": 4.312571275155823e-07, + "loss": 0.2709015905857086, + "step": 1053 + }, + { + "epoch": 29.281690140845072, + "grad_norm": 0.32452118396759033, + "learning_rate": 4.304997471309361e-07, + "loss": 0.2698490619659424, + "step": 1054 + }, + { + "epoch": 29.309859154929576, + "grad_norm": 0.3382558226585388, + "learning_rate": 4.297427307647844e-07, + "loss": 0.2615205645561218, + "step": 1055 + }, + { + "epoch": 29.338028169014084, + "grad_norm": 0.3098710775375366, + "learning_rate": 4.2898608072313045e-07, + "loss": 0.2664251923561096, + "step": 1056 + }, + { + "epoch": 29.366197183098592, + "grad_norm": 0.3207705318927765, + "learning_rate": 4.2822979931086144e-07, + "loss": 0.2764906883239746, + "step": 1057 + }, + { + "epoch": 29.3943661971831, + "grad_norm": 0.3483034372329712, + "learning_rate": 4.2747388883174154e-07, + "loss": 0.2622952163219452, + "step": 1058 + }, + { + "epoch": 29.422535211267604, + "grad_norm": 0.30950114130973816, + "learning_rate": 4.267183515884054e-07, + "loss": 0.2630128860473633, + "step": 1059 + }, + { + "epoch": 29.450704225352112, + "grad_norm": 0.32425740361213684, + "learning_rate": 4.2596318988235037e-07, + "loss": 0.25917208194732666, + "step": 1060 + }, + { + "epoch": 29.47887323943662, + "grad_norm": 0.3382692039012909, + "learning_rate": 4.2520840601392996e-07, + "loss": 0.26483750343322754, + "step": 1061 + }, + { + "epoch": 29.507042253521128, + "grad_norm": 0.30861786007881165, + "learning_rate": 4.2445400228234687e-07, + "loss": 0.2531127631664276, + "step": 1062 + }, + { + "epoch": 29.535211267605632, + "grad_norm": 0.33470088243484497, + "learning_rate": 4.2369998098564554e-07, + "loss": 0.263372540473938, + "step": 1063 + }, + { + "epoch": 29.56338028169014, + "grad_norm": 0.34484177827835083, + "learning_rate": 4.2294634442070553e-07, + "loss": 0.263760507106781, + "step": 1064 + }, + { + "epoch": 29.591549295774648, + "grad_norm": 0.32152125239372253, + "learning_rate": 4.2219309488323487e-07, + "loss": 0.2630784511566162, + "step": 1065 + }, + { + "epoch": 29.619718309859156, + "grad_norm": 0.3259511888027191, + "learning_rate": 4.214402346677619e-07, + "loss": 0.26080453395843506, + "step": 1066 + }, + { + "epoch": 29.647887323943664, + "grad_norm": 0.32442566752433777, + "learning_rate": 4.206877660676297e-07, + "loss": 0.2604103088378906, + "step": 1067 + }, + { + "epoch": 29.676056338028168, + "grad_norm": 0.3231119215488434, + "learning_rate": 4.1993569137498776e-07, + "loss": 0.26589787006378174, + "step": 1068 + }, + { + "epoch": 29.704225352112676, + "grad_norm": 0.3275383412837982, + "learning_rate": 4.1918401288078633e-07, + "loss": 0.2476288229227066, + "step": 1069 + }, + { + "epoch": 29.732394366197184, + "grad_norm": 0.3219151496887207, + "learning_rate": 4.1843273287476854e-07, + "loss": 0.26332658529281616, + "step": 1070 + }, + { + "epoch": 29.760563380281692, + "grad_norm": 0.31227391958236694, + "learning_rate": 4.1768185364546326e-07, + "loss": 0.2647852301597595, + "step": 1071 + }, + { + "epoch": 29.788732394366196, + "grad_norm": 0.3090374767780304, + "learning_rate": 4.1693137748017915e-07, + "loss": 0.2562742531299591, + "step": 1072 + }, + { + "epoch": 29.816901408450704, + "grad_norm": 0.32516875863075256, + "learning_rate": 4.161813066649963e-07, + "loss": 0.27417412400245667, + "step": 1073 + }, + { + "epoch": 29.845070422535212, + "grad_norm": 0.3393928110599518, + "learning_rate": 4.15431643484761e-07, + "loss": 0.25790080428123474, + "step": 1074 + }, + { + "epoch": 29.87323943661972, + "grad_norm": 0.3293744623661041, + "learning_rate": 4.146823902230772e-07, + "loss": 0.27599674463272095, + "step": 1075 + }, + { + "epoch": 29.901408450704224, + "grad_norm": 0.336525022983551, + "learning_rate": 4.1393354916230005e-07, + "loss": 0.2566748261451721, + "step": 1076 + }, + { + "epoch": 29.929577464788732, + "grad_norm": 0.30744579434394836, + "learning_rate": 4.1318512258352936e-07, + "loss": 0.276886522769928, + "step": 1077 + }, + { + "epoch": 29.95774647887324, + "grad_norm": 0.3156173527240753, + "learning_rate": 4.124371127666024e-07, + "loss": 0.27484360337257385, + "step": 1078 + }, + { + "epoch": 29.985915492957748, + "grad_norm": 0.31924012303352356, + "learning_rate": 4.1168952199008677e-07, + "loss": 0.2567445635795593, + "step": 1079 + }, + { + "epoch": 30.0, + "grad_norm": 0.4623652994632721, + "learning_rate": 4.1094235253127374e-07, + "loss": 0.27351921796798706, + "step": 1080 + }, + { + "epoch": 30.028169014084508, + "grad_norm": 0.32494813203811646, + "learning_rate": 4.101956066661708e-07, + "loss": 0.26006799936294556, + "step": 1081 + }, + { + "epoch": 30.056338028169016, + "grad_norm": 0.3355497121810913, + "learning_rate": 4.0944928666949527e-07, + "loss": 0.26071614027023315, + "step": 1082 + }, + { + "epoch": 30.08450704225352, + "grad_norm": 0.3180653750896454, + "learning_rate": 4.0870339481466774e-07, + "loss": 0.2741304039955139, + "step": 1083 + }, + { + "epoch": 30.112676056338028, + "grad_norm": 0.31589558720588684, + "learning_rate": 4.079579333738039e-07, + "loss": 0.2640499770641327, + "step": 1084 + }, + { + "epoch": 30.140845070422536, + "grad_norm": 0.33277377486228943, + "learning_rate": 4.0721290461770863e-07, + "loss": 0.2542555630207062, + "step": 1085 + }, + { + "epoch": 30.169014084507044, + "grad_norm": 0.31191685795783997, + "learning_rate": 4.064683108158685e-07, + "loss": 0.24946148693561554, + "step": 1086 + }, + { + "epoch": 30.197183098591548, + "grad_norm": 0.31646913290023804, + "learning_rate": 4.057241542364457e-07, + "loss": 0.2565403878688812, + "step": 1087 + }, + { + "epoch": 30.225352112676056, + "grad_norm": 0.32091739773750305, + "learning_rate": 4.0498043714627006e-07, + "loss": 0.2608620226383209, + "step": 1088 + }, + { + "epoch": 30.253521126760564, + "grad_norm": 0.3244355618953705, + "learning_rate": 4.042371618108329e-07, + "loss": 0.25209081172943115, + "step": 1089 + }, + { + "epoch": 30.281690140845072, + "grad_norm": 0.3262701630592346, + "learning_rate": 4.034943304942796e-07, + "loss": 0.2566452622413635, + "step": 1090 + }, + { + "epoch": 30.309859154929576, + "grad_norm": 0.35125988721847534, + "learning_rate": 4.027519454594033e-07, + "loss": 0.2646006643772125, + "step": 1091 + }, + { + "epoch": 30.338028169014084, + "grad_norm": 0.32471081614494324, + "learning_rate": 4.020100089676376e-07, + "loss": 0.2576545178890228, + "step": 1092 + }, + { + "epoch": 30.366197183098592, + "grad_norm": 0.33542898297309875, + "learning_rate": 4.012685232790497e-07, + "loss": 0.25865480303764343, + "step": 1093 + }, + { + "epoch": 30.3943661971831, + "grad_norm": 0.31360387802124023, + "learning_rate": 4.005274906523336e-07, + "loss": 0.25481581687927246, + "step": 1094 + }, + { + "epoch": 30.422535211267604, + "grad_norm": 0.33107563853263855, + "learning_rate": 3.9978691334480306e-07, + "loss": 0.252411812543869, + "step": 1095 + }, + { + "epoch": 30.450704225352112, + "grad_norm": 0.3281182050704956, + "learning_rate": 3.9904679361238526e-07, + "loss": 0.2586092948913574, + "step": 1096 + }, + { + "epoch": 30.47887323943662, + "grad_norm": 0.32694414258003235, + "learning_rate": 3.9830713370961313e-07, + "loss": 0.26445192098617554, + "step": 1097 + }, + { + "epoch": 30.507042253521128, + "grad_norm": 0.318498432636261, + "learning_rate": 3.975679358896189e-07, + "loss": 0.25009143352508545, + "step": 1098 + }, + { + "epoch": 30.535211267605632, + "grad_norm": 0.3352436423301697, + "learning_rate": 3.968292024041275e-07, + "loss": 0.2770006060600281, + "step": 1099 + }, + { + "epoch": 30.56338028169014, + "grad_norm": 0.3413051664829254, + "learning_rate": 3.9609093550344907e-07, + "loss": 0.2675744593143463, + "step": 1100 + }, + { + "epoch": 30.591549295774648, + "grad_norm": 0.33011800050735474, + "learning_rate": 3.953531374364728e-07, + "loss": 0.25982439517974854, + "step": 1101 + }, + { + "epoch": 30.619718309859156, + "grad_norm": 0.3153058588504791, + "learning_rate": 3.946158104506594e-07, + "loss": 0.26440930366516113, + "step": 1102 + }, + { + "epoch": 30.647887323943664, + "grad_norm": 0.33693262934684753, + "learning_rate": 3.938789567920349e-07, + "loss": 0.2564413845539093, + "step": 1103 + }, + { + "epoch": 30.676056338028168, + "grad_norm": 0.3082239031791687, + "learning_rate": 3.931425787051832e-07, + "loss": 0.26095646619796753, + "step": 1104 + }, + { + "epoch": 30.704225352112676, + "grad_norm": 0.34148088097572327, + "learning_rate": 3.924066784332396e-07, + "loss": 0.27237722277641296, + "step": 1105 + }, + { + "epoch": 30.732394366197184, + "grad_norm": 0.3161861300468445, + "learning_rate": 3.9167125821788416e-07, + "loss": 0.25798144936561584, + "step": 1106 + }, + { + "epoch": 30.760563380281692, + "grad_norm": 0.33590832352638245, + "learning_rate": 3.909363202993343e-07, + "loss": 0.2643035650253296, + "step": 1107 + }, + { + "epoch": 30.788732394366196, + "grad_norm": 0.33959585428237915, + "learning_rate": 3.902018669163384e-07, + "loss": 0.2613189220428467, + "step": 1108 + }, + { + "epoch": 30.816901408450704, + "grad_norm": 0.31452202796936035, + "learning_rate": 3.894679003061686e-07, + "loss": 0.26554104685783386, + "step": 1109 + }, + { + "epoch": 30.845070422535212, + "grad_norm": 0.3322625160217285, + "learning_rate": 3.8873442270461485e-07, + "loss": 0.2571873664855957, + "step": 1110 + }, + { + "epoch": 30.87323943661972, + "grad_norm": 0.33110320568084717, + "learning_rate": 3.88001436345977e-07, + "loss": 0.26796817779541016, + "step": 1111 + }, + { + "epoch": 30.901408450704224, + "grad_norm": 0.32166630029678345, + "learning_rate": 3.872689434630585e-07, + "loss": 0.25648969411849976, + "step": 1112 + }, + { + "epoch": 30.929577464788732, + "grad_norm": 0.3449627757072449, + "learning_rate": 3.8653694628715984e-07, + "loss": 0.26782190799713135, + "step": 1113 + }, + { + "epoch": 30.95774647887324, + "grad_norm": 0.3227315843105316, + "learning_rate": 3.8580544704807117e-07, + "loss": 0.2791867256164551, + "step": 1114 + }, + { + "epoch": 30.985915492957748, + "grad_norm": 0.3112963140010834, + "learning_rate": 3.850744479740663e-07, + "loss": 0.26565277576446533, + "step": 1115 + }, + { + "epoch": 31.0, + "grad_norm": 0.4575044810771942, + "learning_rate": 3.843439512918949e-07, + "loss": 0.25405725836753845, + "step": 1116 + }, + { + "epoch": 31.028169014084508, + "grad_norm": 0.3324749767780304, + "learning_rate": 3.8361395922677687e-07, + "loss": 0.26342666149139404, + "step": 1117 + }, + { + "epoch": 31.056338028169016, + "grad_norm": 0.3335409164428711, + "learning_rate": 3.8288447400239443e-07, + "loss": 0.27227702736854553, + "step": 1118 + }, + { + "epoch": 31.08450704225352, + "grad_norm": 0.33716699481010437, + "learning_rate": 3.82155497840886e-07, + "loss": 0.2696995437145233, + "step": 1119 + }, + { + "epoch": 31.112676056338028, + "grad_norm": 0.33672624826431274, + "learning_rate": 3.8142703296283953e-07, + "loss": 0.2588409185409546, + "step": 1120 + }, + { + "epoch": 31.140845070422536, + "grad_norm": 0.3224928081035614, + "learning_rate": 3.806990815872855e-07, + "loss": 0.2625422775745392, + "step": 1121 + }, + { + "epoch": 31.169014084507044, + "grad_norm": 0.32264038920402527, + "learning_rate": 3.7997164593168983e-07, + "loss": 0.251539021730423, + "step": 1122 + }, + { + "epoch": 31.197183098591548, + "grad_norm": 0.33344459533691406, + "learning_rate": 3.7924472821194765e-07, + "loss": 0.25519099831581116, + "step": 1123 + }, + { + "epoch": 31.225352112676056, + "grad_norm": 0.3551379442214966, + "learning_rate": 3.785183306423767e-07, + "loss": 0.2584845721721649, + "step": 1124 + }, + { + "epoch": 31.253521126760564, + "grad_norm": 0.3440611660480499, + "learning_rate": 3.777924554357096e-07, + "loss": 0.2609241008758545, + "step": 1125 + }, + { + "epoch": 31.281690140845072, + "grad_norm": 0.3400917649269104, + "learning_rate": 3.7706710480308835e-07, + "loss": 0.26181089878082275, + "step": 1126 + }, + { + "epoch": 31.309859154929576, + "grad_norm": 0.3361797630786896, + "learning_rate": 3.7634228095405673e-07, + "loss": 0.2546064853668213, + "step": 1127 + }, + { + "epoch": 31.338028169014084, + "grad_norm": 0.3346230387687683, + "learning_rate": 3.7561798609655373e-07, + "loss": 0.26581573486328125, + "step": 1128 + }, + { + "epoch": 31.366197183098592, + "grad_norm": 0.34457266330718994, + "learning_rate": 3.748942224369073e-07, + "loss": 0.2582035958766937, + "step": 1129 + }, + { + "epoch": 31.3943661971831, + "grad_norm": 0.3213818073272705, + "learning_rate": 3.7417099217982686e-07, + "loss": 0.25484442710876465, + "step": 1130 + }, + { + "epoch": 31.422535211267604, + "grad_norm": 0.3486325442790985, + "learning_rate": 3.734482975283975e-07, + "loss": 0.27330318093299866, + "step": 1131 + }, + { + "epoch": 31.450704225352112, + "grad_norm": 0.3430873453617096, + "learning_rate": 3.72726140684072e-07, + "loss": 0.25915205478668213, + "step": 1132 + }, + { + "epoch": 31.47887323943662, + "grad_norm": 0.3348333537578583, + "learning_rate": 3.720045238466658e-07, + "loss": 0.2582821846008301, + "step": 1133 + }, + { + "epoch": 31.507042253521128, + "grad_norm": 0.3174356520175934, + "learning_rate": 3.712834492143487e-07, + "loss": 0.2682039737701416, + "step": 1134 + }, + { + "epoch": 31.535211267605632, + "grad_norm": 0.3320380449295044, + "learning_rate": 3.7056291898363925e-07, + "loss": 0.2751486003398895, + "step": 1135 + }, + { + "epoch": 31.56338028169014, + "grad_norm": 0.3412676155567169, + "learning_rate": 3.6984293534939737e-07, + "loss": 0.2540426254272461, + "step": 1136 + }, + { + "epoch": 31.591549295774648, + "grad_norm": 0.35137638449668884, + "learning_rate": 3.69123500504818e-07, + "loss": 0.2570858895778656, + "step": 1137 + }, + { + "epoch": 31.619718309859156, + "grad_norm": 0.32933273911476135, + "learning_rate": 3.6840461664142444e-07, + "loss": 0.2535385489463806, + "step": 1138 + }, + { + "epoch": 31.647887323943664, + "grad_norm": 0.32296112179756165, + "learning_rate": 3.6768628594906193e-07, + "loss": 0.26802340149879456, + "step": 1139 + }, + { + "epoch": 31.676056338028168, + "grad_norm": 0.33371275663375854, + "learning_rate": 3.6696851061588994e-07, + "loss": 0.26279398798942566, + "step": 1140 + }, + { + "epoch": 31.704225352112676, + "grad_norm": 0.3587881624698639, + "learning_rate": 3.6625129282837685e-07, + "loss": 0.26237016916275024, + "step": 1141 + }, + { + "epoch": 31.732394366197184, + "grad_norm": 0.3388115465641022, + "learning_rate": 3.655346347712922e-07, + "loss": 0.2542800307273865, + "step": 1142 + }, + { + "epoch": 31.760563380281692, + "grad_norm": 0.3145511746406555, + "learning_rate": 3.6481853862770107e-07, + "loss": 0.2536108195781708, + "step": 1143 + }, + { + "epoch": 31.788732394366196, + "grad_norm": 0.34181296825408936, + "learning_rate": 3.641030065789562e-07, + "loss": 0.2601550817489624, + "step": 1144 + }, + { + "epoch": 31.816901408450704, + "grad_norm": 0.322862833738327, + "learning_rate": 3.6338804080469253e-07, + "loss": 0.25029903650283813, + "step": 1145 + }, + { + "epoch": 31.845070422535212, + "grad_norm": 0.3622659146785736, + "learning_rate": 3.6267364348281946e-07, + "loss": 0.26150447130203247, + "step": 1146 + }, + { + "epoch": 31.87323943661972, + "grad_norm": 0.330181360244751, + "learning_rate": 3.6195981678951535e-07, + "loss": 0.2587708830833435, + "step": 1147 + }, + { + "epoch": 31.901408450704224, + "grad_norm": 0.3616638779640198, + "learning_rate": 3.612465628992203e-07, + "loss": 0.26097607612609863, + "step": 1148 + }, + { + "epoch": 31.929577464788732, + "grad_norm": 0.3439587652683258, + "learning_rate": 3.60533883984629e-07, + "loss": 0.2429528385400772, + "step": 1149 + }, + { + "epoch": 31.95774647887324, + "grad_norm": 0.3390144407749176, + "learning_rate": 3.5982178221668533e-07, + "loss": 0.2673777937889099, + "step": 1150 + }, + { + "epoch": 31.985915492957748, + "grad_norm": 0.3215203881263733, + "learning_rate": 3.591102597645743e-07, + "loss": 0.25635766983032227, + "step": 1151 + }, + { + "epoch": 32.0, + "grad_norm": 0.4861057698726654, + "learning_rate": 3.5839931879571725e-07, + "loss": 0.26994332671165466, + "step": 1152 + }, + { + "epoch": 32.028169014084504, + "grad_norm": 0.3433145582675934, + "learning_rate": 3.5768896147576344e-07, + "loss": 0.2525317072868347, + "step": 1153 + }, + { + "epoch": 32.056338028169016, + "grad_norm": 0.34238752722740173, + "learning_rate": 3.5697918996858443e-07, + "loss": 0.271589457988739, + "step": 1154 + }, + { + "epoch": 32.08450704225352, + "grad_norm": 0.33140960335731506, + "learning_rate": 3.5627000643626704e-07, + "loss": 0.2612978219985962, + "step": 1155 + }, + { + "epoch": 32.11267605633803, + "grad_norm": 0.31951841711997986, + "learning_rate": 3.555614130391079e-07, + "loss": 0.27151286602020264, + "step": 1156 + }, + { + "epoch": 32.140845070422536, + "grad_norm": 0.3442953824996948, + "learning_rate": 3.5485341193560503e-07, + "loss": 0.2442217469215393, + "step": 1157 + }, + { + "epoch": 32.16901408450704, + "grad_norm": 0.3276779055595398, + "learning_rate": 3.5414600528245266e-07, + "loss": 0.25613170862197876, + "step": 1158 + }, + { + "epoch": 32.19718309859155, + "grad_norm": 0.33608436584472656, + "learning_rate": 3.534391952345341e-07, + "loss": 0.2614259123802185, + "step": 1159 + }, + { + "epoch": 32.225352112676056, + "grad_norm": 0.3303307592868805, + "learning_rate": 3.5273298394491515e-07, + "loss": 0.2672120928764343, + "step": 1160 + }, + { + "epoch": 32.25352112676056, + "grad_norm": 0.32655128836631775, + "learning_rate": 3.5202737356483816e-07, + "loss": 0.25033846497535706, + "step": 1161 + }, + { + "epoch": 32.28169014084507, + "grad_norm": 0.3326750099658966, + "learning_rate": 3.513223662437147e-07, + "loss": 0.2697717547416687, + "step": 1162 + }, + { + "epoch": 32.309859154929576, + "grad_norm": 0.33951663970947266, + "learning_rate": 3.5061796412911913e-07, + "loss": 0.25987690687179565, + "step": 1163 + }, + { + "epoch": 32.33802816901409, + "grad_norm": 0.3316378891468048, + "learning_rate": 3.4991416936678276e-07, + "loss": 0.26063597202301025, + "step": 1164 + }, + { + "epoch": 32.36619718309859, + "grad_norm": 0.33838751912117004, + "learning_rate": 3.49210984100586e-07, + "loss": 0.26821669936180115, + "step": 1165 + }, + { + "epoch": 32.394366197183096, + "grad_norm": 0.3294714689254761, + "learning_rate": 3.4850841047255364e-07, + "loss": 0.2651536464691162, + "step": 1166 + }, + { + "epoch": 32.42253521126761, + "grad_norm": 0.32624831795692444, + "learning_rate": 3.4780645062284665e-07, + "loss": 0.26797136664390564, + "step": 1167 + }, + { + "epoch": 32.45070422535211, + "grad_norm": 0.3322686553001404, + "learning_rate": 3.471051066897562e-07, + "loss": 0.2507922649383545, + "step": 1168 + }, + { + "epoch": 32.478873239436616, + "grad_norm": 0.34128591418266296, + "learning_rate": 3.4640438080969773e-07, + "loss": 0.2541847229003906, + "step": 1169 + }, + { + "epoch": 32.50704225352113, + "grad_norm": 0.3294316828250885, + "learning_rate": 3.45704275117204e-07, + "loss": 0.26326608657836914, + "step": 1170 + }, + { + "epoch": 32.53521126760563, + "grad_norm": 0.3293727934360504, + "learning_rate": 3.450047917449181e-07, + "loss": 0.2654852271080017, + "step": 1171 + }, + { + "epoch": 32.563380281690144, + "grad_norm": 0.32460466027259827, + "learning_rate": 3.4430593282358777e-07, + "loss": 0.25532153248786926, + "step": 1172 + }, + { + "epoch": 32.59154929577465, + "grad_norm": 0.3373318016529083, + "learning_rate": 3.4360770048205843e-07, + "loss": 0.25554513931274414, + "step": 1173 + }, + { + "epoch": 32.61971830985915, + "grad_norm": 0.34251123666763306, + "learning_rate": 3.429100968472668e-07, + "loss": 0.26249927282333374, + "step": 1174 + }, + { + "epoch": 32.647887323943664, + "grad_norm": 0.32484838366508484, + "learning_rate": 3.4221312404423486e-07, + "loss": 0.2562830448150635, + "step": 1175 + }, + { + "epoch": 32.67605633802817, + "grad_norm": 0.3435952365398407, + "learning_rate": 3.4151678419606233e-07, + "loss": 0.2574070692062378, + "step": 1176 + }, + { + "epoch": 32.70422535211267, + "grad_norm": 0.33101195096969604, + "learning_rate": 3.4082107942392136e-07, + "loss": 0.257138729095459, + "step": 1177 + }, + { + "epoch": 32.732394366197184, + "grad_norm": 0.37783390283584595, + "learning_rate": 3.4012601184704904e-07, + "loss": 0.26037871837615967, + "step": 1178 + }, + { + "epoch": 32.76056338028169, + "grad_norm": 0.33994340896606445, + "learning_rate": 3.3943158358274203e-07, + "loss": 0.27281370759010315, + "step": 1179 + }, + { + "epoch": 32.7887323943662, + "grad_norm": 0.32044896483421326, + "learning_rate": 3.387377967463493e-07, + "loss": 0.2526357173919678, + "step": 1180 + }, + { + "epoch": 32.816901408450704, + "grad_norm": 0.3177328109741211, + "learning_rate": 3.3804465345126545e-07, + "loss": 0.24474188685417175, + "step": 1181 + }, + { + "epoch": 32.84507042253521, + "grad_norm": 0.3454241454601288, + "learning_rate": 3.3735215580892575e-07, + "loss": 0.24287842214107513, + "step": 1182 + }, + { + "epoch": 32.87323943661972, + "grad_norm": 0.3315359354019165, + "learning_rate": 3.366603059287977e-07, + "loss": 0.26422587037086487, + "step": 1183 + }, + { + "epoch": 32.901408450704224, + "grad_norm": 0.3329971730709076, + "learning_rate": 3.359691059183761e-07, + "loss": 0.2687873840332031, + "step": 1184 + }, + { + "epoch": 32.929577464788736, + "grad_norm": 0.32194119691848755, + "learning_rate": 3.3527855788317614e-07, + "loss": 0.2529294788837433, + "step": 1185 + }, + { + "epoch": 32.95774647887324, + "grad_norm": 0.3383830487728119, + "learning_rate": 3.3458866392672694e-07, + "loss": 0.24743716418743134, + "step": 1186 + }, + { + "epoch": 32.985915492957744, + "grad_norm": 0.3237183690071106, + "learning_rate": 3.338994261505649e-07, + "loss": 0.2624974250793457, + "step": 1187 + }, + { + "epoch": 33.0, + "grad_norm": 0.4738941192626953, + "learning_rate": 3.3321084665422803e-07, + "loss": 0.2611575722694397, + "step": 1188 + }, + { + "epoch": 33.028169014084504, + "grad_norm": 0.3192257285118103, + "learning_rate": 3.325229275352489e-07, + "loss": 0.25964364409446716, + "step": 1189 + }, + { + "epoch": 33.056338028169016, + "grad_norm": 0.3343312442302704, + "learning_rate": 3.3183567088914833e-07, + "loss": 0.2630879282951355, + "step": 1190 + }, + { + "epoch": 33.08450704225352, + "grad_norm": 0.32633543014526367, + "learning_rate": 3.3114907880942933e-07, + "loss": 0.2663639783859253, + "step": 1191 + }, + { + "epoch": 33.11267605633803, + "grad_norm": 0.3315299451351166, + "learning_rate": 3.3046315338757026e-07, + "loss": 0.2600438892841339, + "step": 1192 + }, + { + "epoch": 33.140845070422536, + "grad_norm": 0.35579875111579895, + "learning_rate": 3.297778967130191e-07, + "loss": 0.2606794834136963, + "step": 1193 + }, + { + "epoch": 33.16901408450704, + "grad_norm": 0.3733043074607849, + "learning_rate": 3.290933108731866e-07, + "loss": 0.2512716054916382, + "step": 1194 + }, + { + "epoch": 33.19718309859155, + "grad_norm": 0.345547616481781, + "learning_rate": 3.2840939795343987e-07, + "loss": 0.26478058099746704, + "step": 1195 + }, + { + "epoch": 33.225352112676056, + "grad_norm": 0.33482369780540466, + "learning_rate": 3.2772616003709616e-07, + "loss": 0.2547541856765747, + "step": 1196 + }, + { + "epoch": 33.25352112676056, + "grad_norm": 0.3360159695148468, + "learning_rate": 3.270435992054166e-07, + "loss": 0.2729008197784424, + "step": 1197 + }, + { + "epoch": 33.28169014084507, + "grad_norm": 0.34279924631118774, + "learning_rate": 3.263617175376001e-07, + "loss": 0.253216028213501, + "step": 1198 + }, + { + "epoch": 33.309859154929576, + "grad_norm": 0.33277833461761475, + "learning_rate": 3.2568051711077636e-07, + "loss": 0.2548581659793854, + "step": 1199 + }, + { + "epoch": 33.33802816901409, + "grad_norm": 0.3363766074180603, + "learning_rate": 3.250000000000001e-07, + "loss": 0.25859585404396057, + "step": 1200 + }, + { + "epoch": 33.36619718309859, + "grad_norm": 0.3143514394760132, + "learning_rate": 3.2432016827824414e-07, + "loss": 0.25202757120132446, + "step": 1201 + }, + { + "epoch": 33.394366197183096, + "grad_norm": 0.3307502567768097, + "learning_rate": 3.2364102401639423e-07, + "loss": 0.2585509717464447, + "step": 1202 + }, + { + "epoch": 33.42253521126761, + "grad_norm": 0.33466944098472595, + "learning_rate": 3.229625692832414e-07, + "loss": 0.25337138772010803, + "step": 1203 + }, + { + "epoch": 33.45070422535211, + "grad_norm": 0.31453531980514526, + "learning_rate": 3.222848061454764e-07, + "loss": 0.2618822455406189, + "step": 1204 + }, + { + "epoch": 33.478873239436616, + "grad_norm": 0.35038280487060547, + "learning_rate": 3.216077366676833e-07, + "loss": 0.26571914553642273, + "step": 1205 + }, + { + "epoch": 33.50704225352113, + "grad_norm": 0.3479344844818115, + "learning_rate": 3.209313629123329e-07, + "loss": 0.26047736406326294, + "step": 1206 + }, + { + "epoch": 33.53521126760563, + "grad_norm": 0.339733362197876, + "learning_rate": 3.2025568693977745e-07, + "loss": 0.2580920159816742, + "step": 1207 + }, + { + "epoch": 33.563380281690144, + "grad_norm": 0.3457892835140228, + "learning_rate": 3.195807108082429e-07, + "loss": 0.25361278653144836, + "step": 1208 + }, + { + "epoch": 33.59154929577465, + "grad_norm": 0.35116419196128845, + "learning_rate": 3.1890643657382356e-07, + "loss": 0.2517722249031067, + "step": 1209 + }, + { + "epoch": 33.61971830985915, + "grad_norm": 0.3323304355144501, + "learning_rate": 3.182328662904756e-07, + "loss": 0.25763052701950073, + "step": 1210 + }, + { + "epoch": 33.647887323943664, + "grad_norm": 0.3180283308029175, + "learning_rate": 3.175600020100112e-07, + "loss": 0.26268666982650757, + "step": 1211 + }, + { + "epoch": 33.67605633802817, + "grad_norm": 0.32394516468048096, + "learning_rate": 3.168878457820915e-07, + "loss": 0.2540284991264343, + "step": 1212 + }, + { + "epoch": 33.70422535211267, + "grad_norm": 0.3315521478652954, + "learning_rate": 3.162163996542209e-07, + "loss": 0.26291581988334656, + "step": 1213 + }, + { + "epoch": 33.732394366197184, + "grad_norm": 0.32950082421302795, + "learning_rate": 3.155456656717408e-07, + "loss": 0.2569209039211273, + "step": 1214 + }, + { + "epoch": 33.76056338028169, + "grad_norm": 0.3513064384460449, + "learning_rate": 3.14875645877823e-07, + "loss": 0.24890759587287903, + "step": 1215 + }, + { + "epoch": 33.7887323943662, + "grad_norm": 0.3389022946357727, + "learning_rate": 3.142063423134644e-07, + "loss": 0.2649242579936981, + "step": 1216 + }, + { + "epoch": 33.816901408450704, + "grad_norm": 0.3270207941532135, + "learning_rate": 3.135377570174796e-07, + "loss": 0.26036375761032104, + "step": 1217 + }, + { + "epoch": 33.84507042253521, + "grad_norm": 0.35390451550483704, + "learning_rate": 3.1286989202649503e-07, + "loss": 0.25314897298812866, + "step": 1218 + }, + { + "epoch": 33.87323943661972, + "grad_norm": 0.3263014256954193, + "learning_rate": 3.122027493749438e-07, + "loss": 0.2565680742263794, + "step": 1219 + }, + { + "epoch": 33.901408450704224, + "grad_norm": 0.3133479654788971, + "learning_rate": 3.115363310950578e-07, + "loss": 0.2629280090332031, + "step": 1220 + }, + { + "epoch": 33.929577464788736, + "grad_norm": 0.3530975580215454, + "learning_rate": 3.1087063921686263e-07, + "loss": 0.26493778824806213, + "step": 1221 + }, + { + "epoch": 33.95774647887324, + "grad_norm": 0.3344945013523102, + "learning_rate": 3.102056757681715e-07, + "loss": 0.2550634741783142, + "step": 1222 + }, + { + "epoch": 33.985915492957744, + "grad_norm": 0.32563889026641846, + "learning_rate": 3.0954144277457817e-07, + "loss": 0.25193893909454346, + "step": 1223 + }, + { + "epoch": 34.0, + "grad_norm": 0.48929160833358765, + "learning_rate": 3.0887794225945143e-07, + "loss": 0.2488047182559967, + "step": 1224 + }, + { + "epoch": 34.028169014084504, + "grad_norm": 0.32252368330955505, + "learning_rate": 3.0821517624392925e-07, + "loss": 0.25322937965393066, + "step": 1225 + }, + { + "epoch": 34.056338028169016, + "grad_norm": 0.3510408401489258, + "learning_rate": 3.075531467469116e-07, + "loss": 0.265546977519989, + "step": 1226 + }, + { + "epoch": 34.08450704225352, + "grad_norm": 0.33205100893974304, + "learning_rate": 3.0689185578505525e-07, + "loss": 0.2621091902256012, + "step": 1227 + }, + { + "epoch": 34.11267605633803, + "grad_norm": 0.33356767892837524, + "learning_rate": 3.062313053727671e-07, + "loss": 0.24525871872901917, + "step": 1228 + }, + { + "epoch": 34.140845070422536, + "grad_norm": 0.32789838314056396, + "learning_rate": 3.055714975221981e-07, + "loss": 0.2655676007270813, + "step": 1229 + }, + { + "epoch": 34.16901408450704, + "grad_norm": 0.3837502598762512, + "learning_rate": 3.0491243424323783e-07, + "loss": 0.2583563029766083, + "step": 1230 + }, + { + "epoch": 34.19718309859155, + "grad_norm": 0.32497507333755493, + "learning_rate": 3.0425411754350694e-07, + "loss": 0.25412964820861816, + "step": 1231 + }, + { + "epoch": 34.225352112676056, + "grad_norm": 0.3423527181148529, + "learning_rate": 3.0359654942835247e-07, + "loss": 0.2603622078895569, + "step": 1232 + }, + { + "epoch": 34.25352112676056, + "grad_norm": 0.3326815068721771, + "learning_rate": 3.029397319008407e-07, + "loss": 0.2565937638282776, + "step": 1233 + }, + { + "epoch": 34.28169014084507, + "grad_norm": 0.3410370945930481, + "learning_rate": 3.02283666961752e-07, + "loss": 0.2687773108482361, + "step": 1234 + }, + { + "epoch": 34.309859154929576, + "grad_norm": 0.33839917182922363, + "learning_rate": 3.016283566095739e-07, + "loss": 0.27057865262031555, + "step": 1235 + }, + { + "epoch": 34.33802816901409, + "grad_norm": 0.32578834891319275, + "learning_rate": 3.0097380284049523e-07, + "loss": 0.2486121952533722, + "step": 1236 + }, + { + "epoch": 34.36619718309859, + "grad_norm": 0.34315571188926697, + "learning_rate": 3.003200076484004e-07, + "loss": 0.24546003341674805, + "step": 1237 + }, + { + "epoch": 34.394366197183096, + "grad_norm": 0.32684844732284546, + "learning_rate": 2.996669730248628e-07, + "loss": 0.2699982523918152, + "step": 1238 + }, + { + "epoch": 34.42253521126761, + "grad_norm": 0.33143216371536255, + "learning_rate": 2.9901470095913943e-07, + "loss": 0.25373488664627075, + "step": 1239 + }, + { + "epoch": 34.45070422535211, + "grad_norm": 0.35439276695251465, + "learning_rate": 2.9836319343816397e-07, + "loss": 0.24537047743797302, + "step": 1240 + }, + { + "epoch": 34.478873239436616, + "grad_norm": 0.33683332800865173, + "learning_rate": 2.977124524465413e-07, + "loss": 0.2581592798233032, + "step": 1241 + }, + { + "epoch": 34.50704225352113, + "grad_norm": 0.3526037037372589, + "learning_rate": 2.9706247996654134e-07, + "loss": 0.2586764693260193, + "step": 1242 + }, + { + "epoch": 34.53521126760563, + "grad_norm": 0.3380417823791504, + "learning_rate": 2.964132779780929e-07, + "loss": 0.263625830411911, + "step": 1243 + }, + { + "epoch": 34.563380281690144, + "grad_norm": 0.3443485200405121, + "learning_rate": 2.9576484845877793e-07, + "loss": 0.2503140866756439, + "step": 1244 + }, + { + "epoch": 34.59154929577465, + "grad_norm": 0.35234031081199646, + "learning_rate": 2.9511719338382535e-07, + "loss": 0.25954437255859375, + "step": 1245 + }, + { + "epoch": 34.61971830985915, + "grad_norm": 0.3406411111354828, + "learning_rate": 2.944703147261046e-07, + "loss": 0.2619974613189697, + "step": 1246 + }, + { + "epoch": 34.647887323943664, + "grad_norm": 0.3347373306751251, + "learning_rate": 2.938242144561201e-07, + "loss": 0.2618395984172821, + "step": 1247 + }, + { + "epoch": 34.67605633802817, + "grad_norm": 0.33204221725463867, + "learning_rate": 2.931788945420058e-07, + "loss": 0.26617297530174255, + "step": 1248 + }, + { + "epoch": 34.70422535211267, + "grad_norm": 0.3484657406806946, + "learning_rate": 2.925343569495178e-07, + "loss": 0.2656903564929962, + "step": 1249 + }, + { + "epoch": 34.732394366197184, + "grad_norm": 0.3254799544811249, + "learning_rate": 2.918906036420294e-07, + "loss": 0.24855300784111023, + "step": 1250 + }, + { + "epoch": 34.76056338028169, + "grad_norm": 0.33594822883605957, + "learning_rate": 2.9124763658052474e-07, + "loss": 0.2618425786495209, + "step": 1251 + }, + { + "epoch": 34.7887323943662, + "grad_norm": 0.323949933052063, + "learning_rate": 2.9060545772359305e-07, + "loss": 0.2546170949935913, + "step": 1252 + }, + { + "epoch": 34.816901408450704, + "grad_norm": 0.3242202699184418, + "learning_rate": 2.8996406902742267e-07, + "loss": 0.24211625754833221, + "step": 1253 + }, + { + "epoch": 34.84507042253521, + "grad_norm": 0.3353058695793152, + "learning_rate": 2.893234724457946e-07, + "loss": 0.25402140617370605, + "step": 1254 + }, + { + "epoch": 34.87323943661972, + "grad_norm": 0.33988505601882935, + "learning_rate": 2.886836699300771e-07, + "loss": 0.24861261248588562, + "step": 1255 + }, + { + "epoch": 34.901408450704224, + "grad_norm": 0.3339218199253082, + "learning_rate": 2.8804466342921987e-07, + "loss": 0.25520533323287964, + "step": 1256 + }, + { + "epoch": 34.929577464788736, + "grad_norm": 0.3448787033557892, + "learning_rate": 2.874064548897472e-07, + "loss": 0.2663518786430359, + "step": 1257 + }, + { + "epoch": 34.95774647887324, + "grad_norm": 0.3454734981060028, + "learning_rate": 2.86769046255753e-07, + "loss": 0.25287461280822754, + "step": 1258 + }, + { + "epoch": 34.985915492957744, + "grad_norm": 0.3322574496269226, + "learning_rate": 2.8613243946889477e-07, + "loss": 0.25937291979789734, + "step": 1259 + }, + { + "epoch": 35.0, + "grad_norm": 0.47356757521629333, + "learning_rate": 2.854966364683872e-07, + "loss": 0.2588436007499695, + "step": 1260 + }, + { + "epoch": 35.028169014084504, + "grad_norm": 0.32370901107788086, + "learning_rate": 2.848616391909959e-07, + "loss": 0.2847004234790802, + "step": 1261 + }, + { + "epoch": 35.056338028169016, + "grad_norm": 0.3340662717819214, + "learning_rate": 2.842274495710335e-07, + "loss": 0.24963748455047607, + "step": 1262 + }, + { + "epoch": 35.08450704225352, + "grad_norm": 0.3470820188522339, + "learning_rate": 2.835940695403512e-07, + "loss": 0.25704559683799744, + "step": 1263 + }, + { + "epoch": 35.11267605633803, + "grad_norm": 0.3213740289211273, + "learning_rate": 2.829615010283344e-07, + "loss": 0.24562162160873413, + "step": 1264 + }, + { + "epoch": 35.140845070422536, + "grad_norm": 0.3323827385902405, + "learning_rate": 2.8232974596189653e-07, + "loss": 0.25376367568969727, + "step": 1265 + }, + { + "epoch": 35.16901408450704, + "grad_norm": 0.32620102167129517, + "learning_rate": 2.8169880626547283e-07, + "loss": 0.25920748710632324, + "step": 1266 + }, + { + "epoch": 35.19718309859155, + "grad_norm": 0.34155285358428955, + "learning_rate": 2.8106868386101545e-07, + "loss": 0.2532484233379364, + "step": 1267 + }, + { + "epoch": 35.225352112676056, + "grad_norm": 0.32295599579811096, + "learning_rate": 2.8043938066798645e-07, + "loss": 0.2596886456012726, + "step": 1268 + }, + { + "epoch": 35.25352112676056, + "grad_norm": 0.3390556871891022, + "learning_rate": 2.7981089860335225e-07, + "loss": 0.2628597021102905, + "step": 1269 + }, + { + "epoch": 35.28169014084507, + "grad_norm": 0.3397858738899231, + "learning_rate": 2.791832395815782e-07, + "loss": 0.260450154542923, + "step": 1270 + }, + { + "epoch": 35.309859154929576, + "grad_norm": 0.3356383442878723, + "learning_rate": 2.7855640551462287e-07, + "loss": 0.24709969758987427, + "step": 1271 + }, + { + "epoch": 35.33802816901409, + "grad_norm": 0.3386112153530121, + "learning_rate": 2.7793039831193133e-07, + "loss": 0.2554944157600403, + "step": 1272 + }, + { + "epoch": 35.36619718309859, + "grad_norm": 0.34547311067581177, + "learning_rate": 2.773052198804301e-07, + "loss": 0.2689363658428192, + "step": 1273 + }, + { + "epoch": 35.394366197183096, + "grad_norm": 0.34119531512260437, + "learning_rate": 2.766808721245211e-07, + "loss": 0.2566688656806946, + "step": 1274 + }, + { + "epoch": 35.42253521126761, + "grad_norm": 0.3342508375644684, + "learning_rate": 2.760573569460757e-07, + "loss": 0.24888336658477783, + "step": 1275 + }, + { + "epoch": 35.45070422535211, + "grad_norm": 0.33420711755752563, + "learning_rate": 2.7543467624442956e-07, + "loss": 0.27446046471595764, + "step": 1276 + }, + { + "epoch": 35.478873239436616, + "grad_norm": 0.3241899907588959, + "learning_rate": 2.7481283191637605e-07, + "loss": 0.24648495018482208, + "step": 1277 + }, + { + "epoch": 35.50704225352113, + "grad_norm": 0.3267020285129547, + "learning_rate": 2.741918258561607e-07, + "loss": 0.2573559880256653, + "step": 1278 + }, + { + "epoch": 35.53521126760563, + "grad_norm": 0.3532126247882843, + "learning_rate": 2.7357165995547547e-07, + "loss": 0.2432764172554016, + "step": 1279 + }, + { + "epoch": 35.563380281690144, + "grad_norm": 0.33826351165771484, + "learning_rate": 2.729523361034538e-07, + "loss": 0.25668877363204956, + "step": 1280 + }, + { + "epoch": 35.59154929577465, + "grad_norm": 0.338796466588974, + "learning_rate": 2.7233385618666315e-07, + "loss": 0.2522228956222534, + "step": 1281 + }, + { + "epoch": 35.61971830985915, + "grad_norm": 0.3262656629085541, + "learning_rate": 2.717162220891007e-07, + "loss": 0.2595973312854767, + "step": 1282 + }, + { + "epoch": 35.647887323943664, + "grad_norm": 0.3441692590713501, + "learning_rate": 2.7109943569218707e-07, + "loss": 0.26480039954185486, + "step": 1283 + }, + { + "epoch": 35.67605633802817, + "grad_norm": 0.3370777368545532, + "learning_rate": 2.7048349887476037e-07, + "loss": 0.25393831729888916, + "step": 1284 + }, + { + "epoch": 35.70422535211267, + "grad_norm": 0.34027761220932007, + "learning_rate": 2.698684135130713e-07, + "loss": 0.24741466343402863, + "step": 1285 + }, + { + "epoch": 35.732394366197184, + "grad_norm": 0.3438904881477356, + "learning_rate": 2.692541814807763e-07, + "loss": 0.2620083689689636, + "step": 1286 + }, + { + "epoch": 35.76056338028169, + "grad_norm": 0.33286988735198975, + "learning_rate": 2.686408046489328e-07, + "loss": 0.2683720588684082, + "step": 1287 + }, + { + "epoch": 35.7887323943662, + "grad_norm": 0.3397563397884369, + "learning_rate": 2.6802828488599294e-07, + "loss": 0.25813597440719604, + "step": 1288 + }, + { + "epoch": 35.816901408450704, + "grad_norm": 0.34016039967536926, + "learning_rate": 2.6741662405779796e-07, + "loss": 0.25924018025398254, + "step": 1289 + }, + { + "epoch": 35.84507042253521, + "grad_norm": 0.3287438452243805, + "learning_rate": 2.6680582402757324e-07, + "loss": 0.24357835948467255, + "step": 1290 + }, + { + "epoch": 35.87323943661972, + "grad_norm": 0.3473154306411743, + "learning_rate": 2.661958866559213e-07, + "loss": 0.25433164834976196, + "step": 1291 + }, + { + "epoch": 35.901408450704224, + "grad_norm": 0.3320452570915222, + "learning_rate": 2.655868138008171e-07, + "loss": 0.2620140016078949, + "step": 1292 + }, + { + "epoch": 35.929577464788736, + "grad_norm": 0.35027673840522766, + "learning_rate": 2.649786073176025e-07, + "loss": 0.26484349370002747, + "step": 1293 + }, + { + "epoch": 35.95774647887324, + "grad_norm": 0.34910938143730164, + "learning_rate": 2.6437126905897967e-07, + "loss": 0.24849724769592285, + "step": 1294 + }, + { + "epoch": 35.985915492957744, + "grad_norm": 0.3321913480758667, + "learning_rate": 2.637648008750062e-07, + "loss": 0.24661482870578766, + "step": 1295 + }, + { + "epoch": 36.0, + "grad_norm": 0.48746395111083984, + "learning_rate": 2.631592046130896e-07, + "loss": 0.25251615047454834, + "step": 1296 + }, + { + "epoch": 36.028169014084504, + "grad_norm": 0.3326322138309479, + "learning_rate": 2.6255448211798103e-07, + "loss": 0.2514849603176117, + "step": 1297 + }, + { + "epoch": 36.056338028169016, + "grad_norm": 0.323958158493042, + "learning_rate": 2.6195063523177e-07, + "loss": 0.2420714795589447, + "step": 1298 + }, + { + "epoch": 36.08450704225352, + "grad_norm": 0.3715856075286865, + "learning_rate": 2.613476657938789e-07, + "loss": 0.24617412686347961, + "step": 1299 + }, + { + "epoch": 36.11267605633803, + "grad_norm": 0.34012261033058167, + "learning_rate": 2.6074557564105724e-07, + "loss": 0.26243406534194946, + "step": 1300 + }, + { + "epoch": 36.140845070422536, + "grad_norm": 0.33578699827194214, + "learning_rate": 2.6014436660737605e-07, + "loss": 0.2461467981338501, + "step": 1301 + }, + { + "epoch": 36.16901408450704, + "grad_norm": 0.3389386832714081, + "learning_rate": 2.595440405242222e-07, + "loss": 0.2597675025463104, + "step": 1302 + }, + { + "epoch": 36.19718309859155, + "grad_norm": 0.33628833293914795, + "learning_rate": 2.589445992202931e-07, + "loss": 0.2510983943939209, + "step": 1303 + }, + { + "epoch": 36.225352112676056, + "grad_norm": 0.3409932851791382, + "learning_rate": 2.583460445215911e-07, + "loss": 0.2607109844684601, + "step": 1304 + }, + { + "epoch": 36.25352112676056, + "grad_norm": 0.3476935625076294, + "learning_rate": 2.5774837825141736e-07, + "loss": 0.26868295669555664, + "step": 1305 + }, + { + "epoch": 36.28169014084507, + "grad_norm": 0.3389628231525421, + "learning_rate": 2.571516022303671e-07, + "loss": 0.24396029114723206, + "step": 1306 + }, + { + "epoch": 36.309859154929576, + "grad_norm": 0.3351360261440277, + "learning_rate": 2.565557182763235e-07, + "loss": 0.2638927102088928, + "step": 1307 + }, + { + "epoch": 36.33802816901409, + "grad_norm": 0.34508877992630005, + "learning_rate": 2.5596072820445254e-07, + "loss": 0.25982603430747986, + "step": 1308 + }, + { + "epoch": 36.36619718309859, + "grad_norm": 0.3333590626716614, + "learning_rate": 2.5536663382719713e-07, + "loss": 0.25606241822242737, + "step": 1309 + }, + { + "epoch": 36.394366197183096, + "grad_norm": 0.33822396397590637, + "learning_rate": 2.547734369542718e-07, + "loss": 0.2518611252307892, + "step": 1310 + }, + { + "epoch": 36.42253521126761, + "grad_norm": 0.3358154594898224, + "learning_rate": 2.5418113939265686e-07, + "loss": 0.25333690643310547, + "step": 1311 + }, + { + "epoch": 36.45070422535211, + "grad_norm": 0.33005034923553467, + "learning_rate": 2.5358974294659373e-07, + "loss": 0.24985584616661072, + "step": 1312 + }, + { + "epoch": 36.478873239436616, + "grad_norm": 0.3343973159790039, + "learning_rate": 2.5299924941757843e-07, + "loss": 0.27109482884407043, + "step": 1313 + }, + { + "epoch": 36.50704225352113, + "grad_norm": 0.33798739314079285, + "learning_rate": 2.5240966060435674e-07, + "loss": 0.2599262595176697, + "step": 1314 + }, + { + "epoch": 36.53521126760563, + "grad_norm": 0.33094605803489685, + "learning_rate": 2.5182097830291824e-07, + "loss": 0.24939575791358948, + "step": 1315 + }, + { + "epoch": 36.563380281690144, + "grad_norm": 0.3303806185722351, + "learning_rate": 2.512332043064913e-07, + "loss": 0.2498035877943039, + "step": 1316 + }, + { + "epoch": 36.59154929577465, + "grad_norm": 0.3437672555446625, + "learning_rate": 2.5064634040553767e-07, + "loss": 0.26817601919174194, + "step": 1317 + }, + { + "epoch": 36.61971830985915, + "grad_norm": 0.3672111928462982, + "learning_rate": 2.5006038838774647e-07, + "loss": 0.2572394609451294, + "step": 1318 + }, + { + "epoch": 36.647887323943664, + "grad_norm": 0.34106817841529846, + "learning_rate": 2.494753500380291e-07, + "loss": 0.25872814655303955, + "step": 1319 + }, + { + "epoch": 36.67605633802817, + "grad_norm": 0.35012519359588623, + "learning_rate": 2.488912271385139e-07, + "loss": 0.2478848099708557, + "step": 1320 + }, + { + "epoch": 36.70422535211267, + "grad_norm": 0.3354050815105438, + "learning_rate": 2.483080214685404e-07, + "loss": 0.2592930793762207, + "step": 1321 + }, + { + "epoch": 36.732394366197184, + "grad_norm": 0.3539486825466156, + "learning_rate": 2.4772573480465445e-07, + "loss": 0.24492186307907104, + "step": 1322 + }, + { + "epoch": 36.76056338028169, + "grad_norm": 0.34425100684165955, + "learning_rate": 2.471443689206021e-07, + "loss": 0.2586178779602051, + "step": 1323 + }, + { + "epoch": 36.7887323943662, + "grad_norm": 0.35161006450653076, + "learning_rate": 2.465639255873246e-07, + "loss": 0.2581009268760681, + "step": 1324 + }, + { + "epoch": 36.816901408450704, + "grad_norm": 0.3478921949863434, + "learning_rate": 2.4598440657295286e-07, + "loss": 0.2674616575241089, + "step": 1325 + }, + { + "epoch": 36.84507042253521, + "grad_norm": 0.35100990533828735, + "learning_rate": 2.454058136428027e-07, + "loss": 0.27003878355026245, + "step": 1326 + }, + { + "epoch": 36.87323943661972, + "grad_norm": 0.3363000452518463, + "learning_rate": 2.4482814855936834e-07, + "loss": 0.2609623968601227, + "step": 1327 + }, + { + "epoch": 36.901408450704224, + "grad_norm": 0.3406379222869873, + "learning_rate": 2.4425141308231765e-07, + "loss": 0.2661615014076233, + "step": 1328 + }, + { + "epoch": 36.929577464788736, + "grad_norm": 0.331514447927475, + "learning_rate": 2.43675608968487e-07, + "loss": 0.24595093727111816, + "step": 1329 + }, + { + "epoch": 36.95774647887324, + "grad_norm": 0.33636540174484253, + "learning_rate": 2.4310073797187573e-07, + "loss": 0.2518694996833801, + "step": 1330 + }, + { + "epoch": 36.985915492957744, + "grad_norm": 0.3203655779361725, + "learning_rate": 2.4252680184364045e-07, + "loss": 0.24997392296791077, + "step": 1331 + }, + { + "epoch": 37.0, + "grad_norm": 0.47873687744140625, + "learning_rate": 2.4195380233209006e-07, + "loss": 0.24962179362773895, + "step": 1332 + } + ], + "logging_steps": 1, + "max_steps": 1800, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 1.0, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.788879174705873e+18, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}