| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 37.0, | |
| "eval_steps": 500, | |
| "global_step": 1332, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.028169014084507043, | |
| "grad_norm": 12.737117767333984, | |
| "learning_rate": 9.999993146109795e-07, | |
| "loss": 0.6797127723693848, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.056338028169014086, | |
| "grad_norm": 12.04797649383545, | |
| "learning_rate": 9.999972584460056e-07, | |
| "loss": 0.6627321243286133, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.08450704225352113, | |
| "grad_norm": 12.632461547851562, | |
| "learning_rate": 9.99993831511342e-07, | |
| "loss": 0.6829236149787903, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.11267605633802817, | |
| "grad_norm": 11.97681713104248, | |
| "learning_rate": 9.999890338174275e-07, | |
| "loss": 0.6625960469245911, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.14084507042253522, | |
| "grad_norm": 11.185710906982422, | |
| "learning_rate": 9.99982865378877e-07, | |
| "loss": 0.6418126821517944, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.16901408450704225, | |
| "grad_norm": 11.49565315246582, | |
| "learning_rate": 9.999753262144804e-07, | |
| "loss": 0.6464570760726929, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.19718309859154928, | |
| "grad_norm": 10.954561233520508, | |
| "learning_rate": 9.999664163472034e-07, | |
| "loss": 0.63329017162323, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.22535211267605634, | |
| "grad_norm": 10.728333473205566, | |
| "learning_rate": 9.999561358041868e-07, | |
| "loss": 0.6382037401199341, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.2535211267605634, | |
| "grad_norm": 8.404616355895996, | |
| "learning_rate": 9.99944484616747e-07, | |
| "loss": 0.5870345830917358, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.28169014084507044, | |
| "grad_norm": 7.616209983825684, | |
| "learning_rate": 9.99931462820376e-07, | |
| "loss": 0.5672095417976379, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.30985915492957744, | |
| "grad_norm": 7.800975799560547, | |
| "learning_rate": 9.999170704547398e-07, | |
| "loss": 0.581696629524231, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.3380281690140845, | |
| "grad_norm": 7.584338665008545, | |
| "learning_rate": 9.999013075636804e-07, | |
| "loss": 0.5873032808303833, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.36619718309859156, | |
| "grad_norm": 6.736105442047119, | |
| "learning_rate": 9.998841741952141e-07, | |
| "loss": 0.5502372980117798, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.39436619718309857, | |
| "grad_norm": 6.839756965637207, | |
| "learning_rate": 9.998656704015323e-07, | |
| "loss": 0.5653150677680969, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.4225352112676056, | |
| "grad_norm": 7.052567005157471, | |
| "learning_rate": 9.998457962390008e-07, | |
| "loss": 0.5660480260848999, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.4507042253521127, | |
| "grad_norm": 6.61349630355835, | |
| "learning_rate": 9.998245517681593e-07, | |
| "loss": 0.552219033241272, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.4788732394366197, | |
| "grad_norm": 3.9956817626953125, | |
| "learning_rate": 9.998019370537227e-07, | |
| "loss": 0.5171241760253906, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.5070422535211268, | |
| "grad_norm": 3.6887121200561523, | |
| "learning_rate": 9.997779521645791e-07, | |
| "loss": 0.5023034811019897, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.5352112676056338, | |
| "grad_norm": 3.6457769870758057, | |
| "learning_rate": 9.997525971737909e-07, | |
| "loss": 0.505454421043396, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.5633802816901409, | |
| "grad_norm": 3.398740530014038, | |
| "learning_rate": 9.997258721585931e-07, | |
| "loss": 0.4978747069835663, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5915492957746479, | |
| "grad_norm": 3.2862207889556885, | |
| "learning_rate": 9.99697777200395e-07, | |
| "loss": 0.5002620220184326, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.6197183098591549, | |
| "grad_norm": 3.3747572898864746, | |
| "learning_rate": 9.996683123847795e-07, | |
| "loss": 0.5069968700408936, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.647887323943662, | |
| "grad_norm": 3.001546621322632, | |
| "learning_rate": 9.996374778015007e-07, | |
| "loss": 0.4922000765800476, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.676056338028169, | |
| "grad_norm": 2.996706962585449, | |
| "learning_rate": 9.996052735444862e-07, | |
| "loss": 0.4938335716724396, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.704225352112676, | |
| "grad_norm": 2.668245315551758, | |
| "learning_rate": 9.99571699711836e-07, | |
| "loss": 0.49115338921546936, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.7323943661971831, | |
| "grad_norm": 2.4952428340911865, | |
| "learning_rate": 9.995367564058216e-07, | |
| "loss": 0.4847099483013153, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.7605633802816901, | |
| "grad_norm": 2.529451847076416, | |
| "learning_rate": 9.995004437328865e-07, | |
| "loss": 0.48129573464393616, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.7887323943661971, | |
| "grad_norm": 2.479883909225464, | |
| "learning_rate": 9.994627618036452e-07, | |
| "loss": 0.5088395476341248, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.8169014084507042, | |
| "grad_norm": 2.414393424987793, | |
| "learning_rate": 9.994237107328838e-07, | |
| "loss": 0.48045098781585693, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.8450704225352113, | |
| "grad_norm": 2.2080600261688232, | |
| "learning_rate": 9.993832906395582e-07, | |
| "loss": 0.47147125005722046, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8732394366197183, | |
| "grad_norm": 1.912841558456421, | |
| "learning_rate": 9.993415016467952e-07, | |
| "loss": 0.4724900424480438, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.9014084507042254, | |
| "grad_norm": 1.282597303390503, | |
| "learning_rate": 9.992983438818915e-07, | |
| "loss": 0.46792298555374146, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.9295774647887324, | |
| "grad_norm": 1.4362828731536865, | |
| "learning_rate": 9.992538174763127e-07, | |
| "loss": 0.45093870162963867, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.9577464788732394, | |
| "grad_norm": 1.4296821355819702, | |
| "learning_rate": 9.992079225656944e-07, | |
| "loss": 0.44724205136299133, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.9859154929577465, | |
| "grad_norm": 1.4829713106155396, | |
| "learning_rate": 9.9916065928984e-07, | |
| "loss": 0.44936883449554443, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.387039303779602, | |
| "learning_rate": 9.991120277927223e-07, | |
| "loss": 0.47316086292266846, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.028169014084507, | |
| "grad_norm": 1.3140299320220947, | |
| "learning_rate": 9.990620282224806e-07, | |
| "loss": 0.4389120638370514, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 1.056338028169014, | |
| "grad_norm": 1.2881019115447998, | |
| "learning_rate": 9.990106607314225e-07, | |
| "loss": 0.43830516934394836, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 1.084507042253521, | |
| "grad_norm": 1.1489726305007935, | |
| "learning_rate": 9.989579254760224e-07, | |
| "loss": 0.44559216499328613, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 1.1126760563380282, | |
| "grad_norm": 1.0595662593841553, | |
| "learning_rate": 9.989038226169207e-07, | |
| "loss": 0.43717890977859497, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.1408450704225352, | |
| "grad_norm": 0.9458185434341431, | |
| "learning_rate": 9.988483523189248e-07, | |
| "loss": 0.43611639738082886, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 1.1690140845070423, | |
| "grad_norm": 0.8811507821083069, | |
| "learning_rate": 9.98791514751006e-07, | |
| "loss": 0.4194882810115814, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 1.1971830985915493, | |
| "grad_norm": 0.7880372405052185, | |
| "learning_rate": 9.98733310086302e-07, | |
| "loss": 0.4363758862018585, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 1.2253521126760563, | |
| "grad_norm": 0.7736399173736572, | |
| "learning_rate": 9.98673738502114e-07, | |
| "loss": 0.43049588799476624, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.2535211267605635, | |
| "grad_norm": 0.7198370695114136, | |
| "learning_rate": 9.986128001799076e-07, | |
| "loss": 0.43443119525909424, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.2816901408450705, | |
| "grad_norm": 0.7174084186553955, | |
| "learning_rate": 9.985504953053113e-07, | |
| "loss": 0.43092280626296997, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.3098591549295775, | |
| "grad_norm": 0.7043387293815613, | |
| "learning_rate": 9.984868240681164e-07, | |
| "loss": 0.417573481798172, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 1.3380281690140845, | |
| "grad_norm": 0.6884390115737915, | |
| "learning_rate": 9.98421786662277e-07, | |
| "loss": 0.4211745262145996, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.3661971830985915, | |
| "grad_norm": 0.7091729044914246, | |
| "learning_rate": 9.983553832859078e-07, | |
| "loss": 0.4147814214229584, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.3943661971830985, | |
| "grad_norm": 0.6925486326217651, | |
| "learning_rate": 9.982876141412855e-07, | |
| "loss": 0.432437002658844, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.4225352112676055, | |
| "grad_norm": 0.7119179368019104, | |
| "learning_rate": 9.982184794348462e-07, | |
| "loss": 0.41633373498916626, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.4507042253521127, | |
| "grad_norm": 0.6801888346672058, | |
| "learning_rate": 9.981479793771866e-07, | |
| "loss": 0.4228135645389557, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.4788732394366197, | |
| "grad_norm": 0.6876774430274963, | |
| "learning_rate": 9.98076114183062e-07, | |
| "loss": 0.41455432772636414, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 1.5070422535211268, | |
| "grad_norm": 0.6285378336906433, | |
| "learning_rate": 9.98002884071386e-07, | |
| "loss": 0.41491252183914185, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.5352112676056338, | |
| "grad_norm": 0.6261480450630188, | |
| "learning_rate": 9.979282892652304e-07, | |
| "loss": 0.42695990204811096, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.563380281690141, | |
| "grad_norm": 0.6269007325172424, | |
| "learning_rate": 9.97852329991824e-07, | |
| "loss": 0.41284894943237305, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.591549295774648, | |
| "grad_norm": 0.6070351600646973, | |
| "learning_rate": 9.977750064825519e-07, | |
| "loss": 0.42982780933380127, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.619718309859155, | |
| "grad_norm": 0.5970191955566406, | |
| "learning_rate": 9.976963189729547e-07, | |
| "loss": 0.41365376114845276, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.647887323943662, | |
| "grad_norm": 0.5778729319572449, | |
| "learning_rate": 9.976162677027284e-07, | |
| "loss": 0.42080622911453247, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.676056338028169, | |
| "grad_norm": 0.5267013907432556, | |
| "learning_rate": 9.975348529157229e-07, | |
| "loss": 0.40949106216430664, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.704225352112676, | |
| "grad_norm": 0.5284983515739441, | |
| "learning_rate": 9.974520748599421e-07, | |
| "loss": 0.4082256555557251, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.732394366197183, | |
| "grad_norm": 0.49156272411346436, | |
| "learning_rate": 9.973679337875418e-07, | |
| "loss": 0.3944624662399292, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.76056338028169, | |
| "grad_norm": 0.4944726824760437, | |
| "learning_rate": 9.972824299548309e-07, | |
| "loss": 0.4087256193161011, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.788732394366197, | |
| "grad_norm": 0.4764452874660492, | |
| "learning_rate": 9.971955636222684e-07, | |
| "loss": 0.4067206382751465, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.8169014084507042, | |
| "grad_norm": 0.48928746581077576, | |
| "learning_rate": 9.971073350544644e-07, | |
| "loss": 0.4004918336868286, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.8450704225352113, | |
| "grad_norm": 0.4580424726009369, | |
| "learning_rate": 9.970177445201783e-07, | |
| "loss": 0.4040325880050659, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.8732394366197183, | |
| "grad_norm": 0.5053924322128296, | |
| "learning_rate": 9.969267922923188e-07, | |
| "loss": 0.40139085054397583, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.9014084507042255, | |
| "grad_norm": 0.4661526679992676, | |
| "learning_rate": 9.968344786479415e-07, | |
| "loss": 0.38993388414382935, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.9295774647887325, | |
| "grad_norm": 0.4677845537662506, | |
| "learning_rate": 9.967408038682505e-07, | |
| "loss": 0.4014376401901245, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.9577464788732395, | |
| "grad_norm": 0.4655434787273407, | |
| "learning_rate": 9.96645768238595e-07, | |
| "loss": 0.3975449204444885, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.9859154929577465, | |
| "grad_norm": 0.4675063192844391, | |
| "learning_rate": 9.965493720484698e-07, | |
| "loss": 0.4009154438972473, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5548242926597595, | |
| "learning_rate": 9.964516155915151e-07, | |
| "loss": 0.39267462491989136, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.028169014084507, | |
| "grad_norm": 0.4601926803588867, | |
| "learning_rate": 9.963524991655133e-07, | |
| "loss": 0.3973795473575592, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 2.056338028169014, | |
| "grad_norm": 0.4464695155620575, | |
| "learning_rate": 9.962520230723906e-07, | |
| "loss": 0.39020174741744995, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 2.084507042253521, | |
| "grad_norm": 0.42715415358543396, | |
| "learning_rate": 9.961501876182148e-07, | |
| "loss": 0.3930002450942993, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.112676056338028, | |
| "grad_norm": 0.3989242613315582, | |
| "learning_rate": 9.960469931131936e-07, | |
| "loss": 0.3865053653717041, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 2.140845070422535, | |
| "grad_norm": 0.4167341887950897, | |
| "learning_rate": 9.959424398716763e-07, | |
| "loss": 0.39777663350105286, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 2.169014084507042, | |
| "grad_norm": 0.4046856760978699, | |
| "learning_rate": 9.958365282121496e-07, | |
| "loss": 0.38023141026496887, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 2.1971830985915495, | |
| "grad_norm": 0.40858548879623413, | |
| "learning_rate": 9.95729258457239e-07, | |
| "loss": 0.37487876415252686, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 2.2253521126760565, | |
| "grad_norm": 0.3576146364212036, | |
| "learning_rate": 9.956206309337066e-07, | |
| "loss": 0.3785707354545593, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.2535211267605635, | |
| "grad_norm": 0.35235047340393066, | |
| "learning_rate": 9.955106459724508e-07, | |
| "loss": 0.38552170991897583, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 2.2816901408450705, | |
| "grad_norm": 0.373362272977829, | |
| "learning_rate": 9.953993039085048e-07, | |
| "loss": 0.38321995735168457, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 2.3098591549295775, | |
| "grad_norm": 0.3574947416782379, | |
| "learning_rate": 9.952866050810363e-07, | |
| "loss": 0.37346434593200684, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 2.3380281690140845, | |
| "grad_norm": 0.36156368255615234, | |
| "learning_rate": 9.951725498333448e-07, | |
| "loss": 0.382648229598999, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 2.3661971830985915, | |
| "grad_norm": 0.3521256148815155, | |
| "learning_rate": 9.950571385128625e-07, | |
| "loss": 0.3722230792045593, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.3943661971830985, | |
| "grad_norm": 0.3384946584701538, | |
| "learning_rate": 9.949403714711526e-07, | |
| "loss": 0.3648328185081482, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 2.4225352112676055, | |
| "grad_norm": 0.34228095412254333, | |
| "learning_rate": 9.948222490639075e-07, | |
| "loss": 0.372160941362381, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 2.4507042253521125, | |
| "grad_norm": 0.34330716729164124, | |
| "learning_rate": 9.947027716509488e-07, | |
| "loss": 0.36588054895401, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 2.4788732394366195, | |
| "grad_norm": 0.34555092453956604, | |
| "learning_rate": 9.94581939596225e-07, | |
| "loss": 0.38422292470932007, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 2.507042253521127, | |
| "grad_norm": 0.34432411193847656, | |
| "learning_rate": 9.944597532678119e-07, | |
| "loss": 0.3802357316017151, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.535211267605634, | |
| "grad_norm": 0.35508641600608826, | |
| "learning_rate": 9.943362130379101e-07, | |
| "loss": 0.37436896562576294, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 2.563380281690141, | |
| "grad_norm": 0.3540443181991577, | |
| "learning_rate": 9.942113192828444e-07, | |
| "loss": 0.39830613136291504, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 2.591549295774648, | |
| "grad_norm": 0.3429860472679138, | |
| "learning_rate": 9.940850723830632e-07, | |
| "loss": 0.38153308629989624, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 2.619718309859155, | |
| "grad_norm": 0.3220756947994232, | |
| "learning_rate": 9.939574727231362e-07, | |
| "loss": 0.36020469665527344, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 2.647887323943662, | |
| "grad_norm": 0.3417351245880127, | |
| "learning_rate": 9.93828520691754e-07, | |
| "loss": 0.38868680596351624, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.676056338028169, | |
| "grad_norm": 0.3259858191013336, | |
| "learning_rate": 9.93698216681727e-07, | |
| "loss": 0.37741273641586304, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.704225352112676, | |
| "grad_norm": 0.33722448348999023, | |
| "learning_rate": 9.93566561089984e-07, | |
| "loss": 0.3821848928928375, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 2.732394366197183, | |
| "grad_norm": 0.31846100091934204, | |
| "learning_rate": 9.934335543175705e-07, | |
| "loss": 0.3690311014652252, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.76056338028169, | |
| "grad_norm": 0.34040549397468567, | |
| "learning_rate": 9.932991967696482e-07, | |
| "loss": 0.3875328600406647, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 2.788732394366197, | |
| "grad_norm": 0.3258971571922302, | |
| "learning_rate": 9.931634888554935e-07, | |
| "loss": 0.3811268210411072, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.816901408450704, | |
| "grad_norm": 0.32806867361068726, | |
| "learning_rate": 9.930264309884964e-07, | |
| "loss": 0.3713844418525696, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 2.845070422535211, | |
| "grad_norm": 0.3252440094947815, | |
| "learning_rate": 9.928880235861588e-07, | |
| "loss": 0.3812159299850464, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.873239436619718, | |
| "grad_norm": 0.33440181612968445, | |
| "learning_rate": 9.927482670700936e-07, | |
| "loss": 0.37723666429519653, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 2.9014084507042255, | |
| "grad_norm": 0.3046083152294159, | |
| "learning_rate": 9.926071618660237e-07, | |
| "loss": 0.3681407868862152, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 2.9295774647887325, | |
| "grad_norm": 0.3097338378429413, | |
| "learning_rate": 9.924647084037797e-07, | |
| "loss": 0.3724687099456787, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.9577464788732395, | |
| "grad_norm": 0.32305970788002014, | |
| "learning_rate": 9.923209071172994e-07, | |
| "loss": 0.3641166090965271, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.9859154929577465, | |
| "grad_norm": 0.32677826285362244, | |
| "learning_rate": 9.921757584446268e-07, | |
| "loss": 0.36330974102020264, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.4263511896133423, | |
| "learning_rate": 9.9202926282791e-07, | |
| "loss": 0.35592788457870483, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 3.028169014084507, | |
| "grad_norm": 0.2994212508201599, | |
| "learning_rate": 9.918814207133997e-07, | |
| "loss": 0.3603532314300537, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 3.056338028169014, | |
| "grad_norm": 0.30977630615234375, | |
| "learning_rate": 9.917322325514487e-07, | |
| "loss": 0.374819278717041, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.084507042253521, | |
| "grad_norm": 0.31614792346954346, | |
| "learning_rate": 9.915816987965102e-07, | |
| "loss": 0.3680700957775116, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 3.112676056338028, | |
| "grad_norm": 0.30458712577819824, | |
| "learning_rate": 9.91429819907136e-07, | |
| "loss": 0.3753468692302704, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 3.140845070422535, | |
| "grad_norm": 0.30280736088752747, | |
| "learning_rate": 9.912765963459756e-07, | |
| "loss": 0.3559075593948364, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 3.169014084507042, | |
| "grad_norm": 0.3088322579860687, | |
| "learning_rate": 9.911220285797748e-07, | |
| "loss": 0.36761462688446045, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 3.1971830985915495, | |
| "grad_norm": 0.3007463216781616, | |
| "learning_rate": 9.909661170793733e-07, | |
| "loss": 0.3572486340999603, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 3.2253521126760565, | |
| "grad_norm": 0.29317507147789, | |
| "learning_rate": 9.908088623197048e-07, | |
| "loss": 0.37356066703796387, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 3.2535211267605635, | |
| "grad_norm": 0.30190175771713257, | |
| "learning_rate": 9.906502647797945e-07, | |
| "loss": 0.3747510015964508, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 3.2816901408450705, | |
| "grad_norm": 0.300547331571579, | |
| "learning_rate": 9.904903249427582e-07, | |
| "loss": 0.3723798096179962, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 3.3098591549295775, | |
| "grad_norm": 0.2943092882633209, | |
| "learning_rate": 9.903290432958003e-07, | |
| "loss": 0.3614634573459625, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 3.3380281690140845, | |
| "grad_norm": 0.2933284342288971, | |
| "learning_rate": 9.901664203302124e-07, | |
| "loss": 0.34804195165634155, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.3661971830985915, | |
| "grad_norm": 0.2936899662017822, | |
| "learning_rate": 9.900024565413727e-07, | |
| "loss": 0.3482627272605896, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 3.3943661971830985, | |
| "grad_norm": 0.2972092628479004, | |
| "learning_rate": 9.89837152428743e-07, | |
| "loss": 0.35861676931381226, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 3.4225352112676055, | |
| "grad_norm": 0.296779602766037, | |
| "learning_rate": 9.896705084958687e-07, | |
| "loss": 0.37210696935653687, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 3.4507042253521125, | |
| "grad_norm": 0.2911286950111389, | |
| "learning_rate": 9.895025252503755e-07, | |
| "loss": 0.33883392810821533, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 3.4788732394366195, | |
| "grad_norm": 0.29729408025741577, | |
| "learning_rate": 9.8933320320397e-07, | |
| "loss": 0.3569541573524475, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 3.507042253521127, | |
| "grad_norm": 0.29103100299835205, | |
| "learning_rate": 9.891625428724364e-07, | |
| "loss": 0.36078906059265137, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 3.535211267605634, | |
| "grad_norm": 0.2976583242416382, | |
| "learning_rate": 9.889905447756355e-07, | |
| "loss": 0.3531530499458313, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 3.563380281690141, | |
| "grad_norm": 0.3033563196659088, | |
| "learning_rate": 9.888172094375033e-07, | |
| "loss": 0.37008020281791687, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 3.591549295774648, | |
| "grad_norm": 0.30928340554237366, | |
| "learning_rate": 9.886425373860496e-07, | |
| "loss": 0.3652263283729553, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 3.619718309859155, | |
| "grad_norm": 0.3299793601036072, | |
| "learning_rate": 9.88466529153356e-07, | |
| "loss": 0.36931300163269043, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.647887323943662, | |
| "grad_norm": 0.29216262698173523, | |
| "learning_rate": 9.882891852755732e-07, | |
| "loss": 0.3560551404953003, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 3.676056338028169, | |
| "grad_norm": 0.3086439371109009, | |
| "learning_rate": 9.881105062929221e-07, | |
| "loss": 0.3592608869075775, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 3.704225352112676, | |
| "grad_norm": 0.3008037805557251, | |
| "learning_rate": 9.879304927496896e-07, | |
| "loss": 0.35765546560287476, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 3.732394366197183, | |
| "grad_norm": 0.3011510968208313, | |
| "learning_rate": 9.877491451942284e-07, | |
| "loss": 0.35755690932273865, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 3.76056338028169, | |
| "grad_norm": 0.28508952260017395, | |
| "learning_rate": 9.875664641789543e-07, | |
| "loss": 0.3475223183631897, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 3.788732394366197, | |
| "grad_norm": 0.29807090759277344, | |
| "learning_rate": 9.873824502603459e-07, | |
| "loss": 0.3468858003616333, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 3.816901408450704, | |
| "grad_norm": 0.30015671253204346, | |
| "learning_rate": 9.871971039989407e-07, | |
| "loss": 0.3525606393814087, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 3.845070422535211, | |
| "grad_norm": 0.2894802689552307, | |
| "learning_rate": 9.870104259593362e-07, | |
| "loss": 0.35189589858055115, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 3.873239436619718, | |
| "grad_norm": 0.2956956624984741, | |
| "learning_rate": 9.86822416710186e-07, | |
| "loss": 0.3662959337234497, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 3.9014084507042255, | |
| "grad_norm": 0.28614693880081177, | |
| "learning_rate": 9.866330768241983e-07, | |
| "loss": 0.3523305654525757, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.9295774647887325, | |
| "grad_norm": 0.3109326958656311, | |
| "learning_rate": 9.86442406878136e-07, | |
| "loss": 0.3661171495914459, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 3.9577464788732395, | |
| "grad_norm": 0.29977917671203613, | |
| "learning_rate": 9.862504074528126e-07, | |
| "loss": 0.3687261939048767, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 3.9859154929577465, | |
| "grad_norm": 0.2874816954135895, | |
| "learning_rate": 9.860570791330911e-07, | |
| "loss": 0.35026735067367554, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.39478132128715515, | |
| "learning_rate": 9.85862422507884e-07, | |
| "loss": 0.329179584980011, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 4.028169014084507, | |
| "grad_norm": 0.29594185948371887, | |
| "learning_rate": 9.856664381701483e-07, | |
| "loss": 0.34915629029273987, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 4.056338028169014, | |
| "grad_norm": 0.2942439615726471, | |
| "learning_rate": 9.854691267168871e-07, | |
| "loss": 0.3501034080982208, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 4.084507042253521, | |
| "grad_norm": 0.3186146318912506, | |
| "learning_rate": 9.852704887491445e-07, | |
| "loss": 0.3498520255088806, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 4.112676056338028, | |
| "grad_norm": 0.2865906059741974, | |
| "learning_rate": 9.850705248720068e-07, | |
| "loss": 0.359851598739624, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 4.140845070422535, | |
| "grad_norm": 0.2773308753967285, | |
| "learning_rate": 9.848692356945981e-07, | |
| "loss": 0.34519776701927185, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 4.169014084507042, | |
| "grad_norm": 0.27520084381103516, | |
| "learning_rate": 9.846666218300807e-07, | |
| "loss": 0.3370436429977417, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.197183098591549, | |
| "grad_norm": 0.31606534123420715, | |
| "learning_rate": 9.844626838956513e-07, | |
| "loss": 0.3660886287689209, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 4.225352112676056, | |
| "grad_norm": 0.30757179856300354, | |
| "learning_rate": 9.8425742251254e-07, | |
| "loss": 0.3431619703769684, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 4.253521126760563, | |
| "grad_norm": 0.2864473760128021, | |
| "learning_rate": 9.84050838306009e-07, | |
| "loss": 0.3478638231754303, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 4.28169014084507, | |
| "grad_norm": 0.2924051880836487, | |
| "learning_rate": 9.838429319053495e-07, | |
| "loss": 0.3459091782569885, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 4.309859154929577, | |
| "grad_norm": 0.2723977565765381, | |
| "learning_rate": 9.836337039438803e-07, | |
| "loss": 0.3437414765357971, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 4.338028169014084, | |
| "grad_norm": 0.28301340341567993, | |
| "learning_rate": 9.83423155058946e-07, | |
| "loss": 0.351753830909729, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 4.366197183098592, | |
| "grad_norm": 0.3007968068122864, | |
| "learning_rate": 9.832112858919155e-07, | |
| "loss": 0.3534032106399536, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 4.394366197183099, | |
| "grad_norm": 0.2823623716831207, | |
| "learning_rate": 9.829980970881784e-07, | |
| "loss": 0.33871978521347046, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 4.422535211267606, | |
| "grad_norm": 0.27985984086990356, | |
| "learning_rate": 9.82783589297145e-07, | |
| "loss": 0.35134732723236084, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 4.450704225352113, | |
| "grad_norm": 0.29764989018440247, | |
| "learning_rate": 9.825677631722435e-07, | |
| "loss": 0.35344886779785156, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.47887323943662, | |
| "grad_norm": 0.2861703634262085, | |
| "learning_rate": 9.823506193709174e-07, | |
| "loss": 0.3553098440170288, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 4.507042253521127, | |
| "grad_norm": 0.3005011975765228, | |
| "learning_rate": 9.821321585546243e-07, | |
| "loss": 0.349773645401001, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 4.535211267605634, | |
| "grad_norm": 0.28691744804382324, | |
| "learning_rate": 9.81912381388834e-07, | |
| "loss": 0.3327012360095978, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 4.563380281690141, | |
| "grad_norm": 0.3060745298862457, | |
| "learning_rate": 9.816912885430258e-07, | |
| "loss": 0.3464226722717285, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 4.591549295774648, | |
| "grad_norm": 0.3035100996494293, | |
| "learning_rate": 9.814688806906868e-07, | |
| "loss": 0.3499942719936371, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 4.619718309859155, | |
| "grad_norm": 0.3114430606365204, | |
| "learning_rate": 9.812451585093098e-07, | |
| "loss": 0.3396627604961395, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 4.647887323943662, | |
| "grad_norm": 0.30142080783843994, | |
| "learning_rate": 9.810201226803917e-07, | |
| "loss": 0.3466919958591461, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 4.676056338028169, | |
| "grad_norm": 0.2819617986679077, | |
| "learning_rate": 9.807937738894303e-07, | |
| "loss": 0.34856730699539185, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 4.704225352112676, | |
| "grad_norm": 0.29183247685432434, | |
| "learning_rate": 9.805661128259235e-07, | |
| "loss": 0.3437175750732422, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 4.732394366197183, | |
| "grad_norm": 0.29465699195861816, | |
| "learning_rate": 9.80337140183366e-07, | |
| "loss": 0.3438083827495575, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.76056338028169, | |
| "grad_norm": 0.28720420598983765, | |
| "learning_rate": 9.801068566592483e-07, | |
| "loss": 0.3422589898109436, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 4.788732394366197, | |
| "grad_norm": 0.2751031816005707, | |
| "learning_rate": 9.798752629550546e-07, | |
| "loss": 0.3460365831851959, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 4.816901408450704, | |
| "grad_norm": 0.2868765592575073, | |
| "learning_rate": 9.796423597762588e-07, | |
| "loss": 0.3391006886959076, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 4.845070422535211, | |
| "grad_norm": 0.2844865024089813, | |
| "learning_rate": 9.794081478323245e-07, | |
| "loss": 0.3488645851612091, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 4.873239436619718, | |
| "grad_norm": 0.28600648045539856, | |
| "learning_rate": 9.791726278367021e-07, | |
| "loss": 0.3440667986869812, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 4.901408450704225, | |
| "grad_norm": 0.29167741537094116, | |
| "learning_rate": 9.78935800506826e-07, | |
| "loss": 0.34016746282577515, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 4.929577464788732, | |
| "grad_norm": 0.29203853011131287, | |
| "learning_rate": 9.786976665641138e-07, | |
| "loss": 0.33034777641296387, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 4.957746478873239, | |
| "grad_norm": 0.29975563287734985, | |
| "learning_rate": 9.784582267339622e-07, | |
| "loss": 0.34664660692214966, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 4.985915492957746, | |
| "grad_norm": 0.2778502106666565, | |
| "learning_rate": 9.78217481745747e-07, | |
| "loss": 0.34249287843704224, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.396133691072464, | |
| "learning_rate": 9.779754323328192e-07, | |
| "loss": 0.34673285484313965, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.028169014084507, | |
| "grad_norm": 0.29174622893333435, | |
| "learning_rate": 9.777320792325025e-07, | |
| "loss": 0.3266841173171997, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 5.056338028169014, | |
| "grad_norm": 0.28281646966934204, | |
| "learning_rate": 9.774874231860935e-07, | |
| "loss": 0.3295621871948242, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 5.084507042253521, | |
| "grad_norm": 0.2767295837402344, | |
| "learning_rate": 9.772414649388568e-07, | |
| "loss": 0.3460637629032135, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 5.112676056338028, | |
| "grad_norm": 0.28246212005615234, | |
| "learning_rate": 9.769942052400235e-07, | |
| "loss": 0.3325508236885071, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 5.140845070422535, | |
| "grad_norm": 0.31317514181137085, | |
| "learning_rate": 9.767456448427896e-07, | |
| "loss": 0.3373739719390869, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 5.169014084507042, | |
| "grad_norm": 0.29388973116874695, | |
| "learning_rate": 9.764957845043135e-07, | |
| "loss": 0.3335680365562439, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 5.197183098591549, | |
| "grad_norm": 0.3093099892139435, | |
| "learning_rate": 9.76244624985713e-07, | |
| "loss": 0.3288199007511139, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 5.225352112676056, | |
| "grad_norm": 0.2718607187271118, | |
| "learning_rate": 9.759921670520634e-07, | |
| "loss": 0.33789312839508057, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 5.253521126760563, | |
| "grad_norm": 0.3087296485900879, | |
| "learning_rate": 9.757384114723953e-07, | |
| "loss": 0.3482661843299866, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 5.28169014084507, | |
| "grad_norm": 0.2887554466724396, | |
| "learning_rate": 9.754833590196926e-07, | |
| "loss": 0.3353871703147888, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 5.309859154929577, | |
| "grad_norm": 0.2770691514015198, | |
| "learning_rate": 9.752270104708888e-07, | |
| "loss": 0.33239609003067017, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 5.338028169014084, | |
| "grad_norm": 0.29489442706108093, | |
| "learning_rate": 9.749693666068663e-07, | |
| "loss": 0.34318211674690247, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 5.366197183098592, | |
| "grad_norm": 0.31870850920677185, | |
| "learning_rate": 9.747104282124531e-07, | |
| "loss": 0.33540403842926025, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 5.394366197183099, | |
| "grad_norm": 0.27267521619796753, | |
| "learning_rate": 9.744501960764203e-07, | |
| "loss": 0.33416521549224854, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 5.422535211267606, | |
| "grad_norm": 0.284470796585083, | |
| "learning_rate": 9.741886709914803e-07, | |
| "loss": 0.3242385685443878, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 5.450704225352113, | |
| "grad_norm": 0.2988561689853668, | |
| "learning_rate": 9.739258537542835e-07, | |
| "loss": 0.3325580656528473, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 5.47887323943662, | |
| "grad_norm": 0.29107666015625, | |
| "learning_rate": 9.73661745165417e-07, | |
| "loss": 0.34368401765823364, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 5.507042253521127, | |
| "grad_norm": 0.289497047662735, | |
| "learning_rate": 9.733963460294015e-07, | |
| "loss": 0.33908677101135254, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 5.535211267605634, | |
| "grad_norm": 0.27910080552101135, | |
| "learning_rate": 9.731296571546885e-07, | |
| "loss": 0.3478449285030365, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 5.563380281690141, | |
| "grad_norm": 0.2966774106025696, | |
| "learning_rate": 9.728616793536587e-07, | |
| "loss": 0.3371037244796753, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 5.591549295774648, | |
| "grad_norm": 0.30997180938720703, | |
| "learning_rate": 9.72592413442619e-07, | |
| "loss": 0.3469342589378357, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 5.619718309859155, | |
| "grad_norm": 0.2851829528808594, | |
| "learning_rate": 9.723218602418e-07, | |
| "loss": 0.3497530221939087, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 5.647887323943662, | |
| "grad_norm": 0.29238471388816833, | |
| "learning_rate": 9.720500205753538e-07, | |
| "loss": 0.3286020755767822, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 5.676056338028169, | |
| "grad_norm": 0.2877226769924164, | |
| "learning_rate": 9.717768952713511e-07, | |
| "loss": 0.338655948638916, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 5.704225352112676, | |
| "grad_norm": 0.28834086656570435, | |
| "learning_rate": 9.71502485161779e-07, | |
| "loss": 0.333360880613327, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 5.732394366197183, | |
| "grad_norm": 0.28225836157798767, | |
| "learning_rate": 9.71226791082538e-07, | |
| "loss": 0.3514789640903473, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 5.76056338028169, | |
| "grad_norm": 0.28878796100616455, | |
| "learning_rate": 9.709498138734403e-07, | |
| "loss": 0.3271612524986267, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 5.788732394366197, | |
| "grad_norm": 0.29221564531326294, | |
| "learning_rate": 9.706715543782064e-07, | |
| "loss": 0.32984620332717896, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 5.816901408450704, | |
| "grad_norm": 0.31417179107666016, | |
| "learning_rate": 9.703920134444632e-07, | |
| "loss": 0.32708263397216797, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 5.845070422535211, | |
| "grad_norm": 0.30656933784484863, | |
| "learning_rate": 9.701111919237408e-07, | |
| "loss": 0.3378485143184662, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 5.873239436619718, | |
| "grad_norm": 0.28274714946746826, | |
| "learning_rate": 9.698290906714702e-07, | |
| "loss": 0.3210570812225342, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 5.901408450704225, | |
| "grad_norm": 0.28694605827331543, | |
| "learning_rate": 9.695457105469804e-07, | |
| "loss": 0.33672863245010376, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 5.929577464788732, | |
| "grad_norm": 0.2965106666088104, | |
| "learning_rate": 9.69261052413497e-07, | |
| "loss": 0.34379851818084717, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 5.957746478873239, | |
| "grad_norm": 0.3144500255584717, | |
| "learning_rate": 9.689751171381377e-07, | |
| "loss": 0.33530962467193604, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 5.985915492957746, | |
| "grad_norm": 0.274070680141449, | |
| "learning_rate": 9.68687905591911e-07, | |
| "loss": 0.32609909772872925, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.3976318836212158, | |
| "learning_rate": 9.683994186497132e-07, | |
| "loss": 0.3320915997028351, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 6.028169014084507, | |
| "grad_norm": 0.27306580543518066, | |
| "learning_rate": 9.681096571903252e-07, | |
| "loss": 0.32757407426834106, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 6.056338028169014, | |
| "grad_norm": 0.2815074622631073, | |
| "learning_rate": 9.67818622096411e-07, | |
| "loss": 0.31570878624916077, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 6.084507042253521, | |
| "grad_norm": 0.29271578788757324, | |
| "learning_rate": 9.67526314254514e-07, | |
| "loss": 0.33092743158340454, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 6.112676056338028, | |
| "grad_norm": 0.2819676399230957, | |
| "learning_rate": 9.672327345550543e-07, | |
| "loss": 0.32412028312683105, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 6.140845070422535, | |
| "grad_norm": 0.29121264815330505, | |
| "learning_rate": 9.669378838923267e-07, | |
| "loss": 0.324832558631897, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 6.169014084507042, | |
| "grad_norm": 0.28991273045539856, | |
| "learning_rate": 9.666417631644976e-07, | |
| "loss": 0.3393062949180603, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 6.197183098591549, | |
| "grad_norm": 0.28072309494018555, | |
| "learning_rate": 9.66344373273602e-07, | |
| "loss": 0.32950296998023987, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 6.225352112676056, | |
| "grad_norm": 0.3102487027645111, | |
| "learning_rate": 9.66045715125541e-07, | |
| "loss": 0.3289036154747009, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 6.253521126760563, | |
| "grad_norm": 0.2856598198413849, | |
| "learning_rate": 9.657457896300791e-07, | |
| "loss": 0.30844709277153015, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 6.28169014084507, | |
| "grad_norm": 0.28150248527526855, | |
| "learning_rate": 9.654445977008414e-07, | |
| "loss": 0.32252323627471924, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 6.309859154929577, | |
| "grad_norm": 0.3106309175491333, | |
| "learning_rate": 9.651421402553108e-07, | |
| "loss": 0.3153507113456726, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 6.338028169014084, | |
| "grad_norm": 0.3323248028755188, | |
| "learning_rate": 9.648384182148252e-07, | |
| "loss": 0.3372737169265747, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 6.366197183098592, | |
| "grad_norm": 0.2816256880760193, | |
| "learning_rate": 9.645334325045745e-07, | |
| "loss": 0.3402503728866577, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 6.394366197183099, | |
| "grad_norm": 0.28511133790016174, | |
| "learning_rate": 9.64227184053598e-07, | |
| "loss": 0.3433256149291992, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 6.422535211267606, | |
| "grad_norm": 0.27890780568122864, | |
| "learning_rate": 9.63919673794782e-07, | |
| "loss": 0.3293980658054352, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 6.450704225352113, | |
| "grad_norm": 0.29692021012306213, | |
| "learning_rate": 9.636109026648554e-07, | |
| "loss": 0.3282950818538666, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 6.47887323943662, | |
| "grad_norm": 0.2867494523525238, | |
| "learning_rate": 9.633008716043892e-07, | |
| "loss": 0.3350924253463745, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 6.507042253521127, | |
| "grad_norm": 0.27419739961624146, | |
| "learning_rate": 9.629895815577915e-07, | |
| "loss": 0.33370357751846313, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 6.535211267605634, | |
| "grad_norm": 0.2837441563606262, | |
| "learning_rate": 9.626770334733058e-07, | |
| "loss": 0.3225363790988922, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 6.563380281690141, | |
| "grad_norm": 0.28063684701919556, | |
| "learning_rate": 9.623632283030077e-07, | |
| "loss": 0.33922791481018066, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 6.591549295774648, | |
| "grad_norm": 0.2789226770401001, | |
| "learning_rate": 9.620481670028026e-07, | |
| "loss": 0.3289903998374939, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 6.619718309859155, | |
| "grad_norm": 0.2788150906562805, | |
| "learning_rate": 9.617318505324212e-07, | |
| "loss": 0.3213944435119629, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 6.647887323943662, | |
| "grad_norm": 0.2622866928577423, | |
| "learning_rate": 9.614142798554186e-07, | |
| "loss": 0.3391764461994171, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 6.676056338028169, | |
| "grad_norm": 0.2952481806278229, | |
| "learning_rate": 9.610954559391704e-07, | |
| "loss": 0.31737983226776123, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 6.704225352112676, | |
| "grad_norm": 0.28387367725372314, | |
| "learning_rate": 9.607753797548691e-07, | |
| "loss": 0.33009767532348633, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 6.732394366197183, | |
| "grad_norm": 0.28222769498825073, | |
| "learning_rate": 9.604540522775227e-07, | |
| "loss": 0.3226430416107178, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 6.76056338028169, | |
| "grad_norm": 0.2985075116157532, | |
| "learning_rate": 9.601314744859504e-07, | |
| "loss": 0.3328002393245697, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 6.788732394366197, | |
| "grad_norm": 0.2787352204322815, | |
| "learning_rate": 9.598076473627796e-07, | |
| "loss": 0.3292522728443146, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 6.816901408450704, | |
| "grad_norm": 0.2772713899612427, | |
| "learning_rate": 9.594825718944444e-07, | |
| "loss": 0.322078138589859, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 6.845070422535211, | |
| "grad_norm": 0.28727421164512634, | |
| "learning_rate": 9.59156249071181e-07, | |
| "loss": 0.3206414580345154, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 6.873239436619718, | |
| "grad_norm": 0.28722915053367615, | |
| "learning_rate": 9.588286798870248e-07, | |
| "loss": 0.34071967005729675, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 6.901408450704225, | |
| "grad_norm": 0.2791661322116852, | |
| "learning_rate": 9.58499865339809e-07, | |
| "loss": 0.32371699810028076, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 6.929577464788732, | |
| "grad_norm": 0.30174046754837036, | |
| "learning_rate": 9.581698064311592e-07, | |
| "loss": 0.32212015986442566, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 6.957746478873239, | |
| "grad_norm": 0.2757203280925751, | |
| "learning_rate": 9.578385041664925e-07, | |
| "loss": 0.3286738395690918, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 6.985915492957746, | |
| "grad_norm": 0.2977890968322754, | |
| "learning_rate": 9.575059595550127e-07, | |
| "loss": 0.32400673627853394, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.38676717877388, | |
| "learning_rate": 9.571721736097088e-07, | |
| "loss": 0.31549203395843506, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 7.028169014084507, | |
| "grad_norm": 0.28209057450294495, | |
| "learning_rate": 9.568371473473503e-07, | |
| "loss": 0.3403396010398865, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 7.056338028169014, | |
| "grad_norm": 0.28578808903694153, | |
| "learning_rate": 9.565008817884854e-07, | |
| "loss": 0.32727712392807007, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 7.084507042253521, | |
| "grad_norm": 0.2921590805053711, | |
| "learning_rate": 9.561633779574372e-07, | |
| "loss": 0.33234310150146484, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 7.112676056338028, | |
| "grad_norm": 0.27242740988731384, | |
| "learning_rate": 9.55824636882301e-07, | |
| "loss": 0.3204275965690613, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 7.140845070422535, | |
| "grad_norm": 0.28681573271751404, | |
| "learning_rate": 9.554846595949413e-07, | |
| "loss": 0.3127729594707489, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 7.169014084507042, | |
| "grad_norm": 0.27501875162124634, | |
| "learning_rate": 9.55143447130987e-07, | |
| "loss": 0.3219028115272522, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 7.197183098591549, | |
| "grad_norm": 0.2893284261226654, | |
| "learning_rate": 9.54801000529831e-07, | |
| "loss": 0.3149603009223938, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 7.225352112676056, | |
| "grad_norm": 0.29977115988731384, | |
| "learning_rate": 9.54457320834625e-07, | |
| "loss": 0.3116862177848816, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 7.253521126760563, | |
| "grad_norm": 0.2911919355392456, | |
| "learning_rate": 9.54112409092277e-07, | |
| "loss": 0.3377895653247833, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 7.28169014084507, | |
| "grad_norm": 0.32472458481788635, | |
| "learning_rate": 9.537662663534477e-07, | |
| "loss": 0.3152693510055542, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 7.309859154929577, | |
| "grad_norm": 0.2667696177959442, | |
| "learning_rate": 9.534188936725483e-07, | |
| "loss": 0.3181629180908203, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 7.338028169014084, | |
| "grad_norm": 0.29469212889671326, | |
| "learning_rate": 9.530702921077358e-07, | |
| "loss": 0.32251378893852234, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 7.366197183098592, | |
| "grad_norm": 0.2710505425930023, | |
| "learning_rate": 9.527204627209112e-07, | |
| "loss": 0.3157137632369995, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 7.394366197183099, | |
| "grad_norm": 0.29605209827423096, | |
| "learning_rate": 9.523694065777156e-07, | |
| "loss": 0.32492029666900635, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 7.422535211267606, | |
| "grad_norm": 0.28292831778526306, | |
| "learning_rate": 9.520171247475268e-07, | |
| "loss": 0.3182477653026581, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 7.450704225352113, | |
| "grad_norm": 0.28567084670066833, | |
| "learning_rate": 9.516636183034564e-07, | |
| "loss": 0.317740797996521, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 7.47887323943662, | |
| "grad_norm": 0.26249128580093384, | |
| "learning_rate": 9.513088883223463e-07, | |
| "loss": 0.3064804971218109, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 7.507042253521127, | |
| "grad_norm": 0.2805914878845215, | |
| "learning_rate": 9.509529358847654e-07, | |
| "loss": 0.32089754939079285, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 7.535211267605634, | |
| "grad_norm": 0.2892814874649048, | |
| "learning_rate": 9.505957620750069e-07, | |
| "loss": 0.31203514337539673, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 7.563380281690141, | |
| "grad_norm": 0.2809925079345703, | |
| "learning_rate": 9.502373679810839e-07, | |
| "loss": 0.3222312331199646, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 7.591549295774648, | |
| "grad_norm": 0.2793818414211273, | |
| "learning_rate": 9.49877754694727e-07, | |
| "loss": 0.30804064869880676, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 7.619718309859155, | |
| "grad_norm": 0.27966272830963135, | |
| "learning_rate": 9.495169233113806e-07, | |
| "loss": 0.32768452167510986, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 7.647887323943662, | |
| "grad_norm": 0.2743930220603943, | |
| "learning_rate": 9.491548749301997e-07, | |
| "loss": 0.3242339491844177, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 7.676056338028169, | |
| "grad_norm": 0.2765263319015503, | |
| "learning_rate": 9.487916106540465e-07, | |
| "loss": 0.3245530128479004, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 7.704225352112676, | |
| "grad_norm": 0.29381853342056274, | |
| "learning_rate": 9.484271315894871e-07, | |
| "loss": 0.32187986373901367, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 7.732394366197183, | |
| "grad_norm": 0.27294641733169556, | |
| "learning_rate": 9.480614388467877e-07, | |
| "loss": 0.3233500123023987, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 7.76056338028169, | |
| "grad_norm": 0.28944891691207886, | |
| "learning_rate": 9.47694533539912e-07, | |
| "loss": 0.31809201836586, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 7.788732394366197, | |
| "grad_norm": 0.2922861576080322, | |
| "learning_rate": 9.473264167865171e-07, | |
| "loss": 0.33151817321777344, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 7.816901408450704, | |
| "grad_norm": 0.2928006649017334, | |
| "learning_rate": 9.469570897079504e-07, | |
| "loss": 0.3220402002334595, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 7.845070422535211, | |
| "grad_norm": 0.28323814272880554, | |
| "learning_rate": 9.465865534292464e-07, | |
| "loss": 0.31611043214797974, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 7.873239436619718, | |
| "grad_norm": 0.28506791591644287, | |
| "learning_rate": 9.462148090791228e-07, | |
| "loss": 0.32090169191360474, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 7.901408450704225, | |
| "grad_norm": 0.2799360156059265, | |
| "learning_rate": 9.458418577899774e-07, | |
| "loss": 0.344720721244812, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 7.929577464788732, | |
| "grad_norm": 0.27799472212791443, | |
| "learning_rate": 9.454677006978842e-07, | |
| "loss": 0.3141616880893707, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 7.957746478873239, | |
| "grad_norm": 0.27411341667175293, | |
| "learning_rate": 9.450923389425911e-07, | |
| "loss": 0.31020885705947876, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 7.985915492957746, | |
| "grad_norm": 0.28921812772750854, | |
| "learning_rate": 9.44715773667515e-07, | |
| "loss": 0.3182592988014221, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.3832477331161499, | |
| "learning_rate": 9.443380060197385e-07, | |
| "loss": 0.32039332389831543, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 8.028169014084508, | |
| "grad_norm": 0.2698141932487488, | |
| "learning_rate": 9.43959037150008e-07, | |
| "loss": 0.3155902028083801, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 8.056338028169014, | |
| "grad_norm": 0.2765481472015381, | |
| "learning_rate": 9.43578868212728e-07, | |
| "loss": 0.3177169859409332, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 8.084507042253522, | |
| "grad_norm": 0.27723443508148193, | |
| "learning_rate": 9.431975003659594e-07, | |
| "loss": 0.31647437810897827, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 8.112676056338028, | |
| "grad_norm": 0.26522088050842285, | |
| "learning_rate": 9.428149347714143e-07, | |
| "loss": 0.31819185614585876, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 8.140845070422536, | |
| "grad_norm": 0.28780215978622437, | |
| "learning_rate": 9.424311725944543e-07, | |
| "loss": 0.31119635701179504, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 8.169014084507042, | |
| "grad_norm": 0.2786031663417816, | |
| "learning_rate": 9.420462150040852e-07, | |
| "loss": 0.31440460681915283, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 8.19718309859155, | |
| "grad_norm": 0.26644277572631836, | |
| "learning_rate": 9.416600631729548e-07, | |
| "loss": 0.32182344794273376, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 8.225352112676056, | |
| "grad_norm": 0.2974756062030792, | |
| "learning_rate": 9.412727182773486e-07, | |
| "loss": 0.3225427269935608, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 8.253521126760564, | |
| "grad_norm": 0.2951170802116394, | |
| "learning_rate": 9.408841814971861e-07, | |
| "loss": 0.31894785165786743, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 8.28169014084507, | |
| "grad_norm": 0.28619688749313354, | |
| "learning_rate": 9.404944540160177e-07, | |
| "loss": 0.31788474321365356, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 8.309859154929578, | |
| "grad_norm": 0.2877795398235321, | |
| "learning_rate": 9.401035370210212e-07, | |
| "loss": 0.3235325217247009, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 8.338028169014084, | |
| "grad_norm": 0.30395635962486267, | |
| "learning_rate": 9.397114317029974e-07, | |
| "loss": 0.33284687995910645, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 8.366197183098592, | |
| "grad_norm": 0.2896060347557068, | |
| "learning_rate": 9.393181392563669e-07, | |
| "loss": 0.32644715905189514, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 8.394366197183098, | |
| "grad_norm": 0.2763223648071289, | |
| "learning_rate": 9.38923660879167e-07, | |
| "loss": 0.304126501083374, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 8.422535211267606, | |
| "grad_norm": 0.2764940559864044, | |
| "learning_rate": 9.385279977730472e-07, | |
| "loss": 0.3124150037765503, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 8.450704225352112, | |
| "grad_norm": 0.2838902771472931, | |
| "learning_rate": 9.381311511432658e-07, | |
| "loss": 0.32950958609580994, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 8.47887323943662, | |
| "grad_norm": 0.2854890823364258, | |
| "learning_rate": 9.377331221986866e-07, | |
| "loss": 0.30994099378585815, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 8.507042253521126, | |
| "grad_norm": 0.2682625353336334, | |
| "learning_rate": 9.373339121517746e-07, | |
| "loss": 0.31963592767715454, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 8.535211267605634, | |
| "grad_norm": 0.2849690318107605, | |
| "learning_rate": 9.36933522218593e-07, | |
| "loss": 0.3182557225227356, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 8.56338028169014, | |
| "grad_norm": 0.28616634011268616, | |
| "learning_rate": 9.36531953618799e-07, | |
| "loss": 0.30273881554603577, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 8.591549295774648, | |
| "grad_norm": 0.2721138596534729, | |
| "learning_rate": 9.361292075756401e-07, | |
| "loss": 0.3207533657550812, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 8.619718309859154, | |
| "grad_norm": 0.2752065360546112, | |
| "learning_rate": 9.357252853159505e-07, | |
| "loss": 0.3186470866203308, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 8.647887323943662, | |
| "grad_norm": 0.2684236168861389, | |
| "learning_rate": 9.353201880701477e-07, | |
| "loss": 0.31932806968688965, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 8.676056338028168, | |
| "grad_norm": 0.28039291501045227, | |
| "learning_rate": 9.34913917072228e-07, | |
| "loss": 0.31683626770973206, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 8.704225352112676, | |
| "grad_norm": 0.2638692855834961, | |
| "learning_rate": 9.345064735597633e-07, | |
| "loss": 0.2991946339607239, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 8.732394366197184, | |
| "grad_norm": 0.30425477027893066, | |
| "learning_rate": 9.340978587738972e-07, | |
| "loss": 0.3023770749568939, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 8.76056338028169, | |
| "grad_norm": 0.27750107645988464, | |
| "learning_rate": 9.336880739593415e-07, | |
| "loss": 0.31177228689193726, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 8.788732394366198, | |
| "grad_norm": 0.2731636166572571, | |
| "learning_rate": 9.332771203643714e-07, | |
| "loss": 0.3076733946800232, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 8.816901408450704, | |
| "grad_norm": 0.2740687131881714, | |
| "learning_rate": 9.328649992408231e-07, | |
| "loss": 0.30277711153030396, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 8.845070422535212, | |
| "grad_norm": 0.27956005930900574, | |
| "learning_rate": 9.324517118440888e-07, | |
| "loss": 0.30988752841949463, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 8.873239436619718, | |
| "grad_norm": 0.28827622532844543, | |
| "learning_rate": 9.320372594331137e-07, | |
| "loss": 0.32537323236465454, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 8.901408450704226, | |
| "grad_norm": 0.2771560847759247, | |
| "learning_rate": 9.316216432703916e-07, | |
| "loss": 0.3233356475830078, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 8.929577464788732, | |
| "grad_norm": 0.2804992198944092, | |
| "learning_rate": 9.312048646219617e-07, | |
| "loss": 0.31110987067222595, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 8.95774647887324, | |
| "grad_norm": 0.29048794507980347, | |
| "learning_rate": 9.307869247574038e-07, | |
| "loss": 0.3100625276565552, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 8.985915492957746, | |
| "grad_norm": 0.2751557230949402, | |
| "learning_rate": 9.303678249498352e-07, | |
| "loss": 0.30283451080322266, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.38358354568481445, | |
| "learning_rate": 9.299475664759068e-07, | |
| "loss": 0.3202640414237976, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 9.028169014084508, | |
| "grad_norm": 0.26551520824432373, | |
| "learning_rate": 9.295261506157985e-07, | |
| "loss": 0.31331080198287964, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 9.056338028169014, | |
| "grad_norm": 0.28371915221214294, | |
| "learning_rate": 9.291035786532163e-07, | |
| "loss": 0.3039785325527191, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 9.084507042253522, | |
| "grad_norm": 0.28972727060317993, | |
| "learning_rate": 9.286798518753878e-07, | |
| "loss": 0.3172224462032318, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 9.112676056338028, | |
| "grad_norm": 0.2863673269748688, | |
| "learning_rate": 9.282549715730579e-07, | |
| "loss": 0.3220033049583435, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 9.140845070422536, | |
| "grad_norm": 0.27619102597236633, | |
| "learning_rate": 9.278289390404859e-07, | |
| "loss": 0.31595173478126526, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 9.169014084507042, | |
| "grad_norm": 0.2838309705257416, | |
| "learning_rate": 9.274017555754407e-07, | |
| "loss": 0.31470271944999695, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 9.19718309859155, | |
| "grad_norm": 0.28437867760658264, | |
| "learning_rate": 9.269734224791974e-07, | |
| "loss": 0.31371644139289856, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 9.225352112676056, | |
| "grad_norm": 0.28935906291007996, | |
| "learning_rate": 9.265439410565328e-07, | |
| "loss": 0.3154122829437256, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 9.253521126760564, | |
| "grad_norm": 0.28751862049102783, | |
| "learning_rate": 9.261133126157217e-07, | |
| "loss": 0.3072774410247803, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 9.28169014084507, | |
| "grad_norm": 0.2829267680644989, | |
| "learning_rate": 9.256815384685328e-07, | |
| "loss": 0.30855560302734375, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 9.309859154929578, | |
| "grad_norm": 0.28372108936309814, | |
| "learning_rate": 9.252486199302256e-07, | |
| "loss": 0.3047599792480469, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 9.338028169014084, | |
| "grad_norm": 0.26949799060821533, | |
| "learning_rate": 9.248145583195447e-07, | |
| "loss": 0.3051632046699524, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 9.366197183098592, | |
| "grad_norm": 0.26946741342544556, | |
| "learning_rate": 9.243793549587171e-07, | |
| "loss": 0.30776509642601013, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 9.394366197183098, | |
| "grad_norm": 0.2829545736312866, | |
| "learning_rate": 9.239430111734476e-07, | |
| "loss": 0.30643659830093384, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 9.422535211267606, | |
| "grad_norm": 0.30891162157058716, | |
| "learning_rate": 9.235055282929153e-07, | |
| "loss": 0.30099156498908997, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 9.450704225352112, | |
| "grad_norm": 0.2820793390274048, | |
| "learning_rate": 9.230669076497687e-07, | |
| "loss": 0.31829434633255005, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 9.47887323943662, | |
| "grad_norm": 0.27604445815086365, | |
| "learning_rate": 9.226271505801224e-07, | |
| "loss": 0.31647807359695435, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 9.507042253521126, | |
| "grad_norm": 0.2793697714805603, | |
| "learning_rate": 9.221862584235526e-07, | |
| "loss": 0.30784907937049866, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 9.535211267605634, | |
| "grad_norm": 0.27153849601745605, | |
| "learning_rate": 9.217442325230936e-07, | |
| "loss": 0.29595351219177246, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 9.56338028169014, | |
| "grad_norm": 0.28174859285354614, | |
| "learning_rate": 9.213010742252327e-07, | |
| "loss": 0.3158809244632721, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 9.591549295774648, | |
| "grad_norm": 0.27065321803092957, | |
| "learning_rate": 9.208567848799069e-07, | |
| "loss": 0.29831117391586304, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 9.619718309859154, | |
| "grad_norm": 0.2704644799232483, | |
| "learning_rate": 9.204113658404989e-07, | |
| "loss": 0.31440460681915283, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 9.647887323943662, | |
| "grad_norm": 0.2712800204753876, | |
| "learning_rate": 9.199648184638318e-07, | |
| "loss": 0.2985243499279022, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 9.676056338028168, | |
| "grad_norm": 0.2808634042739868, | |
| "learning_rate": 9.195171441101668e-07, | |
| "loss": 0.3167741000652313, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 9.704225352112676, | |
| "grad_norm": 0.27340877056121826, | |
| "learning_rate": 9.190683441431974e-07, | |
| "loss": 0.3019712269306183, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 9.732394366197184, | |
| "grad_norm": 0.2813129723072052, | |
| "learning_rate": 9.186184199300463e-07, | |
| "loss": 0.3006363809108734, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 9.76056338028169, | |
| "grad_norm": 0.28003188967704773, | |
| "learning_rate": 9.181673728412605e-07, | |
| "loss": 0.31190669536590576, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 9.788732394366198, | |
| "grad_norm": 0.2703484892845154, | |
| "learning_rate": 9.177152042508077e-07, | |
| "loss": 0.3077196478843689, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 9.816901408450704, | |
| "grad_norm": 0.2803649604320526, | |
| "learning_rate": 9.17261915536072e-07, | |
| "loss": 0.30905407667160034, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 9.845070422535212, | |
| "grad_norm": 0.2884216606616974, | |
| "learning_rate": 9.168075080778494e-07, | |
| "loss": 0.30327335000038147, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 9.873239436619718, | |
| "grad_norm": 0.2796288728713989, | |
| "learning_rate": 9.163519832603436e-07, | |
| "loss": 0.3104422390460968, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 9.901408450704226, | |
| "grad_norm": 0.30282527208328247, | |
| "learning_rate": 9.158953424711624e-07, | |
| "loss": 0.3279035985469818, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 9.929577464788732, | |
| "grad_norm": 0.2795606851577759, | |
| "learning_rate": 9.154375871013128e-07, | |
| "loss": 0.3136137127876282, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 9.95774647887324, | |
| "grad_norm": 0.2871512174606323, | |
| "learning_rate": 9.149787185451969e-07, | |
| "loss": 0.3188316226005554, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 9.985915492957746, | |
| "grad_norm": 0.2814459502696991, | |
| "learning_rate": 9.145187382006081e-07, | |
| "loss": 0.3084180951118469, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.4135233461856842, | |
| "learning_rate": 9.140576474687263e-07, | |
| "loss": 0.32664716243743896, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 10.028169014084508, | |
| "grad_norm": 0.2743515968322754, | |
| "learning_rate": 9.135954477541137e-07, | |
| "loss": 0.31237614154815674, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 10.056338028169014, | |
| "grad_norm": 0.2790542244911194, | |
| "learning_rate": 9.131321404647109e-07, | |
| "loss": 0.32110899686813354, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 10.084507042253522, | |
| "grad_norm": 0.32552531361579895, | |
| "learning_rate": 9.126677270118322e-07, | |
| "loss": 0.31540626287460327, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 10.112676056338028, | |
| "grad_norm": 0.27251535654067993, | |
| "learning_rate": 9.122022088101613e-07, | |
| "loss": 0.2956544756889343, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 10.140845070422536, | |
| "grad_norm": 0.3012971878051758, | |
| "learning_rate": 9.117355872777477e-07, | |
| "loss": 0.3012295961380005, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 10.169014084507042, | |
| "grad_norm": 0.29038530588150024, | |
| "learning_rate": 9.112678638360015e-07, | |
| "loss": 0.2931394875049591, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 10.19718309859155, | |
| "grad_norm": 0.2870721220970154, | |
| "learning_rate": 9.107990399096893e-07, | |
| "loss": 0.2930557131767273, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 10.225352112676056, | |
| "grad_norm": 0.281965047121048, | |
| "learning_rate": 9.103291169269299e-07, | |
| "loss": 0.3096895217895508, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 10.253521126760564, | |
| "grad_norm": 0.2720247209072113, | |
| "learning_rate": 9.098580963191907e-07, | |
| "loss": 0.302044540643692, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 10.28169014084507, | |
| "grad_norm": 0.2841237783432007, | |
| "learning_rate": 9.093859795212817e-07, | |
| "loss": 0.32047468423843384, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 10.309859154929578, | |
| "grad_norm": 0.29989898204803467, | |
| "learning_rate": 9.089127679713529e-07, | |
| "loss": 0.31085067987442017, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 10.338028169014084, | |
| "grad_norm": 0.29164332151412964, | |
| "learning_rate": 9.084384631108882e-07, | |
| "loss": 0.3052881360054016, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 10.366197183098592, | |
| "grad_norm": 0.2740509808063507, | |
| "learning_rate": 9.079630663847031e-07, | |
| "loss": 0.31468653678894043, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 10.394366197183098, | |
| "grad_norm": 0.2791116535663605, | |
| "learning_rate": 9.074865792409381e-07, | |
| "loss": 0.30899161100387573, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 10.422535211267606, | |
| "grad_norm": 0.30149030685424805, | |
| "learning_rate": 9.070090031310558e-07, | |
| "loss": 0.3094651997089386, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 10.450704225352112, | |
| "grad_norm": 0.2970089018344879, | |
| "learning_rate": 9.065303395098358e-07, | |
| "loss": 0.3142540156841278, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 10.47887323943662, | |
| "grad_norm": 0.2772645652294159, | |
| "learning_rate": 9.060505898353705e-07, | |
| "loss": 0.32443171739578247, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 10.507042253521126, | |
| "grad_norm": 0.2707611620426178, | |
| "learning_rate": 9.055697555690607e-07, | |
| "loss": 0.30495521426200867, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 10.535211267605634, | |
| "grad_norm": 0.2923314869403839, | |
| "learning_rate": 9.050878381756107e-07, | |
| "loss": 0.30734074115753174, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 10.56338028169014, | |
| "grad_norm": 0.2865448594093323, | |
| "learning_rate": 9.046048391230247e-07, | |
| "loss": 0.2913230061531067, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 10.591549295774648, | |
| "grad_norm": 0.29643693566322327, | |
| "learning_rate": 9.041207598826017e-07, | |
| "loss": 0.30088239908218384, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 10.619718309859154, | |
| "grad_norm": 0.2761143445968628, | |
| "learning_rate": 9.036356019289309e-07, | |
| "loss": 0.30702435970306396, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 10.647887323943662, | |
| "grad_norm": 0.27720797061920166, | |
| "learning_rate": 9.031493667398872e-07, | |
| "loss": 0.2953702509403229, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 10.676056338028168, | |
| "grad_norm": 0.30037540197372437, | |
| "learning_rate": 9.026620557966279e-07, | |
| "loss": 0.3012697696685791, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 10.704225352112676, | |
| "grad_norm": 0.27628859877586365, | |
| "learning_rate": 9.021736705835862e-07, | |
| "loss": 0.30558526515960693, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 10.732394366197184, | |
| "grad_norm": 0.2692992091178894, | |
| "learning_rate": 9.016842125884684e-07, | |
| "loss": 0.288699209690094, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 10.76056338028169, | |
| "grad_norm": 0.30020084977149963, | |
| "learning_rate": 9.011936833022484e-07, | |
| "loss": 0.294253945350647, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 10.788732394366198, | |
| "grad_norm": 0.29289868474006653, | |
| "learning_rate": 9.007020842191634e-07, | |
| "loss": 0.31805676221847534, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 10.816901408450704, | |
| "grad_norm": 0.28465571999549866, | |
| "learning_rate": 9.002094168367095e-07, | |
| "loss": 0.3168966472148895, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 10.845070422535212, | |
| "grad_norm": 0.27562448382377625, | |
| "learning_rate": 8.997156826556369e-07, | |
| "loss": 0.302585631608963, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 10.873239436619718, | |
| "grad_norm": 0.28200119733810425, | |
| "learning_rate": 8.992208831799456e-07, | |
| "loss": 0.3037059009075165, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 10.901408450704226, | |
| "grad_norm": 0.2829252779483795, | |
| "learning_rate": 8.987250199168808e-07, | |
| "loss": 0.2850543260574341, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 10.929577464788732, | |
| "grad_norm": 0.28010982275009155, | |
| "learning_rate": 8.982280943769278e-07, | |
| "loss": 0.30365508794784546, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 10.95774647887324, | |
| "grad_norm": 0.2917790114879608, | |
| "learning_rate": 8.977301080738079e-07, | |
| "loss": 0.32212477922439575, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 10.985915492957746, | |
| "grad_norm": 0.27254894375801086, | |
| "learning_rate": 8.97231062524474e-07, | |
| "loss": 0.29733577370643616, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.38847291469573975, | |
| "learning_rate": 8.967309592491052e-07, | |
| "loss": 0.31824764609336853, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 11.028169014084508, | |
| "grad_norm": 0.27360019087791443, | |
| "learning_rate": 8.962297997711027e-07, | |
| "loss": 0.2907956540584564, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 11.056338028169014, | |
| "grad_norm": 0.28565695881843567, | |
| "learning_rate": 8.957275856170855e-07, | |
| "loss": 0.30498966574668884, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 11.084507042253522, | |
| "grad_norm": 0.2826082408428192, | |
| "learning_rate": 8.952243183168848e-07, | |
| "loss": 0.3076494634151459, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 11.112676056338028, | |
| "grad_norm": 0.28598853945732117, | |
| "learning_rate": 8.9471999940354e-07, | |
| "loss": 0.29677921533584595, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 11.140845070422536, | |
| "grad_norm": 0.27635788917541504, | |
| "learning_rate": 8.942146304132943e-07, | |
| "loss": 0.28424787521362305, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 11.169014084507042, | |
| "grad_norm": 0.3110678195953369, | |
| "learning_rate": 8.937082128855891e-07, | |
| "loss": 0.31091392040252686, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 11.19718309859155, | |
| "grad_norm": 0.28018108010292053, | |
| "learning_rate": 8.932007483630596e-07, | |
| "loss": 0.2973289489746094, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 11.225352112676056, | |
| "grad_norm": 0.2748464345932007, | |
| "learning_rate": 8.926922383915315e-07, | |
| "loss": 0.3064712882041931, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 11.253521126760564, | |
| "grad_norm": 0.2758099138736725, | |
| "learning_rate": 8.921826845200138e-07, | |
| "loss": 0.30080002546310425, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 11.28169014084507, | |
| "grad_norm": 0.27323541045188904, | |
| "learning_rate": 8.916720883006963e-07, | |
| "loss": 0.30011099576950073, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 11.309859154929578, | |
| "grad_norm": 0.2751684784889221, | |
| "learning_rate": 8.911604512889434e-07, | |
| "loss": 0.3021606206893921, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 11.338028169014084, | |
| "grad_norm": 0.278543084859848, | |
| "learning_rate": 8.906477750432903e-07, | |
| "loss": 0.2979898452758789, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 11.366197183098592, | |
| "grad_norm": 0.2872096300125122, | |
| "learning_rate": 8.901340611254378e-07, | |
| "loss": 0.30450716614723206, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 11.394366197183098, | |
| "grad_norm": 0.27768319845199585, | |
| "learning_rate": 8.896193111002475e-07, | |
| "loss": 0.31025999784469604, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 11.422535211267606, | |
| "grad_norm": 0.28008511662483215, | |
| "learning_rate": 8.891035265357371e-07, | |
| "loss": 0.2903551757335663, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 11.450704225352112, | |
| "grad_norm": 0.28000614047050476, | |
| "learning_rate": 8.88586709003076e-07, | |
| "loss": 0.30711328983306885, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 11.47887323943662, | |
| "grad_norm": 0.27915990352630615, | |
| "learning_rate": 8.8806886007658e-07, | |
| "loss": 0.309296578168869, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 11.507042253521126, | |
| "grad_norm": 0.2682763636112213, | |
| "learning_rate": 8.875499813337067e-07, | |
| "loss": 0.3053497076034546, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 11.535211267605634, | |
| "grad_norm": 0.26592400670051575, | |
| "learning_rate": 8.87030074355051e-07, | |
| "loss": 0.29761987924575806, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 11.56338028169014, | |
| "grad_norm": 0.2664642333984375, | |
| "learning_rate": 8.865091407243394e-07, | |
| "loss": 0.2986457645893097, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 11.591549295774648, | |
| "grad_norm": 0.2615084648132324, | |
| "learning_rate": 8.859871820284261e-07, | |
| "loss": 0.31391632556915283, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 11.619718309859154, | |
| "grad_norm": 0.27312856912612915, | |
| "learning_rate": 8.85464199857288e-07, | |
| "loss": 0.3128984570503235, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 11.647887323943662, | |
| "grad_norm": 0.2734473645687103, | |
| "learning_rate": 8.849401958040192e-07, | |
| "loss": 0.298526793718338, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 11.676056338028168, | |
| "grad_norm": 0.2901906669139862, | |
| "learning_rate": 8.844151714648274e-07, | |
| "loss": 0.31268036365509033, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 11.704225352112676, | |
| "grad_norm": 0.28374356031417847, | |
| "learning_rate": 8.838891284390273e-07, | |
| "loss": 0.3042759299278259, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 11.732394366197184, | |
| "grad_norm": 0.26128286123275757, | |
| "learning_rate": 8.833620683290375e-07, | |
| "loss": 0.30057787895202637, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 11.76056338028169, | |
| "grad_norm": 0.29005923867225647, | |
| "learning_rate": 8.828339927403745e-07, | |
| "loss": 0.2969115376472473, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 11.788732394366198, | |
| "grad_norm": 0.26823022961616516, | |
| "learning_rate": 8.823049032816478e-07, | |
| "loss": 0.3024095296859741, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 11.816901408450704, | |
| "grad_norm": 0.2938059866428375, | |
| "learning_rate": 8.817748015645558e-07, | |
| "loss": 0.2982884347438812, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 11.845070422535212, | |
| "grad_norm": 0.2794440686702728, | |
| "learning_rate": 8.812436892038805e-07, | |
| "loss": 0.3006170094013214, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 11.873239436619718, | |
| "grad_norm": 0.27727699279785156, | |
| "learning_rate": 8.807115678174819e-07, | |
| "loss": 0.29938215017318726, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 11.901408450704226, | |
| "grad_norm": 0.28038865327835083, | |
| "learning_rate": 8.801784390262943e-07, | |
| "loss": 0.3107326924800873, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 11.929577464788732, | |
| "grad_norm": 0.29747217893600464, | |
| "learning_rate": 8.796443044543203e-07, | |
| "loss": 0.2999688982963562, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 11.95774647887324, | |
| "grad_norm": 0.2875438332557678, | |
| "learning_rate": 8.791091657286267e-07, | |
| "loss": 0.2930242419242859, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 11.985915492957746, | |
| "grad_norm": 0.2946978211402893, | |
| "learning_rate": 8.785730244793386e-07, | |
| "loss": 0.295132577419281, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.39752283692359924, | |
| "learning_rate": 8.780358823396352e-07, | |
| "loss": 0.30750101804733276, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 12.028169014084508, | |
| "grad_norm": 0.2708489000797272, | |
| "learning_rate": 8.774977409457447e-07, | |
| "loss": 0.3058265447616577, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 12.056338028169014, | |
| "grad_norm": 0.2773410975933075, | |
| "learning_rate": 8.769586019369391e-07, | |
| "loss": 0.30409157276153564, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 12.084507042253522, | |
| "grad_norm": 0.26894107460975647, | |
| "learning_rate": 8.764184669555293e-07, | |
| "loss": 0.30384916067123413, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 12.112676056338028, | |
| "grad_norm": 0.27837878465652466, | |
| "learning_rate": 8.758773376468604e-07, | |
| "loss": 0.2943356931209564, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 12.140845070422536, | |
| "grad_norm": 0.2690330445766449, | |
| "learning_rate": 8.753352156593055e-07, | |
| "loss": 0.2933955788612366, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 12.169014084507042, | |
| "grad_norm": 0.27980291843414307, | |
| "learning_rate": 8.747921026442629e-07, | |
| "loss": 0.28997617959976196, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 12.19718309859155, | |
| "grad_norm": 0.287624329328537, | |
| "learning_rate": 8.742480002561487e-07, | |
| "loss": 0.30039626359939575, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 12.225352112676056, | |
| "grad_norm": 0.28817304968833923, | |
| "learning_rate": 8.737029101523929e-07, | |
| "loss": 0.3200758099555969, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 12.253521126760564, | |
| "grad_norm": 0.2769193649291992, | |
| "learning_rate": 8.731568339934348e-07, | |
| "loss": 0.2976597547531128, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 12.28169014084507, | |
| "grad_norm": 0.309583842754364, | |
| "learning_rate": 8.726097734427172e-07, | |
| "loss": 0.2977990210056305, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 12.309859154929578, | |
| "grad_norm": 0.26997339725494385, | |
| "learning_rate": 8.72061730166681e-07, | |
| "loss": 0.29733020067214966, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 12.338028169014084, | |
| "grad_norm": 0.2782990634441376, | |
| "learning_rate": 8.715127058347614e-07, | |
| "loss": 0.29592543840408325, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 12.366197183098592, | |
| "grad_norm": 0.2781784236431122, | |
| "learning_rate": 8.709627021193816e-07, | |
| "loss": 0.2965870797634125, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 12.394366197183098, | |
| "grad_norm": 0.2965787649154663, | |
| "learning_rate": 8.704117206959484e-07, | |
| "loss": 0.30272242426872253, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 12.422535211267606, | |
| "grad_norm": 0.2780534625053406, | |
| "learning_rate": 8.698597632428466e-07, | |
| "loss": 0.30883416533470154, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 12.450704225352112, | |
| "grad_norm": 0.27513188123703003, | |
| "learning_rate": 8.693068314414344e-07, | |
| "loss": 0.30461177229881287, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 12.47887323943662, | |
| "grad_norm": 0.2838785946369171, | |
| "learning_rate": 8.687529269760379e-07, | |
| "loss": 0.2927112281322479, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 12.507042253521126, | |
| "grad_norm": 0.28894707560539246, | |
| "learning_rate": 8.681980515339463e-07, | |
| "loss": 0.28816863894462585, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 12.535211267605634, | |
| "grad_norm": 0.28006207942962646, | |
| "learning_rate": 8.676422068054064e-07, | |
| "loss": 0.29931047558784485, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 12.56338028169014, | |
| "grad_norm": 0.2799602150917053, | |
| "learning_rate": 8.670853944836176e-07, | |
| "loss": 0.3038347363471985, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 12.591549295774648, | |
| "grad_norm": 0.2760638892650604, | |
| "learning_rate": 8.665276162647267e-07, | |
| "loss": 0.30183106660842896, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 12.619718309859154, | |
| "grad_norm": 0.278127521276474, | |
| "learning_rate": 8.659688738478231e-07, | |
| "loss": 0.3019717335700989, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 12.647887323943662, | |
| "grad_norm": 0.26856380701065063, | |
| "learning_rate": 8.654091689349329e-07, | |
| "loss": 0.2945576310157776, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 12.676056338028168, | |
| "grad_norm": 0.2749437391757965, | |
| "learning_rate": 8.648485032310144e-07, | |
| "loss": 0.3023756444454193, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 12.704225352112676, | |
| "grad_norm": 0.2729102671146393, | |
| "learning_rate": 8.642868784439527e-07, | |
| "loss": 0.2842894196510315, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 12.732394366197184, | |
| "grad_norm": 0.28390341997146606, | |
| "learning_rate": 8.63724296284554e-07, | |
| "loss": 0.2940555810928345, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 12.76056338028169, | |
| "grad_norm": 0.2739807069301605, | |
| "learning_rate": 8.631607584665413e-07, | |
| "loss": 0.2935922145843506, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 12.788732394366198, | |
| "grad_norm": 0.2823079824447632, | |
| "learning_rate": 8.625962667065487e-07, | |
| "loss": 0.2949485182762146, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 12.816901408450704, | |
| "grad_norm": 0.2843155264854431, | |
| "learning_rate": 8.620308227241157e-07, | |
| "loss": 0.31058311462402344, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 12.845070422535212, | |
| "grad_norm": 0.2805749475955963, | |
| "learning_rate": 8.614644282416831e-07, | |
| "loss": 0.2892061173915863, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 12.873239436619718, | |
| "grad_norm": 0.2773419916629791, | |
| "learning_rate": 8.608970849845862e-07, | |
| "loss": 0.28688696026802063, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 12.901408450704226, | |
| "grad_norm": 0.28667542338371277, | |
| "learning_rate": 8.603287946810513e-07, | |
| "loss": 0.30356699228286743, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 12.929577464788732, | |
| "grad_norm": 0.2785196900367737, | |
| "learning_rate": 8.597595590621892e-07, | |
| "loss": 0.29802441596984863, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 12.95774647887324, | |
| "grad_norm": 0.2778855562210083, | |
| "learning_rate": 8.591893798619903e-07, | |
| "loss": 0.29154932498931885, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 12.985915492957746, | |
| "grad_norm": 0.28308385610580444, | |
| "learning_rate": 8.586182588173194e-07, | |
| "loss": 0.29143208265304565, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.39711424708366394, | |
| "learning_rate": 8.580461976679099e-07, | |
| "loss": 0.2990560233592987, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 13.028169014084508, | |
| "grad_norm": 0.26802533864974976, | |
| "learning_rate": 8.574731981563597e-07, | |
| "loss": 0.29934608936309814, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 13.056338028169014, | |
| "grad_norm": 0.2663622498512268, | |
| "learning_rate": 8.568992620281243e-07, | |
| "loss": 0.29982200264930725, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 13.084507042253522, | |
| "grad_norm": 0.28624898195266724, | |
| "learning_rate": 8.56324391031513e-07, | |
| "loss": 0.2810109555721283, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 13.112676056338028, | |
| "grad_norm": 0.28607407212257385, | |
| "learning_rate": 8.557485869176825e-07, | |
| "loss": 0.2949367165565491, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 13.140845070422536, | |
| "grad_norm": 0.26953044533729553, | |
| "learning_rate": 8.551718514406318e-07, | |
| "loss": 0.2851143479347229, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 13.169014084507042, | |
| "grad_norm": 0.31105440855026245, | |
| "learning_rate": 8.545941863571973e-07, | |
| "loss": 0.2858909070491791, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 13.19718309859155, | |
| "grad_norm": 0.28143224120140076, | |
| "learning_rate": 8.540155934270471e-07, | |
| "loss": 0.2961467504501343, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 13.225352112676056, | |
| "grad_norm": 0.2862183451652527, | |
| "learning_rate": 8.534360744126753e-07, | |
| "loss": 0.29882240295410156, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 13.253521126760564, | |
| "grad_norm": 0.26780712604522705, | |
| "learning_rate": 8.528556310793979e-07, | |
| "loss": 0.2933373749256134, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 13.28169014084507, | |
| "grad_norm": 0.27026116847991943, | |
| "learning_rate": 8.522742651953456e-07, | |
| "loss": 0.2968083918094635, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 13.309859154929578, | |
| "grad_norm": 0.2800562381744385, | |
| "learning_rate": 8.516919785314595e-07, | |
| "loss": 0.3015640377998352, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 13.338028169014084, | |
| "grad_norm": 0.29154452681541443, | |
| "learning_rate": 8.511087728614862e-07, | |
| "loss": 0.31045541167259216, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 13.366197183098592, | |
| "grad_norm": 0.28183555603027344, | |
| "learning_rate": 8.50524649961971e-07, | |
| "loss": 0.29173219203948975, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 13.394366197183098, | |
| "grad_norm": 0.2971493601799011, | |
| "learning_rate": 8.499396116122535e-07, | |
| "loss": 0.2765740752220154, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 13.422535211267606, | |
| "grad_norm": 0.26922252774238586, | |
| "learning_rate": 8.493536595944622e-07, | |
| "loss": 0.297348290681839, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 13.450704225352112, | |
| "grad_norm": 0.27836039662361145, | |
| "learning_rate": 8.487667956935087e-07, | |
| "loss": 0.28694790601730347, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 13.47887323943662, | |
| "grad_norm": 0.29267406463623047, | |
| "learning_rate": 8.481790216970819e-07, | |
| "loss": 0.2862587571144104, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 13.507042253521126, | |
| "grad_norm": 0.27863144874572754, | |
| "learning_rate": 8.475903393956433e-07, | |
| "loss": 0.2894202470779419, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 13.535211267605634, | |
| "grad_norm": 0.2911999523639679, | |
| "learning_rate": 8.470007505824215e-07, | |
| "loss": 0.29356449842453003, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 13.56338028169014, | |
| "grad_norm": 0.2968003451824188, | |
| "learning_rate": 8.464102570534061e-07, | |
| "loss": 0.29188239574432373, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 13.591549295774648, | |
| "grad_norm": 0.2842749357223511, | |
| "learning_rate": 8.458188606073431e-07, | |
| "loss": 0.28485268354415894, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 13.619718309859154, | |
| "grad_norm": 0.2762301564216614, | |
| "learning_rate": 8.452265630457282e-07, | |
| "loss": 0.2829025387763977, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 13.647887323943662, | |
| "grad_norm": 0.27368924021720886, | |
| "learning_rate": 8.446333661728028e-07, | |
| "loss": 0.3129264712333679, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 13.676056338028168, | |
| "grad_norm": 0.3042363226413727, | |
| "learning_rate": 8.440392717955475e-07, | |
| "loss": 0.298667311668396, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 13.704225352112676, | |
| "grad_norm": 0.31437602639198303, | |
| "learning_rate": 8.434442817236765e-07, | |
| "loss": 0.2911669909954071, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 13.732394366197184, | |
| "grad_norm": 0.2624206840991974, | |
| "learning_rate": 8.428483977696328e-07, | |
| "loss": 0.2875954508781433, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 13.76056338028169, | |
| "grad_norm": 0.2824702858924866, | |
| "learning_rate": 8.422516217485825e-07, | |
| "loss": 0.28079336881637573, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 13.788732394366198, | |
| "grad_norm": 0.27612945437431335, | |
| "learning_rate": 8.416539554784089e-07, | |
| "loss": 0.3052091598510742, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 13.816901408450704, | |
| "grad_norm": 0.28139790892601013, | |
| "learning_rate": 8.410554007797068e-07, | |
| "loss": 0.2918257415294647, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 13.845070422535212, | |
| "grad_norm": 0.2779678702354431, | |
| "learning_rate": 8.404559594757777e-07, | |
| "loss": 0.30707138776779175, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 13.873239436619718, | |
| "grad_norm": 0.2710152566432953, | |
| "learning_rate": 8.398556333926239e-07, | |
| "loss": 0.3128437101840973, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 13.901408450704226, | |
| "grad_norm": 0.2958044707775116, | |
| "learning_rate": 8.392544243589427e-07, | |
| "loss": 0.29653337597846985, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 13.929577464788732, | |
| "grad_norm": 0.28408974409103394, | |
| "learning_rate": 8.38652334206121e-07, | |
| "loss": 0.29291969537734985, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 13.95774647887324, | |
| "grad_norm": 0.27897724509239197, | |
| "learning_rate": 8.3804936476823e-07, | |
| "loss": 0.3117462992668152, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 13.985915492957746, | |
| "grad_norm": 0.27391254901885986, | |
| "learning_rate": 8.374455178820189e-07, | |
| "loss": 0.30571603775024414, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.3995163142681122, | |
| "learning_rate": 8.368407953869103e-07, | |
| "loss": 0.2876809239387512, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 14.028169014084508, | |
| "grad_norm": 0.3068762719631195, | |
| "learning_rate": 8.362351991249937e-07, | |
| "loss": 0.28866052627563477, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 14.056338028169014, | |
| "grad_norm": 0.278751939535141, | |
| "learning_rate": 8.356287309410204e-07, | |
| "loss": 0.3048397898674011, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 14.084507042253522, | |
| "grad_norm": 0.2831234335899353, | |
| "learning_rate": 8.350213926823974e-07, | |
| "loss": 0.28643566370010376, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 14.112676056338028, | |
| "grad_norm": 0.2744354009628296, | |
| "learning_rate": 8.344131861991828e-07, | |
| "loss": 0.30159255862236023, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 14.140845070422536, | |
| "grad_norm": 0.2834227383136749, | |
| "learning_rate": 8.338041133440788e-07, | |
| "loss": 0.2945912182331085, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 14.169014084507042, | |
| "grad_norm": 0.2914932072162628, | |
| "learning_rate": 8.331941759724268e-07, | |
| "loss": 0.30261489748954773, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 14.19718309859155, | |
| "grad_norm": 0.2795814871788025, | |
| "learning_rate": 8.325833759422021e-07, | |
| "loss": 0.29661813378334045, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 14.225352112676056, | |
| "grad_norm": 0.2715330719947815, | |
| "learning_rate": 8.319717151140072e-07, | |
| "loss": 0.28672271966934204, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 14.253521126760564, | |
| "grad_norm": 0.2859768271446228, | |
| "learning_rate": 8.313591953510673e-07, | |
| "loss": 0.2985742390155792, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 14.28169014084507, | |
| "grad_norm": 0.2789771854877472, | |
| "learning_rate": 8.307458185192238e-07, | |
| "loss": 0.2883588671684265, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 14.309859154929578, | |
| "grad_norm": 0.2849474549293518, | |
| "learning_rate": 8.301315864869289e-07, | |
| "loss": 0.3045833706855774, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 14.338028169014084, | |
| "grad_norm": 0.28583216667175293, | |
| "learning_rate": 8.295165011252396e-07, | |
| "loss": 0.28541919589042664, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 14.366197183098592, | |
| "grad_norm": 0.286767840385437, | |
| "learning_rate": 8.289005643078131e-07, | |
| "loss": 0.2928876280784607, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 14.394366197183098, | |
| "grad_norm": 0.2851925790309906, | |
| "learning_rate": 8.282837779108993e-07, | |
| "loss": 0.29808348417282104, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 14.422535211267606, | |
| "grad_norm": 0.2843434512615204, | |
| "learning_rate": 8.276661438133368e-07, | |
| "loss": 0.281357079744339, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 14.450704225352112, | |
| "grad_norm": 0.29959535598754883, | |
| "learning_rate": 8.270476638965461e-07, | |
| "loss": 0.287128746509552, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 14.47887323943662, | |
| "grad_norm": 0.2812483310699463, | |
| "learning_rate": 8.264283400445243e-07, | |
| "loss": 0.29306480288505554, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 14.507042253521126, | |
| "grad_norm": 0.3015466034412384, | |
| "learning_rate": 8.258081741438394e-07, | |
| "loss": 0.3011341691017151, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 14.535211267605634, | |
| "grad_norm": 0.2930891215801239, | |
| "learning_rate": 8.25187168083624e-07, | |
| "loss": 0.2976144850254059, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 14.56338028169014, | |
| "grad_norm": 0.2777521312236786, | |
| "learning_rate": 8.245653237555705e-07, | |
| "loss": 0.2829003930091858, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 14.591549295774648, | |
| "grad_norm": 0.2916077673435211, | |
| "learning_rate": 8.239426430539243e-07, | |
| "loss": 0.28546392917633057, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 14.619718309859154, | |
| "grad_norm": 0.3006315231323242, | |
| "learning_rate": 8.23319127875479e-07, | |
| "loss": 0.2851755619049072, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 14.647887323943662, | |
| "grad_norm": 0.2654482424259186, | |
| "learning_rate": 8.226947801195699e-07, | |
| "loss": 0.28430840373039246, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 14.676056338028168, | |
| "grad_norm": 0.2679372727870941, | |
| "learning_rate": 8.220696016880687e-07, | |
| "loss": 0.282630980014801, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 14.704225352112676, | |
| "grad_norm": 0.28538262844085693, | |
| "learning_rate": 8.21443594485377e-07, | |
| "loss": 0.2789214551448822, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 14.732394366197184, | |
| "grad_norm": 0.2713358700275421, | |
| "learning_rate": 8.208167604184217e-07, | |
| "loss": 0.2909342646598816, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 14.76056338028169, | |
| "grad_norm": 0.30056601762771606, | |
| "learning_rate": 8.201891013966478e-07, | |
| "loss": 0.2838485836982727, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 14.788732394366198, | |
| "grad_norm": 0.2811543345451355, | |
| "learning_rate": 8.195606193320136e-07, | |
| "loss": 0.29030710458755493, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 14.816901408450704, | |
| "grad_norm": 0.2930709719657898, | |
| "learning_rate": 8.189313161389844e-07, | |
| "loss": 0.2922976613044739, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 14.845070422535212, | |
| "grad_norm": 0.29798057675361633, | |
| "learning_rate": 8.183011937345271e-07, | |
| "loss": 0.2951294183731079, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 14.873239436619718, | |
| "grad_norm": 0.28483426570892334, | |
| "learning_rate": 8.176702540381036e-07, | |
| "loss": 0.2938500642776489, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 14.901408450704226, | |
| "grad_norm": 0.2990010380744934, | |
| "learning_rate": 8.170384989716657e-07, | |
| "loss": 0.29805850982666016, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 14.929577464788732, | |
| "grad_norm": 0.2896774411201477, | |
| "learning_rate": 8.164059304596488e-07, | |
| "loss": 0.29530227184295654, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 14.95774647887324, | |
| "grad_norm": 0.28662148118019104, | |
| "learning_rate": 8.157725504289664e-07, | |
| "loss": 0.28371667861938477, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 14.985915492957746, | |
| "grad_norm": 0.2807771861553192, | |
| "learning_rate": 8.151383608090039e-07, | |
| "loss": 0.29020193219184875, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.39528268575668335, | |
| "learning_rate": 8.145033635316128e-07, | |
| "loss": 0.30530279874801636, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 15.028169014084508, | |
| "grad_norm": 0.28691425919532776, | |
| "learning_rate": 8.138675605311051e-07, | |
| "loss": 0.27306681871414185, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 15.056338028169014, | |
| "grad_norm": 0.27633434534072876, | |
| "learning_rate": 8.13230953744247e-07, | |
| "loss": 0.2900540828704834, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 15.084507042253522, | |
| "grad_norm": 0.28263136744499207, | |
| "learning_rate": 8.125935451102528e-07, | |
| "loss": 0.29298198223114014, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 15.112676056338028, | |
| "grad_norm": 0.2708156406879425, | |
| "learning_rate": 8.119553365707802e-07, | |
| "loss": 0.2728630006313324, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 15.140845070422536, | |
| "grad_norm": 0.28263747692108154, | |
| "learning_rate": 8.113163300699228e-07, | |
| "loss": 0.2994900047779083, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 15.169014084507042, | |
| "grad_norm": 0.2628503739833832, | |
| "learning_rate": 8.106765275542053e-07, | |
| "loss": 0.2943934202194214, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 15.19718309859155, | |
| "grad_norm": 0.2844214141368866, | |
| "learning_rate": 8.100359309725774e-07, | |
| "loss": 0.286617636680603, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 15.225352112676056, | |
| "grad_norm": 0.2979234457015991, | |
| "learning_rate": 8.093945422764069e-07, | |
| "loss": 0.28598904609680176, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 15.253521126760564, | |
| "grad_norm": 0.2918255925178528, | |
| "learning_rate": 8.087523634194754e-07, | |
| "loss": 0.2826801538467407, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 15.28169014084507, | |
| "grad_norm": 0.30238643288612366, | |
| "learning_rate": 8.081093963579707e-07, | |
| "loss": 0.3018723726272583, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 15.309859154929578, | |
| "grad_norm": 0.2762410342693329, | |
| "learning_rate": 8.074656430504823e-07, | |
| "loss": 0.27831658720970154, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 15.338028169014084, | |
| "grad_norm": 0.28324148058891296, | |
| "learning_rate": 8.068211054579943e-07, | |
| "loss": 0.30506500601768494, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 15.366197183098592, | |
| "grad_norm": 0.2893829643726349, | |
| "learning_rate": 8.061757855438799e-07, | |
| "loss": 0.29023078083992004, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 15.394366197183098, | |
| "grad_norm": 0.2907930016517639, | |
| "learning_rate": 8.055296852738956e-07, | |
| "loss": 0.28343409299850464, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 15.422535211267606, | |
| "grad_norm": 0.28478139638900757, | |
| "learning_rate": 8.048828066161747e-07, | |
| "loss": 0.28546571731567383, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 15.450704225352112, | |
| "grad_norm": 0.2851191759109497, | |
| "learning_rate": 8.04235151541222e-07, | |
| "loss": 0.2884707748889923, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 15.47887323943662, | |
| "grad_norm": 0.2689509987831116, | |
| "learning_rate": 8.035867220219071e-07, | |
| "loss": 0.2950664758682251, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 15.507042253521126, | |
| "grad_norm": 0.2825435400009155, | |
| "learning_rate": 8.029375200334587e-07, | |
| "loss": 0.281552791595459, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 15.535211267605634, | |
| "grad_norm": 0.28483787178993225, | |
| "learning_rate": 8.022875475534588e-07, | |
| "loss": 0.2870042622089386, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 15.56338028169014, | |
| "grad_norm": 0.27896517515182495, | |
| "learning_rate": 8.01636806561836e-07, | |
| "loss": 0.287916362285614, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 15.591549295774648, | |
| "grad_norm": 0.2788335382938385, | |
| "learning_rate": 8.009852990408606e-07, | |
| "loss": 0.28609931468963623, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 15.619718309859154, | |
| "grad_norm": 0.2826322019100189, | |
| "learning_rate": 8.003330269751372e-07, | |
| "loss": 0.2950190305709839, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 15.647887323943662, | |
| "grad_norm": 0.2843019366264343, | |
| "learning_rate": 7.996799923515997e-07, | |
| "loss": 0.2914244532585144, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 15.676056338028168, | |
| "grad_norm": 0.26445460319519043, | |
| "learning_rate": 7.990261971595048e-07, | |
| "loss": 0.27984780073165894, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 15.704225352112676, | |
| "grad_norm": 0.27918627858161926, | |
| "learning_rate": 7.983716433904262e-07, | |
| "loss": 0.27757298946380615, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 15.732394366197184, | |
| "grad_norm": 0.2938336133956909, | |
| "learning_rate": 7.977163330382479e-07, | |
| "loss": 0.2920360565185547, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 15.76056338028169, | |
| "grad_norm": 0.28976547718048096, | |
| "learning_rate": 7.970602680991592e-07, | |
| "loss": 0.2951090931892395, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 15.788732394366198, | |
| "grad_norm": 0.27327752113342285, | |
| "learning_rate": 7.964034505716476e-07, | |
| "loss": 0.29640987515449524, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 15.816901408450704, | |
| "grad_norm": 0.27222704887390137, | |
| "learning_rate": 7.957458824564931e-07, | |
| "loss": 0.28876399993896484, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 15.845070422535212, | |
| "grad_norm": 0.29962998628616333, | |
| "learning_rate": 7.950875657567621e-07, | |
| "loss": 0.3039361238479614, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 15.873239436619718, | |
| "grad_norm": 0.2705839276313782, | |
| "learning_rate": 7.944285024778017e-07, | |
| "loss": 0.28840112686157227, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 15.901408450704226, | |
| "grad_norm": 0.28124475479125977, | |
| "learning_rate": 7.93768694627233e-07, | |
| "loss": 0.2832530736923218, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 15.929577464788732, | |
| "grad_norm": 0.29025372862815857, | |
| "learning_rate": 7.931081442149448e-07, | |
| "loss": 0.28588593006134033, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 15.95774647887324, | |
| "grad_norm": 0.27376946806907654, | |
| "learning_rate": 7.924468532530883e-07, | |
| "loss": 0.2883457839488983, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 15.985915492957746, | |
| "grad_norm": 0.28059038519859314, | |
| "learning_rate": 7.917848237560708e-07, | |
| "loss": 0.2923107147216797, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.39920157194137573, | |
| "learning_rate": 7.911220577405484e-07, | |
| "loss": 0.2896960973739624, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 16.028169014084508, | |
| "grad_norm": 0.2756041884422302, | |
| "learning_rate": 7.904585572254218e-07, | |
| "loss": 0.2934238910675049, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 16.056338028169016, | |
| "grad_norm": 0.2831096947193146, | |
| "learning_rate": 7.897943242318285e-07, | |
| "loss": 0.2862626910209656, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 16.08450704225352, | |
| "grad_norm": 0.27020981907844543, | |
| "learning_rate": 7.891293607831373e-07, | |
| "loss": 0.3019767999649048, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 16.112676056338028, | |
| "grad_norm": 0.2866615056991577, | |
| "learning_rate": 7.884636689049422e-07, | |
| "loss": 0.29431337118148804, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 16.140845070422536, | |
| "grad_norm": 0.27709120512008667, | |
| "learning_rate": 7.877972506250562e-07, | |
| "loss": 0.26718783378601074, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 16.169014084507044, | |
| "grad_norm": 0.2864624261856079, | |
| "learning_rate": 7.871301079735049e-07, | |
| "loss": 0.28138402104377747, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 16.197183098591548, | |
| "grad_norm": 0.2806070148944855, | |
| "learning_rate": 7.864622429825204e-07, | |
| "loss": 0.29040491580963135, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 16.225352112676056, | |
| "grad_norm": 0.2866605818271637, | |
| "learning_rate": 7.857936576865356e-07, | |
| "loss": 0.2876106798648834, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 16.253521126760564, | |
| "grad_norm": 0.2853955626487732, | |
| "learning_rate": 7.851243541221769e-07, | |
| "loss": 0.30784159898757935, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 16.281690140845072, | |
| "grad_norm": 0.290031760931015, | |
| "learning_rate": 7.844543343282595e-07, | |
| "loss": 0.27567434310913086, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 16.309859154929576, | |
| "grad_norm": 0.283806174993515, | |
| "learning_rate": 7.837836003457793e-07, | |
| "loss": 0.28710314631462097, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 16.338028169014084, | |
| "grad_norm": 0.2768094539642334, | |
| "learning_rate": 7.831121542179086e-07, | |
| "loss": 0.27676063776016235, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 16.366197183098592, | |
| "grad_norm": 0.27568569779396057, | |
| "learning_rate": 7.824399979899889e-07, | |
| "loss": 0.2947593927383423, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 16.3943661971831, | |
| "grad_norm": 0.3079885244369507, | |
| "learning_rate": 7.817671337095244e-07, | |
| "loss": 0.2868027985095978, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 16.422535211267604, | |
| "grad_norm": 0.29744645953178406, | |
| "learning_rate": 7.810935634261764e-07, | |
| "loss": 0.2946295738220215, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 16.450704225352112, | |
| "grad_norm": 0.28457650542259216, | |
| "learning_rate": 7.804192891917571e-07, | |
| "loss": 0.2790455222129822, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 16.47887323943662, | |
| "grad_norm": 0.28848767280578613, | |
| "learning_rate": 7.797443130602226e-07, | |
| "loss": 0.2941606640815735, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 16.507042253521128, | |
| "grad_norm": 0.2936708927154541, | |
| "learning_rate": 7.79068637087667e-07, | |
| "loss": 0.2923729121685028, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 16.535211267605632, | |
| "grad_norm": 0.28460994362831116, | |
| "learning_rate": 7.783922633323169e-07, | |
| "loss": 0.2795827090740204, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 16.56338028169014, | |
| "grad_norm": 0.28233277797698975, | |
| "learning_rate": 7.777151938545235e-07, | |
| "loss": 0.29222947359085083, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 16.591549295774648, | |
| "grad_norm": 0.28648558259010315, | |
| "learning_rate": 7.770374307167585e-07, | |
| "loss": 0.27923721075057983, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 16.619718309859156, | |
| "grad_norm": 0.2813912332057953, | |
| "learning_rate": 7.763589759836058e-07, | |
| "loss": 0.2912202477455139, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 16.647887323943664, | |
| "grad_norm": 0.28273841738700867, | |
| "learning_rate": 7.756798317217558e-07, | |
| "loss": 0.29805850982666016, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 16.676056338028168, | |
| "grad_norm": 0.2922080457210541, | |
| "learning_rate": 7.75e-07, | |
| "loss": 0.2834911346435547, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 16.704225352112676, | |
| "grad_norm": 0.27855902910232544, | |
| "learning_rate": 7.743194828892235e-07, | |
| "loss": 0.2842041552066803, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 16.732394366197184, | |
| "grad_norm": 0.2905668318271637, | |
| "learning_rate": 7.736382824623999e-07, | |
| "loss": 0.281250923871994, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 16.760563380281692, | |
| "grad_norm": 0.2928289771080017, | |
| "learning_rate": 7.729564007945834e-07, | |
| "loss": 0.2863979935646057, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 16.788732394366196, | |
| "grad_norm": 0.28705668449401855, | |
| "learning_rate": 7.72273839962904e-07, | |
| "loss": 0.287672221660614, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 16.816901408450704, | |
| "grad_norm": 0.29107093811035156, | |
| "learning_rate": 7.715906020465602e-07, | |
| "loss": 0.27715277671813965, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 16.845070422535212, | |
| "grad_norm": 0.28827348351478577, | |
| "learning_rate": 7.709066891268133e-07, | |
| "loss": 0.2648072838783264, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 16.87323943661972, | |
| "grad_norm": 0.28768298029899597, | |
| "learning_rate": 7.702221032869808e-07, | |
| "loss": 0.26861560344696045, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 16.901408450704224, | |
| "grad_norm": 0.3000086843967438, | |
| "learning_rate": 7.695368466124296e-07, | |
| "loss": 0.2910693287849426, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 16.929577464788732, | |
| "grad_norm": 0.3058622181415558, | |
| "learning_rate": 7.688509211905707e-07, | |
| "loss": 0.2804388105869293, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 16.95774647887324, | |
| "grad_norm": 0.2874692678451538, | |
| "learning_rate": 7.681643291108517e-07, | |
| "loss": 0.2883044481277466, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 16.985915492957748, | |
| "grad_norm": 0.2868764102458954, | |
| "learning_rate": 7.67477072464751e-07, | |
| "loss": 0.2847598195075989, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.3980148136615753, | |
| "learning_rate": 7.667891533457718e-07, | |
| "loss": 0.29258161783218384, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 17.028169014084508, | |
| "grad_norm": 0.2752118408679962, | |
| "learning_rate": 7.661005738494349e-07, | |
| "loss": 0.28283417224884033, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 17.056338028169016, | |
| "grad_norm": 0.2837778627872467, | |
| "learning_rate": 7.654113360732732e-07, | |
| "loss": 0.2758600115776062, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 17.08450704225352, | |
| "grad_norm": 0.2887240946292877, | |
| "learning_rate": 7.647214421168238e-07, | |
| "loss": 0.2864817976951599, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 17.112676056338028, | |
| "grad_norm": 0.27935662865638733, | |
| "learning_rate": 7.640308940816239e-07, | |
| "loss": 0.28024283051490784, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 17.140845070422536, | |
| "grad_norm": 0.2960900664329529, | |
| "learning_rate": 7.633396940712023e-07, | |
| "loss": 0.2681460976600647, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 17.169014084507044, | |
| "grad_norm": 0.2915673553943634, | |
| "learning_rate": 7.626478441910744e-07, | |
| "loss": 0.2805773913860321, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 17.197183098591548, | |
| "grad_norm": 0.2789720892906189, | |
| "learning_rate": 7.619553465487344e-07, | |
| "loss": 0.28847092390060425, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 17.225352112676056, | |
| "grad_norm": 0.2745218575000763, | |
| "learning_rate": 7.612622032536507e-07, | |
| "loss": 0.28274643421173096, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 17.253521126760564, | |
| "grad_norm": 0.2962469458580017, | |
| "learning_rate": 7.60568416417258e-07, | |
| "loss": 0.2827341556549072, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 17.281690140845072, | |
| "grad_norm": 0.28243717551231384, | |
| "learning_rate": 7.59873988152951e-07, | |
| "loss": 0.2872379422187805, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 17.309859154929576, | |
| "grad_norm": 0.2935909926891327, | |
| "learning_rate": 7.591789205760789e-07, | |
| "loss": 0.29077547788619995, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 17.338028169014084, | |
| "grad_norm": 0.2725030481815338, | |
| "learning_rate": 7.584832158039378e-07, | |
| "loss": 0.28079894185066223, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 17.366197183098592, | |
| "grad_norm": 0.2863542437553406, | |
| "learning_rate": 7.577868759557653e-07, | |
| "loss": 0.2759760618209839, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 17.3943661971831, | |
| "grad_norm": 0.2829958498477936, | |
| "learning_rate": 7.570899031527332e-07, | |
| "loss": 0.27316516637802124, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 17.422535211267604, | |
| "grad_norm": 0.28861963748931885, | |
| "learning_rate": 7.563922995179418e-07, | |
| "loss": 0.2758478820323944, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 17.450704225352112, | |
| "grad_norm": 0.2935570478439331, | |
| "learning_rate": 7.556940671764124e-07, | |
| "loss": 0.28437983989715576, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 17.47887323943662, | |
| "grad_norm": 0.3037278652191162, | |
| "learning_rate": 7.54995208255082e-07, | |
| "loss": 0.28943467140197754, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 17.507042253521128, | |
| "grad_norm": 0.31774893403053284, | |
| "learning_rate": 7.54295724882796e-07, | |
| "loss": 0.29023581743240356, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 17.535211267605632, | |
| "grad_norm": 0.28832852840423584, | |
| "learning_rate": 7.535956191903021e-07, | |
| "loss": 0.2840030789375305, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 17.56338028169014, | |
| "grad_norm": 0.28122231364250183, | |
| "learning_rate": 7.528948933102438e-07, | |
| "loss": 0.28523629903793335, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 17.591549295774648, | |
| "grad_norm": 0.29538190364837646, | |
| "learning_rate": 7.521935493771534e-07, | |
| "loss": 0.28018033504486084, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 17.619718309859156, | |
| "grad_norm": 0.3163702189922333, | |
| "learning_rate": 7.514915895274463e-07, | |
| "loss": 0.2885722517967224, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 17.647887323943664, | |
| "grad_norm": 0.2946973741054535, | |
| "learning_rate": 7.507890158994139e-07, | |
| "loss": 0.2785816490650177, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 17.676056338028168, | |
| "grad_norm": 0.2805889844894409, | |
| "learning_rate": 7.500858306332172e-07, | |
| "loss": 0.2974117398262024, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 17.704225352112676, | |
| "grad_norm": 0.28544914722442627, | |
| "learning_rate": 7.493820358708809e-07, | |
| "loss": 0.2892162501811981, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 17.732394366197184, | |
| "grad_norm": 0.3272300064563751, | |
| "learning_rate": 7.486776337562853e-07, | |
| "loss": 0.3017275333404541, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 17.760563380281692, | |
| "grad_norm": 0.28177788853645325, | |
| "learning_rate": 7.479726264351618e-07, | |
| "loss": 0.2729823589324951, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 17.788732394366196, | |
| "grad_norm": 0.2774059474468231, | |
| "learning_rate": 7.472670160550848e-07, | |
| "loss": 0.27497977018356323, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 17.816901408450704, | |
| "grad_norm": 0.2898328900337219, | |
| "learning_rate": 7.46560804765466e-07, | |
| "loss": 0.27945676445961, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 17.845070422535212, | |
| "grad_norm": 0.2784922420978546, | |
| "learning_rate": 7.458539947175473e-07, | |
| "loss": 0.29566580057144165, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 17.87323943661972, | |
| "grad_norm": 0.2864189147949219, | |
| "learning_rate": 7.45146588064395e-07, | |
| "loss": 0.2862587869167328, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 17.901408450704224, | |
| "grad_norm": 0.2896963953971863, | |
| "learning_rate": 7.444385869608921e-07, | |
| "loss": 0.2924667000770569, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 17.929577464788732, | |
| "grad_norm": 0.28463807702064514, | |
| "learning_rate": 7.437299935637328e-07, | |
| "loss": 0.2862287163734436, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 17.95774647887324, | |
| "grad_norm": 0.28407302498817444, | |
| "learning_rate": 7.430208100314156e-07, | |
| "loss": 0.2759779989719391, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 17.985915492957748, | |
| "grad_norm": 0.2773316502571106, | |
| "learning_rate": 7.423110385242366e-07, | |
| "loss": 0.2798498272895813, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.3958338499069214, | |
| "learning_rate": 7.416006812042827e-07, | |
| "loss": 0.28481870889663696, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 18.028169014084508, | |
| "grad_norm": 0.2922191321849823, | |
| "learning_rate": 7.408897402354255e-07, | |
| "loss": 0.2781963348388672, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 18.056338028169016, | |
| "grad_norm": 0.29166096448898315, | |
| "learning_rate": 7.401782177833147e-07, | |
| "loss": 0.2843964099884033, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 18.08450704225352, | |
| "grad_norm": 0.28290343284606934, | |
| "learning_rate": 7.394661160153709e-07, | |
| "loss": 0.2840275168418884, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 18.112676056338028, | |
| "grad_norm": 0.28300249576568604, | |
| "learning_rate": 7.387534371007797e-07, | |
| "loss": 0.2893407642841339, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 18.140845070422536, | |
| "grad_norm": 0.2870761752128601, | |
| "learning_rate": 7.380401832104845e-07, | |
| "loss": 0.26570916175842285, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 18.169014084507044, | |
| "grad_norm": 0.2919873297214508, | |
| "learning_rate": 7.373263565171805e-07, | |
| "loss": 0.26768985390663147, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 18.197183098591548, | |
| "grad_norm": 0.2856583893299103, | |
| "learning_rate": 7.366119591953075e-07, | |
| "loss": 0.2823103070259094, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 18.225352112676056, | |
| "grad_norm": 0.2853250801563263, | |
| "learning_rate": 7.358969934210438e-07, | |
| "loss": 0.28462791442871094, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 18.253521126760564, | |
| "grad_norm": 0.27667704224586487, | |
| "learning_rate": 7.35181461372299e-07, | |
| "loss": 0.27125126123428345, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 18.281690140845072, | |
| "grad_norm": 0.2884734272956848, | |
| "learning_rate": 7.344653652287077e-07, | |
| "loss": 0.271454781293869, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 18.309859154929576, | |
| "grad_norm": 0.28490886092185974, | |
| "learning_rate": 7.337487071716232e-07, | |
| "loss": 0.286302775144577, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 18.338028169014084, | |
| "grad_norm": 0.27361124753952026, | |
| "learning_rate": 7.330314893841101e-07, | |
| "loss": 0.2801797389984131, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 18.366197183098592, | |
| "grad_norm": 0.28517088294029236, | |
| "learning_rate": 7.323137140509381e-07, | |
| "loss": 0.2785356640815735, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 18.3943661971831, | |
| "grad_norm": 0.2725742757320404, | |
| "learning_rate": 7.315953833585755e-07, | |
| "loss": 0.27504605054855347, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 18.422535211267604, | |
| "grad_norm": 0.29915499687194824, | |
| "learning_rate": 7.308764994951821e-07, | |
| "loss": 0.2808704078197479, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 18.450704225352112, | |
| "grad_norm": 0.31304341554641724, | |
| "learning_rate": 7.301570646506027e-07, | |
| "loss": 0.2911706566810608, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 18.47887323943662, | |
| "grad_norm": 0.2919553816318512, | |
| "learning_rate": 7.294370810163607e-07, | |
| "loss": 0.27866852283477783, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 18.507042253521128, | |
| "grad_norm": 0.3162909746170044, | |
| "learning_rate": 7.287165507856512e-07, | |
| "loss": 0.2802932560443878, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 18.535211267605632, | |
| "grad_norm": 0.303523451089859, | |
| "learning_rate": 7.279954761533342e-07, | |
| "loss": 0.2824591398239136, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 18.56338028169014, | |
| "grad_norm": 0.29366716742515564, | |
| "learning_rate": 7.27273859315928e-07, | |
| "loss": 0.28101497888565063, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 18.591549295774648, | |
| "grad_norm": 0.28469985723495483, | |
| "learning_rate": 7.265517024716026e-07, | |
| "loss": 0.29134345054626465, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 18.619718309859156, | |
| "grad_norm": 0.28721922636032104, | |
| "learning_rate": 7.258290078201731e-07, | |
| "loss": 0.284817636013031, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 18.647887323943664, | |
| "grad_norm": 0.30535197257995605, | |
| "learning_rate": 7.251057775630927e-07, | |
| "loss": 0.28168779611587524, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 18.676056338028168, | |
| "grad_norm": 0.2980702817440033, | |
| "learning_rate": 7.243820139034464e-07, | |
| "loss": 0.27493056654930115, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 18.704225352112676, | |
| "grad_norm": 0.28984636068344116, | |
| "learning_rate": 7.236577190459433e-07, | |
| "loss": 0.2975635528564453, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 18.732394366197184, | |
| "grad_norm": 0.29580390453338623, | |
| "learning_rate": 7.229328951969115e-07, | |
| "loss": 0.2849118113517761, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 18.760563380281692, | |
| "grad_norm": 0.2950834035873413, | |
| "learning_rate": 7.222075445642904e-07, | |
| "loss": 0.26458609104156494, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 18.788732394366196, | |
| "grad_norm": 0.29167890548706055, | |
| "learning_rate": 7.214816693576234e-07, | |
| "loss": 0.2846098840236664, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 18.816901408450704, | |
| "grad_norm": 0.2784614861011505, | |
| "learning_rate": 7.207552717880522e-07, | |
| "loss": 0.28443169593811035, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 18.845070422535212, | |
| "grad_norm": 0.29537051916122437, | |
| "learning_rate": 7.200283540683102e-07, | |
| "loss": 0.27960023283958435, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 18.87323943661972, | |
| "grad_norm": 0.2873672544956207, | |
| "learning_rate": 7.193009184127145e-07, | |
| "loss": 0.28757309913635254, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 18.901408450704224, | |
| "grad_norm": 0.28597328066825867, | |
| "learning_rate": 7.185729670371604e-07, | |
| "loss": 0.2904655635356903, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 18.929577464788732, | |
| "grad_norm": 0.29267045855522156, | |
| "learning_rate": 7.17844502159114e-07, | |
| "loss": 0.2797931432723999, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 18.95774647887324, | |
| "grad_norm": 0.27707934379577637, | |
| "learning_rate": 7.171155259976057e-07, | |
| "loss": 0.2788022458553314, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 18.985915492957748, | |
| "grad_norm": 0.2854091227054596, | |
| "learning_rate": 7.163860407732231e-07, | |
| "loss": 0.28216353058815, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.4010404348373413, | |
| "learning_rate": 7.156560487081051e-07, | |
| "loss": 0.2831748127937317, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 19.028169014084508, | |
| "grad_norm": 0.2948407232761383, | |
| "learning_rate": 7.149255520259338e-07, | |
| "loss": 0.26844292879104614, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 19.056338028169016, | |
| "grad_norm": 0.2946661114692688, | |
| "learning_rate": 7.141945529519288e-07, | |
| "loss": 0.2809017300605774, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 19.08450704225352, | |
| "grad_norm": 0.27715936303138733, | |
| "learning_rate": 7.134630537128403e-07, | |
| "loss": 0.2835448980331421, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 19.112676056338028, | |
| "grad_norm": 0.2933226525783539, | |
| "learning_rate": 7.127310565369415e-07, | |
| "loss": 0.2795133888721466, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 19.140845070422536, | |
| "grad_norm": 0.28180861473083496, | |
| "learning_rate": 7.11998563654023e-07, | |
| "loss": 0.2750745713710785, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 19.169014084507044, | |
| "grad_norm": 0.2755012810230255, | |
| "learning_rate": 7.11265577295385e-07, | |
| "loss": 0.281097412109375, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 19.197183098591548, | |
| "grad_norm": 0.2865377962589264, | |
| "learning_rate": 7.105320996938314e-07, | |
| "loss": 0.2677628993988037, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 19.225352112676056, | |
| "grad_norm": 0.2958216369152069, | |
| "learning_rate": 7.097981330836616e-07, | |
| "loss": 0.2733122408390045, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 19.253521126760564, | |
| "grad_norm": 0.2982434034347534, | |
| "learning_rate": 7.090636797006657e-07, | |
| "loss": 0.2764785885810852, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 19.281690140845072, | |
| "grad_norm": 0.31210824847221375, | |
| "learning_rate": 7.083287417821157e-07, | |
| "loss": 0.27116531133651733, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 19.309859154929576, | |
| "grad_norm": 0.29045426845550537, | |
| "learning_rate": 7.075933215667604e-07, | |
| "loss": 0.2775840163230896, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 19.338028169014084, | |
| "grad_norm": 0.29685893654823303, | |
| "learning_rate": 7.068574212948169e-07, | |
| "loss": 0.2803945541381836, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 19.366197183098592, | |
| "grad_norm": 0.2790866494178772, | |
| "learning_rate": 7.06121043207965e-07, | |
| "loss": 0.2769659161567688, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 19.3943661971831, | |
| "grad_norm": 0.31644630432128906, | |
| "learning_rate": 7.053841895493406e-07, | |
| "loss": 0.27923786640167236, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 19.422535211267604, | |
| "grad_norm": 0.30641067028045654, | |
| "learning_rate": 7.046468625635274e-07, | |
| "loss": 0.2825276255607605, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 19.450704225352112, | |
| "grad_norm": 0.292458713054657, | |
| "learning_rate": 7.039090644965509e-07, | |
| "loss": 0.27422571182250977, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 19.47887323943662, | |
| "grad_norm": 0.2903311550617218, | |
| "learning_rate": 7.031707975958726e-07, | |
| "loss": 0.27189522981643677, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 19.507042253521128, | |
| "grad_norm": 0.2947315275669098, | |
| "learning_rate": 7.024320641103811e-07, | |
| "loss": 0.2683555483818054, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 19.535211267605632, | |
| "grad_norm": 0.29522547125816345, | |
| "learning_rate": 7.01692866290387e-07, | |
| "loss": 0.28815943002700806, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 19.56338028169014, | |
| "grad_norm": 0.28272008895874023, | |
| "learning_rate": 7.009532063876148e-07, | |
| "loss": 0.2853075861930847, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 19.591549295774648, | |
| "grad_norm": 0.286604642868042, | |
| "learning_rate": 7.002130866551968e-07, | |
| "loss": 0.2744004726409912, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 19.619718309859156, | |
| "grad_norm": 0.2829611301422119, | |
| "learning_rate": 6.994725093476664e-07, | |
| "loss": 0.2899395525455475, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 19.647887323943664, | |
| "grad_norm": 0.3035781681537628, | |
| "learning_rate": 6.987314767209503e-07, | |
| "loss": 0.29819610714912415, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 19.676056338028168, | |
| "grad_norm": 0.30463680624961853, | |
| "learning_rate": 6.979899910323624e-07, | |
| "loss": 0.2818058729171753, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 19.704225352112676, | |
| "grad_norm": 0.29514482617378235, | |
| "learning_rate": 6.972480545405968e-07, | |
| "loss": 0.294766366481781, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 19.732394366197184, | |
| "grad_norm": 0.282625675201416, | |
| "learning_rate": 6.965056695057204e-07, | |
| "loss": 0.27316591143608093, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 19.760563380281692, | |
| "grad_norm": 0.3090338110923767, | |
| "learning_rate": 6.957628381891673e-07, | |
| "loss": 0.2785091698169708, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 19.788732394366196, | |
| "grad_norm": 0.2826164960861206, | |
| "learning_rate": 6.950195628537299e-07, | |
| "loss": 0.2870754301548004, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 19.816901408450704, | |
| "grad_norm": 0.29807525873184204, | |
| "learning_rate": 6.942758457635543e-07, | |
| "loss": 0.27232879400253296, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 19.845070422535212, | |
| "grad_norm": 0.2901877760887146, | |
| "learning_rate": 6.935316891841315e-07, | |
| "loss": 0.2786208987236023, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 19.87323943661972, | |
| "grad_norm": 0.2947152853012085, | |
| "learning_rate": 6.927870953822915e-07, | |
| "loss": 0.2676268517971039, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 19.901408450704224, | |
| "grad_norm": 0.30847856402397156, | |
| "learning_rate": 6.920420666261961e-07, | |
| "loss": 0.27726125717163086, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 19.929577464788732, | |
| "grad_norm": 0.29455119371414185, | |
| "learning_rate": 6.912966051853322e-07, | |
| "loss": 0.28886911273002625, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 19.95774647887324, | |
| "grad_norm": 0.2961712181568146, | |
| "learning_rate": 6.905507133305047e-07, | |
| "loss": 0.2736320495605469, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 19.985915492957748, | |
| "grad_norm": 0.2923624515533447, | |
| "learning_rate": 6.898043933338293e-07, | |
| "loss": 0.2720155119895935, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.40786370635032654, | |
| "learning_rate": 6.890576474687263e-07, | |
| "loss": 0.3052176237106323, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 20.028169014084508, | |
| "grad_norm": 0.281310498714447, | |
| "learning_rate": 6.883104780099133e-07, | |
| "loss": 0.2827909588813782, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 20.056338028169016, | |
| "grad_norm": 0.28428319096565247, | |
| "learning_rate": 6.875628872333975e-07, | |
| "loss": 0.2593810558319092, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 20.08450704225352, | |
| "grad_norm": 0.28026291728019714, | |
| "learning_rate": 6.868148774164706e-07, | |
| "loss": 0.2783263027667999, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 20.112676056338028, | |
| "grad_norm": 0.2842010259628296, | |
| "learning_rate": 6.860664508377001e-07, | |
| "loss": 0.2809029221534729, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 20.140845070422536, | |
| "grad_norm": 0.2880638539791107, | |
| "learning_rate": 6.853176097769228e-07, | |
| "loss": 0.26888588070869446, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 20.169014084507044, | |
| "grad_norm": 0.28630784153938293, | |
| "learning_rate": 6.84568356515239e-07, | |
| "loss": 0.2781735062599182, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 20.197183098591548, | |
| "grad_norm": 0.30342307686805725, | |
| "learning_rate": 6.838186933350036e-07, | |
| "loss": 0.27911239862442017, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 20.225352112676056, | |
| "grad_norm": 0.29965290427207947, | |
| "learning_rate": 6.83068622519821e-07, | |
| "loss": 0.2759650945663452, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 20.253521126760564, | |
| "grad_norm": 0.2921484708786011, | |
| "learning_rate": 6.823181463545366e-07, | |
| "loss": 0.26791465282440186, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 20.281690140845072, | |
| "grad_norm": 0.29477155208587646, | |
| "learning_rate": 6.815672671252315e-07, | |
| "loss": 0.27440106868743896, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 20.309859154929576, | |
| "grad_norm": 0.2930176854133606, | |
| "learning_rate": 6.808159871192136e-07, | |
| "loss": 0.28788119554519653, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 20.338028169014084, | |
| "grad_norm": 0.304382860660553, | |
| "learning_rate": 6.800643086250121e-07, | |
| "loss": 0.2717517614364624, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 20.366197183098592, | |
| "grad_norm": 0.2945499122142792, | |
| "learning_rate": 6.793122339323705e-07, | |
| "loss": 0.29744279384613037, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 20.3943661971831, | |
| "grad_norm": 0.2932227849960327, | |
| "learning_rate": 6.78559765332238e-07, | |
| "loss": 0.2782973051071167, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 20.422535211267604, | |
| "grad_norm": 0.29432976245880127, | |
| "learning_rate": 6.778069051167653e-07, | |
| "loss": 0.28551533818244934, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 20.450704225352112, | |
| "grad_norm": 0.30091312527656555, | |
| "learning_rate": 6.770536555792944e-07, | |
| "loss": 0.28610894083976746, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 20.47887323943662, | |
| "grad_norm": 0.29813316464424133, | |
| "learning_rate": 6.763000190143545e-07, | |
| "loss": 0.28137102723121643, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 20.507042253521128, | |
| "grad_norm": 0.28738856315612793, | |
| "learning_rate": 6.755459977176532e-07, | |
| "loss": 0.26876533031463623, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 20.535211267605632, | |
| "grad_norm": 0.2894875407218933, | |
| "learning_rate": 6.747915939860701e-07, | |
| "loss": 0.2704589366912842, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 20.56338028169014, | |
| "grad_norm": 0.3046717047691345, | |
| "learning_rate": 6.740368101176495e-07, | |
| "loss": 0.28678447008132935, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 20.591549295774648, | |
| "grad_norm": 0.29942622780799866, | |
| "learning_rate": 6.732816484115946e-07, | |
| "loss": 0.27722471952438354, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 20.619718309859156, | |
| "grad_norm": 0.2984582185745239, | |
| "learning_rate": 6.725261111682584e-07, | |
| "loss": 0.2638360261917114, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 20.647887323943664, | |
| "grad_norm": 0.2943922281265259, | |
| "learning_rate": 6.717702006891386e-07, | |
| "loss": 0.286998450756073, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 20.676056338028168, | |
| "grad_norm": 0.2971697747707367, | |
| "learning_rate": 6.710139192768694e-07, | |
| "loss": 0.2628033757209778, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 20.704225352112676, | |
| "grad_norm": 0.2915992736816406, | |
| "learning_rate": 6.702572692352155e-07, | |
| "loss": 0.2789704203605652, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 20.732394366197184, | |
| "grad_norm": 0.29871392250061035, | |
| "learning_rate": 6.695002528690639e-07, | |
| "loss": 0.2669401168823242, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 20.760563380281692, | |
| "grad_norm": 0.29496580362319946, | |
| "learning_rate": 6.687428724844179e-07, | |
| "loss": 0.2711006999015808, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 20.788732394366196, | |
| "grad_norm": 0.29237619042396545, | |
| "learning_rate": 6.679851303883891e-07, | |
| "loss": 0.2822151780128479, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 20.816901408450704, | |
| "grad_norm": 0.29689720273017883, | |
| "learning_rate": 6.672270288891918e-07, | |
| "loss": 0.2751491665840149, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 20.845070422535212, | |
| "grad_norm": 0.28889331221580505, | |
| "learning_rate": 6.664685702961344e-07, | |
| "loss": 0.2681749761104584, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 20.87323943661972, | |
| "grad_norm": 0.2995631694793701, | |
| "learning_rate": 6.657097569196133e-07, | |
| "loss": 0.2793988287448883, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 20.901408450704224, | |
| "grad_norm": 0.29980671405792236, | |
| "learning_rate": 6.649505910711058e-07, | |
| "loss": 0.27338624000549316, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 20.929577464788732, | |
| "grad_norm": 0.29344668984413147, | |
| "learning_rate": 6.641910750631626e-07, | |
| "loss": 0.284781813621521, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 20.95774647887324, | |
| "grad_norm": 0.29827746748924255, | |
| "learning_rate": 6.634312112094013e-07, | |
| "loss": 0.27890220284461975, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 20.985915492957748, | |
| "grad_norm": 0.2813144326210022, | |
| "learning_rate": 6.626710018244987e-07, | |
| "loss": 0.2822881042957306, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 0.3963703215122223, | |
| "learning_rate": 6.619104492241847e-07, | |
| "loss": 0.27128899097442627, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 21.028169014084508, | |
| "grad_norm": 0.2815580666065216, | |
| "learning_rate": 6.611495557252344e-07, | |
| "loss": 0.26516419649124146, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 21.056338028169016, | |
| "grad_norm": 0.2884436845779419, | |
| "learning_rate": 6.603883236454612e-07, | |
| "loss": 0.2861919701099396, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 21.08450704225352, | |
| "grad_norm": 0.29655352234840393, | |
| "learning_rate": 6.596267553037102e-07, | |
| "loss": 0.28643375635147095, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 21.112676056338028, | |
| "grad_norm": 0.2927301824092865, | |
| "learning_rate": 6.588648530198504e-07, | |
| "loss": 0.26665711402893066, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 21.140845070422536, | |
| "grad_norm": 0.3053556978702545, | |
| "learning_rate": 6.581026191147687e-07, | |
| "loss": 0.2608697712421417, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 21.169014084507044, | |
| "grad_norm": 0.2939828634262085, | |
| "learning_rate": 6.573400559103613e-07, | |
| "loss": 0.2792375683784485, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 21.197183098591548, | |
| "grad_norm": 0.2972046136856079, | |
| "learning_rate": 6.565771657295285e-07, | |
| "loss": 0.28457099199295044, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 21.225352112676056, | |
| "grad_norm": 0.2918429672718048, | |
| "learning_rate": 6.558139508961654e-07, | |
| "loss": 0.2648508548736572, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 21.253521126760564, | |
| "grad_norm": 0.28380143642425537, | |
| "learning_rate": 6.550504137351575e-07, | |
| "loss": 0.27792784571647644, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 21.281690140845072, | |
| "grad_norm": 0.3151639997959137, | |
| "learning_rate": 6.542865565723707e-07, | |
| "loss": 0.2657250165939331, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 21.309859154929576, | |
| "grad_norm": 0.2861776351928711, | |
| "learning_rate": 6.53522381734647e-07, | |
| "loss": 0.27351340651512146, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 21.338028169014084, | |
| "grad_norm": 0.28596001863479614, | |
| "learning_rate": 6.527578915497951e-07, | |
| "loss": 0.28022241592407227, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 21.366197183098592, | |
| "grad_norm": 0.29702675342559814, | |
| "learning_rate": 6.519930883465847e-07, | |
| "loss": 0.2644035518169403, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 21.3943661971831, | |
| "grad_norm": 0.2863904535770416, | |
| "learning_rate": 6.512279744547392e-07, | |
| "loss": 0.2721293568611145, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 21.422535211267604, | |
| "grad_norm": 0.311262845993042, | |
| "learning_rate": 6.50462552204928e-07, | |
| "loss": 0.2911388874053955, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 21.450704225352112, | |
| "grad_norm": 0.3132490813732147, | |
| "learning_rate": 6.496968239287603e-07, | |
| "loss": 0.27957841753959656, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 21.47887323943662, | |
| "grad_norm": 0.29439255595207214, | |
| "learning_rate": 6.489307919587769e-07, | |
| "loss": 0.28288164734840393, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 21.507042253521128, | |
| "grad_norm": 0.3006008267402649, | |
| "learning_rate": 6.481644586284442e-07, | |
| "loss": 0.26865097880363464, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 21.535211267605632, | |
| "grad_norm": 0.28934645652770996, | |
| "learning_rate": 6.473978262721463e-07, | |
| "loss": 0.28625524044036865, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 21.56338028169014, | |
| "grad_norm": 0.28962355852127075, | |
| "learning_rate": 6.466308972251785e-07, | |
| "loss": 0.2737366855144501, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 21.591549295774648, | |
| "grad_norm": 0.29193779826164246, | |
| "learning_rate": 6.458636738237395e-07, | |
| "loss": 0.2644401788711548, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 21.619718309859156, | |
| "grad_norm": 0.31439822912216187, | |
| "learning_rate": 6.45096158404925e-07, | |
| "loss": 0.2638384699821472, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 21.647887323943664, | |
| "grad_norm": 0.2855563163757324, | |
| "learning_rate": 6.443283533067198e-07, | |
| "loss": 0.2697969079017639, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 21.676056338028168, | |
| "grad_norm": 0.2941296398639679, | |
| "learning_rate": 6.435602608679916e-07, | |
| "loss": 0.27152666449546814, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 21.704225352112676, | |
| "grad_norm": 0.2861116826534271, | |
| "learning_rate": 6.427918834284834e-07, | |
| "loss": 0.2749404013156891, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 21.732394366197184, | |
| "grad_norm": 0.30467715859413147, | |
| "learning_rate": 6.420232233288055e-07, | |
| "loss": 0.28106456995010376, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 21.760563380281692, | |
| "grad_norm": 0.2885453402996063, | |
| "learning_rate": 6.412542829104306e-07, | |
| "loss": 0.2661711275577545, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 21.788732394366196, | |
| "grad_norm": 0.30243006348609924, | |
| "learning_rate": 6.404850645156841e-07, | |
| "loss": 0.28171294927597046, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 21.816901408450704, | |
| "grad_norm": 0.29606276750564575, | |
| "learning_rate": 6.397155704877388e-07, | |
| "loss": 0.2737141251564026, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 21.845070422535212, | |
| "grad_norm": 0.30514174699783325, | |
| "learning_rate": 6.389458031706068e-07, | |
| "loss": 0.2778671979904175, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 21.87323943661972, | |
| "grad_norm": 0.29419445991516113, | |
| "learning_rate": 6.381757649091329e-07, | |
| "loss": 0.27829116582870483, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 21.901408450704224, | |
| "grad_norm": 0.30376535654067993, | |
| "learning_rate": 6.374054580489873e-07, | |
| "loss": 0.26818743348121643, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 21.929577464788732, | |
| "grad_norm": 0.29063352942466736, | |
| "learning_rate": 6.366348849366583e-07, | |
| "loss": 0.28016185760498047, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 21.95774647887324, | |
| "grad_norm": 0.29429173469543457, | |
| "learning_rate": 6.358640479194451e-07, | |
| "loss": 0.27824854850769043, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 21.985915492957748, | |
| "grad_norm": 0.28934815526008606, | |
| "learning_rate": 6.35092949345451e-07, | |
| "loss": 0.2743881344795227, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.41559702157974243, | |
| "learning_rate": 6.343215915635761e-07, | |
| "loss": 0.2856147289276123, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 22.028169014084508, | |
| "grad_norm": 0.29498717188835144, | |
| "learning_rate": 6.335499769235098e-07, | |
| "loss": 0.2729465961456299, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 22.056338028169016, | |
| "grad_norm": 0.30124449729919434, | |
| "learning_rate": 6.327781077757241e-07, | |
| "loss": 0.2874697744846344, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 22.08450704225352, | |
| "grad_norm": 0.3204105794429779, | |
| "learning_rate": 6.320059864714664e-07, | |
| "loss": 0.2923066020011902, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 22.112676056338028, | |
| "grad_norm": 0.2912622392177582, | |
| "learning_rate": 6.31233615362752e-07, | |
| "loss": 0.2808852791786194, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 22.140845070422536, | |
| "grad_norm": 0.30250096321105957, | |
| "learning_rate": 6.304609968023572e-07, | |
| "loss": 0.27111589908599854, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 22.169014084507044, | |
| "grad_norm": 0.3024645447731018, | |
| "learning_rate": 6.296881331438126e-07, | |
| "loss": 0.2812804877758026, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 22.197183098591548, | |
| "grad_norm": 0.29673656821250916, | |
| "learning_rate": 6.289150267413942e-07, | |
| "loss": 0.2681958079338074, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 22.225352112676056, | |
| "grad_norm": 0.29564592242240906, | |
| "learning_rate": 6.281416799501187e-07, | |
| "loss": 0.26508989930152893, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 22.253521126760564, | |
| "grad_norm": 0.2849496603012085, | |
| "learning_rate": 6.273680951257342e-07, | |
| "loss": 0.27044007182121277, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 22.281690140845072, | |
| "grad_norm": 0.30459970235824585, | |
| "learning_rate": 6.265942746247146e-07, | |
| "loss": 0.26503556966781616, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 22.309859154929576, | |
| "grad_norm": 0.29415223002433777, | |
| "learning_rate": 6.258202208042511e-07, | |
| "loss": 0.26770085096359253, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 22.338028169014084, | |
| "grad_norm": 0.3101199269294739, | |
| "learning_rate": 6.25045936022246e-07, | |
| "loss": 0.26633113622665405, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 22.366197183098592, | |
| "grad_norm": 0.28551825881004333, | |
| "learning_rate": 6.242714226373049e-07, | |
| "loss": 0.2745598256587982, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 22.3943661971831, | |
| "grad_norm": 0.30341607332229614, | |
| "learning_rate": 6.2349668300873e-07, | |
| "loss": 0.2879912853240967, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 22.422535211267604, | |
| "grad_norm": 0.33077767491340637, | |
| "learning_rate": 6.227217194965125e-07, | |
| "loss": 0.28035950660705566, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 22.450704225352112, | |
| "grad_norm": 0.305733859539032, | |
| "learning_rate": 6.219465344613258e-07, | |
| "loss": 0.2842296361923218, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 22.47887323943662, | |
| "grad_norm": 0.2931113839149475, | |
| "learning_rate": 6.211711302645177e-07, | |
| "loss": 0.2730957865715027, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 22.507042253521128, | |
| "grad_norm": 0.2949962913990021, | |
| "learning_rate": 6.203955092681039e-07, | |
| "loss": 0.281680166721344, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 22.535211267605632, | |
| "grad_norm": 0.30062124133110046, | |
| "learning_rate": 6.196196738347607e-07, | |
| "loss": 0.2771790027618408, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 22.56338028169014, | |
| "grad_norm": 0.29685312509536743, | |
| "learning_rate": 6.188436263278172e-07, | |
| "loss": 0.27885377407073975, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 22.591549295774648, | |
| "grad_norm": 0.30217039585113525, | |
| "learning_rate": 6.180673691112486e-07, | |
| "loss": 0.2664039433002472, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 22.619718309859156, | |
| "grad_norm": 0.2935945987701416, | |
| "learning_rate": 6.172909045496694e-07, | |
| "loss": 0.266349196434021, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 22.647887323943664, | |
| "grad_norm": 0.31217825412750244, | |
| "learning_rate": 6.165142350083249e-07, | |
| "loss": 0.2723742127418518, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 22.676056338028168, | |
| "grad_norm": 0.2960183918476105, | |
| "learning_rate": 6.157373628530852e-07, | |
| "loss": 0.272281289100647, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 22.704225352112676, | |
| "grad_norm": 0.2914189100265503, | |
| "learning_rate": 6.149602904504378e-07, | |
| "loss": 0.26770728826522827, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 22.732394366197184, | |
| "grad_norm": 0.2774648368358612, | |
| "learning_rate": 6.141830201674802e-07, | |
| "loss": 0.2694011330604553, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 22.760563380281692, | |
| "grad_norm": 0.29001736640930176, | |
| "learning_rate": 6.134055543719121e-07, | |
| "loss": 0.2670798897743225, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 22.788732394366196, | |
| "grad_norm": 0.31117716431617737, | |
| "learning_rate": 6.126278954320294e-07, | |
| "loss": 0.26127567887306213, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 22.816901408450704, | |
| "grad_norm": 0.29720577597618103, | |
| "learning_rate": 6.118500457167159e-07, | |
| "loss": 0.27497297525405884, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 22.845070422535212, | |
| "grad_norm": 0.3057437241077423, | |
| "learning_rate": 6.11072007595437e-07, | |
| "loss": 0.27363038063049316, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 22.87323943661972, | |
| "grad_norm": 0.323045939207077, | |
| "learning_rate": 6.102937834382315e-07, | |
| "loss": 0.27130627632141113, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 22.901408450704224, | |
| "grad_norm": 0.28948745131492615, | |
| "learning_rate": 6.095153756157051e-07, | |
| "loss": 0.26591163873672485, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 22.929577464788732, | |
| "grad_norm": 0.27952200174331665, | |
| "learning_rate": 6.087367864990232e-07, | |
| "loss": 0.266745388507843, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 22.95774647887324, | |
| "grad_norm": 0.30804452300071716, | |
| "learning_rate": 6.079580184599032e-07, | |
| "loss": 0.2794422507286072, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 22.985915492957748, | |
| "grad_norm": 0.3002220392227173, | |
| "learning_rate": 6.071790738706078e-07, | |
| "loss": 0.26469242572784424, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.4127134084701538, | |
| "learning_rate": 6.06399955103937e-07, | |
| "loss": 0.2482779324054718, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 23.028169014084508, | |
| "grad_norm": 0.30051475763320923, | |
| "learning_rate": 6.056206645332217e-07, | |
| "loss": 0.26631736755371094, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 23.056338028169016, | |
| "grad_norm": 0.3008311688899994, | |
| "learning_rate": 6.048412045323164e-07, | |
| "loss": 0.27459877729415894, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 23.08450704225352, | |
| "grad_norm": 0.28853461146354675, | |
| "learning_rate": 6.040615774755911e-07, | |
| "loss": 0.26959413290023804, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 23.112676056338028, | |
| "grad_norm": 0.29199543595314026, | |
| "learning_rate": 6.032817857379256e-07, | |
| "loss": 0.2588391900062561, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 23.140845070422536, | |
| "grad_norm": 0.29191362857818604, | |
| "learning_rate": 6.025018316946999e-07, | |
| "loss": 0.27447617053985596, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 23.169014084507044, | |
| "grad_norm": 0.29501983523368835, | |
| "learning_rate": 6.017217177217899e-07, | |
| "loss": 0.26884716749191284, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 23.197183098591548, | |
| "grad_norm": 0.3098088502883911, | |
| "learning_rate": 6.009414461955581e-07, | |
| "loss": 0.28516972064971924, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 23.225352112676056, | |
| "grad_norm": 0.3027796149253845, | |
| "learning_rate": 6.001610194928464e-07, | |
| "loss": 0.2739514112472534, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 23.253521126760564, | |
| "grad_norm": 0.31156665086746216, | |
| "learning_rate": 5.993804399909703e-07, | |
| "loss": 0.26852983236312866, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 23.281690140845072, | |
| "grad_norm": 0.2958903908729553, | |
| "learning_rate": 5.985997100677103e-07, | |
| "loss": 0.2743365168571472, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 23.309859154929576, | |
| "grad_norm": 0.31140410900115967, | |
| "learning_rate": 5.97818832101305e-07, | |
| "loss": 0.27525418996810913, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 23.338028169014084, | |
| "grad_norm": 0.3082049787044525, | |
| "learning_rate": 5.97037808470444e-07, | |
| "loss": 0.27074384689331055, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 23.366197183098592, | |
| "grad_norm": 0.2950114905834198, | |
| "learning_rate": 5.96256641554261e-07, | |
| "loss": 0.26068389415740967, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 23.3943661971831, | |
| "grad_norm": 0.31746307015419006, | |
| "learning_rate": 5.954753337323259e-07, | |
| "loss": 0.2648658752441406, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 23.422535211267604, | |
| "grad_norm": 0.2906374931335449, | |
| "learning_rate": 5.946938873846375e-07, | |
| "loss": 0.29040125012397766, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 23.450704225352112, | |
| "grad_norm": 0.3055919408798218, | |
| "learning_rate": 5.939123048916173e-07, | |
| "loss": 0.2694965600967407, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 23.47887323943662, | |
| "grad_norm": 0.3007211983203888, | |
| "learning_rate": 5.931305886341008e-07, | |
| "loss": 0.25987839698791504, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 23.507042253521128, | |
| "grad_norm": 0.3042035400867462, | |
| "learning_rate": 5.923487409933315e-07, | |
| "loss": 0.26484209299087524, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 23.535211267605632, | |
| "grad_norm": 0.30741506814956665, | |
| "learning_rate": 5.915667643509528e-07, | |
| "loss": 0.2735103368759155, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 23.56338028169014, | |
| "grad_norm": 0.30859899520874023, | |
| "learning_rate": 5.907846610890011e-07, | |
| "loss": 0.27706003189086914, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 23.591549295774648, | |
| "grad_norm": 0.29999226331710815, | |
| "learning_rate": 5.900024335898987e-07, | |
| "loss": 0.2733941674232483, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 23.619718309859156, | |
| "grad_norm": 0.3084903955459595, | |
| "learning_rate": 5.892200842364462e-07, | |
| "loss": 0.282131165266037, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 23.647887323943664, | |
| "grad_norm": 0.29400384426116943, | |
| "learning_rate": 5.884376154118154e-07, | |
| "loss": 0.26756390929222107, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 23.676056338028168, | |
| "grad_norm": 0.31666234135627747, | |
| "learning_rate": 5.87655029499542e-07, | |
| "loss": 0.2766130268573761, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 23.704225352112676, | |
| "grad_norm": 0.30233001708984375, | |
| "learning_rate": 5.868723288835184e-07, | |
| "loss": 0.2544291019439697, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 23.732394366197184, | |
| "grad_norm": 0.2888985276222229, | |
| "learning_rate": 5.860895159479864e-07, | |
| "loss": 0.272182822227478, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 23.760563380281692, | |
| "grad_norm": 0.29870662093162537, | |
| "learning_rate": 5.853065930775303e-07, | |
| "loss": 0.2798278331756592, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 23.788732394366196, | |
| "grad_norm": 0.307162344455719, | |
| "learning_rate": 5.845235626570683e-07, | |
| "loss": 0.2772548794746399, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 23.816901408450704, | |
| "grad_norm": 0.290558785200119, | |
| "learning_rate": 5.837404270718475e-07, | |
| "loss": 0.2746056020259857, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 23.845070422535212, | |
| "grad_norm": 0.30080270767211914, | |
| "learning_rate": 5.829571887074343e-07, | |
| "loss": 0.2648829519748688, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 23.87323943661972, | |
| "grad_norm": 0.3067336678504944, | |
| "learning_rate": 5.821738499497086e-07, | |
| "loss": 0.2871520519256592, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 23.901408450704224, | |
| "grad_norm": 0.29975709319114685, | |
| "learning_rate": 5.813904131848564e-07, | |
| "loss": 0.26279598474502563, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 23.929577464788732, | |
| "grad_norm": 0.3006797730922699, | |
| "learning_rate": 5.806068807993617e-07, | |
| "loss": 0.2586716115474701, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 23.95774647887324, | |
| "grad_norm": 0.31139636039733887, | |
| "learning_rate": 5.798232551800002e-07, | |
| "loss": 0.26469486951828003, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 23.985915492957748, | |
| "grad_norm": 0.295448899269104, | |
| "learning_rate": 5.790395387138311e-07, | |
| "loss": 0.27641937136650085, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.41943204402923584, | |
| "learning_rate": 5.78255733788191e-07, | |
| "loss": 0.2656780779361725, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 24.028169014084508, | |
| "grad_norm": 0.2978457808494568, | |
| "learning_rate": 5.774718427906856e-07, | |
| "loss": 0.27108752727508545, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 24.056338028169016, | |
| "grad_norm": 0.2980673015117645, | |
| "learning_rate": 5.766878681091828e-07, | |
| "loss": 0.27321118116378784, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 24.08450704225352, | |
| "grad_norm": 0.30751070380210876, | |
| "learning_rate": 5.759038121318052e-07, | |
| "loss": 0.26482248306274414, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 24.112676056338028, | |
| "grad_norm": 0.2982223629951477, | |
| "learning_rate": 5.751196772469237e-07, | |
| "loss": 0.2737855315208435, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 24.140845070422536, | |
| "grad_norm": 0.2943744361400604, | |
| "learning_rate": 5.743354658431489e-07, | |
| "loss": 0.27646419405937195, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 24.169014084507044, | |
| "grad_norm": 0.2863228917121887, | |
| "learning_rate": 5.735511803093248e-07, | |
| "loss": 0.2726101279258728, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 24.197183098591548, | |
| "grad_norm": 0.2973101735115051, | |
| "learning_rate": 5.727668230345209e-07, | |
| "loss": 0.2601590156555176, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 24.225352112676056, | |
| "grad_norm": 0.3052431344985962, | |
| "learning_rate": 5.71982396408026e-07, | |
| "loss": 0.27889275550842285, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 24.253521126760564, | |
| "grad_norm": 0.3076930046081543, | |
| "learning_rate": 5.711979028193391e-07, | |
| "loss": 0.2612301707267761, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 24.281690140845072, | |
| "grad_norm": 0.2986485958099365, | |
| "learning_rate": 5.704133446581642e-07, | |
| "loss": 0.27018094062805176, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 24.309859154929576, | |
| "grad_norm": 0.3108276426792145, | |
| "learning_rate": 5.696287243144012e-07, | |
| "loss": 0.27102935314178467, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 24.338028169014084, | |
| "grad_norm": 0.30193671584129333, | |
| "learning_rate": 5.688440441781398e-07, | |
| "loss": 0.2653925120830536, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 24.366197183098592, | |
| "grad_norm": 0.3071465492248535, | |
| "learning_rate": 5.680593066396518e-07, | |
| "loss": 0.2752073109149933, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 24.3943661971831, | |
| "grad_norm": 0.31397056579589844, | |
| "learning_rate": 5.672745140893839e-07, | |
| "loss": 0.2662411332130432, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 24.422535211267604, | |
| "grad_norm": 0.2991463243961334, | |
| "learning_rate": 5.664896689179504e-07, | |
| "loss": 0.24169263243675232, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 24.450704225352112, | |
| "grad_norm": 0.3123292028903961, | |
| "learning_rate": 5.657047735161255e-07, | |
| "loss": 0.27330368757247925, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 24.47887323943662, | |
| "grad_norm": 0.3062734305858612, | |
| "learning_rate": 5.649198302748368e-07, | |
| "loss": 0.26652461290359497, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 24.507042253521128, | |
| "grad_norm": 0.2875562906265259, | |
| "learning_rate": 5.641348415851577e-07, | |
| "loss": 0.2717418670654297, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 24.535211267605632, | |
| "grad_norm": 0.30724218487739563, | |
| "learning_rate": 5.633498098382998e-07, | |
| "loss": 0.2761197090148926, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 24.56338028169014, | |
| "grad_norm": 0.30381572246551514, | |
| "learning_rate": 5.625647374256061e-07, | |
| "loss": 0.2838340997695923, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 24.591549295774648, | |
| "grad_norm": 0.30817776918411255, | |
| "learning_rate": 5.617796267385429e-07, | |
| "loss": 0.26739388704299927, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 24.619718309859156, | |
| "grad_norm": 0.31107473373413086, | |
| "learning_rate": 5.60994480168694e-07, | |
| "loss": 0.27139878273010254, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 24.647887323943664, | |
| "grad_norm": 0.29710572957992554, | |
| "learning_rate": 5.602093001077517e-07, | |
| "loss": 0.26788806915283203, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 24.676056338028168, | |
| "grad_norm": 0.31037789583206177, | |
| "learning_rate": 5.594240889475106e-07, | |
| "loss": 0.2767243981361389, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 24.704225352112676, | |
| "grad_norm": 0.30905231833457947, | |
| "learning_rate": 5.586388490798604e-07, | |
| "loss": 0.2679288685321808, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 24.732394366197184, | |
| "grad_norm": 0.30612513422966003, | |
| "learning_rate": 5.578535828967777e-07, | |
| "loss": 0.2660091519355774, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 24.760563380281692, | |
| "grad_norm": 0.29661476612091064, | |
| "learning_rate": 5.570682927903193e-07, | |
| "loss": 0.27202385663986206, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 24.788732394366196, | |
| "grad_norm": 0.31154492497444153, | |
| "learning_rate": 5.562829811526154e-07, | |
| "loss": 0.26965251564979553, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 24.816901408450704, | |
| "grad_norm": 0.29887905716896057, | |
| "learning_rate": 5.554976503758612e-07, | |
| "loss": 0.2663193345069885, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 24.845070422535212, | |
| "grad_norm": 0.3046702444553375, | |
| "learning_rate": 5.547123028523106e-07, | |
| "loss": 0.26517826318740845, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 24.87323943661972, | |
| "grad_norm": 0.29926952719688416, | |
| "learning_rate": 5.539269409742683e-07, | |
| "loss": 0.2689710855484009, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 24.901408450704224, | |
| "grad_norm": 0.31607043743133545, | |
| "learning_rate": 5.531415671340826e-07, | |
| "loss": 0.2774956226348877, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 24.929577464788732, | |
| "grad_norm": 0.313334584236145, | |
| "learning_rate": 5.523561837241387e-07, | |
| "loss": 0.2801990807056427, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 24.95774647887324, | |
| "grad_norm": 0.3167824149131775, | |
| "learning_rate": 5.515707931368507e-07, | |
| "loss": 0.2556470036506653, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 24.985915492957748, | |
| "grad_norm": 0.3055095970630646, | |
| "learning_rate": 5.507853977646543e-07, | |
| "loss": 0.2693515121936798, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.41877350211143494, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.2642577588558197, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 25.028169014084508, | |
| "grad_norm": 0.3000764548778534, | |
| "learning_rate": 5.492146022353459e-07, | |
| "loss": 0.2616558074951172, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 25.056338028169016, | |
| "grad_norm": 0.30835723876953125, | |
| "learning_rate": 5.484292068631494e-07, | |
| "loss": 0.260206401348114, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 25.08450704225352, | |
| "grad_norm": 0.30945923924446106, | |
| "learning_rate": 5.476438162758611e-07, | |
| "loss": 0.26666033267974854, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 25.112676056338028, | |
| "grad_norm": 0.3131259083747864, | |
| "learning_rate": 5.468584328659172e-07, | |
| "loss": 0.2688153386116028, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 25.140845070422536, | |
| "grad_norm": 0.31281140446662903, | |
| "learning_rate": 5.460730590257317e-07, | |
| "loss": 0.25907081365585327, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 25.169014084507044, | |
| "grad_norm": 0.300714910030365, | |
| "learning_rate": 5.452876971476896e-07, | |
| "loss": 0.2585920989513397, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 25.197183098591548, | |
| "grad_norm": 0.31137779355049133, | |
| "learning_rate": 5.445023496241388e-07, | |
| "loss": 0.2691946029663086, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 25.225352112676056, | |
| "grad_norm": 0.31905803084373474, | |
| "learning_rate": 5.437170188473847e-07, | |
| "loss": 0.25889474153518677, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 25.253521126760564, | |
| "grad_norm": 0.30952438712120056, | |
| "learning_rate": 5.429317072096807e-07, | |
| "loss": 0.26691755652427673, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 25.281690140845072, | |
| "grad_norm": 0.3063667416572571, | |
| "learning_rate": 5.421464171032224e-07, | |
| "loss": 0.2661867141723633, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 25.309859154929576, | |
| "grad_norm": 0.31403201818466187, | |
| "learning_rate": 5.413611509201396e-07, | |
| "loss": 0.26902246475219727, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 25.338028169014084, | |
| "grad_norm": 0.3037600815296173, | |
| "learning_rate": 5.405759110524894e-07, | |
| "loss": 0.26004883646965027, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 25.366197183098592, | |
| "grad_norm": 0.3116777837276459, | |
| "learning_rate": 5.397906998922483e-07, | |
| "loss": 0.27219873666763306, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 25.3943661971831, | |
| "grad_norm": 0.2961476445198059, | |
| "learning_rate": 5.390055198313061e-07, | |
| "loss": 0.26753348112106323, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 25.422535211267604, | |
| "grad_norm": 0.3180798888206482, | |
| "learning_rate": 5.382203732614571e-07, | |
| "loss": 0.2706093192100525, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 25.450704225352112, | |
| "grad_norm": 0.2982124090194702, | |
| "learning_rate": 5.37435262574394e-07, | |
| "loss": 0.2601392865180969, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 25.47887323943662, | |
| "grad_norm": 0.29854777455329895, | |
| "learning_rate": 5.366501901617001e-07, | |
| "loss": 0.2788724899291992, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 25.507042253521128, | |
| "grad_norm": 0.30327802896499634, | |
| "learning_rate": 5.358651584148423e-07, | |
| "loss": 0.26465606689453125, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 25.535211267605632, | |
| "grad_norm": 0.3136656582355499, | |
| "learning_rate": 5.350801697251633e-07, | |
| "loss": 0.2621968984603882, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 25.56338028169014, | |
| "grad_norm": 0.3008262813091278, | |
| "learning_rate": 5.342952264838747e-07, | |
| "loss": 0.2775859236717224, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 25.591549295774648, | |
| "grad_norm": 0.31797295808792114, | |
| "learning_rate": 5.335103310820496e-07, | |
| "loss": 0.2715638279914856, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 25.619718309859156, | |
| "grad_norm": 0.3112519383430481, | |
| "learning_rate": 5.32725485910616e-07, | |
| "loss": 0.26941171288490295, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 25.647887323943664, | |
| "grad_norm": 0.2887360453605652, | |
| "learning_rate": 5.319406933603482e-07, | |
| "loss": 0.26261216402053833, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 25.676056338028168, | |
| "grad_norm": 0.3208933472633362, | |
| "learning_rate": 5.311559558218603e-07, | |
| "loss": 0.26436418294906616, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 25.704225352112676, | |
| "grad_norm": 0.30341023206710815, | |
| "learning_rate": 5.303712756855988e-07, | |
| "loss": 0.2747180461883545, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 25.732394366197184, | |
| "grad_norm": 0.31803277134895325, | |
| "learning_rate": 5.295866553418358e-07, | |
| "loss": 0.2771461606025696, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 25.760563380281692, | |
| "grad_norm": 0.3123302459716797, | |
| "learning_rate": 5.288020971806608e-07, | |
| "loss": 0.26546305418014526, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 25.788732394366196, | |
| "grad_norm": 0.3141644597053528, | |
| "learning_rate": 5.28017603591974e-07, | |
| "loss": 0.27546215057373047, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 25.816901408450704, | |
| "grad_norm": 0.29840072989463806, | |
| "learning_rate": 5.27233176965479e-07, | |
| "loss": 0.25834715366363525, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 25.845070422535212, | |
| "grad_norm": 0.3083305060863495, | |
| "learning_rate": 5.264488196906752e-07, | |
| "loss": 0.2746443748474121, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 25.87323943661972, | |
| "grad_norm": 0.30847135186195374, | |
| "learning_rate": 5.256645341568511e-07, | |
| "loss": 0.2748471200466156, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 25.901408450704224, | |
| "grad_norm": 0.30591723322868347, | |
| "learning_rate": 5.248803227530763e-07, | |
| "loss": 0.26996147632598877, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 25.929577464788732, | |
| "grad_norm": 0.314569354057312, | |
| "learning_rate": 5.240961878681947e-07, | |
| "loss": 0.28236207365989685, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 25.95774647887324, | |
| "grad_norm": 0.32219424843788147, | |
| "learning_rate": 5.233121318908173e-07, | |
| "loss": 0.2674041986465454, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 25.985915492957748, | |
| "grad_norm": 0.3121417760848999, | |
| "learning_rate": 5.225281572093143e-07, | |
| "loss": 0.2723839282989502, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.4469078481197357, | |
| "learning_rate": 5.21744266211809e-07, | |
| "loss": 0.2659713625907898, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 26.028169014084508, | |
| "grad_norm": 0.3079273998737335, | |
| "learning_rate": 5.20960461286169e-07, | |
| "loss": 0.2612949013710022, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 26.056338028169016, | |
| "grad_norm": 0.29670900106430054, | |
| "learning_rate": 5.2017674482e-07, | |
| "loss": 0.26683154702186584, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 26.08450704225352, | |
| "grad_norm": 0.3200303018093109, | |
| "learning_rate": 5.193931192006385e-07, | |
| "loss": 0.2616243362426758, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 26.112676056338028, | |
| "grad_norm": 0.31682220101356506, | |
| "learning_rate": 5.186095868151436e-07, | |
| "loss": 0.27138951420783997, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 26.140845070422536, | |
| "grad_norm": 0.30821120738983154, | |
| "learning_rate": 5.178261500502912e-07, | |
| "loss": 0.26395922899246216, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 26.169014084507044, | |
| "grad_norm": 0.3168351352214813, | |
| "learning_rate": 5.170428112925659e-07, | |
| "loss": 0.2528039813041687, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 26.197183098591548, | |
| "grad_norm": 0.31877174973487854, | |
| "learning_rate": 5.162595729281526e-07, | |
| "loss": 0.268981397151947, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 26.225352112676056, | |
| "grad_norm": 0.30236542224884033, | |
| "learning_rate": 5.154764373429315e-07, | |
| "loss": 0.26689520478248596, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 26.253521126760564, | |
| "grad_norm": 0.31615039706230164, | |
| "learning_rate": 5.146934069224698e-07, | |
| "loss": 0.25211524963378906, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 26.281690140845072, | |
| "grad_norm": 0.304155558347702, | |
| "learning_rate": 5.139104840520135e-07, | |
| "loss": 0.26361894607543945, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 26.309859154929576, | |
| "grad_norm": 0.31038856506347656, | |
| "learning_rate": 5.131276711164815e-07, | |
| "loss": 0.26455777883529663, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 26.338028169014084, | |
| "grad_norm": 0.3139597177505493, | |
| "learning_rate": 5.123449705004581e-07, | |
| "loss": 0.2526125907897949, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 26.366197183098592, | |
| "grad_norm": 0.3288014233112335, | |
| "learning_rate": 5.115623845881847e-07, | |
| "loss": 0.2677180767059326, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 26.3943661971831, | |
| "grad_norm": 0.33518192172050476, | |
| "learning_rate": 5.107799157635538e-07, | |
| "loss": 0.2683093249797821, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 26.422535211267604, | |
| "grad_norm": 0.3219356834888458, | |
| "learning_rate": 5.099975664101014e-07, | |
| "loss": 0.2773933708667755, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 26.450704225352112, | |
| "grad_norm": 0.32385388016700745, | |
| "learning_rate": 5.09215338910999e-07, | |
| "loss": 0.2612137198448181, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 26.47887323943662, | |
| "grad_norm": 0.32834818959236145, | |
| "learning_rate": 5.084332356490472e-07, | |
| "loss": 0.2747904658317566, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 26.507042253521128, | |
| "grad_norm": 0.32953891158103943, | |
| "learning_rate": 5.076512590066685e-07, | |
| "loss": 0.2700774669647217, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 26.535211267605632, | |
| "grad_norm": 0.31470146775245667, | |
| "learning_rate": 5.068694113658992e-07, | |
| "loss": 0.26825615763664246, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 26.56338028169014, | |
| "grad_norm": 0.3184269964694977, | |
| "learning_rate": 5.060876951083828e-07, | |
| "loss": 0.2559502124786377, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 26.591549295774648, | |
| "grad_norm": 0.3205021619796753, | |
| "learning_rate": 5.053061126153624e-07, | |
| "loss": 0.26462531089782715, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 26.619718309859156, | |
| "grad_norm": 0.3158126473426819, | |
| "learning_rate": 5.045246662676741e-07, | |
| "loss": 0.2701690196990967, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 26.647887323943664, | |
| "grad_norm": 0.3104144334793091, | |
| "learning_rate": 5.037433584457389e-07, | |
| "loss": 0.27104830741882324, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 26.676056338028168, | |
| "grad_norm": 0.3229422867298126, | |
| "learning_rate": 5.02962191529556e-07, | |
| "loss": 0.2765110731124878, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 26.704225352112676, | |
| "grad_norm": 0.3127235770225525, | |
| "learning_rate": 5.021811678986951e-07, | |
| "loss": 0.26477351784706116, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 26.732394366197184, | |
| "grad_norm": 0.31363457441329956, | |
| "learning_rate": 5.014002899322896e-07, | |
| "loss": 0.2696647644042969, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 26.760563380281692, | |
| "grad_norm": 0.3330313265323639, | |
| "learning_rate": 5.006195600090296e-07, | |
| "loss": 0.2720947861671448, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 26.788732394366196, | |
| "grad_norm": 0.3137781023979187, | |
| "learning_rate": 4.998389805071536e-07, | |
| "loss": 0.2770814001560211, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 26.816901408450704, | |
| "grad_norm": 0.30663928389549255, | |
| "learning_rate": 4.990585538044419e-07, | |
| "loss": 0.26743337512016296, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 26.845070422535212, | |
| "grad_norm": 0.3439841866493225, | |
| "learning_rate": 4.982782822782101e-07, | |
| "loss": 0.26640748977661133, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 26.87323943661972, | |
| "grad_norm": 0.30016517639160156, | |
| "learning_rate": 4.974981683053001e-07, | |
| "loss": 0.2630905508995056, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 26.901408450704224, | |
| "grad_norm": 0.30313640832901, | |
| "learning_rate": 4.967182142620745e-07, | |
| "loss": 0.26278769969940186, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 26.929577464788732, | |
| "grad_norm": 0.3100942373275757, | |
| "learning_rate": 4.959384225244087e-07, | |
| "loss": 0.25859004259109497, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 26.95774647887324, | |
| "grad_norm": 0.3049146234989166, | |
| "learning_rate": 4.951587954676837e-07, | |
| "loss": 0.2737579941749573, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 26.985915492957748, | |
| "grad_norm": 0.3105259835720062, | |
| "learning_rate": 4.943793354667783e-07, | |
| "loss": 0.2698732018470764, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.43671199679374695, | |
| "learning_rate": 4.93600044896063e-07, | |
| "loss": 0.2851495146751404, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 27.028169014084508, | |
| "grad_norm": 0.3152709901332855, | |
| "learning_rate": 4.928209261293923e-07, | |
| "loss": 0.27372750639915466, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 27.056338028169016, | |
| "grad_norm": 0.3281909227371216, | |
| "learning_rate": 4.920419815400968e-07, | |
| "loss": 0.26317745447158813, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 27.08450704225352, | |
| "grad_norm": 0.30629420280456543, | |
| "learning_rate": 4.912632135009769e-07, | |
| "loss": 0.267042338848114, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 27.112676056338028, | |
| "grad_norm": 0.31097206473350525, | |
| "learning_rate": 4.904846243842949e-07, | |
| "loss": 0.2647910714149475, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 27.140845070422536, | |
| "grad_norm": 0.30723172426223755, | |
| "learning_rate": 4.897062165617686e-07, | |
| "loss": 0.27176767587661743, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 27.169014084507044, | |
| "grad_norm": 0.333957701921463, | |
| "learning_rate": 4.88927992404563e-07, | |
| "loss": 0.26361826062202454, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 27.197183098591548, | |
| "grad_norm": 0.30476778745651245, | |
| "learning_rate": 4.881499542832841e-07, | |
| "loss": 0.2584869861602783, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 27.225352112676056, | |
| "grad_norm": 0.3146997392177582, | |
| "learning_rate": 4.873721045679706e-07, | |
| "loss": 0.2549043893814087, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 27.253521126760564, | |
| "grad_norm": 0.30739930272102356, | |
| "learning_rate": 4.865944456280878e-07, | |
| "loss": 0.2622683644294739, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 27.281690140845072, | |
| "grad_norm": 0.3006227910518646, | |
| "learning_rate": 4.858169798325198e-07, | |
| "loss": 0.27283164858818054, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 27.309859154929576, | |
| "grad_norm": 0.31303322315216064, | |
| "learning_rate": 4.850397095495621e-07, | |
| "loss": 0.2585863471031189, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 27.338028169014084, | |
| "grad_norm": 0.3036518692970276, | |
| "learning_rate": 4.842626371469149e-07, | |
| "loss": 0.2656107246875763, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 27.366197183098592, | |
| "grad_norm": 0.3137490749359131, | |
| "learning_rate": 4.834857649916752e-07, | |
| "loss": 0.25737249851226807, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 27.3943661971831, | |
| "grad_norm": 0.3161812424659729, | |
| "learning_rate": 4.827090954503308e-07, | |
| "loss": 0.2658624053001404, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 27.422535211267604, | |
| "grad_norm": 0.2974465489387512, | |
| "learning_rate": 4.819326308887513e-07, | |
| "loss": 0.2653939425945282, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 27.450704225352112, | |
| "grad_norm": 0.3207877576351166, | |
| "learning_rate": 4.811563736721829e-07, | |
| "loss": 0.2567484378814697, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 27.47887323943662, | |
| "grad_norm": 0.30379563570022583, | |
| "learning_rate": 4.803803261652395e-07, | |
| "loss": 0.2731136083602905, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 27.507042253521128, | |
| "grad_norm": 0.30110257863998413, | |
| "learning_rate": 4.79604490731896e-07, | |
| "loss": 0.2533247172832489, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 27.535211267605632, | |
| "grad_norm": 0.32354485988616943, | |
| "learning_rate": 4.788288697354824e-07, | |
| "loss": 0.2776826024055481, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 27.56338028169014, | |
| "grad_norm": 0.3137172758579254, | |
| "learning_rate": 4.780534655386743e-07, | |
| "loss": 0.2678206264972687, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 27.591549295774648, | |
| "grad_norm": 0.3129335641860962, | |
| "learning_rate": 4.772782805034876e-07, | |
| "loss": 0.27128273248672485, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 27.619718309859156, | |
| "grad_norm": 0.3112099766731262, | |
| "learning_rate": 4.7650331699127013e-07, | |
| "loss": 0.25505757331848145, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 27.647887323943664, | |
| "grad_norm": 0.3214300274848938, | |
| "learning_rate": 4.75728577362695e-07, | |
| "loss": 0.252490371465683, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 27.676056338028168, | |
| "grad_norm": 0.3177250623703003, | |
| "learning_rate": 4.749540639777539e-07, | |
| "loss": 0.2748945355415344, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 27.704225352112676, | |
| "grad_norm": 0.3087361752986908, | |
| "learning_rate": 4.741797791957489e-07, | |
| "loss": 0.26117944717407227, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 27.732394366197184, | |
| "grad_norm": 0.3008691072463989, | |
| "learning_rate": 4.7340572537528547e-07, | |
| "loss": 0.2576630115509033, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 27.760563380281692, | |
| "grad_norm": 0.3111347556114197, | |
| "learning_rate": 4.7263190487426563e-07, | |
| "loss": 0.26800209283828735, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 27.788732394366196, | |
| "grad_norm": 0.2986048758029938, | |
| "learning_rate": 4.7185832004988133e-07, | |
| "loss": 0.2734978497028351, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 27.816901408450704, | |
| "grad_norm": 0.31797438859939575, | |
| "learning_rate": 4.710849732586059e-07, | |
| "loss": 0.2649095356464386, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 27.845070422535212, | |
| "grad_norm": 0.3100630044937134, | |
| "learning_rate": 4.703118668561875e-07, | |
| "loss": 0.2550201117992401, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 27.87323943661972, | |
| "grad_norm": 0.3206699788570404, | |
| "learning_rate": 4.6953900319764274e-07, | |
| "loss": 0.26471948623657227, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 27.901408450704224, | |
| "grad_norm": 0.3138802945613861, | |
| "learning_rate": 4.68766384637248e-07, | |
| "loss": 0.26174217462539673, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 27.929577464788732, | |
| "grad_norm": 0.3069911301136017, | |
| "learning_rate": 4.679940135285336e-07, | |
| "loss": 0.26182085275650024, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 27.95774647887324, | |
| "grad_norm": 0.3080894351005554, | |
| "learning_rate": 4.672218922242759e-07, | |
| "loss": 0.272597074508667, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 27.985915492957748, | |
| "grad_norm": 0.30975106358528137, | |
| "learning_rate": 4.664500230764903e-07, | |
| "loss": 0.28192490339279175, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.44492414593696594, | |
| "learning_rate": 4.656784084364238e-07, | |
| "loss": 0.2805609405040741, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 28.028169014084508, | |
| "grad_norm": 0.3142589330673218, | |
| "learning_rate": 4.6490705065454883e-07, | |
| "loss": 0.2571072280406952, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 28.056338028169016, | |
| "grad_norm": 0.3059631884098053, | |
| "learning_rate": 4.641359520805548e-07, | |
| "loss": 0.2683190107345581, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 28.08450704225352, | |
| "grad_norm": 0.32835182547569275, | |
| "learning_rate": 4.6336511506334177e-07, | |
| "loss": 0.2751193344593048, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 28.112676056338028, | |
| "grad_norm": 0.31909412145614624, | |
| "learning_rate": 4.6259454195101267e-07, | |
| "loss": 0.27306729555130005, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 28.140845070422536, | |
| "grad_norm": 0.32016029953956604, | |
| "learning_rate": 4.61824235090867e-07, | |
| "loss": 0.2615482211112976, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 28.169014084507044, | |
| "grad_norm": 0.30900275707244873, | |
| "learning_rate": 4.6105419682939316e-07, | |
| "loss": 0.2553929388523102, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 28.197183098591548, | |
| "grad_norm": 0.3047516942024231, | |
| "learning_rate": 4.602844295122613e-07, | |
| "loss": 0.26050907373428345, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 28.225352112676056, | |
| "grad_norm": 0.31619319319725037, | |
| "learning_rate": 4.59514935484316e-07, | |
| "loss": 0.2493715137243271, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 28.253521126760564, | |
| "grad_norm": 0.31594234704971313, | |
| "learning_rate": 4.5874571708956953e-07, | |
| "loss": 0.26061999797821045, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 28.281690140845072, | |
| "grad_norm": 0.31763410568237305, | |
| "learning_rate": 4.579767766711944e-07, | |
| "loss": 0.2720048427581787, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 28.309859154929576, | |
| "grad_norm": 0.3225538432598114, | |
| "learning_rate": 4.572081165715167e-07, | |
| "loss": 0.26587527990341187, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 28.338028169014084, | |
| "grad_norm": 0.33830496668815613, | |
| "learning_rate": 4.5643973913200837e-07, | |
| "loss": 0.26142361760139465, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 28.366197183098592, | |
| "grad_norm": 0.30440667271614075, | |
| "learning_rate": 4.556716466932803e-07, | |
| "loss": 0.25490373373031616, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 28.3943661971831, | |
| "grad_norm": 0.30009451508522034, | |
| "learning_rate": 4.549038415950751e-07, | |
| "loss": 0.258319616317749, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 28.422535211267604, | |
| "grad_norm": 0.32110437750816345, | |
| "learning_rate": 4.5413632617626054e-07, | |
| "loss": 0.2684330344200134, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 28.450704225352112, | |
| "grad_norm": 0.3126528561115265, | |
| "learning_rate": 4.5336910277482155e-07, | |
| "loss": 0.2647142708301544, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 28.47887323943662, | |
| "grad_norm": 0.30162736773490906, | |
| "learning_rate": 4.526021737278537e-07, | |
| "loss": 0.2717491388320923, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 28.507042253521128, | |
| "grad_norm": 0.32018333673477173, | |
| "learning_rate": 4.51835541371556e-07, | |
| "loss": 0.2770422697067261, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 28.535211267605632, | |
| "grad_norm": 0.3132731318473816, | |
| "learning_rate": 4.5106920804122304e-07, | |
| "loss": 0.2692522406578064, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 28.56338028169014, | |
| "grad_norm": 0.30906060338020325, | |
| "learning_rate": 4.503031760712397e-07, | |
| "loss": 0.2523694932460785, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 28.591549295774648, | |
| "grad_norm": 0.3276032507419586, | |
| "learning_rate": 4.4953744779507197e-07, | |
| "loss": 0.26482313871383667, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 28.619718309859156, | |
| "grad_norm": 0.33187615871429443, | |
| "learning_rate": 4.4877202554526084e-07, | |
| "loss": 0.2603946924209595, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 28.647887323943664, | |
| "grad_norm": 0.30181628465652466, | |
| "learning_rate": 4.480069116534151e-07, | |
| "loss": 0.25871700048446655, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 28.676056338028168, | |
| "grad_norm": 0.3155851662158966, | |
| "learning_rate": 4.4724210845020494e-07, | |
| "loss": 0.2617461681365967, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 28.704225352112676, | |
| "grad_norm": 0.30370378494262695, | |
| "learning_rate": 4.4647761826535303e-07, | |
| "loss": 0.26235488057136536, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 28.732394366197184, | |
| "grad_norm": 0.317186564207077, | |
| "learning_rate": 4.457134434276293e-07, | |
| "loss": 0.26761680841445923, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 28.760563380281692, | |
| "grad_norm": 0.3287314772605896, | |
| "learning_rate": 4.449495862648427e-07, | |
| "loss": 0.261843204498291, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 28.788732394366196, | |
| "grad_norm": 0.33204883337020874, | |
| "learning_rate": 4.441860491038345e-07, | |
| "loss": 0.2633381485939026, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 28.816901408450704, | |
| "grad_norm": 0.32268011569976807, | |
| "learning_rate": 4.4342283427047164e-07, | |
| "loss": 0.24900981783866882, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 28.845070422535212, | |
| "grad_norm": 0.3224244713783264, | |
| "learning_rate": 4.4265994408963867e-07, | |
| "loss": 0.2667103111743927, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 28.87323943661972, | |
| "grad_norm": 0.3169482350349426, | |
| "learning_rate": 4.418973808852313e-07, | |
| "loss": 0.268291175365448, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 28.901408450704224, | |
| "grad_norm": 0.33006441593170166, | |
| "learning_rate": 4.4113514698014953e-07, | |
| "loss": 0.27004534006118774, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 28.929577464788732, | |
| "grad_norm": 0.35179299116134644, | |
| "learning_rate": 4.403732446962899e-07, | |
| "loss": 0.2628635764122009, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 28.95774647887324, | |
| "grad_norm": 0.3151315748691559, | |
| "learning_rate": 4.3961167635453876e-07, | |
| "loss": 0.2677478492259979, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 28.985915492957748, | |
| "grad_norm": 0.3185572922229767, | |
| "learning_rate": 4.388504442747657e-07, | |
| "loss": 0.2660791873931885, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.45902183651924133, | |
| "learning_rate": 4.3808955077581546e-07, | |
| "loss": 0.2720754146575928, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 29.028169014084508, | |
| "grad_norm": 0.3011077344417572, | |
| "learning_rate": 4.373289981755013e-07, | |
| "loss": 0.25422877073287964, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 29.056338028169016, | |
| "grad_norm": 0.3089461028575897, | |
| "learning_rate": 4.365687887905988e-07, | |
| "loss": 0.2498088926076889, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 29.08450704225352, | |
| "grad_norm": 0.32150641083717346, | |
| "learning_rate": 4.358089249368375e-07, | |
| "loss": 0.2662513554096222, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 29.112676056338028, | |
| "grad_norm": 0.32592031359672546, | |
| "learning_rate": 4.350494089288943e-07, | |
| "loss": 0.2539994418621063, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 29.140845070422536, | |
| "grad_norm": 0.31924694776535034, | |
| "learning_rate": 4.3429024308038686e-07, | |
| "loss": 0.2557491958141327, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 29.169014084507044, | |
| "grad_norm": 0.32504960894584656, | |
| "learning_rate": 4.3353142970386557e-07, | |
| "loss": 0.26317501068115234, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 29.197183098591548, | |
| "grad_norm": 0.3093854784965515, | |
| "learning_rate": 4.327729711108082e-07, | |
| "loss": 0.25340092182159424, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 29.225352112676056, | |
| "grad_norm": 0.313862144947052, | |
| "learning_rate": 4.3201486961161093e-07, | |
| "loss": 0.2559676766395569, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 29.253521126760564, | |
| "grad_norm": 0.3301529288291931, | |
| "learning_rate": 4.312571275155823e-07, | |
| "loss": 0.2709015905857086, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 29.281690140845072, | |
| "grad_norm": 0.32452118396759033, | |
| "learning_rate": 4.304997471309361e-07, | |
| "loss": 0.2698490619659424, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 29.309859154929576, | |
| "grad_norm": 0.3382558226585388, | |
| "learning_rate": 4.297427307647844e-07, | |
| "loss": 0.2615205645561218, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 29.338028169014084, | |
| "grad_norm": 0.3098710775375366, | |
| "learning_rate": 4.2898608072313045e-07, | |
| "loss": 0.2664251923561096, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 29.366197183098592, | |
| "grad_norm": 0.3207705318927765, | |
| "learning_rate": 4.2822979931086144e-07, | |
| "loss": 0.2764906883239746, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 29.3943661971831, | |
| "grad_norm": 0.3483034372329712, | |
| "learning_rate": 4.2747388883174154e-07, | |
| "loss": 0.2622952163219452, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 29.422535211267604, | |
| "grad_norm": 0.30950114130973816, | |
| "learning_rate": 4.267183515884054e-07, | |
| "loss": 0.2630128860473633, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 29.450704225352112, | |
| "grad_norm": 0.32425740361213684, | |
| "learning_rate": 4.2596318988235037e-07, | |
| "loss": 0.25917208194732666, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 29.47887323943662, | |
| "grad_norm": 0.3382692039012909, | |
| "learning_rate": 4.2520840601392996e-07, | |
| "loss": 0.26483750343322754, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 29.507042253521128, | |
| "grad_norm": 0.30861786007881165, | |
| "learning_rate": 4.2445400228234687e-07, | |
| "loss": 0.2531127631664276, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 29.535211267605632, | |
| "grad_norm": 0.33470088243484497, | |
| "learning_rate": 4.2369998098564554e-07, | |
| "loss": 0.263372540473938, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 29.56338028169014, | |
| "grad_norm": 0.34484177827835083, | |
| "learning_rate": 4.2294634442070553e-07, | |
| "loss": 0.263760507106781, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 29.591549295774648, | |
| "grad_norm": 0.32152125239372253, | |
| "learning_rate": 4.2219309488323487e-07, | |
| "loss": 0.2630784511566162, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 29.619718309859156, | |
| "grad_norm": 0.3259511888027191, | |
| "learning_rate": 4.214402346677619e-07, | |
| "loss": 0.26080453395843506, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 29.647887323943664, | |
| "grad_norm": 0.32442566752433777, | |
| "learning_rate": 4.206877660676297e-07, | |
| "loss": 0.2604103088378906, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 29.676056338028168, | |
| "grad_norm": 0.3231119215488434, | |
| "learning_rate": 4.1993569137498776e-07, | |
| "loss": 0.26589787006378174, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 29.704225352112676, | |
| "grad_norm": 0.3275383412837982, | |
| "learning_rate": 4.1918401288078633e-07, | |
| "loss": 0.2476288229227066, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 29.732394366197184, | |
| "grad_norm": 0.3219151496887207, | |
| "learning_rate": 4.1843273287476854e-07, | |
| "loss": 0.26332658529281616, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 29.760563380281692, | |
| "grad_norm": 0.31227391958236694, | |
| "learning_rate": 4.1768185364546326e-07, | |
| "loss": 0.2647852301597595, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 29.788732394366196, | |
| "grad_norm": 0.3090374767780304, | |
| "learning_rate": 4.1693137748017915e-07, | |
| "loss": 0.2562742531299591, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 29.816901408450704, | |
| "grad_norm": 0.32516875863075256, | |
| "learning_rate": 4.161813066649963e-07, | |
| "loss": 0.27417412400245667, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 29.845070422535212, | |
| "grad_norm": 0.3393928110599518, | |
| "learning_rate": 4.15431643484761e-07, | |
| "loss": 0.25790080428123474, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 29.87323943661972, | |
| "grad_norm": 0.3293744623661041, | |
| "learning_rate": 4.146823902230772e-07, | |
| "loss": 0.27599674463272095, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 29.901408450704224, | |
| "grad_norm": 0.336525022983551, | |
| "learning_rate": 4.1393354916230005e-07, | |
| "loss": 0.2566748261451721, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 29.929577464788732, | |
| "grad_norm": 0.30744579434394836, | |
| "learning_rate": 4.1318512258352936e-07, | |
| "loss": 0.276886522769928, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 29.95774647887324, | |
| "grad_norm": 0.3156173527240753, | |
| "learning_rate": 4.124371127666024e-07, | |
| "loss": 0.27484360337257385, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 29.985915492957748, | |
| "grad_norm": 0.31924012303352356, | |
| "learning_rate": 4.1168952199008677e-07, | |
| "loss": 0.2567445635795593, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.4623652994632721, | |
| "learning_rate": 4.1094235253127374e-07, | |
| "loss": 0.27351921796798706, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 30.028169014084508, | |
| "grad_norm": 0.32494813203811646, | |
| "learning_rate": 4.101956066661708e-07, | |
| "loss": 0.26006799936294556, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 30.056338028169016, | |
| "grad_norm": 0.3355497121810913, | |
| "learning_rate": 4.0944928666949527e-07, | |
| "loss": 0.26071614027023315, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 30.08450704225352, | |
| "grad_norm": 0.3180653750896454, | |
| "learning_rate": 4.0870339481466774e-07, | |
| "loss": 0.2741304039955139, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 30.112676056338028, | |
| "grad_norm": 0.31589558720588684, | |
| "learning_rate": 4.079579333738039e-07, | |
| "loss": 0.2640499770641327, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 30.140845070422536, | |
| "grad_norm": 0.33277377486228943, | |
| "learning_rate": 4.0721290461770863e-07, | |
| "loss": 0.2542555630207062, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 30.169014084507044, | |
| "grad_norm": 0.31191685795783997, | |
| "learning_rate": 4.064683108158685e-07, | |
| "loss": 0.24946148693561554, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 30.197183098591548, | |
| "grad_norm": 0.31646913290023804, | |
| "learning_rate": 4.057241542364457e-07, | |
| "loss": 0.2565403878688812, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 30.225352112676056, | |
| "grad_norm": 0.32091739773750305, | |
| "learning_rate": 4.0498043714627006e-07, | |
| "loss": 0.2608620226383209, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 30.253521126760564, | |
| "grad_norm": 0.3244355618953705, | |
| "learning_rate": 4.042371618108329e-07, | |
| "loss": 0.25209081172943115, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 30.281690140845072, | |
| "grad_norm": 0.3262701630592346, | |
| "learning_rate": 4.034943304942796e-07, | |
| "loss": 0.2566452622413635, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 30.309859154929576, | |
| "grad_norm": 0.35125988721847534, | |
| "learning_rate": 4.027519454594033e-07, | |
| "loss": 0.2646006643772125, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 30.338028169014084, | |
| "grad_norm": 0.32471081614494324, | |
| "learning_rate": 4.020100089676376e-07, | |
| "loss": 0.2576545178890228, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 30.366197183098592, | |
| "grad_norm": 0.33542898297309875, | |
| "learning_rate": 4.012685232790497e-07, | |
| "loss": 0.25865480303764343, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 30.3943661971831, | |
| "grad_norm": 0.31360387802124023, | |
| "learning_rate": 4.005274906523336e-07, | |
| "loss": 0.25481581687927246, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 30.422535211267604, | |
| "grad_norm": 0.33107563853263855, | |
| "learning_rate": 3.9978691334480306e-07, | |
| "loss": 0.252411812543869, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 30.450704225352112, | |
| "grad_norm": 0.3281182050704956, | |
| "learning_rate": 3.9904679361238526e-07, | |
| "loss": 0.2586092948913574, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 30.47887323943662, | |
| "grad_norm": 0.32694414258003235, | |
| "learning_rate": 3.9830713370961313e-07, | |
| "loss": 0.26445192098617554, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 30.507042253521128, | |
| "grad_norm": 0.318498432636261, | |
| "learning_rate": 3.975679358896189e-07, | |
| "loss": 0.25009143352508545, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 30.535211267605632, | |
| "grad_norm": 0.3352436423301697, | |
| "learning_rate": 3.968292024041275e-07, | |
| "loss": 0.2770006060600281, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 30.56338028169014, | |
| "grad_norm": 0.3413051664829254, | |
| "learning_rate": 3.9609093550344907e-07, | |
| "loss": 0.2675744593143463, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 30.591549295774648, | |
| "grad_norm": 0.33011800050735474, | |
| "learning_rate": 3.953531374364728e-07, | |
| "loss": 0.25982439517974854, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 30.619718309859156, | |
| "grad_norm": 0.3153058588504791, | |
| "learning_rate": 3.946158104506594e-07, | |
| "loss": 0.26440930366516113, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 30.647887323943664, | |
| "grad_norm": 0.33693262934684753, | |
| "learning_rate": 3.938789567920349e-07, | |
| "loss": 0.2564413845539093, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 30.676056338028168, | |
| "grad_norm": 0.3082239031791687, | |
| "learning_rate": 3.931425787051832e-07, | |
| "loss": 0.26095646619796753, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 30.704225352112676, | |
| "grad_norm": 0.34148088097572327, | |
| "learning_rate": 3.924066784332396e-07, | |
| "loss": 0.27237722277641296, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 30.732394366197184, | |
| "grad_norm": 0.3161861300468445, | |
| "learning_rate": 3.9167125821788416e-07, | |
| "loss": 0.25798144936561584, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 30.760563380281692, | |
| "grad_norm": 0.33590832352638245, | |
| "learning_rate": 3.909363202993343e-07, | |
| "loss": 0.2643035650253296, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 30.788732394366196, | |
| "grad_norm": 0.33959585428237915, | |
| "learning_rate": 3.902018669163384e-07, | |
| "loss": 0.2613189220428467, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 30.816901408450704, | |
| "grad_norm": 0.31452202796936035, | |
| "learning_rate": 3.894679003061686e-07, | |
| "loss": 0.26554104685783386, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 30.845070422535212, | |
| "grad_norm": 0.3322625160217285, | |
| "learning_rate": 3.8873442270461485e-07, | |
| "loss": 0.2571873664855957, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 30.87323943661972, | |
| "grad_norm": 0.33110320568084717, | |
| "learning_rate": 3.88001436345977e-07, | |
| "loss": 0.26796817779541016, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 30.901408450704224, | |
| "grad_norm": 0.32166630029678345, | |
| "learning_rate": 3.872689434630585e-07, | |
| "loss": 0.25648969411849976, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 30.929577464788732, | |
| "grad_norm": 0.3449627757072449, | |
| "learning_rate": 3.8653694628715984e-07, | |
| "loss": 0.26782190799713135, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 30.95774647887324, | |
| "grad_norm": 0.3227315843105316, | |
| "learning_rate": 3.8580544704807117e-07, | |
| "loss": 0.2791867256164551, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 30.985915492957748, | |
| "grad_norm": 0.3112963140010834, | |
| "learning_rate": 3.850744479740663e-07, | |
| "loss": 0.26565277576446533, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "grad_norm": 0.4575044810771942, | |
| "learning_rate": 3.843439512918949e-07, | |
| "loss": 0.25405725836753845, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 31.028169014084508, | |
| "grad_norm": 0.3324749767780304, | |
| "learning_rate": 3.8361395922677687e-07, | |
| "loss": 0.26342666149139404, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 31.056338028169016, | |
| "grad_norm": 0.3335409164428711, | |
| "learning_rate": 3.8288447400239443e-07, | |
| "loss": 0.27227702736854553, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 31.08450704225352, | |
| "grad_norm": 0.33716699481010437, | |
| "learning_rate": 3.82155497840886e-07, | |
| "loss": 0.2696995437145233, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 31.112676056338028, | |
| "grad_norm": 0.33672624826431274, | |
| "learning_rate": 3.8142703296283953e-07, | |
| "loss": 0.2588409185409546, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 31.140845070422536, | |
| "grad_norm": 0.3224928081035614, | |
| "learning_rate": 3.806990815872855e-07, | |
| "loss": 0.2625422775745392, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 31.169014084507044, | |
| "grad_norm": 0.32264038920402527, | |
| "learning_rate": 3.7997164593168983e-07, | |
| "loss": 0.251539021730423, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 31.197183098591548, | |
| "grad_norm": 0.33344459533691406, | |
| "learning_rate": 3.7924472821194765e-07, | |
| "loss": 0.25519099831581116, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 31.225352112676056, | |
| "grad_norm": 0.3551379442214966, | |
| "learning_rate": 3.785183306423767e-07, | |
| "loss": 0.2584845721721649, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 31.253521126760564, | |
| "grad_norm": 0.3440611660480499, | |
| "learning_rate": 3.777924554357096e-07, | |
| "loss": 0.2609241008758545, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 31.281690140845072, | |
| "grad_norm": 0.3400917649269104, | |
| "learning_rate": 3.7706710480308835e-07, | |
| "loss": 0.26181089878082275, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 31.309859154929576, | |
| "grad_norm": 0.3361797630786896, | |
| "learning_rate": 3.7634228095405673e-07, | |
| "loss": 0.2546064853668213, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 31.338028169014084, | |
| "grad_norm": 0.3346230387687683, | |
| "learning_rate": 3.7561798609655373e-07, | |
| "loss": 0.26581573486328125, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 31.366197183098592, | |
| "grad_norm": 0.34457266330718994, | |
| "learning_rate": 3.748942224369073e-07, | |
| "loss": 0.2582035958766937, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 31.3943661971831, | |
| "grad_norm": 0.3213818073272705, | |
| "learning_rate": 3.7417099217982686e-07, | |
| "loss": 0.25484442710876465, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 31.422535211267604, | |
| "grad_norm": 0.3486325442790985, | |
| "learning_rate": 3.734482975283975e-07, | |
| "loss": 0.27330318093299866, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 31.450704225352112, | |
| "grad_norm": 0.3430873453617096, | |
| "learning_rate": 3.72726140684072e-07, | |
| "loss": 0.25915205478668213, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 31.47887323943662, | |
| "grad_norm": 0.3348333537578583, | |
| "learning_rate": 3.720045238466658e-07, | |
| "loss": 0.2582821846008301, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 31.507042253521128, | |
| "grad_norm": 0.3174356520175934, | |
| "learning_rate": 3.712834492143487e-07, | |
| "loss": 0.2682039737701416, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 31.535211267605632, | |
| "grad_norm": 0.3320380449295044, | |
| "learning_rate": 3.7056291898363925e-07, | |
| "loss": 0.2751486003398895, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 31.56338028169014, | |
| "grad_norm": 0.3412676155567169, | |
| "learning_rate": 3.6984293534939737e-07, | |
| "loss": 0.2540426254272461, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 31.591549295774648, | |
| "grad_norm": 0.35137638449668884, | |
| "learning_rate": 3.69123500504818e-07, | |
| "loss": 0.2570858895778656, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 31.619718309859156, | |
| "grad_norm": 0.32933273911476135, | |
| "learning_rate": 3.6840461664142444e-07, | |
| "loss": 0.2535385489463806, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 31.647887323943664, | |
| "grad_norm": 0.32296112179756165, | |
| "learning_rate": 3.6768628594906193e-07, | |
| "loss": 0.26802340149879456, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 31.676056338028168, | |
| "grad_norm": 0.33371275663375854, | |
| "learning_rate": 3.6696851061588994e-07, | |
| "loss": 0.26279398798942566, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 31.704225352112676, | |
| "grad_norm": 0.3587881624698639, | |
| "learning_rate": 3.6625129282837685e-07, | |
| "loss": 0.26237016916275024, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 31.732394366197184, | |
| "grad_norm": 0.3388115465641022, | |
| "learning_rate": 3.655346347712922e-07, | |
| "loss": 0.2542800307273865, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 31.760563380281692, | |
| "grad_norm": 0.3145511746406555, | |
| "learning_rate": 3.6481853862770107e-07, | |
| "loss": 0.2536108195781708, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 31.788732394366196, | |
| "grad_norm": 0.34181296825408936, | |
| "learning_rate": 3.641030065789562e-07, | |
| "loss": 0.2601550817489624, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 31.816901408450704, | |
| "grad_norm": 0.322862833738327, | |
| "learning_rate": 3.6338804080469253e-07, | |
| "loss": 0.25029903650283813, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 31.845070422535212, | |
| "grad_norm": 0.3622659146785736, | |
| "learning_rate": 3.6267364348281946e-07, | |
| "loss": 0.26150447130203247, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 31.87323943661972, | |
| "grad_norm": 0.330181360244751, | |
| "learning_rate": 3.6195981678951535e-07, | |
| "loss": 0.2587708830833435, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 31.901408450704224, | |
| "grad_norm": 0.3616638779640198, | |
| "learning_rate": 3.612465628992203e-07, | |
| "loss": 0.26097607612609863, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 31.929577464788732, | |
| "grad_norm": 0.3439587652683258, | |
| "learning_rate": 3.60533883984629e-07, | |
| "loss": 0.2429528385400772, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 31.95774647887324, | |
| "grad_norm": 0.3390144407749176, | |
| "learning_rate": 3.5982178221668533e-07, | |
| "loss": 0.2673777937889099, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 31.985915492957748, | |
| "grad_norm": 0.3215203881263733, | |
| "learning_rate": 3.591102597645743e-07, | |
| "loss": 0.25635766983032227, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 0.4861057698726654, | |
| "learning_rate": 3.5839931879571725e-07, | |
| "loss": 0.26994332671165466, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 32.028169014084504, | |
| "grad_norm": 0.3433145582675934, | |
| "learning_rate": 3.5768896147576344e-07, | |
| "loss": 0.2525317072868347, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 32.056338028169016, | |
| "grad_norm": 0.34238752722740173, | |
| "learning_rate": 3.5697918996858443e-07, | |
| "loss": 0.271589457988739, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 32.08450704225352, | |
| "grad_norm": 0.33140960335731506, | |
| "learning_rate": 3.5627000643626704e-07, | |
| "loss": 0.2612978219985962, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 32.11267605633803, | |
| "grad_norm": 0.31951841711997986, | |
| "learning_rate": 3.555614130391079e-07, | |
| "loss": 0.27151286602020264, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 32.140845070422536, | |
| "grad_norm": 0.3442953824996948, | |
| "learning_rate": 3.5485341193560503e-07, | |
| "loss": 0.2442217469215393, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 32.16901408450704, | |
| "grad_norm": 0.3276779055595398, | |
| "learning_rate": 3.5414600528245266e-07, | |
| "loss": 0.25613170862197876, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 32.19718309859155, | |
| "grad_norm": 0.33608436584472656, | |
| "learning_rate": 3.534391952345341e-07, | |
| "loss": 0.2614259123802185, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 32.225352112676056, | |
| "grad_norm": 0.3303307592868805, | |
| "learning_rate": 3.5273298394491515e-07, | |
| "loss": 0.2672120928764343, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 32.25352112676056, | |
| "grad_norm": 0.32655128836631775, | |
| "learning_rate": 3.5202737356483816e-07, | |
| "loss": 0.25033846497535706, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 32.28169014084507, | |
| "grad_norm": 0.3326750099658966, | |
| "learning_rate": 3.513223662437147e-07, | |
| "loss": 0.2697717547416687, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 32.309859154929576, | |
| "grad_norm": 0.33951663970947266, | |
| "learning_rate": 3.5061796412911913e-07, | |
| "loss": 0.25987690687179565, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 32.33802816901409, | |
| "grad_norm": 0.3316378891468048, | |
| "learning_rate": 3.4991416936678276e-07, | |
| "loss": 0.26063597202301025, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 32.36619718309859, | |
| "grad_norm": 0.33838751912117004, | |
| "learning_rate": 3.49210984100586e-07, | |
| "loss": 0.26821669936180115, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 32.394366197183096, | |
| "grad_norm": 0.3294714689254761, | |
| "learning_rate": 3.4850841047255364e-07, | |
| "loss": 0.2651536464691162, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 32.42253521126761, | |
| "grad_norm": 0.32624831795692444, | |
| "learning_rate": 3.4780645062284665e-07, | |
| "loss": 0.26797136664390564, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 32.45070422535211, | |
| "grad_norm": 0.3322686553001404, | |
| "learning_rate": 3.471051066897562e-07, | |
| "loss": 0.2507922649383545, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 32.478873239436616, | |
| "grad_norm": 0.34128591418266296, | |
| "learning_rate": 3.4640438080969773e-07, | |
| "loss": 0.2541847229003906, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 32.50704225352113, | |
| "grad_norm": 0.3294316828250885, | |
| "learning_rate": 3.45704275117204e-07, | |
| "loss": 0.26326608657836914, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 32.53521126760563, | |
| "grad_norm": 0.3293727934360504, | |
| "learning_rate": 3.450047917449181e-07, | |
| "loss": 0.2654852271080017, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 32.563380281690144, | |
| "grad_norm": 0.32460466027259827, | |
| "learning_rate": 3.4430593282358777e-07, | |
| "loss": 0.25532153248786926, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 32.59154929577465, | |
| "grad_norm": 0.3373318016529083, | |
| "learning_rate": 3.4360770048205843e-07, | |
| "loss": 0.25554513931274414, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 32.61971830985915, | |
| "grad_norm": 0.34251123666763306, | |
| "learning_rate": 3.429100968472668e-07, | |
| "loss": 0.26249927282333374, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 32.647887323943664, | |
| "grad_norm": 0.32484838366508484, | |
| "learning_rate": 3.4221312404423486e-07, | |
| "loss": 0.2562830448150635, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 32.67605633802817, | |
| "grad_norm": 0.3435952365398407, | |
| "learning_rate": 3.4151678419606233e-07, | |
| "loss": 0.2574070692062378, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 32.70422535211267, | |
| "grad_norm": 0.33101195096969604, | |
| "learning_rate": 3.4082107942392136e-07, | |
| "loss": 0.257138729095459, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 32.732394366197184, | |
| "grad_norm": 0.37783390283584595, | |
| "learning_rate": 3.4012601184704904e-07, | |
| "loss": 0.26037871837615967, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 32.76056338028169, | |
| "grad_norm": 0.33994340896606445, | |
| "learning_rate": 3.3943158358274203e-07, | |
| "loss": 0.27281370759010315, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 32.7887323943662, | |
| "grad_norm": 0.32044896483421326, | |
| "learning_rate": 3.387377967463493e-07, | |
| "loss": 0.2526357173919678, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 32.816901408450704, | |
| "grad_norm": 0.3177328109741211, | |
| "learning_rate": 3.3804465345126545e-07, | |
| "loss": 0.24474188685417175, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 32.84507042253521, | |
| "grad_norm": 0.3454241454601288, | |
| "learning_rate": 3.3735215580892575e-07, | |
| "loss": 0.24287842214107513, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 32.87323943661972, | |
| "grad_norm": 0.3315359354019165, | |
| "learning_rate": 3.366603059287977e-07, | |
| "loss": 0.26422587037086487, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 32.901408450704224, | |
| "grad_norm": 0.3329971730709076, | |
| "learning_rate": 3.359691059183761e-07, | |
| "loss": 0.2687873840332031, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 32.929577464788736, | |
| "grad_norm": 0.32194119691848755, | |
| "learning_rate": 3.3527855788317614e-07, | |
| "loss": 0.2529294788837433, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 32.95774647887324, | |
| "grad_norm": 0.3383830487728119, | |
| "learning_rate": 3.3458866392672694e-07, | |
| "loss": 0.24743716418743134, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 32.985915492957744, | |
| "grad_norm": 0.3237183690071106, | |
| "learning_rate": 3.338994261505649e-07, | |
| "loss": 0.2624974250793457, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "grad_norm": 0.4738941192626953, | |
| "learning_rate": 3.3321084665422803e-07, | |
| "loss": 0.2611575722694397, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 33.028169014084504, | |
| "grad_norm": 0.3192257285118103, | |
| "learning_rate": 3.325229275352489e-07, | |
| "loss": 0.25964364409446716, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 33.056338028169016, | |
| "grad_norm": 0.3343312442302704, | |
| "learning_rate": 3.3183567088914833e-07, | |
| "loss": 0.2630879282951355, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 33.08450704225352, | |
| "grad_norm": 0.32633543014526367, | |
| "learning_rate": 3.3114907880942933e-07, | |
| "loss": 0.2663639783859253, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 33.11267605633803, | |
| "grad_norm": 0.3315299451351166, | |
| "learning_rate": 3.3046315338757026e-07, | |
| "loss": 0.2600438892841339, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 33.140845070422536, | |
| "grad_norm": 0.35579875111579895, | |
| "learning_rate": 3.297778967130191e-07, | |
| "loss": 0.2606794834136963, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 33.16901408450704, | |
| "grad_norm": 0.3733043074607849, | |
| "learning_rate": 3.290933108731866e-07, | |
| "loss": 0.2512716054916382, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 33.19718309859155, | |
| "grad_norm": 0.345547616481781, | |
| "learning_rate": 3.2840939795343987e-07, | |
| "loss": 0.26478058099746704, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 33.225352112676056, | |
| "grad_norm": 0.33482369780540466, | |
| "learning_rate": 3.2772616003709616e-07, | |
| "loss": 0.2547541856765747, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 33.25352112676056, | |
| "grad_norm": 0.3360159695148468, | |
| "learning_rate": 3.270435992054166e-07, | |
| "loss": 0.2729008197784424, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 33.28169014084507, | |
| "grad_norm": 0.34279924631118774, | |
| "learning_rate": 3.263617175376001e-07, | |
| "loss": 0.253216028213501, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 33.309859154929576, | |
| "grad_norm": 0.33277833461761475, | |
| "learning_rate": 3.2568051711077636e-07, | |
| "loss": 0.2548581659793854, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 33.33802816901409, | |
| "grad_norm": 0.3363766074180603, | |
| "learning_rate": 3.250000000000001e-07, | |
| "loss": 0.25859585404396057, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 33.36619718309859, | |
| "grad_norm": 0.3143514394760132, | |
| "learning_rate": 3.2432016827824414e-07, | |
| "loss": 0.25202757120132446, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 33.394366197183096, | |
| "grad_norm": 0.3307502567768097, | |
| "learning_rate": 3.2364102401639423e-07, | |
| "loss": 0.2585509717464447, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 33.42253521126761, | |
| "grad_norm": 0.33466944098472595, | |
| "learning_rate": 3.229625692832414e-07, | |
| "loss": 0.25337138772010803, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 33.45070422535211, | |
| "grad_norm": 0.31453531980514526, | |
| "learning_rate": 3.222848061454764e-07, | |
| "loss": 0.2618822455406189, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 33.478873239436616, | |
| "grad_norm": 0.35038280487060547, | |
| "learning_rate": 3.216077366676833e-07, | |
| "loss": 0.26571914553642273, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 33.50704225352113, | |
| "grad_norm": 0.3479344844818115, | |
| "learning_rate": 3.209313629123329e-07, | |
| "loss": 0.26047736406326294, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 33.53521126760563, | |
| "grad_norm": 0.339733362197876, | |
| "learning_rate": 3.2025568693977745e-07, | |
| "loss": 0.2580920159816742, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 33.563380281690144, | |
| "grad_norm": 0.3457892835140228, | |
| "learning_rate": 3.195807108082429e-07, | |
| "loss": 0.25361278653144836, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 33.59154929577465, | |
| "grad_norm": 0.35116419196128845, | |
| "learning_rate": 3.1890643657382356e-07, | |
| "loss": 0.2517722249031067, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 33.61971830985915, | |
| "grad_norm": 0.3323304355144501, | |
| "learning_rate": 3.182328662904756e-07, | |
| "loss": 0.25763052701950073, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 33.647887323943664, | |
| "grad_norm": 0.3180283308029175, | |
| "learning_rate": 3.175600020100112e-07, | |
| "loss": 0.26268666982650757, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 33.67605633802817, | |
| "grad_norm": 0.32394516468048096, | |
| "learning_rate": 3.168878457820915e-07, | |
| "loss": 0.2540284991264343, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 33.70422535211267, | |
| "grad_norm": 0.3315521478652954, | |
| "learning_rate": 3.162163996542209e-07, | |
| "loss": 0.26291581988334656, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 33.732394366197184, | |
| "grad_norm": 0.32950082421302795, | |
| "learning_rate": 3.155456656717408e-07, | |
| "loss": 0.2569209039211273, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 33.76056338028169, | |
| "grad_norm": 0.3513064384460449, | |
| "learning_rate": 3.14875645877823e-07, | |
| "loss": 0.24890759587287903, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 33.7887323943662, | |
| "grad_norm": 0.3389022946357727, | |
| "learning_rate": 3.142063423134644e-07, | |
| "loss": 0.2649242579936981, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 33.816901408450704, | |
| "grad_norm": 0.3270207941532135, | |
| "learning_rate": 3.135377570174796e-07, | |
| "loss": 0.26036375761032104, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 33.84507042253521, | |
| "grad_norm": 0.35390451550483704, | |
| "learning_rate": 3.1286989202649503e-07, | |
| "loss": 0.25314897298812866, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 33.87323943661972, | |
| "grad_norm": 0.3263014256954193, | |
| "learning_rate": 3.122027493749438e-07, | |
| "loss": 0.2565680742263794, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 33.901408450704224, | |
| "grad_norm": 0.3133479654788971, | |
| "learning_rate": 3.115363310950578e-07, | |
| "loss": 0.2629280090332031, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 33.929577464788736, | |
| "grad_norm": 0.3530975580215454, | |
| "learning_rate": 3.1087063921686263e-07, | |
| "loss": 0.26493778824806213, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 33.95774647887324, | |
| "grad_norm": 0.3344945013523102, | |
| "learning_rate": 3.102056757681715e-07, | |
| "loss": 0.2550634741783142, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 33.985915492957744, | |
| "grad_norm": 0.32563889026641846, | |
| "learning_rate": 3.0954144277457817e-07, | |
| "loss": 0.25193893909454346, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 0.48929160833358765, | |
| "learning_rate": 3.0887794225945143e-07, | |
| "loss": 0.2488047182559967, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 34.028169014084504, | |
| "grad_norm": 0.32252368330955505, | |
| "learning_rate": 3.0821517624392925e-07, | |
| "loss": 0.25322937965393066, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 34.056338028169016, | |
| "grad_norm": 0.3510408401489258, | |
| "learning_rate": 3.075531467469116e-07, | |
| "loss": 0.265546977519989, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 34.08450704225352, | |
| "grad_norm": 0.33205100893974304, | |
| "learning_rate": 3.0689185578505525e-07, | |
| "loss": 0.2621091902256012, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 34.11267605633803, | |
| "grad_norm": 0.33356767892837524, | |
| "learning_rate": 3.062313053727671e-07, | |
| "loss": 0.24525871872901917, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 34.140845070422536, | |
| "grad_norm": 0.32789838314056396, | |
| "learning_rate": 3.055714975221981e-07, | |
| "loss": 0.2655676007270813, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 34.16901408450704, | |
| "grad_norm": 0.3837502598762512, | |
| "learning_rate": 3.0491243424323783e-07, | |
| "loss": 0.2583563029766083, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 34.19718309859155, | |
| "grad_norm": 0.32497507333755493, | |
| "learning_rate": 3.0425411754350694e-07, | |
| "loss": 0.25412964820861816, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 34.225352112676056, | |
| "grad_norm": 0.3423527181148529, | |
| "learning_rate": 3.0359654942835247e-07, | |
| "loss": 0.2603622078895569, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 34.25352112676056, | |
| "grad_norm": 0.3326815068721771, | |
| "learning_rate": 3.029397319008407e-07, | |
| "loss": 0.2565937638282776, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 34.28169014084507, | |
| "grad_norm": 0.3410370945930481, | |
| "learning_rate": 3.02283666961752e-07, | |
| "loss": 0.2687773108482361, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 34.309859154929576, | |
| "grad_norm": 0.33839917182922363, | |
| "learning_rate": 3.016283566095739e-07, | |
| "loss": 0.27057865262031555, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 34.33802816901409, | |
| "grad_norm": 0.32578834891319275, | |
| "learning_rate": 3.0097380284049523e-07, | |
| "loss": 0.2486121952533722, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 34.36619718309859, | |
| "grad_norm": 0.34315571188926697, | |
| "learning_rate": 3.003200076484004e-07, | |
| "loss": 0.24546003341674805, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 34.394366197183096, | |
| "grad_norm": 0.32684844732284546, | |
| "learning_rate": 2.996669730248628e-07, | |
| "loss": 0.2699982523918152, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 34.42253521126761, | |
| "grad_norm": 0.33143216371536255, | |
| "learning_rate": 2.9901470095913943e-07, | |
| "loss": 0.25373488664627075, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 34.45070422535211, | |
| "grad_norm": 0.35439276695251465, | |
| "learning_rate": 2.9836319343816397e-07, | |
| "loss": 0.24537047743797302, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 34.478873239436616, | |
| "grad_norm": 0.33683332800865173, | |
| "learning_rate": 2.977124524465413e-07, | |
| "loss": 0.2581592798233032, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 34.50704225352113, | |
| "grad_norm": 0.3526037037372589, | |
| "learning_rate": 2.9706247996654134e-07, | |
| "loss": 0.2586764693260193, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 34.53521126760563, | |
| "grad_norm": 0.3380417823791504, | |
| "learning_rate": 2.964132779780929e-07, | |
| "loss": 0.263625830411911, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 34.563380281690144, | |
| "grad_norm": 0.3443485200405121, | |
| "learning_rate": 2.9576484845877793e-07, | |
| "loss": 0.2503140866756439, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 34.59154929577465, | |
| "grad_norm": 0.35234031081199646, | |
| "learning_rate": 2.9511719338382535e-07, | |
| "loss": 0.25954437255859375, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 34.61971830985915, | |
| "grad_norm": 0.3406411111354828, | |
| "learning_rate": 2.944703147261046e-07, | |
| "loss": 0.2619974613189697, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 34.647887323943664, | |
| "grad_norm": 0.3347373306751251, | |
| "learning_rate": 2.938242144561201e-07, | |
| "loss": 0.2618395984172821, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 34.67605633802817, | |
| "grad_norm": 0.33204221725463867, | |
| "learning_rate": 2.931788945420058e-07, | |
| "loss": 0.26617297530174255, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 34.70422535211267, | |
| "grad_norm": 0.3484657406806946, | |
| "learning_rate": 2.925343569495178e-07, | |
| "loss": 0.2656903564929962, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 34.732394366197184, | |
| "grad_norm": 0.3254799544811249, | |
| "learning_rate": 2.918906036420294e-07, | |
| "loss": 0.24855300784111023, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 34.76056338028169, | |
| "grad_norm": 0.33594822883605957, | |
| "learning_rate": 2.9124763658052474e-07, | |
| "loss": 0.2618425786495209, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 34.7887323943662, | |
| "grad_norm": 0.323949933052063, | |
| "learning_rate": 2.9060545772359305e-07, | |
| "loss": 0.2546170949935913, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 34.816901408450704, | |
| "grad_norm": 0.3242202699184418, | |
| "learning_rate": 2.8996406902742267e-07, | |
| "loss": 0.24211625754833221, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 34.84507042253521, | |
| "grad_norm": 0.3353058695793152, | |
| "learning_rate": 2.893234724457946e-07, | |
| "loss": 0.25402140617370605, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 34.87323943661972, | |
| "grad_norm": 0.33988505601882935, | |
| "learning_rate": 2.886836699300771e-07, | |
| "loss": 0.24861261248588562, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 34.901408450704224, | |
| "grad_norm": 0.3339218199253082, | |
| "learning_rate": 2.8804466342921987e-07, | |
| "loss": 0.25520533323287964, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 34.929577464788736, | |
| "grad_norm": 0.3448787033557892, | |
| "learning_rate": 2.874064548897472e-07, | |
| "loss": 0.2663518786430359, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 34.95774647887324, | |
| "grad_norm": 0.3454734981060028, | |
| "learning_rate": 2.86769046255753e-07, | |
| "loss": 0.25287461280822754, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 34.985915492957744, | |
| "grad_norm": 0.3322574496269226, | |
| "learning_rate": 2.8613243946889477e-07, | |
| "loss": 0.25937291979789734, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 0.47356757521629333, | |
| "learning_rate": 2.854966364683872e-07, | |
| "loss": 0.2588436007499695, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 35.028169014084504, | |
| "grad_norm": 0.32370901107788086, | |
| "learning_rate": 2.848616391909959e-07, | |
| "loss": 0.2847004234790802, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 35.056338028169016, | |
| "grad_norm": 0.3340662717819214, | |
| "learning_rate": 2.842274495710335e-07, | |
| "loss": 0.24963748455047607, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 35.08450704225352, | |
| "grad_norm": 0.3470820188522339, | |
| "learning_rate": 2.835940695403512e-07, | |
| "loss": 0.25704559683799744, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 35.11267605633803, | |
| "grad_norm": 0.3213740289211273, | |
| "learning_rate": 2.829615010283344e-07, | |
| "loss": 0.24562162160873413, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 35.140845070422536, | |
| "grad_norm": 0.3323827385902405, | |
| "learning_rate": 2.8232974596189653e-07, | |
| "loss": 0.25376367568969727, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 35.16901408450704, | |
| "grad_norm": 0.32620102167129517, | |
| "learning_rate": 2.8169880626547283e-07, | |
| "loss": 0.25920748710632324, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 35.19718309859155, | |
| "grad_norm": 0.34155285358428955, | |
| "learning_rate": 2.8106868386101545e-07, | |
| "loss": 0.2532484233379364, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 35.225352112676056, | |
| "grad_norm": 0.32295599579811096, | |
| "learning_rate": 2.8043938066798645e-07, | |
| "loss": 0.2596886456012726, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 35.25352112676056, | |
| "grad_norm": 0.3390556871891022, | |
| "learning_rate": 2.7981089860335225e-07, | |
| "loss": 0.2628597021102905, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 35.28169014084507, | |
| "grad_norm": 0.3397858738899231, | |
| "learning_rate": 2.791832395815782e-07, | |
| "loss": 0.260450154542923, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 35.309859154929576, | |
| "grad_norm": 0.3356383442878723, | |
| "learning_rate": 2.7855640551462287e-07, | |
| "loss": 0.24709969758987427, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 35.33802816901409, | |
| "grad_norm": 0.3386112153530121, | |
| "learning_rate": 2.7793039831193133e-07, | |
| "loss": 0.2554944157600403, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 35.36619718309859, | |
| "grad_norm": 0.34547311067581177, | |
| "learning_rate": 2.773052198804301e-07, | |
| "loss": 0.2689363658428192, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 35.394366197183096, | |
| "grad_norm": 0.34119531512260437, | |
| "learning_rate": 2.766808721245211e-07, | |
| "loss": 0.2566688656806946, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 35.42253521126761, | |
| "grad_norm": 0.3342508375644684, | |
| "learning_rate": 2.760573569460757e-07, | |
| "loss": 0.24888336658477783, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 35.45070422535211, | |
| "grad_norm": 0.33420711755752563, | |
| "learning_rate": 2.7543467624442956e-07, | |
| "loss": 0.27446046471595764, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 35.478873239436616, | |
| "grad_norm": 0.3241899907588959, | |
| "learning_rate": 2.7481283191637605e-07, | |
| "loss": 0.24648495018482208, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 35.50704225352113, | |
| "grad_norm": 0.3267020285129547, | |
| "learning_rate": 2.741918258561607e-07, | |
| "loss": 0.2573559880256653, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 35.53521126760563, | |
| "grad_norm": 0.3532126247882843, | |
| "learning_rate": 2.7357165995547547e-07, | |
| "loss": 0.2432764172554016, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 35.563380281690144, | |
| "grad_norm": 0.33826351165771484, | |
| "learning_rate": 2.729523361034538e-07, | |
| "loss": 0.25668877363204956, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 35.59154929577465, | |
| "grad_norm": 0.338796466588974, | |
| "learning_rate": 2.7233385618666315e-07, | |
| "loss": 0.2522228956222534, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 35.61971830985915, | |
| "grad_norm": 0.3262656629085541, | |
| "learning_rate": 2.717162220891007e-07, | |
| "loss": 0.2595973312854767, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 35.647887323943664, | |
| "grad_norm": 0.3441692590713501, | |
| "learning_rate": 2.7109943569218707e-07, | |
| "loss": 0.26480039954185486, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 35.67605633802817, | |
| "grad_norm": 0.3370777368545532, | |
| "learning_rate": 2.7048349887476037e-07, | |
| "loss": 0.25393831729888916, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 35.70422535211267, | |
| "grad_norm": 0.34027761220932007, | |
| "learning_rate": 2.698684135130713e-07, | |
| "loss": 0.24741466343402863, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 35.732394366197184, | |
| "grad_norm": 0.3438904881477356, | |
| "learning_rate": 2.692541814807763e-07, | |
| "loss": 0.2620083689689636, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 35.76056338028169, | |
| "grad_norm": 0.33286988735198975, | |
| "learning_rate": 2.686408046489328e-07, | |
| "loss": 0.2683720588684082, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 35.7887323943662, | |
| "grad_norm": 0.3397563397884369, | |
| "learning_rate": 2.6802828488599294e-07, | |
| "loss": 0.25813597440719604, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 35.816901408450704, | |
| "grad_norm": 0.34016039967536926, | |
| "learning_rate": 2.6741662405779796e-07, | |
| "loss": 0.25924018025398254, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 35.84507042253521, | |
| "grad_norm": 0.3287438452243805, | |
| "learning_rate": 2.6680582402757324e-07, | |
| "loss": 0.24357835948467255, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 35.87323943661972, | |
| "grad_norm": 0.3473154306411743, | |
| "learning_rate": 2.661958866559213e-07, | |
| "loss": 0.25433164834976196, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 35.901408450704224, | |
| "grad_norm": 0.3320452570915222, | |
| "learning_rate": 2.655868138008171e-07, | |
| "loss": 0.2620140016078949, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 35.929577464788736, | |
| "grad_norm": 0.35027673840522766, | |
| "learning_rate": 2.649786073176025e-07, | |
| "loss": 0.26484349370002747, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 35.95774647887324, | |
| "grad_norm": 0.34910938143730164, | |
| "learning_rate": 2.6437126905897967e-07, | |
| "loss": 0.24849724769592285, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 35.985915492957744, | |
| "grad_norm": 0.3321913480758667, | |
| "learning_rate": 2.637648008750062e-07, | |
| "loss": 0.24661482870578766, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 0.48746395111083984, | |
| "learning_rate": 2.631592046130896e-07, | |
| "loss": 0.25251615047454834, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 36.028169014084504, | |
| "grad_norm": 0.3326322138309479, | |
| "learning_rate": 2.6255448211798103e-07, | |
| "loss": 0.2514849603176117, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 36.056338028169016, | |
| "grad_norm": 0.323958158493042, | |
| "learning_rate": 2.6195063523177e-07, | |
| "loss": 0.2420714795589447, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 36.08450704225352, | |
| "grad_norm": 0.3715856075286865, | |
| "learning_rate": 2.613476657938789e-07, | |
| "loss": 0.24617412686347961, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 36.11267605633803, | |
| "grad_norm": 0.34012261033058167, | |
| "learning_rate": 2.6074557564105724e-07, | |
| "loss": 0.26243406534194946, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 36.140845070422536, | |
| "grad_norm": 0.33578699827194214, | |
| "learning_rate": 2.6014436660737605e-07, | |
| "loss": 0.2461467981338501, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 36.16901408450704, | |
| "grad_norm": 0.3389386832714081, | |
| "learning_rate": 2.595440405242222e-07, | |
| "loss": 0.2597675025463104, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 36.19718309859155, | |
| "grad_norm": 0.33628833293914795, | |
| "learning_rate": 2.589445992202931e-07, | |
| "loss": 0.2510983943939209, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 36.225352112676056, | |
| "grad_norm": 0.3409932851791382, | |
| "learning_rate": 2.583460445215911e-07, | |
| "loss": 0.2607109844684601, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 36.25352112676056, | |
| "grad_norm": 0.3476935625076294, | |
| "learning_rate": 2.5774837825141736e-07, | |
| "loss": 0.26868295669555664, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 36.28169014084507, | |
| "grad_norm": 0.3389628231525421, | |
| "learning_rate": 2.571516022303671e-07, | |
| "loss": 0.24396029114723206, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 36.309859154929576, | |
| "grad_norm": 0.3351360261440277, | |
| "learning_rate": 2.565557182763235e-07, | |
| "loss": 0.2638927102088928, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 36.33802816901409, | |
| "grad_norm": 0.34508877992630005, | |
| "learning_rate": 2.5596072820445254e-07, | |
| "loss": 0.25982603430747986, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 36.36619718309859, | |
| "grad_norm": 0.3333590626716614, | |
| "learning_rate": 2.5536663382719713e-07, | |
| "loss": 0.25606241822242737, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 36.394366197183096, | |
| "grad_norm": 0.33822396397590637, | |
| "learning_rate": 2.547734369542718e-07, | |
| "loss": 0.2518611252307892, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 36.42253521126761, | |
| "grad_norm": 0.3358154594898224, | |
| "learning_rate": 2.5418113939265686e-07, | |
| "loss": 0.25333690643310547, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 36.45070422535211, | |
| "grad_norm": 0.33005034923553467, | |
| "learning_rate": 2.5358974294659373e-07, | |
| "loss": 0.24985584616661072, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 36.478873239436616, | |
| "grad_norm": 0.3343973159790039, | |
| "learning_rate": 2.5299924941757843e-07, | |
| "loss": 0.27109482884407043, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 36.50704225352113, | |
| "grad_norm": 0.33798739314079285, | |
| "learning_rate": 2.5240966060435674e-07, | |
| "loss": 0.2599262595176697, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 36.53521126760563, | |
| "grad_norm": 0.33094605803489685, | |
| "learning_rate": 2.5182097830291824e-07, | |
| "loss": 0.24939575791358948, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 36.563380281690144, | |
| "grad_norm": 0.3303806185722351, | |
| "learning_rate": 2.512332043064913e-07, | |
| "loss": 0.2498035877943039, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 36.59154929577465, | |
| "grad_norm": 0.3437672555446625, | |
| "learning_rate": 2.5064634040553767e-07, | |
| "loss": 0.26817601919174194, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 36.61971830985915, | |
| "grad_norm": 0.3672111928462982, | |
| "learning_rate": 2.5006038838774647e-07, | |
| "loss": 0.2572394609451294, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 36.647887323943664, | |
| "grad_norm": 0.34106817841529846, | |
| "learning_rate": 2.494753500380291e-07, | |
| "loss": 0.25872814655303955, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 36.67605633802817, | |
| "grad_norm": 0.35012519359588623, | |
| "learning_rate": 2.488912271385139e-07, | |
| "loss": 0.2478848099708557, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 36.70422535211267, | |
| "grad_norm": 0.3354050815105438, | |
| "learning_rate": 2.483080214685404e-07, | |
| "loss": 0.2592930793762207, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 36.732394366197184, | |
| "grad_norm": 0.3539486825466156, | |
| "learning_rate": 2.4772573480465445e-07, | |
| "loss": 0.24492186307907104, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 36.76056338028169, | |
| "grad_norm": 0.34425100684165955, | |
| "learning_rate": 2.471443689206021e-07, | |
| "loss": 0.2586178779602051, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 36.7887323943662, | |
| "grad_norm": 0.35161006450653076, | |
| "learning_rate": 2.465639255873246e-07, | |
| "loss": 0.2581009268760681, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 36.816901408450704, | |
| "grad_norm": 0.3478921949863434, | |
| "learning_rate": 2.4598440657295286e-07, | |
| "loss": 0.2674616575241089, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 36.84507042253521, | |
| "grad_norm": 0.35100990533828735, | |
| "learning_rate": 2.454058136428027e-07, | |
| "loss": 0.27003878355026245, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 36.87323943661972, | |
| "grad_norm": 0.3363000452518463, | |
| "learning_rate": 2.4482814855936834e-07, | |
| "loss": 0.2609623968601227, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 36.901408450704224, | |
| "grad_norm": 0.3406379222869873, | |
| "learning_rate": 2.4425141308231765e-07, | |
| "loss": 0.2661615014076233, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 36.929577464788736, | |
| "grad_norm": 0.331514447927475, | |
| "learning_rate": 2.43675608968487e-07, | |
| "loss": 0.24595093727111816, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 36.95774647887324, | |
| "grad_norm": 0.33636540174484253, | |
| "learning_rate": 2.4310073797187573e-07, | |
| "loss": 0.2518694996833801, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 36.985915492957744, | |
| "grad_norm": 0.3203655779361725, | |
| "learning_rate": 2.4252680184364045e-07, | |
| "loss": 0.24997392296791077, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "grad_norm": 0.47873687744140625, | |
| "learning_rate": 2.4195380233209006e-07, | |
| "loss": 0.24962179362773895, | |
| "step": 1332 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 1.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.788879174705873e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |