GanitLLM-1.7B-SFT / trainer_state.json
dipta007's picture
update files
a112e56 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 37.0,
"eval_steps": 500,
"global_step": 1332,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.028169014084507043,
"grad_norm": 12.737117767333984,
"learning_rate": 9.999993146109795e-07,
"loss": 0.6797127723693848,
"step": 1
},
{
"epoch": 0.056338028169014086,
"grad_norm": 12.04797649383545,
"learning_rate": 9.999972584460056e-07,
"loss": 0.6627321243286133,
"step": 2
},
{
"epoch": 0.08450704225352113,
"grad_norm": 12.632461547851562,
"learning_rate": 9.99993831511342e-07,
"loss": 0.6829236149787903,
"step": 3
},
{
"epoch": 0.11267605633802817,
"grad_norm": 11.97681713104248,
"learning_rate": 9.999890338174275e-07,
"loss": 0.6625960469245911,
"step": 4
},
{
"epoch": 0.14084507042253522,
"grad_norm": 11.185710906982422,
"learning_rate": 9.99982865378877e-07,
"loss": 0.6418126821517944,
"step": 5
},
{
"epoch": 0.16901408450704225,
"grad_norm": 11.49565315246582,
"learning_rate": 9.999753262144804e-07,
"loss": 0.6464570760726929,
"step": 6
},
{
"epoch": 0.19718309859154928,
"grad_norm": 10.954561233520508,
"learning_rate": 9.999664163472034e-07,
"loss": 0.63329017162323,
"step": 7
},
{
"epoch": 0.22535211267605634,
"grad_norm": 10.728333473205566,
"learning_rate": 9.999561358041868e-07,
"loss": 0.6382037401199341,
"step": 8
},
{
"epoch": 0.2535211267605634,
"grad_norm": 8.404616355895996,
"learning_rate": 9.99944484616747e-07,
"loss": 0.5870345830917358,
"step": 9
},
{
"epoch": 0.28169014084507044,
"grad_norm": 7.616209983825684,
"learning_rate": 9.99931462820376e-07,
"loss": 0.5672095417976379,
"step": 10
},
{
"epoch": 0.30985915492957744,
"grad_norm": 7.800975799560547,
"learning_rate": 9.999170704547398e-07,
"loss": 0.581696629524231,
"step": 11
},
{
"epoch": 0.3380281690140845,
"grad_norm": 7.584338665008545,
"learning_rate": 9.999013075636804e-07,
"loss": 0.5873032808303833,
"step": 12
},
{
"epoch": 0.36619718309859156,
"grad_norm": 6.736105442047119,
"learning_rate": 9.998841741952141e-07,
"loss": 0.5502372980117798,
"step": 13
},
{
"epoch": 0.39436619718309857,
"grad_norm": 6.839756965637207,
"learning_rate": 9.998656704015323e-07,
"loss": 0.5653150677680969,
"step": 14
},
{
"epoch": 0.4225352112676056,
"grad_norm": 7.052567005157471,
"learning_rate": 9.998457962390008e-07,
"loss": 0.5660480260848999,
"step": 15
},
{
"epoch": 0.4507042253521127,
"grad_norm": 6.61349630355835,
"learning_rate": 9.998245517681593e-07,
"loss": 0.552219033241272,
"step": 16
},
{
"epoch": 0.4788732394366197,
"grad_norm": 3.9956817626953125,
"learning_rate": 9.998019370537227e-07,
"loss": 0.5171241760253906,
"step": 17
},
{
"epoch": 0.5070422535211268,
"grad_norm": 3.6887121200561523,
"learning_rate": 9.997779521645791e-07,
"loss": 0.5023034811019897,
"step": 18
},
{
"epoch": 0.5352112676056338,
"grad_norm": 3.6457769870758057,
"learning_rate": 9.997525971737909e-07,
"loss": 0.505454421043396,
"step": 19
},
{
"epoch": 0.5633802816901409,
"grad_norm": 3.398740530014038,
"learning_rate": 9.997258721585931e-07,
"loss": 0.4978747069835663,
"step": 20
},
{
"epoch": 0.5915492957746479,
"grad_norm": 3.2862207889556885,
"learning_rate": 9.99697777200395e-07,
"loss": 0.5002620220184326,
"step": 21
},
{
"epoch": 0.6197183098591549,
"grad_norm": 3.3747572898864746,
"learning_rate": 9.996683123847795e-07,
"loss": 0.5069968700408936,
"step": 22
},
{
"epoch": 0.647887323943662,
"grad_norm": 3.001546621322632,
"learning_rate": 9.996374778015007e-07,
"loss": 0.4922000765800476,
"step": 23
},
{
"epoch": 0.676056338028169,
"grad_norm": 2.996706962585449,
"learning_rate": 9.996052735444862e-07,
"loss": 0.4938335716724396,
"step": 24
},
{
"epoch": 0.704225352112676,
"grad_norm": 2.668245315551758,
"learning_rate": 9.99571699711836e-07,
"loss": 0.49115338921546936,
"step": 25
},
{
"epoch": 0.7323943661971831,
"grad_norm": 2.4952428340911865,
"learning_rate": 9.995367564058216e-07,
"loss": 0.4847099483013153,
"step": 26
},
{
"epoch": 0.7605633802816901,
"grad_norm": 2.529451847076416,
"learning_rate": 9.995004437328865e-07,
"loss": 0.48129573464393616,
"step": 27
},
{
"epoch": 0.7887323943661971,
"grad_norm": 2.479883909225464,
"learning_rate": 9.994627618036452e-07,
"loss": 0.5088395476341248,
"step": 28
},
{
"epoch": 0.8169014084507042,
"grad_norm": 2.414393424987793,
"learning_rate": 9.994237107328838e-07,
"loss": 0.48045098781585693,
"step": 29
},
{
"epoch": 0.8450704225352113,
"grad_norm": 2.2080600261688232,
"learning_rate": 9.993832906395582e-07,
"loss": 0.47147125005722046,
"step": 30
},
{
"epoch": 0.8732394366197183,
"grad_norm": 1.912841558456421,
"learning_rate": 9.993415016467952e-07,
"loss": 0.4724900424480438,
"step": 31
},
{
"epoch": 0.9014084507042254,
"grad_norm": 1.282597303390503,
"learning_rate": 9.992983438818915e-07,
"loss": 0.46792298555374146,
"step": 32
},
{
"epoch": 0.9295774647887324,
"grad_norm": 1.4362828731536865,
"learning_rate": 9.992538174763127e-07,
"loss": 0.45093870162963867,
"step": 33
},
{
"epoch": 0.9577464788732394,
"grad_norm": 1.4296821355819702,
"learning_rate": 9.992079225656944e-07,
"loss": 0.44724205136299133,
"step": 34
},
{
"epoch": 0.9859154929577465,
"grad_norm": 1.4829713106155396,
"learning_rate": 9.9916065928984e-07,
"loss": 0.44936883449554443,
"step": 35
},
{
"epoch": 1.0,
"grad_norm": 1.387039303779602,
"learning_rate": 9.991120277927223e-07,
"loss": 0.47316086292266846,
"step": 36
},
{
"epoch": 1.028169014084507,
"grad_norm": 1.3140299320220947,
"learning_rate": 9.990620282224806e-07,
"loss": 0.4389120638370514,
"step": 37
},
{
"epoch": 1.056338028169014,
"grad_norm": 1.2881019115447998,
"learning_rate": 9.990106607314225e-07,
"loss": 0.43830516934394836,
"step": 38
},
{
"epoch": 1.084507042253521,
"grad_norm": 1.1489726305007935,
"learning_rate": 9.989579254760224e-07,
"loss": 0.44559216499328613,
"step": 39
},
{
"epoch": 1.1126760563380282,
"grad_norm": 1.0595662593841553,
"learning_rate": 9.989038226169207e-07,
"loss": 0.43717890977859497,
"step": 40
},
{
"epoch": 1.1408450704225352,
"grad_norm": 0.9458185434341431,
"learning_rate": 9.988483523189248e-07,
"loss": 0.43611639738082886,
"step": 41
},
{
"epoch": 1.1690140845070423,
"grad_norm": 0.8811507821083069,
"learning_rate": 9.98791514751006e-07,
"loss": 0.4194882810115814,
"step": 42
},
{
"epoch": 1.1971830985915493,
"grad_norm": 0.7880372405052185,
"learning_rate": 9.98733310086302e-07,
"loss": 0.4363758862018585,
"step": 43
},
{
"epoch": 1.2253521126760563,
"grad_norm": 0.7736399173736572,
"learning_rate": 9.98673738502114e-07,
"loss": 0.43049588799476624,
"step": 44
},
{
"epoch": 1.2535211267605635,
"grad_norm": 0.7198370695114136,
"learning_rate": 9.986128001799076e-07,
"loss": 0.43443119525909424,
"step": 45
},
{
"epoch": 1.2816901408450705,
"grad_norm": 0.7174084186553955,
"learning_rate": 9.985504953053113e-07,
"loss": 0.43092280626296997,
"step": 46
},
{
"epoch": 1.3098591549295775,
"grad_norm": 0.7043387293815613,
"learning_rate": 9.984868240681164e-07,
"loss": 0.417573481798172,
"step": 47
},
{
"epoch": 1.3380281690140845,
"grad_norm": 0.6884390115737915,
"learning_rate": 9.98421786662277e-07,
"loss": 0.4211745262145996,
"step": 48
},
{
"epoch": 1.3661971830985915,
"grad_norm": 0.7091729044914246,
"learning_rate": 9.983553832859078e-07,
"loss": 0.4147814214229584,
"step": 49
},
{
"epoch": 1.3943661971830985,
"grad_norm": 0.6925486326217651,
"learning_rate": 9.982876141412855e-07,
"loss": 0.432437002658844,
"step": 50
},
{
"epoch": 1.4225352112676055,
"grad_norm": 0.7119179368019104,
"learning_rate": 9.982184794348462e-07,
"loss": 0.41633373498916626,
"step": 51
},
{
"epoch": 1.4507042253521127,
"grad_norm": 0.6801888346672058,
"learning_rate": 9.981479793771866e-07,
"loss": 0.4228135645389557,
"step": 52
},
{
"epoch": 1.4788732394366197,
"grad_norm": 0.6876774430274963,
"learning_rate": 9.98076114183062e-07,
"loss": 0.41455432772636414,
"step": 53
},
{
"epoch": 1.5070422535211268,
"grad_norm": 0.6285378336906433,
"learning_rate": 9.98002884071386e-07,
"loss": 0.41491252183914185,
"step": 54
},
{
"epoch": 1.5352112676056338,
"grad_norm": 0.6261480450630188,
"learning_rate": 9.979282892652304e-07,
"loss": 0.42695990204811096,
"step": 55
},
{
"epoch": 1.563380281690141,
"grad_norm": 0.6269007325172424,
"learning_rate": 9.97852329991824e-07,
"loss": 0.41284894943237305,
"step": 56
},
{
"epoch": 1.591549295774648,
"grad_norm": 0.6070351600646973,
"learning_rate": 9.977750064825519e-07,
"loss": 0.42982780933380127,
"step": 57
},
{
"epoch": 1.619718309859155,
"grad_norm": 0.5970191955566406,
"learning_rate": 9.976963189729547e-07,
"loss": 0.41365376114845276,
"step": 58
},
{
"epoch": 1.647887323943662,
"grad_norm": 0.5778729319572449,
"learning_rate": 9.976162677027284e-07,
"loss": 0.42080622911453247,
"step": 59
},
{
"epoch": 1.676056338028169,
"grad_norm": 0.5267013907432556,
"learning_rate": 9.975348529157229e-07,
"loss": 0.40949106216430664,
"step": 60
},
{
"epoch": 1.704225352112676,
"grad_norm": 0.5284983515739441,
"learning_rate": 9.974520748599421e-07,
"loss": 0.4082256555557251,
"step": 61
},
{
"epoch": 1.732394366197183,
"grad_norm": 0.49156272411346436,
"learning_rate": 9.973679337875418e-07,
"loss": 0.3944624662399292,
"step": 62
},
{
"epoch": 1.76056338028169,
"grad_norm": 0.4944726824760437,
"learning_rate": 9.972824299548309e-07,
"loss": 0.4087256193161011,
"step": 63
},
{
"epoch": 1.788732394366197,
"grad_norm": 0.4764452874660492,
"learning_rate": 9.971955636222684e-07,
"loss": 0.4067206382751465,
"step": 64
},
{
"epoch": 1.8169014084507042,
"grad_norm": 0.48928746581077576,
"learning_rate": 9.971073350544644e-07,
"loss": 0.4004918336868286,
"step": 65
},
{
"epoch": 1.8450704225352113,
"grad_norm": 0.4580424726009369,
"learning_rate": 9.970177445201783e-07,
"loss": 0.4040325880050659,
"step": 66
},
{
"epoch": 1.8732394366197183,
"grad_norm": 0.5053924322128296,
"learning_rate": 9.969267922923188e-07,
"loss": 0.40139085054397583,
"step": 67
},
{
"epoch": 1.9014084507042255,
"grad_norm": 0.4661526679992676,
"learning_rate": 9.968344786479415e-07,
"loss": 0.38993388414382935,
"step": 68
},
{
"epoch": 1.9295774647887325,
"grad_norm": 0.4677845537662506,
"learning_rate": 9.967408038682505e-07,
"loss": 0.4014376401901245,
"step": 69
},
{
"epoch": 1.9577464788732395,
"grad_norm": 0.4655434787273407,
"learning_rate": 9.96645768238595e-07,
"loss": 0.3975449204444885,
"step": 70
},
{
"epoch": 1.9859154929577465,
"grad_norm": 0.4675063192844391,
"learning_rate": 9.965493720484698e-07,
"loss": 0.4009154438972473,
"step": 71
},
{
"epoch": 2.0,
"grad_norm": 0.5548242926597595,
"learning_rate": 9.964516155915151e-07,
"loss": 0.39267462491989136,
"step": 72
},
{
"epoch": 2.028169014084507,
"grad_norm": 0.4601926803588867,
"learning_rate": 9.963524991655133e-07,
"loss": 0.3973795473575592,
"step": 73
},
{
"epoch": 2.056338028169014,
"grad_norm": 0.4464695155620575,
"learning_rate": 9.962520230723906e-07,
"loss": 0.39020174741744995,
"step": 74
},
{
"epoch": 2.084507042253521,
"grad_norm": 0.42715415358543396,
"learning_rate": 9.961501876182148e-07,
"loss": 0.3930002450942993,
"step": 75
},
{
"epoch": 2.112676056338028,
"grad_norm": 0.3989242613315582,
"learning_rate": 9.960469931131936e-07,
"loss": 0.3865053653717041,
"step": 76
},
{
"epoch": 2.140845070422535,
"grad_norm": 0.4167341887950897,
"learning_rate": 9.959424398716763e-07,
"loss": 0.39777663350105286,
"step": 77
},
{
"epoch": 2.169014084507042,
"grad_norm": 0.4046856760978699,
"learning_rate": 9.958365282121496e-07,
"loss": 0.38023141026496887,
"step": 78
},
{
"epoch": 2.1971830985915495,
"grad_norm": 0.40858548879623413,
"learning_rate": 9.95729258457239e-07,
"loss": 0.37487876415252686,
"step": 79
},
{
"epoch": 2.2253521126760565,
"grad_norm": 0.3576146364212036,
"learning_rate": 9.956206309337066e-07,
"loss": 0.3785707354545593,
"step": 80
},
{
"epoch": 2.2535211267605635,
"grad_norm": 0.35235047340393066,
"learning_rate": 9.955106459724508e-07,
"loss": 0.38552170991897583,
"step": 81
},
{
"epoch": 2.2816901408450705,
"grad_norm": 0.373362272977829,
"learning_rate": 9.953993039085048e-07,
"loss": 0.38321995735168457,
"step": 82
},
{
"epoch": 2.3098591549295775,
"grad_norm": 0.3574947416782379,
"learning_rate": 9.952866050810363e-07,
"loss": 0.37346434593200684,
"step": 83
},
{
"epoch": 2.3380281690140845,
"grad_norm": 0.36156368255615234,
"learning_rate": 9.951725498333448e-07,
"loss": 0.382648229598999,
"step": 84
},
{
"epoch": 2.3661971830985915,
"grad_norm": 0.3521256148815155,
"learning_rate": 9.950571385128625e-07,
"loss": 0.3722230792045593,
"step": 85
},
{
"epoch": 2.3943661971830985,
"grad_norm": 0.3384946584701538,
"learning_rate": 9.949403714711526e-07,
"loss": 0.3648328185081482,
"step": 86
},
{
"epoch": 2.4225352112676055,
"grad_norm": 0.34228095412254333,
"learning_rate": 9.948222490639075e-07,
"loss": 0.372160941362381,
"step": 87
},
{
"epoch": 2.4507042253521125,
"grad_norm": 0.34330716729164124,
"learning_rate": 9.947027716509488e-07,
"loss": 0.36588054895401,
"step": 88
},
{
"epoch": 2.4788732394366195,
"grad_norm": 0.34555092453956604,
"learning_rate": 9.94581939596225e-07,
"loss": 0.38422292470932007,
"step": 89
},
{
"epoch": 2.507042253521127,
"grad_norm": 0.34432411193847656,
"learning_rate": 9.944597532678119e-07,
"loss": 0.3802357316017151,
"step": 90
},
{
"epoch": 2.535211267605634,
"grad_norm": 0.35508641600608826,
"learning_rate": 9.943362130379101e-07,
"loss": 0.37436896562576294,
"step": 91
},
{
"epoch": 2.563380281690141,
"grad_norm": 0.3540443181991577,
"learning_rate": 9.942113192828444e-07,
"loss": 0.39830613136291504,
"step": 92
},
{
"epoch": 2.591549295774648,
"grad_norm": 0.3429860472679138,
"learning_rate": 9.940850723830632e-07,
"loss": 0.38153308629989624,
"step": 93
},
{
"epoch": 2.619718309859155,
"grad_norm": 0.3220756947994232,
"learning_rate": 9.939574727231362e-07,
"loss": 0.36020469665527344,
"step": 94
},
{
"epoch": 2.647887323943662,
"grad_norm": 0.3417351245880127,
"learning_rate": 9.93828520691754e-07,
"loss": 0.38868680596351624,
"step": 95
},
{
"epoch": 2.676056338028169,
"grad_norm": 0.3259858191013336,
"learning_rate": 9.93698216681727e-07,
"loss": 0.37741273641586304,
"step": 96
},
{
"epoch": 2.704225352112676,
"grad_norm": 0.33722448348999023,
"learning_rate": 9.93566561089984e-07,
"loss": 0.3821848928928375,
"step": 97
},
{
"epoch": 2.732394366197183,
"grad_norm": 0.31846100091934204,
"learning_rate": 9.934335543175705e-07,
"loss": 0.3690311014652252,
"step": 98
},
{
"epoch": 2.76056338028169,
"grad_norm": 0.34040549397468567,
"learning_rate": 9.932991967696482e-07,
"loss": 0.3875328600406647,
"step": 99
},
{
"epoch": 2.788732394366197,
"grad_norm": 0.3258971571922302,
"learning_rate": 9.931634888554935e-07,
"loss": 0.3811268210411072,
"step": 100
},
{
"epoch": 2.816901408450704,
"grad_norm": 0.32806867361068726,
"learning_rate": 9.930264309884964e-07,
"loss": 0.3713844418525696,
"step": 101
},
{
"epoch": 2.845070422535211,
"grad_norm": 0.3252440094947815,
"learning_rate": 9.928880235861588e-07,
"loss": 0.3812159299850464,
"step": 102
},
{
"epoch": 2.873239436619718,
"grad_norm": 0.33440181612968445,
"learning_rate": 9.927482670700936e-07,
"loss": 0.37723666429519653,
"step": 103
},
{
"epoch": 2.9014084507042255,
"grad_norm": 0.3046083152294159,
"learning_rate": 9.926071618660237e-07,
"loss": 0.3681407868862152,
"step": 104
},
{
"epoch": 2.9295774647887325,
"grad_norm": 0.3097338378429413,
"learning_rate": 9.924647084037797e-07,
"loss": 0.3724687099456787,
"step": 105
},
{
"epoch": 2.9577464788732395,
"grad_norm": 0.32305970788002014,
"learning_rate": 9.923209071172994e-07,
"loss": 0.3641166090965271,
"step": 106
},
{
"epoch": 2.9859154929577465,
"grad_norm": 0.32677826285362244,
"learning_rate": 9.921757584446268e-07,
"loss": 0.36330974102020264,
"step": 107
},
{
"epoch": 3.0,
"grad_norm": 0.4263511896133423,
"learning_rate": 9.9202926282791e-07,
"loss": 0.35592788457870483,
"step": 108
},
{
"epoch": 3.028169014084507,
"grad_norm": 0.2994212508201599,
"learning_rate": 9.918814207133997e-07,
"loss": 0.3603532314300537,
"step": 109
},
{
"epoch": 3.056338028169014,
"grad_norm": 0.30977630615234375,
"learning_rate": 9.917322325514487e-07,
"loss": 0.374819278717041,
"step": 110
},
{
"epoch": 3.084507042253521,
"grad_norm": 0.31614792346954346,
"learning_rate": 9.915816987965102e-07,
"loss": 0.3680700957775116,
"step": 111
},
{
"epoch": 3.112676056338028,
"grad_norm": 0.30458712577819824,
"learning_rate": 9.91429819907136e-07,
"loss": 0.3753468692302704,
"step": 112
},
{
"epoch": 3.140845070422535,
"grad_norm": 0.30280736088752747,
"learning_rate": 9.912765963459756e-07,
"loss": 0.3559075593948364,
"step": 113
},
{
"epoch": 3.169014084507042,
"grad_norm": 0.3088322579860687,
"learning_rate": 9.911220285797748e-07,
"loss": 0.36761462688446045,
"step": 114
},
{
"epoch": 3.1971830985915495,
"grad_norm": 0.3007463216781616,
"learning_rate": 9.909661170793733e-07,
"loss": 0.3572486340999603,
"step": 115
},
{
"epoch": 3.2253521126760565,
"grad_norm": 0.29317507147789,
"learning_rate": 9.908088623197048e-07,
"loss": 0.37356066703796387,
"step": 116
},
{
"epoch": 3.2535211267605635,
"grad_norm": 0.30190175771713257,
"learning_rate": 9.906502647797945e-07,
"loss": 0.3747510015964508,
"step": 117
},
{
"epoch": 3.2816901408450705,
"grad_norm": 0.300547331571579,
"learning_rate": 9.904903249427582e-07,
"loss": 0.3723798096179962,
"step": 118
},
{
"epoch": 3.3098591549295775,
"grad_norm": 0.2943092882633209,
"learning_rate": 9.903290432958003e-07,
"loss": 0.3614634573459625,
"step": 119
},
{
"epoch": 3.3380281690140845,
"grad_norm": 0.2933284342288971,
"learning_rate": 9.901664203302124e-07,
"loss": 0.34804195165634155,
"step": 120
},
{
"epoch": 3.3661971830985915,
"grad_norm": 0.2936899662017822,
"learning_rate": 9.900024565413727e-07,
"loss": 0.3482627272605896,
"step": 121
},
{
"epoch": 3.3943661971830985,
"grad_norm": 0.2972092628479004,
"learning_rate": 9.89837152428743e-07,
"loss": 0.35861676931381226,
"step": 122
},
{
"epoch": 3.4225352112676055,
"grad_norm": 0.296779602766037,
"learning_rate": 9.896705084958687e-07,
"loss": 0.37210696935653687,
"step": 123
},
{
"epoch": 3.4507042253521125,
"grad_norm": 0.2911286950111389,
"learning_rate": 9.895025252503755e-07,
"loss": 0.33883392810821533,
"step": 124
},
{
"epoch": 3.4788732394366195,
"grad_norm": 0.29729408025741577,
"learning_rate": 9.8933320320397e-07,
"loss": 0.3569541573524475,
"step": 125
},
{
"epoch": 3.507042253521127,
"grad_norm": 0.29103100299835205,
"learning_rate": 9.891625428724364e-07,
"loss": 0.36078906059265137,
"step": 126
},
{
"epoch": 3.535211267605634,
"grad_norm": 0.2976583242416382,
"learning_rate": 9.889905447756355e-07,
"loss": 0.3531530499458313,
"step": 127
},
{
"epoch": 3.563380281690141,
"grad_norm": 0.3033563196659088,
"learning_rate": 9.888172094375033e-07,
"loss": 0.37008020281791687,
"step": 128
},
{
"epoch": 3.591549295774648,
"grad_norm": 0.30928340554237366,
"learning_rate": 9.886425373860496e-07,
"loss": 0.3652263283729553,
"step": 129
},
{
"epoch": 3.619718309859155,
"grad_norm": 0.3299793601036072,
"learning_rate": 9.88466529153356e-07,
"loss": 0.36931300163269043,
"step": 130
},
{
"epoch": 3.647887323943662,
"grad_norm": 0.29216262698173523,
"learning_rate": 9.882891852755732e-07,
"loss": 0.3560551404953003,
"step": 131
},
{
"epoch": 3.676056338028169,
"grad_norm": 0.3086439371109009,
"learning_rate": 9.881105062929221e-07,
"loss": 0.3592608869075775,
"step": 132
},
{
"epoch": 3.704225352112676,
"grad_norm": 0.3008037805557251,
"learning_rate": 9.879304927496896e-07,
"loss": 0.35765546560287476,
"step": 133
},
{
"epoch": 3.732394366197183,
"grad_norm": 0.3011510968208313,
"learning_rate": 9.877491451942284e-07,
"loss": 0.35755690932273865,
"step": 134
},
{
"epoch": 3.76056338028169,
"grad_norm": 0.28508952260017395,
"learning_rate": 9.875664641789543e-07,
"loss": 0.3475223183631897,
"step": 135
},
{
"epoch": 3.788732394366197,
"grad_norm": 0.29807090759277344,
"learning_rate": 9.873824502603459e-07,
"loss": 0.3468858003616333,
"step": 136
},
{
"epoch": 3.816901408450704,
"grad_norm": 0.30015671253204346,
"learning_rate": 9.871971039989407e-07,
"loss": 0.3525606393814087,
"step": 137
},
{
"epoch": 3.845070422535211,
"grad_norm": 0.2894802689552307,
"learning_rate": 9.870104259593362e-07,
"loss": 0.35189589858055115,
"step": 138
},
{
"epoch": 3.873239436619718,
"grad_norm": 0.2956956624984741,
"learning_rate": 9.86822416710186e-07,
"loss": 0.3662959337234497,
"step": 139
},
{
"epoch": 3.9014084507042255,
"grad_norm": 0.28614693880081177,
"learning_rate": 9.866330768241983e-07,
"loss": 0.3523305654525757,
"step": 140
},
{
"epoch": 3.9295774647887325,
"grad_norm": 0.3109326958656311,
"learning_rate": 9.86442406878136e-07,
"loss": 0.3661171495914459,
"step": 141
},
{
"epoch": 3.9577464788732395,
"grad_norm": 0.29977917671203613,
"learning_rate": 9.862504074528126e-07,
"loss": 0.3687261939048767,
"step": 142
},
{
"epoch": 3.9859154929577465,
"grad_norm": 0.2874816954135895,
"learning_rate": 9.860570791330911e-07,
"loss": 0.35026735067367554,
"step": 143
},
{
"epoch": 4.0,
"grad_norm": 0.39478132128715515,
"learning_rate": 9.85862422507884e-07,
"loss": 0.329179584980011,
"step": 144
},
{
"epoch": 4.028169014084507,
"grad_norm": 0.29594185948371887,
"learning_rate": 9.856664381701483e-07,
"loss": 0.34915629029273987,
"step": 145
},
{
"epoch": 4.056338028169014,
"grad_norm": 0.2942439615726471,
"learning_rate": 9.854691267168871e-07,
"loss": 0.3501034080982208,
"step": 146
},
{
"epoch": 4.084507042253521,
"grad_norm": 0.3186146318912506,
"learning_rate": 9.852704887491445e-07,
"loss": 0.3498520255088806,
"step": 147
},
{
"epoch": 4.112676056338028,
"grad_norm": 0.2865906059741974,
"learning_rate": 9.850705248720068e-07,
"loss": 0.359851598739624,
"step": 148
},
{
"epoch": 4.140845070422535,
"grad_norm": 0.2773308753967285,
"learning_rate": 9.848692356945981e-07,
"loss": 0.34519776701927185,
"step": 149
},
{
"epoch": 4.169014084507042,
"grad_norm": 0.27520084381103516,
"learning_rate": 9.846666218300807e-07,
"loss": 0.3370436429977417,
"step": 150
},
{
"epoch": 4.197183098591549,
"grad_norm": 0.31606534123420715,
"learning_rate": 9.844626838956513e-07,
"loss": 0.3660886287689209,
"step": 151
},
{
"epoch": 4.225352112676056,
"grad_norm": 0.30757179856300354,
"learning_rate": 9.8425742251254e-07,
"loss": 0.3431619703769684,
"step": 152
},
{
"epoch": 4.253521126760563,
"grad_norm": 0.2864473760128021,
"learning_rate": 9.84050838306009e-07,
"loss": 0.3478638231754303,
"step": 153
},
{
"epoch": 4.28169014084507,
"grad_norm": 0.2924051880836487,
"learning_rate": 9.838429319053495e-07,
"loss": 0.3459091782569885,
"step": 154
},
{
"epoch": 4.309859154929577,
"grad_norm": 0.2723977565765381,
"learning_rate": 9.836337039438803e-07,
"loss": 0.3437414765357971,
"step": 155
},
{
"epoch": 4.338028169014084,
"grad_norm": 0.28301340341567993,
"learning_rate": 9.83423155058946e-07,
"loss": 0.351753830909729,
"step": 156
},
{
"epoch": 4.366197183098592,
"grad_norm": 0.3007968068122864,
"learning_rate": 9.832112858919155e-07,
"loss": 0.3534032106399536,
"step": 157
},
{
"epoch": 4.394366197183099,
"grad_norm": 0.2823623716831207,
"learning_rate": 9.829980970881784e-07,
"loss": 0.33871978521347046,
"step": 158
},
{
"epoch": 4.422535211267606,
"grad_norm": 0.27985984086990356,
"learning_rate": 9.82783589297145e-07,
"loss": 0.35134732723236084,
"step": 159
},
{
"epoch": 4.450704225352113,
"grad_norm": 0.29764989018440247,
"learning_rate": 9.825677631722435e-07,
"loss": 0.35344886779785156,
"step": 160
},
{
"epoch": 4.47887323943662,
"grad_norm": 0.2861703634262085,
"learning_rate": 9.823506193709174e-07,
"loss": 0.3553098440170288,
"step": 161
},
{
"epoch": 4.507042253521127,
"grad_norm": 0.3005011975765228,
"learning_rate": 9.821321585546243e-07,
"loss": 0.349773645401001,
"step": 162
},
{
"epoch": 4.535211267605634,
"grad_norm": 0.28691744804382324,
"learning_rate": 9.81912381388834e-07,
"loss": 0.3327012360095978,
"step": 163
},
{
"epoch": 4.563380281690141,
"grad_norm": 0.3060745298862457,
"learning_rate": 9.816912885430258e-07,
"loss": 0.3464226722717285,
"step": 164
},
{
"epoch": 4.591549295774648,
"grad_norm": 0.3035100996494293,
"learning_rate": 9.814688806906868e-07,
"loss": 0.3499942719936371,
"step": 165
},
{
"epoch": 4.619718309859155,
"grad_norm": 0.3114430606365204,
"learning_rate": 9.812451585093098e-07,
"loss": 0.3396627604961395,
"step": 166
},
{
"epoch": 4.647887323943662,
"grad_norm": 0.30142080783843994,
"learning_rate": 9.810201226803917e-07,
"loss": 0.3466919958591461,
"step": 167
},
{
"epoch": 4.676056338028169,
"grad_norm": 0.2819617986679077,
"learning_rate": 9.807937738894303e-07,
"loss": 0.34856730699539185,
"step": 168
},
{
"epoch": 4.704225352112676,
"grad_norm": 0.29183247685432434,
"learning_rate": 9.805661128259235e-07,
"loss": 0.3437175750732422,
"step": 169
},
{
"epoch": 4.732394366197183,
"grad_norm": 0.29465699195861816,
"learning_rate": 9.80337140183366e-07,
"loss": 0.3438083827495575,
"step": 170
},
{
"epoch": 4.76056338028169,
"grad_norm": 0.28720420598983765,
"learning_rate": 9.801068566592483e-07,
"loss": 0.3422589898109436,
"step": 171
},
{
"epoch": 4.788732394366197,
"grad_norm": 0.2751031816005707,
"learning_rate": 9.798752629550546e-07,
"loss": 0.3460365831851959,
"step": 172
},
{
"epoch": 4.816901408450704,
"grad_norm": 0.2868765592575073,
"learning_rate": 9.796423597762588e-07,
"loss": 0.3391006886959076,
"step": 173
},
{
"epoch": 4.845070422535211,
"grad_norm": 0.2844865024089813,
"learning_rate": 9.794081478323245e-07,
"loss": 0.3488645851612091,
"step": 174
},
{
"epoch": 4.873239436619718,
"grad_norm": 0.28600648045539856,
"learning_rate": 9.791726278367021e-07,
"loss": 0.3440667986869812,
"step": 175
},
{
"epoch": 4.901408450704225,
"grad_norm": 0.29167741537094116,
"learning_rate": 9.78935800506826e-07,
"loss": 0.34016746282577515,
"step": 176
},
{
"epoch": 4.929577464788732,
"grad_norm": 0.29203853011131287,
"learning_rate": 9.786976665641138e-07,
"loss": 0.33034777641296387,
"step": 177
},
{
"epoch": 4.957746478873239,
"grad_norm": 0.29975563287734985,
"learning_rate": 9.784582267339622e-07,
"loss": 0.34664660692214966,
"step": 178
},
{
"epoch": 4.985915492957746,
"grad_norm": 0.2778502106666565,
"learning_rate": 9.78217481745747e-07,
"loss": 0.34249287843704224,
"step": 179
},
{
"epoch": 5.0,
"grad_norm": 0.396133691072464,
"learning_rate": 9.779754323328192e-07,
"loss": 0.34673285484313965,
"step": 180
},
{
"epoch": 5.028169014084507,
"grad_norm": 0.29174622893333435,
"learning_rate": 9.777320792325025e-07,
"loss": 0.3266841173171997,
"step": 181
},
{
"epoch": 5.056338028169014,
"grad_norm": 0.28281646966934204,
"learning_rate": 9.774874231860935e-07,
"loss": 0.3295621871948242,
"step": 182
},
{
"epoch": 5.084507042253521,
"grad_norm": 0.2767295837402344,
"learning_rate": 9.772414649388568e-07,
"loss": 0.3460637629032135,
"step": 183
},
{
"epoch": 5.112676056338028,
"grad_norm": 0.28246212005615234,
"learning_rate": 9.769942052400235e-07,
"loss": 0.3325508236885071,
"step": 184
},
{
"epoch": 5.140845070422535,
"grad_norm": 0.31317514181137085,
"learning_rate": 9.767456448427896e-07,
"loss": 0.3373739719390869,
"step": 185
},
{
"epoch": 5.169014084507042,
"grad_norm": 0.29388973116874695,
"learning_rate": 9.764957845043135e-07,
"loss": 0.3335680365562439,
"step": 186
},
{
"epoch": 5.197183098591549,
"grad_norm": 0.3093099892139435,
"learning_rate": 9.76244624985713e-07,
"loss": 0.3288199007511139,
"step": 187
},
{
"epoch": 5.225352112676056,
"grad_norm": 0.2718607187271118,
"learning_rate": 9.759921670520634e-07,
"loss": 0.33789312839508057,
"step": 188
},
{
"epoch": 5.253521126760563,
"grad_norm": 0.3087296485900879,
"learning_rate": 9.757384114723953e-07,
"loss": 0.3482661843299866,
"step": 189
},
{
"epoch": 5.28169014084507,
"grad_norm": 0.2887554466724396,
"learning_rate": 9.754833590196926e-07,
"loss": 0.3353871703147888,
"step": 190
},
{
"epoch": 5.309859154929577,
"grad_norm": 0.2770691514015198,
"learning_rate": 9.752270104708888e-07,
"loss": 0.33239609003067017,
"step": 191
},
{
"epoch": 5.338028169014084,
"grad_norm": 0.29489442706108093,
"learning_rate": 9.749693666068663e-07,
"loss": 0.34318211674690247,
"step": 192
},
{
"epoch": 5.366197183098592,
"grad_norm": 0.31870850920677185,
"learning_rate": 9.747104282124531e-07,
"loss": 0.33540403842926025,
"step": 193
},
{
"epoch": 5.394366197183099,
"grad_norm": 0.27267521619796753,
"learning_rate": 9.744501960764203e-07,
"loss": 0.33416521549224854,
"step": 194
},
{
"epoch": 5.422535211267606,
"grad_norm": 0.284470796585083,
"learning_rate": 9.741886709914803e-07,
"loss": 0.3242385685443878,
"step": 195
},
{
"epoch": 5.450704225352113,
"grad_norm": 0.2988561689853668,
"learning_rate": 9.739258537542835e-07,
"loss": 0.3325580656528473,
"step": 196
},
{
"epoch": 5.47887323943662,
"grad_norm": 0.29107666015625,
"learning_rate": 9.73661745165417e-07,
"loss": 0.34368401765823364,
"step": 197
},
{
"epoch": 5.507042253521127,
"grad_norm": 0.289497047662735,
"learning_rate": 9.733963460294015e-07,
"loss": 0.33908677101135254,
"step": 198
},
{
"epoch": 5.535211267605634,
"grad_norm": 0.27910080552101135,
"learning_rate": 9.731296571546885e-07,
"loss": 0.3478449285030365,
"step": 199
},
{
"epoch": 5.563380281690141,
"grad_norm": 0.2966774106025696,
"learning_rate": 9.728616793536587e-07,
"loss": 0.3371037244796753,
"step": 200
},
{
"epoch": 5.591549295774648,
"grad_norm": 0.30997180938720703,
"learning_rate": 9.72592413442619e-07,
"loss": 0.3469342589378357,
"step": 201
},
{
"epoch": 5.619718309859155,
"grad_norm": 0.2851829528808594,
"learning_rate": 9.723218602418e-07,
"loss": 0.3497530221939087,
"step": 202
},
{
"epoch": 5.647887323943662,
"grad_norm": 0.29238471388816833,
"learning_rate": 9.720500205753538e-07,
"loss": 0.3286020755767822,
"step": 203
},
{
"epoch": 5.676056338028169,
"grad_norm": 0.2877226769924164,
"learning_rate": 9.717768952713511e-07,
"loss": 0.338655948638916,
"step": 204
},
{
"epoch": 5.704225352112676,
"grad_norm": 0.28834086656570435,
"learning_rate": 9.71502485161779e-07,
"loss": 0.333360880613327,
"step": 205
},
{
"epoch": 5.732394366197183,
"grad_norm": 0.28225836157798767,
"learning_rate": 9.71226791082538e-07,
"loss": 0.3514789640903473,
"step": 206
},
{
"epoch": 5.76056338028169,
"grad_norm": 0.28878796100616455,
"learning_rate": 9.709498138734403e-07,
"loss": 0.3271612524986267,
"step": 207
},
{
"epoch": 5.788732394366197,
"grad_norm": 0.29221564531326294,
"learning_rate": 9.706715543782064e-07,
"loss": 0.32984620332717896,
"step": 208
},
{
"epoch": 5.816901408450704,
"grad_norm": 0.31417179107666016,
"learning_rate": 9.703920134444632e-07,
"loss": 0.32708263397216797,
"step": 209
},
{
"epoch": 5.845070422535211,
"grad_norm": 0.30656933784484863,
"learning_rate": 9.701111919237408e-07,
"loss": 0.3378485143184662,
"step": 210
},
{
"epoch": 5.873239436619718,
"grad_norm": 0.28274714946746826,
"learning_rate": 9.698290906714702e-07,
"loss": 0.3210570812225342,
"step": 211
},
{
"epoch": 5.901408450704225,
"grad_norm": 0.28694605827331543,
"learning_rate": 9.695457105469804e-07,
"loss": 0.33672863245010376,
"step": 212
},
{
"epoch": 5.929577464788732,
"grad_norm": 0.2965106666088104,
"learning_rate": 9.69261052413497e-07,
"loss": 0.34379851818084717,
"step": 213
},
{
"epoch": 5.957746478873239,
"grad_norm": 0.3144500255584717,
"learning_rate": 9.689751171381377e-07,
"loss": 0.33530962467193604,
"step": 214
},
{
"epoch": 5.985915492957746,
"grad_norm": 0.274070680141449,
"learning_rate": 9.68687905591911e-07,
"loss": 0.32609909772872925,
"step": 215
},
{
"epoch": 6.0,
"grad_norm": 0.3976318836212158,
"learning_rate": 9.683994186497132e-07,
"loss": 0.3320915997028351,
"step": 216
},
{
"epoch": 6.028169014084507,
"grad_norm": 0.27306580543518066,
"learning_rate": 9.681096571903252e-07,
"loss": 0.32757407426834106,
"step": 217
},
{
"epoch": 6.056338028169014,
"grad_norm": 0.2815074622631073,
"learning_rate": 9.67818622096411e-07,
"loss": 0.31570878624916077,
"step": 218
},
{
"epoch": 6.084507042253521,
"grad_norm": 0.29271578788757324,
"learning_rate": 9.67526314254514e-07,
"loss": 0.33092743158340454,
"step": 219
},
{
"epoch": 6.112676056338028,
"grad_norm": 0.2819676399230957,
"learning_rate": 9.672327345550543e-07,
"loss": 0.32412028312683105,
"step": 220
},
{
"epoch": 6.140845070422535,
"grad_norm": 0.29121264815330505,
"learning_rate": 9.669378838923267e-07,
"loss": 0.324832558631897,
"step": 221
},
{
"epoch": 6.169014084507042,
"grad_norm": 0.28991273045539856,
"learning_rate": 9.666417631644976e-07,
"loss": 0.3393062949180603,
"step": 222
},
{
"epoch": 6.197183098591549,
"grad_norm": 0.28072309494018555,
"learning_rate": 9.66344373273602e-07,
"loss": 0.32950296998023987,
"step": 223
},
{
"epoch": 6.225352112676056,
"grad_norm": 0.3102487027645111,
"learning_rate": 9.66045715125541e-07,
"loss": 0.3289036154747009,
"step": 224
},
{
"epoch": 6.253521126760563,
"grad_norm": 0.2856598198413849,
"learning_rate": 9.657457896300791e-07,
"loss": 0.30844709277153015,
"step": 225
},
{
"epoch": 6.28169014084507,
"grad_norm": 0.28150248527526855,
"learning_rate": 9.654445977008414e-07,
"loss": 0.32252323627471924,
"step": 226
},
{
"epoch": 6.309859154929577,
"grad_norm": 0.3106309175491333,
"learning_rate": 9.651421402553108e-07,
"loss": 0.3153507113456726,
"step": 227
},
{
"epoch": 6.338028169014084,
"grad_norm": 0.3323248028755188,
"learning_rate": 9.648384182148252e-07,
"loss": 0.3372737169265747,
"step": 228
},
{
"epoch": 6.366197183098592,
"grad_norm": 0.2816256880760193,
"learning_rate": 9.645334325045745e-07,
"loss": 0.3402503728866577,
"step": 229
},
{
"epoch": 6.394366197183099,
"grad_norm": 0.28511133790016174,
"learning_rate": 9.64227184053598e-07,
"loss": 0.3433256149291992,
"step": 230
},
{
"epoch": 6.422535211267606,
"grad_norm": 0.27890780568122864,
"learning_rate": 9.63919673794782e-07,
"loss": 0.3293980658054352,
"step": 231
},
{
"epoch": 6.450704225352113,
"grad_norm": 0.29692021012306213,
"learning_rate": 9.636109026648554e-07,
"loss": 0.3282950818538666,
"step": 232
},
{
"epoch": 6.47887323943662,
"grad_norm": 0.2867494523525238,
"learning_rate": 9.633008716043892e-07,
"loss": 0.3350924253463745,
"step": 233
},
{
"epoch": 6.507042253521127,
"grad_norm": 0.27419739961624146,
"learning_rate": 9.629895815577915e-07,
"loss": 0.33370357751846313,
"step": 234
},
{
"epoch": 6.535211267605634,
"grad_norm": 0.2837441563606262,
"learning_rate": 9.626770334733058e-07,
"loss": 0.3225363790988922,
"step": 235
},
{
"epoch": 6.563380281690141,
"grad_norm": 0.28063684701919556,
"learning_rate": 9.623632283030077e-07,
"loss": 0.33922791481018066,
"step": 236
},
{
"epoch": 6.591549295774648,
"grad_norm": 0.2789226770401001,
"learning_rate": 9.620481670028026e-07,
"loss": 0.3289903998374939,
"step": 237
},
{
"epoch": 6.619718309859155,
"grad_norm": 0.2788150906562805,
"learning_rate": 9.617318505324212e-07,
"loss": 0.3213944435119629,
"step": 238
},
{
"epoch": 6.647887323943662,
"grad_norm": 0.2622866928577423,
"learning_rate": 9.614142798554186e-07,
"loss": 0.3391764461994171,
"step": 239
},
{
"epoch": 6.676056338028169,
"grad_norm": 0.2952481806278229,
"learning_rate": 9.610954559391704e-07,
"loss": 0.31737983226776123,
"step": 240
},
{
"epoch": 6.704225352112676,
"grad_norm": 0.28387367725372314,
"learning_rate": 9.607753797548691e-07,
"loss": 0.33009767532348633,
"step": 241
},
{
"epoch": 6.732394366197183,
"grad_norm": 0.28222769498825073,
"learning_rate": 9.604540522775227e-07,
"loss": 0.3226430416107178,
"step": 242
},
{
"epoch": 6.76056338028169,
"grad_norm": 0.2985075116157532,
"learning_rate": 9.601314744859504e-07,
"loss": 0.3328002393245697,
"step": 243
},
{
"epoch": 6.788732394366197,
"grad_norm": 0.2787352204322815,
"learning_rate": 9.598076473627796e-07,
"loss": 0.3292522728443146,
"step": 244
},
{
"epoch": 6.816901408450704,
"grad_norm": 0.2772713899612427,
"learning_rate": 9.594825718944444e-07,
"loss": 0.322078138589859,
"step": 245
},
{
"epoch": 6.845070422535211,
"grad_norm": 0.28727421164512634,
"learning_rate": 9.59156249071181e-07,
"loss": 0.3206414580345154,
"step": 246
},
{
"epoch": 6.873239436619718,
"grad_norm": 0.28722915053367615,
"learning_rate": 9.588286798870248e-07,
"loss": 0.34071967005729675,
"step": 247
},
{
"epoch": 6.901408450704225,
"grad_norm": 0.2791661322116852,
"learning_rate": 9.58499865339809e-07,
"loss": 0.32371699810028076,
"step": 248
},
{
"epoch": 6.929577464788732,
"grad_norm": 0.30174046754837036,
"learning_rate": 9.581698064311592e-07,
"loss": 0.32212015986442566,
"step": 249
},
{
"epoch": 6.957746478873239,
"grad_norm": 0.2757203280925751,
"learning_rate": 9.578385041664925e-07,
"loss": 0.3286738395690918,
"step": 250
},
{
"epoch": 6.985915492957746,
"grad_norm": 0.2977890968322754,
"learning_rate": 9.575059595550127e-07,
"loss": 0.32400673627853394,
"step": 251
},
{
"epoch": 7.0,
"grad_norm": 0.38676717877388,
"learning_rate": 9.571721736097088e-07,
"loss": 0.31549203395843506,
"step": 252
},
{
"epoch": 7.028169014084507,
"grad_norm": 0.28209057450294495,
"learning_rate": 9.568371473473503e-07,
"loss": 0.3403396010398865,
"step": 253
},
{
"epoch": 7.056338028169014,
"grad_norm": 0.28578808903694153,
"learning_rate": 9.565008817884854e-07,
"loss": 0.32727712392807007,
"step": 254
},
{
"epoch": 7.084507042253521,
"grad_norm": 0.2921590805053711,
"learning_rate": 9.561633779574372e-07,
"loss": 0.33234310150146484,
"step": 255
},
{
"epoch": 7.112676056338028,
"grad_norm": 0.27242740988731384,
"learning_rate": 9.55824636882301e-07,
"loss": 0.3204275965690613,
"step": 256
},
{
"epoch": 7.140845070422535,
"grad_norm": 0.28681573271751404,
"learning_rate": 9.554846595949413e-07,
"loss": 0.3127729594707489,
"step": 257
},
{
"epoch": 7.169014084507042,
"grad_norm": 0.27501875162124634,
"learning_rate": 9.55143447130987e-07,
"loss": 0.3219028115272522,
"step": 258
},
{
"epoch": 7.197183098591549,
"grad_norm": 0.2893284261226654,
"learning_rate": 9.54801000529831e-07,
"loss": 0.3149603009223938,
"step": 259
},
{
"epoch": 7.225352112676056,
"grad_norm": 0.29977115988731384,
"learning_rate": 9.54457320834625e-07,
"loss": 0.3116862177848816,
"step": 260
},
{
"epoch": 7.253521126760563,
"grad_norm": 0.2911919355392456,
"learning_rate": 9.54112409092277e-07,
"loss": 0.3377895653247833,
"step": 261
},
{
"epoch": 7.28169014084507,
"grad_norm": 0.32472458481788635,
"learning_rate": 9.537662663534477e-07,
"loss": 0.3152693510055542,
"step": 262
},
{
"epoch": 7.309859154929577,
"grad_norm": 0.2667696177959442,
"learning_rate": 9.534188936725483e-07,
"loss": 0.3181629180908203,
"step": 263
},
{
"epoch": 7.338028169014084,
"grad_norm": 0.29469212889671326,
"learning_rate": 9.530702921077358e-07,
"loss": 0.32251378893852234,
"step": 264
},
{
"epoch": 7.366197183098592,
"grad_norm": 0.2710505425930023,
"learning_rate": 9.527204627209112e-07,
"loss": 0.3157137632369995,
"step": 265
},
{
"epoch": 7.394366197183099,
"grad_norm": 0.29605209827423096,
"learning_rate": 9.523694065777156e-07,
"loss": 0.32492029666900635,
"step": 266
},
{
"epoch": 7.422535211267606,
"grad_norm": 0.28292831778526306,
"learning_rate": 9.520171247475268e-07,
"loss": 0.3182477653026581,
"step": 267
},
{
"epoch": 7.450704225352113,
"grad_norm": 0.28567084670066833,
"learning_rate": 9.516636183034564e-07,
"loss": 0.317740797996521,
"step": 268
},
{
"epoch": 7.47887323943662,
"grad_norm": 0.26249128580093384,
"learning_rate": 9.513088883223463e-07,
"loss": 0.3064804971218109,
"step": 269
},
{
"epoch": 7.507042253521127,
"grad_norm": 0.2805914878845215,
"learning_rate": 9.509529358847654e-07,
"loss": 0.32089754939079285,
"step": 270
},
{
"epoch": 7.535211267605634,
"grad_norm": 0.2892814874649048,
"learning_rate": 9.505957620750069e-07,
"loss": 0.31203514337539673,
"step": 271
},
{
"epoch": 7.563380281690141,
"grad_norm": 0.2809925079345703,
"learning_rate": 9.502373679810839e-07,
"loss": 0.3222312331199646,
"step": 272
},
{
"epoch": 7.591549295774648,
"grad_norm": 0.2793818414211273,
"learning_rate": 9.49877754694727e-07,
"loss": 0.30804064869880676,
"step": 273
},
{
"epoch": 7.619718309859155,
"grad_norm": 0.27966272830963135,
"learning_rate": 9.495169233113806e-07,
"loss": 0.32768452167510986,
"step": 274
},
{
"epoch": 7.647887323943662,
"grad_norm": 0.2743930220603943,
"learning_rate": 9.491548749301997e-07,
"loss": 0.3242339491844177,
"step": 275
},
{
"epoch": 7.676056338028169,
"grad_norm": 0.2765263319015503,
"learning_rate": 9.487916106540465e-07,
"loss": 0.3245530128479004,
"step": 276
},
{
"epoch": 7.704225352112676,
"grad_norm": 0.29381853342056274,
"learning_rate": 9.484271315894871e-07,
"loss": 0.32187986373901367,
"step": 277
},
{
"epoch": 7.732394366197183,
"grad_norm": 0.27294641733169556,
"learning_rate": 9.480614388467877e-07,
"loss": 0.3233500123023987,
"step": 278
},
{
"epoch": 7.76056338028169,
"grad_norm": 0.28944891691207886,
"learning_rate": 9.47694533539912e-07,
"loss": 0.31809201836586,
"step": 279
},
{
"epoch": 7.788732394366197,
"grad_norm": 0.2922861576080322,
"learning_rate": 9.473264167865171e-07,
"loss": 0.33151817321777344,
"step": 280
},
{
"epoch": 7.816901408450704,
"grad_norm": 0.2928006649017334,
"learning_rate": 9.469570897079504e-07,
"loss": 0.3220402002334595,
"step": 281
},
{
"epoch": 7.845070422535211,
"grad_norm": 0.28323814272880554,
"learning_rate": 9.465865534292464e-07,
"loss": 0.31611043214797974,
"step": 282
},
{
"epoch": 7.873239436619718,
"grad_norm": 0.28506791591644287,
"learning_rate": 9.462148090791228e-07,
"loss": 0.32090169191360474,
"step": 283
},
{
"epoch": 7.901408450704225,
"grad_norm": 0.2799360156059265,
"learning_rate": 9.458418577899774e-07,
"loss": 0.344720721244812,
"step": 284
},
{
"epoch": 7.929577464788732,
"grad_norm": 0.27799472212791443,
"learning_rate": 9.454677006978842e-07,
"loss": 0.3141616880893707,
"step": 285
},
{
"epoch": 7.957746478873239,
"grad_norm": 0.27411341667175293,
"learning_rate": 9.450923389425911e-07,
"loss": 0.31020885705947876,
"step": 286
},
{
"epoch": 7.985915492957746,
"grad_norm": 0.28921812772750854,
"learning_rate": 9.44715773667515e-07,
"loss": 0.3182592988014221,
"step": 287
},
{
"epoch": 8.0,
"grad_norm": 0.3832477331161499,
"learning_rate": 9.443380060197385e-07,
"loss": 0.32039332389831543,
"step": 288
},
{
"epoch": 8.028169014084508,
"grad_norm": 0.2698141932487488,
"learning_rate": 9.43959037150008e-07,
"loss": 0.3155902028083801,
"step": 289
},
{
"epoch": 8.056338028169014,
"grad_norm": 0.2765481472015381,
"learning_rate": 9.43578868212728e-07,
"loss": 0.3177169859409332,
"step": 290
},
{
"epoch": 8.084507042253522,
"grad_norm": 0.27723443508148193,
"learning_rate": 9.431975003659594e-07,
"loss": 0.31647437810897827,
"step": 291
},
{
"epoch": 8.112676056338028,
"grad_norm": 0.26522088050842285,
"learning_rate": 9.428149347714143e-07,
"loss": 0.31819185614585876,
"step": 292
},
{
"epoch": 8.140845070422536,
"grad_norm": 0.28780215978622437,
"learning_rate": 9.424311725944543e-07,
"loss": 0.31119635701179504,
"step": 293
},
{
"epoch": 8.169014084507042,
"grad_norm": 0.2786031663417816,
"learning_rate": 9.420462150040852e-07,
"loss": 0.31440460681915283,
"step": 294
},
{
"epoch": 8.19718309859155,
"grad_norm": 0.26644277572631836,
"learning_rate": 9.416600631729548e-07,
"loss": 0.32182344794273376,
"step": 295
},
{
"epoch": 8.225352112676056,
"grad_norm": 0.2974756062030792,
"learning_rate": 9.412727182773486e-07,
"loss": 0.3225427269935608,
"step": 296
},
{
"epoch": 8.253521126760564,
"grad_norm": 0.2951170802116394,
"learning_rate": 9.408841814971861e-07,
"loss": 0.31894785165786743,
"step": 297
},
{
"epoch": 8.28169014084507,
"grad_norm": 0.28619688749313354,
"learning_rate": 9.404944540160177e-07,
"loss": 0.31788474321365356,
"step": 298
},
{
"epoch": 8.309859154929578,
"grad_norm": 0.2877795398235321,
"learning_rate": 9.401035370210212e-07,
"loss": 0.3235325217247009,
"step": 299
},
{
"epoch": 8.338028169014084,
"grad_norm": 0.30395635962486267,
"learning_rate": 9.397114317029974e-07,
"loss": 0.33284687995910645,
"step": 300
},
{
"epoch": 8.366197183098592,
"grad_norm": 0.2896060347557068,
"learning_rate": 9.393181392563669e-07,
"loss": 0.32644715905189514,
"step": 301
},
{
"epoch": 8.394366197183098,
"grad_norm": 0.2763223648071289,
"learning_rate": 9.38923660879167e-07,
"loss": 0.304126501083374,
"step": 302
},
{
"epoch": 8.422535211267606,
"grad_norm": 0.2764940559864044,
"learning_rate": 9.385279977730472e-07,
"loss": 0.3124150037765503,
"step": 303
},
{
"epoch": 8.450704225352112,
"grad_norm": 0.2838902771472931,
"learning_rate": 9.381311511432658e-07,
"loss": 0.32950958609580994,
"step": 304
},
{
"epoch": 8.47887323943662,
"grad_norm": 0.2854890823364258,
"learning_rate": 9.377331221986866e-07,
"loss": 0.30994099378585815,
"step": 305
},
{
"epoch": 8.507042253521126,
"grad_norm": 0.2682625353336334,
"learning_rate": 9.373339121517746e-07,
"loss": 0.31963592767715454,
"step": 306
},
{
"epoch": 8.535211267605634,
"grad_norm": 0.2849690318107605,
"learning_rate": 9.36933522218593e-07,
"loss": 0.3182557225227356,
"step": 307
},
{
"epoch": 8.56338028169014,
"grad_norm": 0.28616634011268616,
"learning_rate": 9.36531953618799e-07,
"loss": 0.30273881554603577,
"step": 308
},
{
"epoch": 8.591549295774648,
"grad_norm": 0.2721138596534729,
"learning_rate": 9.361292075756401e-07,
"loss": 0.3207533657550812,
"step": 309
},
{
"epoch": 8.619718309859154,
"grad_norm": 0.2752065360546112,
"learning_rate": 9.357252853159505e-07,
"loss": 0.3186470866203308,
"step": 310
},
{
"epoch": 8.647887323943662,
"grad_norm": 0.2684236168861389,
"learning_rate": 9.353201880701477e-07,
"loss": 0.31932806968688965,
"step": 311
},
{
"epoch": 8.676056338028168,
"grad_norm": 0.28039291501045227,
"learning_rate": 9.34913917072228e-07,
"loss": 0.31683626770973206,
"step": 312
},
{
"epoch": 8.704225352112676,
"grad_norm": 0.2638692855834961,
"learning_rate": 9.345064735597633e-07,
"loss": 0.2991946339607239,
"step": 313
},
{
"epoch": 8.732394366197184,
"grad_norm": 0.30425477027893066,
"learning_rate": 9.340978587738972e-07,
"loss": 0.3023770749568939,
"step": 314
},
{
"epoch": 8.76056338028169,
"grad_norm": 0.27750107645988464,
"learning_rate": 9.336880739593415e-07,
"loss": 0.31177228689193726,
"step": 315
},
{
"epoch": 8.788732394366198,
"grad_norm": 0.2731636166572571,
"learning_rate": 9.332771203643714e-07,
"loss": 0.3076733946800232,
"step": 316
},
{
"epoch": 8.816901408450704,
"grad_norm": 0.2740687131881714,
"learning_rate": 9.328649992408231e-07,
"loss": 0.30277711153030396,
"step": 317
},
{
"epoch": 8.845070422535212,
"grad_norm": 0.27956005930900574,
"learning_rate": 9.324517118440888e-07,
"loss": 0.30988752841949463,
"step": 318
},
{
"epoch": 8.873239436619718,
"grad_norm": 0.28827622532844543,
"learning_rate": 9.320372594331137e-07,
"loss": 0.32537323236465454,
"step": 319
},
{
"epoch": 8.901408450704226,
"grad_norm": 0.2771560847759247,
"learning_rate": 9.316216432703916e-07,
"loss": 0.3233356475830078,
"step": 320
},
{
"epoch": 8.929577464788732,
"grad_norm": 0.2804992198944092,
"learning_rate": 9.312048646219617e-07,
"loss": 0.31110987067222595,
"step": 321
},
{
"epoch": 8.95774647887324,
"grad_norm": 0.29048794507980347,
"learning_rate": 9.307869247574038e-07,
"loss": 0.3100625276565552,
"step": 322
},
{
"epoch": 8.985915492957746,
"grad_norm": 0.2751557230949402,
"learning_rate": 9.303678249498352e-07,
"loss": 0.30283451080322266,
"step": 323
},
{
"epoch": 9.0,
"grad_norm": 0.38358354568481445,
"learning_rate": 9.299475664759068e-07,
"loss": 0.3202640414237976,
"step": 324
},
{
"epoch": 9.028169014084508,
"grad_norm": 0.26551520824432373,
"learning_rate": 9.295261506157985e-07,
"loss": 0.31331080198287964,
"step": 325
},
{
"epoch": 9.056338028169014,
"grad_norm": 0.28371915221214294,
"learning_rate": 9.291035786532163e-07,
"loss": 0.3039785325527191,
"step": 326
},
{
"epoch": 9.084507042253522,
"grad_norm": 0.28972727060317993,
"learning_rate": 9.286798518753878e-07,
"loss": 0.3172224462032318,
"step": 327
},
{
"epoch": 9.112676056338028,
"grad_norm": 0.2863673269748688,
"learning_rate": 9.282549715730579e-07,
"loss": 0.3220033049583435,
"step": 328
},
{
"epoch": 9.140845070422536,
"grad_norm": 0.27619102597236633,
"learning_rate": 9.278289390404859e-07,
"loss": 0.31595173478126526,
"step": 329
},
{
"epoch": 9.169014084507042,
"grad_norm": 0.2838309705257416,
"learning_rate": 9.274017555754407e-07,
"loss": 0.31470271944999695,
"step": 330
},
{
"epoch": 9.19718309859155,
"grad_norm": 0.28437867760658264,
"learning_rate": 9.269734224791974e-07,
"loss": 0.31371644139289856,
"step": 331
},
{
"epoch": 9.225352112676056,
"grad_norm": 0.28935906291007996,
"learning_rate": 9.265439410565328e-07,
"loss": 0.3154122829437256,
"step": 332
},
{
"epoch": 9.253521126760564,
"grad_norm": 0.28751862049102783,
"learning_rate": 9.261133126157217e-07,
"loss": 0.3072774410247803,
"step": 333
},
{
"epoch": 9.28169014084507,
"grad_norm": 0.2829267680644989,
"learning_rate": 9.256815384685328e-07,
"loss": 0.30855560302734375,
"step": 334
},
{
"epoch": 9.309859154929578,
"grad_norm": 0.28372108936309814,
"learning_rate": 9.252486199302256e-07,
"loss": 0.3047599792480469,
"step": 335
},
{
"epoch": 9.338028169014084,
"grad_norm": 0.26949799060821533,
"learning_rate": 9.248145583195447e-07,
"loss": 0.3051632046699524,
"step": 336
},
{
"epoch": 9.366197183098592,
"grad_norm": 0.26946741342544556,
"learning_rate": 9.243793549587171e-07,
"loss": 0.30776509642601013,
"step": 337
},
{
"epoch": 9.394366197183098,
"grad_norm": 0.2829545736312866,
"learning_rate": 9.239430111734476e-07,
"loss": 0.30643659830093384,
"step": 338
},
{
"epoch": 9.422535211267606,
"grad_norm": 0.30891162157058716,
"learning_rate": 9.235055282929153e-07,
"loss": 0.30099156498908997,
"step": 339
},
{
"epoch": 9.450704225352112,
"grad_norm": 0.2820793390274048,
"learning_rate": 9.230669076497687e-07,
"loss": 0.31829434633255005,
"step": 340
},
{
"epoch": 9.47887323943662,
"grad_norm": 0.27604445815086365,
"learning_rate": 9.226271505801224e-07,
"loss": 0.31647807359695435,
"step": 341
},
{
"epoch": 9.507042253521126,
"grad_norm": 0.2793697714805603,
"learning_rate": 9.221862584235526e-07,
"loss": 0.30784907937049866,
"step": 342
},
{
"epoch": 9.535211267605634,
"grad_norm": 0.27153849601745605,
"learning_rate": 9.217442325230936e-07,
"loss": 0.29595351219177246,
"step": 343
},
{
"epoch": 9.56338028169014,
"grad_norm": 0.28174859285354614,
"learning_rate": 9.213010742252327e-07,
"loss": 0.3158809244632721,
"step": 344
},
{
"epoch": 9.591549295774648,
"grad_norm": 0.27065321803092957,
"learning_rate": 9.208567848799069e-07,
"loss": 0.29831117391586304,
"step": 345
},
{
"epoch": 9.619718309859154,
"grad_norm": 0.2704644799232483,
"learning_rate": 9.204113658404989e-07,
"loss": 0.31440460681915283,
"step": 346
},
{
"epoch": 9.647887323943662,
"grad_norm": 0.2712800204753876,
"learning_rate": 9.199648184638318e-07,
"loss": 0.2985243499279022,
"step": 347
},
{
"epoch": 9.676056338028168,
"grad_norm": 0.2808634042739868,
"learning_rate": 9.195171441101668e-07,
"loss": 0.3167741000652313,
"step": 348
},
{
"epoch": 9.704225352112676,
"grad_norm": 0.27340877056121826,
"learning_rate": 9.190683441431974e-07,
"loss": 0.3019712269306183,
"step": 349
},
{
"epoch": 9.732394366197184,
"grad_norm": 0.2813129723072052,
"learning_rate": 9.186184199300463e-07,
"loss": 0.3006363809108734,
"step": 350
},
{
"epoch": 9.76056338028169,
"grad_norm": 0.28003188967704773,
"learning_rate": 9.181673728412605e-07,
"loss": 0.31190669536590576,
"step": 351
},
{
"epoch": 9.788732394366198,
"grad_norm": 0.2703484892845154,
"learning_rate": 9.177152042508077e-07,
"loss": 0.3077196478843689,
"step": 352
},
{
"epoch": 9.816901408450704,
"grad_norm": 0.2803649604320526,
"learning_rate": 9.17261915536072e-07,
"loss": 0.30905407667160034,
"step": 353
},
{
"epoch": 9.845070422535212,
"grad_norm": 0.2884216606616974,
"learning_rate": 9.168075080778494e-07,
"loss": 0.30327335000038147,
"step": 354
},
{
"epoch": 9.873239436619718,
"grad_norm": 0.2796288728713989,
"learning_rate": 9.163519832603436e-07,
"loss": 0.3104422390460968,
"step": 355
},
{
"epoch": 9.901408450704226,
"grad_norm": 0.30282527208328247,
"learning_rate": 9.158953424711624e-07,
"loss": 0.3279035985469818,
"step": 356
},
{
"epoch": 9.929577464788732,
"grad_norm": 0.2795606851577759,
"learning_rate": 9.154375871013128e-07,
"loss": 0.3136137127876282,
"step": 357
},
{
"epoch": 9.95774647887324,
"grad_norm": 0.2871512174606323,
"learning_rate": 9.149787185451969e-07,
"loss": 0.3188316226005554,
"step": 358
},
{
"epoch": 9.985915492957746,
"grad_norm": 0.2814459502696991,
"learning_rate": 9.145187382006081e-07,
"loss": 0.3084180951118469,
"step": 359
},
{
"epoch": 10.0,
"grad_norm": 0.4135233461856842,
"learning_rate": 9.140576474687263e-07,
"loss": 0.32664716243743896,
"step": 360
},
{
"epoch": 10.028169014084508,
"grad_norm": 0.2743515968322754,
"learning_rate": 9.135954477541137e-07,
"loss": 0.31237614154815674,
"step": 361
},
{
"epoch": 10.056338028169014,
"grad_norm": 0.2790542244911194,
"learning_rate": 9.131321404647109e-07,
"loss": 0.32110899686813354,
"step": 362
},
{
"epoch": 10.084507042253522,
"grad_norm": 0.32552531361579895,
"learning_rate": 9.126677270118322e-07,
"loss": 0.31540626287460327,
"step": 363
},
{
"epoch": 10.112676056338028,
"grad_norm": 0.27251535654067993,
"learning_rate": 9.122022088101613e-07,
"loss": 0.2956544756889343,
"step": 364
},
{
"epoch": 10.140845070422536,
"grad_norm": 0.3012971878051758,
"learning_rate": 9.117355872777477e-07,
"loss": 0.3012295961380005,
"step": 365
},
{
"epoch": 10.169014084507042,
"grad_norm": 0.29038530588150024,
"learning_rate": 9.112678638360015e-07,
"loss": 0.2931394875049591,
"step": 366
},
{
"epoch": 10.19718309859155,
"grad_norm": 0.2870721220970154,
"learning_rate": 9.107990399096893e-07,
"loss": 0.2930557131767273,
"step": 367
},
{
"epoch": 10.225352112676056,
"grad_norm": 0.281965047121048,
"learning_rate": 9.103291169269299e-07,
"loss": 0.3096895217895508,
"step": 368
},
{
"epoch": 10.253521126760564,
"grad_norm": 0.2720247209072113,
"learning_rate": 9.098580963191907e-07,
"loss": 0.302044540643692,
"step": 369
},
{
"epoch": 10.28169014084507,
"grad_norm": 0.2841237783432007,
"learning_rate": 9.093859795212817e-07,
"loss": 0.32047468423843384,
"step": 370
},
{
"epoch": 10.309859154929578,
"grad_norm": 0.29989898204803467,
"learning_rate": 9.089127679713529e-07,
"loss": 0.31085067987442017,
"step": 371
},
{
"epoch": 10.338028169014084,
"grad_norm": 0.29164332151412964,
"learning_rate": 9.084384631108882e-07,
"loss": 0.3052881360054016,
"step": 372
},
{
"epoch": 10.366197183098592,
"grad_norm": 0.2740509808063507,
"learning_rate": 9.079630663847031e-07,
"loss": 0.31468653678894043,
"step": 373
},
{
"epoch": 10.394366197183098,
"grad_norm": 0.2791116535663605,
"learning_rate": 9.074865792409381e-07,
"loss": 0.30899161100387573,
"step": 374
},
{
"epoch": 10.422535211267606,
"grad_norm": 0.30149030685424805,
"learning_rate": 9.070090031310558e-07,
"loss": 0.3094651997089386,
"step": 375
},
{
"epoch": 10.450704225352112,
"grad_norm": 0.2970089018344879,
"learning_rate": 9.065303395098358e-07,
"loss": 0.3142540156841278,
"step": 376
},
{
"epoch": 10.47887323943662,
"grad_norm": 0.2772645652294159,
"learning_rate": 9.060505898353705e-07,
"loss": 0.32443171739578247,
"step": 377
},
{
"epoch": 10.507042253521126,
"grad_norm": 0.2707611620426178,
"learning_rate": 9.055697555690607e-07,
"loss": 0.30495521426200867,
"step": 378
},
{
"epoch": 10.535211267605634,
"grad_norm": 0.2923314869403839,
"learning_rate": 9.050878381756107e-07,
"loss": 0.30734074115753174,
"step": 379
},
{
"epoch": 10.56338028169014,
"grad_norm": 0.2865448594093323,
"learning_rate": 9.046048391230247e-07,
"loss": 0.2913230061531067,
"step": 380
},
{
"epoch": 10.591549295774648,
"grad_norm": 0.29643693566322327,
"learning_rate": 9.041207598826017e-07,
"loss": 0.30088239908218384,
"step": 381
},
{
"epoch": 10.619718309859154,
"grad_norm": 0.2761143445968628,
"learning_rate": 9.036356019289309e-07,
"loss": 0.30702435970306396,
"step": 382
},
{
"epoch": 10.647887323943662,
"grad_norm": 0.27720797061920166,
"learning_rate": 9.031493667398872e-07,
"loss": 0.2953702509403229,
"step": 383
},
{
"epoch": 10.676056338028168,
"grad_norm": 0.30037540197372437,
"learning_rate": 9.026620557966279e-07,
"loss": 0.3012697696685791,
"step": 384
},
{
"epoch": 10.704225352112676,
"grad_norm": 0.27628859877586365,
"learning_rate": 9.021736705835862e-07,
"loss": 0.30558526515960693,
"step": 385
},
{
"epoch": 10.732394366197184,
"grad_norm": 0.2692992091178894,
"learning_rate": 9.016842125884684e-07,
"loss": 0.288699209690094,
"step": 386
},
{
"epoch": 10.76056338028169,
"grad_norm": 0.30020084977149963,
"learning_rate": 9.011936833022484e-07,
"loss": 0.294253945350647,
"step": 387
},
{
"epoch": 10.788732394366198,
"grad_norm": 0.29289868474006653,
"learning_rate": 9.007020842191634e-07,
"loss": 0.31805676221847534,
"step": 388
},
{
"epoch": 10.816901408450704,
"grad_norm": 0.28465571999549866,
"learning_rate": 9.002094168367095e-07,
"loss": 0.3168966472148895,
"step": 389
},
{
"epoch": 10.845070422535212,
"grad_norm": 0.27562448382377625,
"learning_rate": 8.997156826556369e-07,
"loss": 0.302585631608963,
"step": 390
},
{
"epoch": 10.873239436619718,
"grad_norm": 0.28200119733810425,
"learning_rate": 8.992208831799456e-07,
"loss": 0.3037059009075165,
"step": 391
},
{
"epoch": 10.901408450704226,
"grad_norm": 0.2829252779483795,
"learning_rate": 8.987250199168808e-07,
"loss": 0.2850543260574341,
"step": 392
},
{
"epoch": 10.929577464788732,
"grad_norm": 0.28010982275009155,
"learning_rate": 8.982280943769278e-07,
"loss": 0.30365508794784546,
"step": 393
},
{
"epoch": 10.95774647887324,
"grad_norm": 0.2917790114879608,
"learning_rate": 8.977301080738079e-07,
"loss": 0.32212477922439575,
"step": 394
},
{
"epoch": 10.985915492957746,
"grad_norm": 0.27254894375801086,
"learning_rate": 8.97231062524474e-07,
"loss": 0.29733577370643616,
"step": 395
},
{
"epoch": 11.0,
"grad_norm": 0.38847291469573975,
"learning_rate": 8.967309592491052e-07,
"loss": 0.31824764609336853,
"step": 396
},
{
"epoch": 11.028169014084508,
"grad_norm": 0.27360019087791443,
"learning_rate": 8.962297997711027e-07,
"loss": 0.2907956540584564,
"step": 397
},
{
"epoch": 11.056338028169014,
"grad_norm": 0.28565695881843567,
"learning_rate": 8.957275856170855e-07,
"loss": 0.30498966574668884,
"step": 398
},
{
"epoch": 11.084507042253522,
"grad_norm": 0.2826082408428192,
"learning_rate": 8.952243183168848e-07,
"loss": 0.3076494634151459,
"step": 399
},
{
"epoch": 11.112676056338028,
"grad_norm": 0.28598853945732117,
"learning_rate": 8.9471999940354e-07,
"loss": 0.29677921533584595,
"step": 400
},
{
"epoch": 11.140845070422536,
"grad_norm": 0.27635788917541504,
"learning_rate": 8.942146304132943e-07,
"loss": 0.28424787521362305,
"step": 401
},
{
"epoch": 11.169014084507042,
"grad_norm": 0.3110678195953369,
"learning_rate": 8.937082128855891e-07,
"loss": 0.31091392040252686,
"step": 402
},
{
"epoch": 11.19718309859155,
"grad_norm": 0.28018108010292053,
"learning_rate": 8.932007483630596e-07,
"loss": 0.2973289489746094,
"step": 403
},
{
"epoch": 11.225352112676056,
"grad_norm": 0.2748464345932007,
"learning_rate": 8.926922383915315e-07,
"loss": 0.3064712882041931,
"step": 404
},
{
"epoch": 11.253521126760564,
"grad_norm": 0.2758099138736725,
"learning_rate": 8.921826845200138e-07,
"loss": 0.30080002546310425,
"step": 405
},
{
"epoch": 11.28169014084507,
"grad_norm": 0.27323541045188904,
"learning_rate": 8.916720883006963e-07,
"loss": 0.30011099576950073,
"step": 406
},
{
"epoch": 11.309859154929578,
"grad_norm": 0.2751684784889221,
"learning_rate": 8.911604512889434e-07,
"loss": 0.3021606206893921,
"step": 407
},
{
"epoch": 11.338028169014084,
"grad_norm": 0.278543084859848,
"learning_rate": 8.906477750432903e-07,
"loss": 0.2979898452758789,
"step": 408
},
{
"epoch": 11.366197183098592,
"grad_norm": 0.2872096300125122,
"learning_rate": 8.901340611254378e-07,
"loss": 0.30450716614723206,
"step": 409
},
{
"epoch": 11.394366197183098,
"grad_norm": 0.27768319845199585,
"learning_rate": 8.896193111002475e-07,
"loss": 0.31025999784469604,
"step": 410
},
{
"epoch": 11.422535211267606,
"grad_norm": 0.28008511662483215,
"learning_rate": 8.891035265357371e-07,
"loss": 0.2903551757335663,
"step": 411
},
{
"epoch": 11.450704225352112,
"grad_norm": 0.28000614047050476,
"learning_rate": 8.88586709003076e-07,
"loss": 0.30711328983306885,
"step": 412
},
{
"epoch": 11.47887323943662,
"grad_norm": 0.27915990352630615,
"learning_rate": 8.8806886007658e-07,
"loss": 0.309296578168869,
"step": 413
},
{
"epoch": 11.507042253521126,
"grad_norm": 0.2682763636112213,
"learning_rate": 8.875499813337067e-07,
"loss": 0.3053497076034546,
"step": 414
},
{
"epoch": 11.535211267605634,
"grad_norm": 0.26592400670051575,
"learning_rate": 8.87030074355051e-07,
"loss": 0.29761987924575806,
"step": 415
},
{
"epoch": 11.56338028169014,
"grad_norm": 0.2664642333984375,
"learning_rate": 8.865091407243394e-07,
"loss": 0.2986457645893097,
"step": 416
},
{
"epoch": 11.591549295774648,
"grad_norm": 0.2615084648132324,
"learning_rate": 8.859871820284261e-07,
"loss": 0.31391632556915283,
"step": 417
},
{
"epoch": 11.619718309859154,
"grad_norm": 0.27312856912612915,
"learning_rate": 8.85464199857288e-07,
"loss": 0.3128984570503235,
"step": 418
},
{
"epoch": 11.647887323943662,
"grad_norm": 0.2734473645687103,
"learning_rate": 8.849401958040192e-07,
"loss": 0.298526793718338,
"step": 419
},
{
"epoch": 11.676056338028168,
"grad_norm": 0.2901906669139862,
"learning_rate": 8.844151714648274e-07,
"loss": 0.31268036365509033,
"step": 420
},
{
"epoch": 11.704225352112676,
"grad_norm": 0.28374356031417847,
"learning_rate": 8.838891284390273e-07,
"loss": 0.3042759299278259,
"step": 421
},
{
"epoch": 11.732394366197184,
"grad_norm": 0.26128286123275757,
"learning_rate": 8.833620683290375e-07,
"loss": 0.30057787895202637,
"step": 422
},
{
"epoch": 11.76056338028169,
"grad_norm": 0.29005923867225647,
"learning_rate": 8.828339927403745e-07,
"loss": 0.2969115376472473,
"step": 423
},
{
"epoch": 11.788732394366198,
"grad_norm": 0.26823022961616516,
"learning_rate": 8.823049032816478e-07,
"loss": 0.3024095296859741,
"step": 424
},
{
"epoch": 11.816901408450704,
"grad_norm": 0.2938059866428375,
"learning_rate": 8.817748015645558e-07,
"loss": 0.2982884347438812,
"step": 425
},
{
"epoch": 11.845070422535212,
"grad_norm": 0.2794440686702728,
"learning_rate": 8.812436892038805e-07,
"loss": 0.3006170094013214,
"step": 426
},
{
"epoch": 11.873239436619718,
"grad_norm": 0.27727699279785156,
"learning_rate": 8.807115678174819e-07,
"loss": 0.29938215017318726,
"step": 427
},
{
"epoch": 11.901408450704226,
"grad_norm": 0.28038865327835083,
"learning_rate": 8.801784390262943e-07,
"loss": 0.3107326924800873,
"step": 428
},
{
"epoch": 11.929577464788732,
"grad_norm": 0.29747217893600464,
"learning_rate": 8.796443044543203e-07,
"loss": 0.2999688982963562,
"step": 429
},
{
"epoch": 11.95774647887324,
"grad_norm": 0.2875438332557678,
"learning_rate": 8.791091657286267e-07,
"loss": 0.2930242419242859,
"step": 430
},
{
"epoch": 11.985915492957746,
"grad_norm": 0.2946978211402893,
"learning_rate": 8.785730244793386e-07,
"loss": 0.295132577419281,
"step": 431
},
{
"epoch": 12.0,
"grad_norm": 0.39752283692359924,
"learning_rate": 8.780358823396352e-07,
"loss": 0.30750101804733276,
"step": 432
},
{
"epoch": 12.028169014084508,
"grad_norm": 0.2708489000797272,
"learning_rate": 8.774977409457447e-07,
"loss": 0.3058265447616577,
"step": 433
},
{
"epoch": 12.056338028169014,
"grad_norm": 0.2773410975933075,
"learning_rate": 8.769586019369391e-07,
"loss": 0.30409157276153564,
"step": 434
},
{
"epoch": 12.084507042253522,
"grad_norm": 0.26894107460975647,
"learning_rate": 8.764184669555293e-07,
"loss": 0.30384916067123413,
"step": 435
},
{
"epoch": 12.112676056338028,
"grad_norm": 0.27837878465652466,
"learning_rate": 8.758773376468604e-07,
"loss": 0.2943356931209564,
"step": 436
},
{
"epoch": 12.140845070422536,
"grad_norm": 0.2690330445766449,
"learning_rate": 8.753352156593055e-07,
"loss": 0.2933955788612366,
"step": 437
},
{
"epoch": 12.169014084507042,
"grad_norm": 0.27980291843414307,
"learning_rate": 8.747921026442629e-07,
"loss": 0.28997617959976196,
"step": 438
},
{
"epoch": 12.19718309859155,
"grad_norm": 0.287624329328537,
"learning_rate": 8.742480002561487e-07,
"loss": 0.30039626359939575,
"step": 439
},
{
"epoch": 12.225352112676056,
"grad_norm": 0.28817304968833923,
"learning_rate": 8.737029101523929e-07,
"loss": 0.3200758099555969,
"step": 440
},
{
"epoch": 12.253521126760564,
"grad_norm": 0.2769193649291992,
"learning_rate": 8.731568339934348e-07,
"loss": 0.2976597547531128,
"step": 441
},
{
"epoch": 12.28169014084507,
"grad_norm": 0.309583842754364,
"learning_rate": 8.726097734427172e-07,
"loss": 0.2977990210056305,
"step": 442
},
{
"epoch": 12.309859154929578,
"grad_norm": 0.26997339725494385,
"learning_rate": 8.72061730166681e-07,
"loss": 0.29733020067214966,
"step": 443
},
{
"epoch": 12.338028169014084,
"grad_norm": 0.2782990634441376,
"learning_rate": 8.715127058347614e-07,
"loss": 0.29592543840408325,
"step": 444
},
{
"epoch": 12.366197183098592,
"grad_norm": 0.2781784236431122,
"learning_rate": 8.709627021193816e-07,
"loss": 0.2965870797634125,
"step": 445
},
{
"epoch": 12.394366197183098,
"grad_norm": 0.2965787649154663,
"learning_rate": 8.704117206959484e-07,
"loss": 0.30272242426872253,
"step": 446
},
{
"epoch": 12.422535211267606,
"grad_norm": 0.2780534625053406,
"learning_rate": 8.698597632428466e-07,
"loss": 0.30883416533470154,
"step": 447
},
{
"epoch": 12.450704225352112,
"grad_norm": 0.27513188123703003,
"learning_rate": 8.693068314414344e-07,
"loss": 0.30461177229881287,
"step": 448
},
{
"epoch": 12.47887323943662,
"grad_norm": 0.2838785946369171,
"learning_rate": 8.687529269760379e-07,
"loss": 0.2927112281322479,
"step": 449
},
{
"epoch": 12.507042253521126,
"grad_norm": 0.28894707560539246,
"learning_rate": 8.681980515339463e-07,
"loss": 0.28816863894462585,
"step": 450
},
{
"epoch": 12.535211267605634,
"grad_norm": 0.28006207942962646,
"learning_rate": 8.676422068054064e-07,
"loss": 0.29931047558784485,
"step": 451
},
{
"epoch": 12.56338028169014,
"grad_norm": 0.2799602150917053,
"learning_rate": 8.670853944836176e-07,
"loss": 0.3038347363471985,
"step": 452
},
{
"epoch": 12.591549295774648,
"grad_norm": 0.2760638892650604,
"learning_rate": 8.665276162647267e-07,
"loss": 0.30183106660842896,
"step": 453
},
{
"epoch": 12.619718309859154,
"grad_norm": 0.278127521276474,
"learning_rate": 8.659688738478231e-07,
"loss": 0.3019717335700989,
"step": 454
},
{
"epoch": 12.647887323943662,
"grad_norm": 0.26856380701065063,
"learning_rate": 8.654091689349329e-07,
"loss": 0.2945576310157776,
"step": 455
},
{
"epoch": 12.676056338028168,
"grad_norm": 0.2749437391757965,
"learning_rate": 8.648485032310144e-07,
"loss": 0.3023756444454193,
"step": 456
},
{
"epoch": 12.704225352112676,
"grad_norm": 0.2729102671146393,
"learning_rate": 8.642868784439527e-07,
"loss": 0.2842894196510315,
"step": 457
},
{
"epoch": 12.732394366197184,
"grad_norm": 0.28390341997146606,
"learning_rate": 8.63724296284554e-07,
"loss": 0.2940555810928345,
"step": 458
},
{
"epoch": 12.76056338028169,
"grad_norm": 0.2739807069301605,
"learning_rate": 8.631607584665413e-07,
"loss": 0.2935922145843506,
"step": 459
},
{
"epoch": 12.788732394366198,
"grad_norm": 0.2823079824447632,
"learning_rate": 8.625962667065487e-07,
"loss": 0.2949485182762146,
"step": 460
},
{
"epoch": 12.816901408450704,
"grad_norm": 0.2843155264854431,
"learning_rate": 8.620308227241157e-07,
"loss": 0.31058311462402344,
"step": 461
},
{
"epoch": 12.845070422535212,
"grad_norm": 0.2805749475955963,
"learning_rate": 8.614644282416831e-07,
"loss": 0.2892061173915863,
"step": 462
},
{
"epoch": 12.873239436619718,
"grad_norm": 0.2773419916629791,
"learning_rate": 8.608970849845862e-07,
"loss": 0.28688696026802063,
"step": 463
},
{
"epoch": 12.901408450704226,
"grad_norm": 0.28667542338371277,
"learning_rate": 8.603287946810513e-07,
"loss": 0.30356699228286743,
"step": 464
},
{
"epoch": 12.929577464788732,
"grad_norm": 0.2785196900367737,
"learning_rate": 8.597595590621892e-07,
"loss": 0.29802441596984863,
"step": 465
},
{
"epoch": 12.95774647887324,
"grad_norm": 0.2778855562210083,
"learning_rate": 8.591893798619903e-07,
"loss": 0.29154932498931885,
"step": 466
},
{
"epoch": 12.985915492957746,
"grad_norm": 0.28308385610580444,
"learning_rate": 8.586182588173194e-07,
"loss": 0.29143208265304565,
"step": 467
},
{
"epoch": 13.0,
"grad_norm": 0.39711424708366394,
"learning_rate": 8.580461976679099e-07,
"loss": 0.2990560233592987,
"step": 468
},
{
"epoch": 13.028169014084508,
"grad_norm": 0.26802533864974976,
"learning_rate": 8.574731981563597e-07,
"loss": 0.29934608936309814,
"step": 469
},
{
"epoch": 13.056338028169014,
"grad_norm": 0.2663622498512268,
"learning_rate": 8.568992620281243e-07,
"loss": 0.29982200264930725,
"step": 470
},
{
"epoch": 13.084507042253522,
"grad_norm": 0.28624898195266724,
"learning_rate": 8.56324391031513e-07,
"loss": 0.2810109555721283,
"step": 471
},
{
"epoch": 13.112676056338028,
"grad_norm": 0.28607407212257385,
"learning_rate": 8.557485869176825e-07,
"loss": 0.2949367165565491,
"step": 472
},
{
"epoch": 13.140845070422536,
"grad_norm": 0.26953044533729553,
"learning_rate": 8.551718514406318e-07,
"loss": 0.2851143479347229,
"step": 473
},
{
"epoch": 13.169014084507042,
"grad_norm": 0.31105440855026245,
"learning_rate": 8.545941863571973e-07,
"loss": 0.2858909070491791,
"step": 474
},
{
"epoch": 13.19718309859155,
"grad_norm": 0.28143224120140076,
"learning_rate": 8.540155934270471e-07,
"loss": 0.2961467504501343,
"step": 475
},
{
"epoch": 13.225352112676056,
"grad_norm": 0.2862183451652527,
"learning_rate": 8.534360744126753e-07,
"loss": 0.29882240295410156,
"step": 476
},
{
"epoch": 13.253521126760564,
"grad_norm": 0.26780712604522705,
"learning_rate": 8.528556310793979e-07,
"loss": 0.2933373749256134,
"step": 477
},
{
"epoch": 13.28169014084507,
"grad_norm": 0.27026116847991943,
"learning_rate": 8.522742651953456e-07,
"loss": 0.2968083918094635,
"step": 478
},
{
"epoch": 13.309859154929578,
"grad_norm": 0.2800562381744385,
"learning_rate": 8.516919785314595e-07,
"loss": 0.3015640377998352,
"step": 479
},
{
"epoch": 13.338028169014084,
"grad_norm": 0.29154452681541443,
"learning_rate": 8.511087728614862e-07,
"loss": 0.31045541167259216,
"step": 480
},
{
"epoch": 13.366197183098592,
"grad_norm": 0.28183555603027344,
"learning_rate": 8.50524649961971e-07,
"loss": 0.29173219203948975,
"step": 481
},
{
"epoch": 13.394366197183098,
"grad_norm": 0.2971493601799011,
"learning_rate": 8.499396116122535e-07,
"loss": 0.2765740752220154,
"step": 482
},
{
"epoch": 13.422535211267606,
"grad_norm": 0.26922252774238586,
"learning_rate": 8.493536595944622e-07,
"loss": 0.297348290681839,
"step": 483
},
{
"epoch": 13.450704225352112,
"grad_norm": 0.27836039662361145,
"learning_rate": 8.487667956935087e-07,
"loss": 0.28694790601730347,
"step": 484
},
{
"epoch": 13.47887323943662,
"grad_norm": 0.29267406463623047,
"learning_rate": 8.481790216970819e-07,
"loss": 0.2862587571144104,
"step": 485
},
{
"epoch": 13.507042253521126,
"grad_norm": 0.27863144874572754,
"learning_rate": 8.475903393956433e-07,
"loss": 0.2894202470779419,
"step": 486
},
{
"epoch": 13.535211267605634,
"grad_norm": 0.2911999523639679,
"learning_rate": 8.470007505824215e-07,
"loss": 0.29356449842453003,
"step": 487
},
{
"epoch": 13.56338028169014,
"grad_norm": 0.2968003451824188,
"learning_rate": 8.464102570534061e-07,
"loss": 0.29188239574432373,
"step": 488
},
{
"epoch": 13.591549295774648,
"grad_norm": 0.2842749357223511,
"learning_rate": 8.458188606073431e-07,
"loss": 0.28485268354415894,
"step": 489
},
{
"epoch": 13.619718309859154,
"grad_norm": 0.2762301564216614,
"learning_rate": 8.452265630457282e-07,
"loss": 0.2829025387763977,
"step": 490
},
{
"epoch": 13.647887323943662,
"grad_norm": 0.27368924021720886,
"learning_rate": 8.446333661728028e-07,
"loss": 0.3129264712333679,
"step": 491
},
{
"epoch": 13.676056338028168,
"grad_norm": 0.3042363226413727,
"learning_rate": 8.440392717955475e-07,
"loss": 0.298667311668396,
"step": 492
},
{
"epoch": 13.704225352112676,
"grad_norm": 0.31437602639198303,
"learning_rate": 8.434442817236765e-07,
"loss": 0.2911669909954071,
"step": 493
},
{
"epoch": 13.732394366197184,
"grad_norm": 0.2624206840991974,
"learning_rate": 8.428483977696328e-07,
"loss": 0.2875954508781433,
"step": 494
},
{
"epoch": 13.76056338028169,
"grad_norm": 0.2824702858924866,
"learning_rate": 8.422516217485825e-07,
"loss": 0.28079336881637573,
"step": 495
},
{
"epoch": 13.788732394366198,
"grad_norm": 0.27612945437431335,
"learning_rate": 8.416539554784089e-07,
"loss": 0.3052091598510742,
"step": 496
},
{
"epoch": 13.816901408450704,
"grad_norm": 0.28139790892601013,
"learning_rate": 8.410554007797068e-07,
"loss": 0.2918257415294647,
"step": 497
},
{
"epoch": 13.845070422535212,
"grad_norm": 0.2779678702354431,
"learning_rate": 8.404559594757777e-07,
"loss": 0.30707138776779175,
"step": 498
},
{
"epoch": 13.873239436619718,
"grad_norm": 0.2710152566432953,
"learning_rate": 8.398556333926239e-07,
"loss": 0.3128437101840973,
"step": 499
},
{
"epoch": 13.901408450704226,
"grad_norm": 0.2958044707775116,
"learning_rate": 8.392544243589427e-07,
"loss": 0.29653337597846985,
"step": 500
},
{
"epoch": 13.929577464788732,
"grad_norm": 0.28408974409103394,
"learning_rate": 8.38652334206121e-07,
"loss": 0.29291969537734985,
"step": 501
},
{
"epoch": 13.95774647887324,
"grad_norm": 0.27897724509239197,
"learning_rate": 8.3804936476823e-07,
"loss": 0.3117462992668152,
"step": 502
},
{
"epoch": 13.985915492957746,
"grad_norm": 0.27391254901885986,
"learning_rate": 8.374455178820189e-07,
"loss": 0.30571603775024414,
"step": 503
},
{
"epoch": 14.0,
"grad_norm": 0.3995163142681122,
"learning_rate": 8.368407953869103e-07,
"loss": 0.2876809239387512,
"step": 504
},
{
"epoch": 14.028169014084508,
"grad_norm": 0.3068762719631195,
"learning_rate": 8.362351991249937e-07,
"loss": 0.28866052627563477,
"step": 505
},
{
"epoch": 14.056338028169014,
"grad_norm": 0.278751939535141,
"learning_rate": 8.356287309410204e-07,
"loss": 0.3048397898674011,
"step": 506
},
{
"epoch": 14.084507042253522,
"grad_norm": 0.2831234335899353,
"learning_rate": 8.350213926823974e-07,
"loss": 0.28643566370010376,
"step": 507
},
{
"epoch": 14.112676056338028,
"grad_norm": 0.2744354009628296,
"learning_rate": 8.344131861991828e-07,
"loss": 0.30159255862236023,
"step": 508
},
{
"epoch": 14.140845070422536,
"grad_norm": 0.2834227383136749,
"learning_rate": 8.338041133440788e-07,
"loss": 0.2945912182331085,
"step": 509
},
{
"epoch": 14.169014084507042,
"grad_norm": 0.2914932072162628,
"learning_rate": 8.331941759724268e-07,
"loss": 0.30261489748954773,
"step": 510
},
{
"epoch": 14.19718309859155,
"grad_norm": 0.2795814871788025,
"learning_rate": 8.325833759422021e-07,
"loss": 0.29661813378334045,
"step": 511
},
{
"epoch": 14.225352112676056,
"grad_norm": 0.2715330719947815,
"learning_rate": 8.319717151140072e-07,
"loss": 0.28672271966934204,
"step": 512
},
{
"epoch": 14.253521126760564,
"grad_norm": 0.2859768271446228,
"learning_rate": 8.313591953510673e-07,
"loss": 0.2985742390155792,
"step": 513
},
{
"epoch": 14.28169014084507,
"grad_norm": 0.2789771854877472,
"learning_rate": 8.307458185192238e-07,
"loss": 0.2883588671684265,
"step": 514
},
{
"epoch": 14.309859154929578,
"grad_norm": 0.2849474549293518,
"learning_rate": 8.301315864869289e-07,
"loss": 0.3045833706855774,
"step": 515
},
{
"epoch": 14.338028169014084,
"grad_norm": 0.28583216667175293,
"learning_rate": 8.295165011252396e-07,
"loss": 0.28541919589042664,
"step": 516
},
{
"epoch": 14.366197183098592,
"grad_norm": 0.286767840385437,
"learning_rate": 8.289005643078131e-07,
"loss": 0.2928876280784607,
"step": 517
},
{
"epoch": 14.394366197183098,
"grad_norm": 0.2851925790309906,
"learning_rate": 8.282837779108993e-07,
"loss": 0.29808348417282104,
"step": 518
},
{
"epoch": 14.422535211267606,
"grad_norm": 0.2843434512615204,
"learning_rate": 8.276661438133368e-07,
"loss": 0.281357079744339,
"step": 519
},
{
"epoch": 14.450704225352112,
"grad_norm": 0.29959535598754883,
"learning_rate": 8.270476638965461e-07,
"loss": 0.287128746509552,
"step": 520
},
{
"epoch": 14.47887323943662,
"grad_norm": 0.2812483310699463,
"learning_rate": 8.264283400445243e-07,
"loss": 0.29306480288505554,
"step": 521
},
{
"epoch": 14.507042253521126,
"grad_norm": 0.3015466034412384,
"learning_rate": 8.258081741438394e-07,
"loss": 0.3011341691017151,
"step": 522
},
{
"epoch": 14.535211267605634,
"grad_norm": 0.2930891215801239,
"learning_rate": 8.25187168083624e-07,
"loss": 0.2976144850254059,
"step": 523
},
{
"epoch": 14.56338028169014,
"grad_norm": 0.2777521312236786,
"learning_rate": 8.245653237555705e-07,
"loss": 0.2829003930091858,
"step": 524
},
{
"epoch": 14.591549295774648,
"grad_norm": 0.2916077673435211,
"learning_rate": 8.239426430539243e-07,
"loss": 0.28546392917633057,
"step": 525
},
{
"epoch": 14.619718309859154,
"grad_norm": 0.3006315231323242,
"learning_rate": 8.23319127875479e-07,
"loss": 0.2851755619049072,
"step": 526
},
{
"epoch": 14.647887323943662,
"grad_norm": 0.2654482424259186,
"learning_rate": 8.226947801195699e-07,
"loss": 0.28430840373039246,
"step": 527
},
{
"epoch": 14.676056338028168,
"grad_norm": 0.2679372727870941,
"learning_rate": 8.220696016880687e-07,
"loss": 0.282630980014801,
"step": 528
},
{
"epoch": 14.704225352112676,
"grad_norm": 0.28538262844085693,
"learning_rate": 8.21443594485377e-07,
"loss": 0.2789214551448822,
"step": 529
},
{
"epoch": 14.732394366197184,
"grad_norm": 0.2713358700275421,
"learning_rate": 8.208167604184217e-07,
"loss": 0.2909342646598816,
"step": 530
},
{
"epoch": 14.76056338028169,
"grad_norm": 0.30056601762771606,
"learning_rate": 8.201891013966478e-07,
"loss": 0.2838485836982727,
"step": 531
},
{
"epoch": 14.788732394366198,
"grad_norm": 0.2811543345451355,
"learning_rate": 8.195606193320136e-07,
"loss": 0.29030710458755493,
"step": 532
},
{
"epoch": 14.816901408450704,
"grad_norm": 0.2930709719657898,
"learning_rate": 8.189313161389844e-07,
"loss": 0.2922976613044739,
"step": 533
},
{
"epoch": 14.845070422535212,
"grad_norm": 0.29798057675361633,
"learning_rate": 8.183011937345271e-07,
"loss": 0.2951294183731079,
"step": 534
},
{
"epoch": 14.873239436619718,
"grad_norm": 0.28483426570892334,
"learning_rate": 8.176702540381036e-07,
"loss": 0.2938500642776489,
"step": 535
},
{
"epoch": 14.901408450704226,
"grad_norm": 0.2990010380744934,
"learning_rate": 8.170384989716657e-07,
"loss": 0.29805850982666016,
"step": 536
},
{
"epoch": 14.929577464788732,
"grad_norm": 0.2896774411201477,
"learning_rate": 8.164059304596488e-07,
"loss": 0.29530227184295654,
"step": 537
},
{
"epoch": 14.95774647887324,
"grad_norm": 0.28662148118019104,
"learning_rate": 8.157725504289664e-07,
"loss": 0.28371667861938477,
"step": 538
},
{
"epoch": 14.985915492957746,
"grad_norm": 0.2807771861553192,
"learning_rate": 8.151383608090039e-07,
"loss": 0.29020193219184875,
"step": 539
},
{
"epoch": 15.0,
"grad_norm": 0.39528268575668335,
"learning_rate": 8.145033635316128e-07,
"loss": 0.30530279874801636,
"step": 540
},
{
"epoch": 15.028169014084508,
"grad_norm": 0.28691425919532776,
"learning_rate": 8.138675605311051e-07,
"loss": 0.27306681871414185,
"step": 541
},
{
"epoch": 15.056338028169014,
"grad_norm": 0.27633434534072876,
"learning_rate": 8.13230953744247e-07,
"loss": 0.2900540828704834,
"step": 542
},
{
"epoch": 15.084507042253522,
"grad_norm": 0.28263136744499207,
"learning_rate": 8.125935451102528e-07,
"loss": 0.29298198223114014,
"step": 543
},
{
"epoch": 15.112676056338028,
"grad_norm": 0.2708156406879425,
"learning_rate": 8.119553365707802e-07,
"loss": 0.2728630006313324,
"step": 544
},
{
"epoch": 15.140845070422536,
"grad_norm": 0.28263747692108154,
"learning_rate": 8.113163300699228e-07,
"loss": 0.2994900047779083,
"step": 545
},
{
"epoch": 15.169014084507042,
"grad_norm": 0.2628503739833832,
"learning_rate": 8.106765275542053e-07,
"loss": 0.2943934202194214,
"step": 546
},
{
"epoch": 15.19718309859155,
"grad_norm": 0.2844214141368866,
"learning_rate": 8.100359309725774e-07,
"loss": 0.286617636680603,
"step": 547
},
{
"epoch": 15.225352112676056,
"grad_norm": 0.2979234457015991,
"learning_rate": 8.093945422764069e-07,
"loss": 0.28598904609680176,
"step": 548
},
{
"epoch": 15.253521126760564,
"grad_norm": 0.2918255925178528,
"learning_rate": 8.087523634194754e-07,
"loss": 0.2826801538467407,
"step": 549
},
{
"epoch": 15.28169014084507,
"grad_norm": 0.30238643288612366,
"learning_rate": 8.081093963579707e-07,
"loss": 0.3018723726272583,
"step": 550
},
{
"epoch": 15.309859154929578,
"grad_norm": 0.2762410342693329,
"learning_rate": 8.074656430504823e-07,
"loss": 0.27831658720970154,
"step": 551
},
{
"epoch": 15.338028169014084,
"grad_norm": 0.28324148058891296,
"learning_rate": 8.068211054579943e-07,
"loss": 0.30506500601768494,
"step": 552
},
{
"epoch": 15.366197183098592,
"grad_norm": 0.2893829643726349,
"learning_rate": 8.061757855438799e-07,
"loss": 0.29023078083992004,
"step": 553
},
{
"epoch": 15.394366197183098,
"grad_norm": 0.2907930016517639,
"learning_rate": 8.055296852738956e-07,
"loss": 0.28343409299850464,
"step": 554
},
{
"epoch": 15.422535211267606,
"grad_norm": 0.28478139638900757,
"learning_rate": 8.048828066161747e-07,
"loss": 0.28546571731567383,
"step": 555
},
{
"epoch": 15.450704225352112,
"grad_norm": 0.2851191759109497,
"learning_rate": 8.04235151541222e-07,
"loss": 0.2884707748889923,
"step": 556
},
{
"epoch": 15.47887323943662,
"grad_norm": 0.2689509987831116,
"learning_rate": 8.035867220219071e-07,
"loss": 0.2950664758682251,
"step": 557
},
{
"epoch": 15.507042253521126,
"grad_norm": 0.2825435400009155,
"learning_rate": 8.029375200334587e-07,
"loss": 0.281552791595459,
"step": 558
},
{
"epoch": 15.535211267605634,
"grad_norm": 0.28483787178993225,
"learning_rate": 8.022875475534588e-07,
"loss": 0.2870042622089386,
"step": 559
},
{
"epoch": 15.56338028169014,
"grad_norm": 0.27896517515182495,
"learning_rate": 8.01636806561836e-07,
"loss": 0.287916362285614,
"step": 560
},
{
"epoch": 15.591549295774648,
"grad_norm": 0.2788335382938385,
"learning_rate": 8.009852990408606e-07,
"loss": 0.28609931468963623,
"step": 561
},
{
"epoch": 15.619718309859154,
"grad_norm": 0.2826322019100189,
"learning_rate": 8.003330269751372e-07,
"loss": 0.2950190305709839,
"step": 562
},
{
"epoch": 15.647887323943662,
"grad_norm": 0.2843019366264343,
"learning_rate": 7.996799923515997e-07,
"loss": 0.2914244532585144,
"step": 563
},
{
"epoch": 15.676056338028168,
"grad_norm": 0.26445460319519043,
"learning_rate": 7.990261971595048e-07,
"loss": 0.27984780073165894,
"step": 564
},
{
"epoch": 15.704225352112676,
"grad_norm": 0.27918627858161926,
"learning_rate": 7.983716433904262e-07,
"loss": 0.27757298946380615,
"step": 565
},
{
"epoch": 15.732394366197184,
"grad_norm": 0.2938336133956909,
"learning_rate": 7.977163330382479e-07,
"loss": 0.2920360565185547,
"step": 566
},
{
"epoch": 15.76056338028169,
"grad_norm": 0.28976547718048096,
"learning_rate": 7.970602680991592e-07,
"loss": 0.2951090931892395,
"step": 567
},
{
"epoch": 15.788732394366198,
"grad_norm": 0.27327752113342285,
"learning_rate": 7.964034505716476e-07,
"loss": 0.29640987515449524,
"step": 568
},
{
"epoch": 15.816901408450704,
"grad_norm": 0.27222704887390137,
"learning_rate": 7.957458824564931e-07,
"loss": 0.28876399993896484,
"step": 569
},
{
"epoch": 15.845070422535212,
"grad_norm": 0.29962998628616333,
"learning_rate": 7.950875657567621e-07,
"loss": 0.3039361238479614,
"step": 570
},
{
"epoch": 15.873239436619718,
"grad_norm": 0.2705839276313782,
"learning_rate": 7.944285024778017e-07,
"loss": 0.28840112686157227,
"step": 571
},
{
"epoch": 15.901408450704226,
"grad_norm": 0.28124475479125977,
"learning_rate": 7.93768694627233e-07,
"loss": 0.2832530736923218,
"step": 572
},
{
"epoch": 15.929577464788732,
"grad_norm": 0.29025372862815857,
"learning_rate": 7.931081442149448e-07,
"loss": 0.28588593006134033,
"step": 573
},
{
"epoch": 15.95774647887324,
"grad_norm": 0.27376946806907654,
"learning_rate": 7.924468532530883e-07,
"loss": 0.2883457839488983,
"step": 574
},
{
"epoch": 15.985915492957746,
"grad_norm": 0.28059038519859314,
"learning_rate": 7.917848237560708e-07,
"loss": 0.2923107147216797,
"step": 575
},
{
"epoch": 16.0,
"grad_norm": 0.39920157194137573,
"learning_rate": 7.911220577405484e-07,
"loss": 0.2896960973739624,
"step": 576
},
{
"epoch": 16.028169014084508,
"grad_norm": 0.2756041884422302,
"learning_rate": 7.904585572254218e-07,
"loss": 0.2934238910675049,
"step": 577
},
{
"epoch": 16.056338028169016,
"grad_norm": 0.2831096947193146,
"learning_rate": 7.897943242318285e-07,
"loss": 0.2862626910209656,
"step": 578
},
{
"epoch": 16.08450704225352,
"grad_norm": 0.27020981907844543,
"learning_rate": 7.891293607831373e-07,
"loss": 0.3019767999649048,
"step": 579
},
{
"epoch": 16.112676056338028,
"grad_norm": 0.2866615056991577,
"learning_rate": 7.884636689049422e-07,
"loss": 0.29431337118148804,
"step": 580
},
{
"epoch": 16.140845070422536,
"grad_norm": 0.27709120512008667,
"learning_rate": 7.877972506250562e-07,
"loss": 0.26718783378601074,
"step": 581
},
{
"epoch": 16.169014084507044,
"grad_norm": 0.2864624261856079,
"learning_rate": 7.871301079735049e-07,
"loss": 0.28138402104377747,
"step": 582
},
{
"epoch": 16.197183098591548,
"grad_norm": 0.2806070148944855,
"learning_rate": 7.864622429825204e-07,
"loss": 0.29040491580963135,
"step": 583
},
{
"epoch": 16.225352112676056,
"grad_norm": 0.2866605818271637,
"learning_rate": 7.857936576865356e-07,
"loss": 0.2876106798648834,
"step": 584
},
{
"epoch": 16.253521126760564,
"grad_norm": 0.2853955626487732,
"learning_rate": 7.851243541221769e-07,
"loss": 0.30784159898757935,
"step": 585
},
{
"epoch": 16.281690140845072,
"grad_norm": 0.290031760931015,
"learning_rate": 7.844543343282595e-07,
"loss": 0.27567434310913086,
"step": 586
},
{
"epoch": 16.309859154929576,
"grad_norm": 0.283806174993515,
"learning_rate": 7.837836003457793e-07,
"loss": 0.28710314631462097,
"step": 587
},
{
"epoch": 16.338028169014084,
"grad_norm": 0.2768094539642334,
"learning_rate": 7.831121542179086e-07,
"loss": 0.27676063776016235,
"step": 588
},
{
"epoch": 16.366197183098592,
"grad_norm": 0.27568569779396057,
"learning_rate": 7.824399979899889e-07,
"loss": 0.2947593927383423,
"step": 589
},
{
"epoch": 16.3943661971831,
"grad_norm": 0.3079885244369507,
"learning_rate": 7.817671337095244e-07,
"loss": 0.2868027985095978,
"step": 590
},
{
"epoch": 16.422535211267604,
"grad_norm": 0.29744645953178406,
"learning_rate": 7.810935634261764e-07,
"loss": 0.2946295738220215,
"step": 591
},
{
"epoch": 16.450704225352112,
"grad_norm": 0.28457650542259216,
"learning_rate": 7.804192891917571e-07,
"loss": 0.2790455222129822,
"step": 592
},
{
"epoch": 16.47887323943662,
"grad_norm": 0.28848767280578613,
"learning_rate": 7.797443130602226e-07,
"loss": 0.2941606640815735,
"step": 593
},
{
"epoch": 16.507042253521128,
"grad_norm": 0.2936708927154541,
"learning_rate": 7.79068637087667e-07,
"loss": 0.2923729121685028,
"step": 594
},
{
"epoch": 16.535211267605632,
"grad_norm": 0.28460994362831116,
"learning_rate": 7.783922633323169e-07,
"loss": 0.2795827090740204,
"step": 595
},
{
"epoch": 16.56338028169014,
"grad_norm": 0.28233277797698975,
"learning_rate": 7.777151938545235e-07,
"loss": 0.29222947359085083,
"step": 596
},
{
"epoch": 16.591549295774648,
"grad_norm": 0.28648558259010315,
"learning_rate": 7.770374307167585e-07,
"loss": 0.27923721075057983,
"step": 597
},
{
"epoch": 16.619718309859156,
"grad_norm": 0.2813912332057953,
"learning_rate": 7.763589759836058e-07,
"loss": 0.2912202477455139,
"step": 598
},
{
"epoch": 16.647887323943664,
"grad_norm": 0.28273841738700867,
"learning_rate": 7.756798317217558e-07,
"loss": 0.29805850982666016,
"step": 599
},
{
"epoch": 16.676056338028168,
"grad_norm": 0.2922080457210541,
"learning_rate": 7.75e-07,
"loss": 0.2834911346435547,
"step": 600
},
{
"epoch": 16.704225352112676,
"grad_norm": 0.27855902910232544,
"learning_rate": 7.743194828892235e-07,
"loss": 0.2842041552066803,
"step": 601
},
{
"epoch": 16.732394366197184,
"grad_norm": 0.2905668318271637,
"learning_rate": 7.736382824623999e-07,
"loss": 0.281250923871994,
"step": 602
},
{
"epoch": 16.760563380281692,
"grad_norm": 0.2928289771080017,
"learning_rate": 7.729564007945834e-07,
"loss": 0.2863979935646057,
"step": 603
},
{
"epoch": 16.788732394366196,
"grad_norm": 0.28705668449401855,
"learning_rate": 7.72273839962904e-07,
"loss": 0.287672221660614,
"step": 604
},
{
"epoch": 16.816901408450704,
"grad_norm": 0.29107093811035156,
"learning_rate": 7.715906020465602e-07,
"loss": 0.27715277671813965,
"step": 605
},
{
"epoch": 16.845070422535212,
"grad_norm": 0.28827348351478577,
"learning_rate": 7.709066891268133e-07,
"loss": 0.2648072838783264,
"step": 606
},
{
"epoch": 16.87323943661972,
"grad_norm": 0.28768298029899597,
"learning_rate": 7.702221032869808e-07,
"loss": 0.26861560344696045,
"step": 607
},
{
"epoch": 16.901408450704224,
"grad_norm": 0.3000086843967438,
"learning_rate": 7.695368466124296e-07,
"loss": 0.2910693287849426,
"step": 608
},
{
"epoch": 16.929577464788732,
"grad_norm": 0.3058622181415558,
"learning_rate": 7.688509211905707e-07,
"loss": 0.2804388105869293,
"step": 609
},
{
"epoch": 16.95774647887324,
"grad_norm": 0.2874692678451538,
"learning_rate": 7.681643291108517e-07,
"loss": 0.2883044481277466,
"step": 610
},
{
"epoch": 16.985915492957748,
"grad_norm": 0.2868764102458954,
"learning_rate": 7.67477072464751e-07,
"loss": 0.2847598195075989,
"step": 611
},
{
"epoch": 17.0,
"grad_norm": 0.3980148136615753,
"learning_rate": 7.667891533457718e-07,
"loss": 0.29258161783218384,
"step": 612
},
{
"epoch": 17.028169014084508,
"grad_norm": 0.2752118408679962,
"learning_rate": 7.661005738494349e-07,
"loss": 0.28283417224884033,
"step": 613
},
{
"epoch": 17.056338028169016,
"grad_norm": 0.2837778627872467,
"learning_rate": 7.654113360732732e-07,
"loss": 0.2758600115776062,
"step": 614
},
{
"epoch": 17.08450704225352,
"grad_norm": 0.2887240946292877,
"learning_rate": 7.647214421168238e-07,
"loss": 0.2864817976951599,
"step": 615
},
{
"epoch": 17.112676056338028,
"grad_norm": 0.27935662865638733,
"learning_rate": 7.640308940816239e-07,
"loss": 0.28024283051490784,
"step": 616
},
{
"epoch": 17.140845070422536,
"grad_norm": 0.2960900664329529,
"learning_rate": 7.633396940712023e-07,
"loss": 0.2681460976600647,
"step": 617
},
{
"epoch": 17.169014084507044,
"grad_norm": 0.2915673553943634,
"learning_rate": 7.626478441910744e-07,
"loss": 0.2805773913860321,
"step": 618
},
{
"epoch": 17.197183098591548,
"grad_norm": 0.2789720892906189,
"learning_rate": 7.619553465487344e-07,
"loss": 0.28847092390060425,
"step": 619
},
{
"epoch": 17.225352112676056,
"grad_norm": 0.2745218575000763,
"learning_rate": 7.612622032536507e-07,
"loss": 0.28274643421173096,
"step": 620
},
{
"epoch": 17.253521126760564,
"grad_norm": 0.2962469458580017,
"learning_rate": 7.60568416417258e-07,
"loss": 0.2827341556549072,
"step": 621
},
{
"epoch": 17.281690140845072,
"grad_norm": 0.28243717551231384,
"learning_rate": 7.59873988152951e-07,
"loss": 0.2872379422187805,
"step": 622
},
{
"epoch": 17.309859154929576,
"grad_norm": 0.2935909926891327,
"learning_rate": 7.591789205760789e-07,
"loss": 0.29077547788619995,
"step": 623
},
{
"epoch": 17.338028169014084,
"grad_norm": 0.2725030481815338,
"learning_rate": 7.584832158039378e-07,
"loss": 0.28079894185066223,
"step": 624
},
{
"epoch": 17.366197183098592,
"grad_norm": 0.2863542437553406,
"learning_rate": 7.577868759557653e-07,
"loss": 0.2759760618209839,
"step": 625
},
{
"epoch": 17.3943661971831,
"grad_norm": 0.2829958498477936,
"learning_rate": 7.570899031527332e-07,
"loss": 0.27316516637802124,
"step": 626
},
{
"epoch": 17.422535211267604,
"grad_norm": 0.28861963748931885,
"learning_rate": 7.563922995179418e-07,
"loss": 0.2758478820323944,
"step": 627
},
{
"epoch": 17.450704225352112,
"grad_norm": 0.2935570478439331,
"learning_rate": 7.556940671764124e-07,
"loss": 0.28437983989715576,
"step": 628
},
{
"epoch": 17.47887323943662,
"grad_norm": 0.3037278652191162,
"learning_rate": 7.54995208255082e-07,
"loss": 0.28943467140197754,
"step": 629
},
{
"epoch": 17.507042253521128,
"grad_norm": 0.31774893403053284,
"learning_rate": 7.54295724882796e-07,
"loss": 0.29023581743240356,
"step": 630
},
{
"epoch": 17.535211267605632,
"grad_norm": 0.28832852840423584,
"learning_rate": 7.535956191903021e-07,
"loss": 0.2840030789375305,
"step": 631
},
{
"epoch": 17.56338028169014,
"grad_norm": 0.28122231364250183,
"learning_rate": 7.528948933102438e-07,
"loss": 0.28523629903793335,
"step": 632
},
{
"epoch": 17.591549295774648,
"grad_norm": 0.29538190364837646,
"learning_rate": 7.521935493771534e-07,
"loss": 0.28018033504486084,
"step": 633
},
{
"epoch": 17.619718309859156,
"grad_norm": 0.3163702189922333,
"learning_rate": 7.514915895274463e-07,
"loss": 0.2885722517967224,
"step": 634
},
{
"epoch": 17.647887323943664,
"grad_norm": 0.2946973741054535,
"learning_rate": 7.507890158994139e-07,
"loss": 0.2785816490650177,
"step": 635
},
{
"epoch": 17.676056338028168,
"grad_norm": 0.2805889844894409,
"learning_rate": 7.500858306332172e-07,
"loss": 0.2974117398262024,
"step": 636
},
{
"epoch": 17.704225352112676,
"grad_norm": 0.28544914722442627,
"learning_rate": 7.493820358708809e-07,
"loss": 0.2892162501811981,
"step": 637
},
{
"epoch": 17.732394366197184,
"grad_norm": 0.3272300064563751,
"learning_rate": 7.486776337562853e-07,
"loss": 0.3017275333404541,
"step": 638
},
{
"epoch": 17.760563380281692,
"grad_norm": 0.28177788853645325,
"learning_rate": 7.479726264351618e-07,
"loss": 0.2729823589324951,
"step": 639
},
{
"epoch": 17.788732394366196,
"grad_norm": 0.2774059474468231,
"learning_rate": 7.472670160550848e-07,
"loss": 0.27497977018356323,
"step": 640
},
{
"epoch": 17.816901408450704,
"grad_norm": 0.2898328900337219,
"learning_rate": 7.46560804765466e-07,
"loss": 0.27945676445961,
"step": 641
},
{
"epoch": 17.845070422535212,
"grad_norm": 0.2784922420978546,
"learning_rate": 7.458539947175473e-07,
"loss": 0.29566580057144165,
"step": 642
},
{
"epoch": 17.87323943661972,
"grad_norm": 0.2864189147949219,
"learning_rate": 7.45146588064395e-07,
"loss": 0.2862587869167328,
"step": 643
},
{
"epoch": 17.901408450704224,
"grad_norm": 0.2896963953971863,
"learning_rate": 7.444385869608921e-07,
"loss": 0.2924667000770569,
"step": 644
},
{
"epoch": 17.929577464788732,
"grad_norm": 0.28463807702064514,
"learning_rate": 7.437299935637328e-07,
"loss": 0.2862287163734436,
"step": 645
},
{
"epoch": 17.95774647887324,
"grad_norm": 0.28407302498817444,
"learning_rate": 7.430208100314156e-07,
"loss": 0.2759779989719391,
"step": 646
},
{
"epoch": 17.985915492957748,
"grad_norm": 0.2773316502571106,
"learning_rate": 7.423110385242366e-07,
"loss": 0.2798498272895813,
"step": 647
},
{
"epoch": 18.0,
"grad_norm": 0.3958338499069214,
"learning_rate": 7.416006812042827e-07,
"loss": 0.28481870889663696,
"step": 648
},
{
"epoch": 18.028169014084508,
"grad_norm": 0.2922191321849823,
"learning_rate": 7.408897402354255e-07,
"loss": 0.2781963348388672,
"step": 649
},
{
"epoch": 18.056338028169016,
"grad_norm": 0.29166096448898315,
"learning_rate": 7.401782177833147e-07,
"loss": 0.2843964099884033,
"step": 650
},
{
"epoch": 18.08450704225352,
"grad_norm": 0.28290343284606934,
"learning_rate": 7.394661160153709e-07,
"loss": 0.2840275168418884,
"step": 651
},
{
"epoch": 18.112676056338028,
"grad_norm": 0.28300249576568604,
"learning_rate": 7.387534371007797e-07,
"loss": 0.2893407642841339,
"step": 652
},
{
"epoch": 18.140845070422536,
"grad_norm": 0.2870761752128601,
"learning_rate": 7.380401832104845e-07,
"loss": 0.26570916175842285,
"step": 653
},
{
"epoch": 18.169014084507044,
"grad_norm": 0.2919873297214508,
"learning_rate": 7.373263565171805e-07,
"loss": 0.26768985390663147,
"step": 654
},
{
"epoch": 18.197183098591548,
"grad_norm": 0.2856583893299103,
"learning_rate": 7.366119591953075e-07,
"loss": 0.2823103070259094,
"step": 655
},
{
"epoch": 18.225352112676056,
"grad_norm": 0.2853250801563263,
"learning_rate": 7.358969934210438e-07,
"loss": 0.28462791442871094,
"step": 656
},
{
"epoch": 18.253521126760564,
"grad_norm": 0.27667704224586487,
"learning_rate": 7.35181461372299e-07,
"loss": 0.27125126123428345,
"step": 657
},
{
"epoch": 18.281690140845072,
"grad_norm": 0.2884734272956848,
"learning_rate": 7.344653652287077e-07,
"loss": 0.271454781293869,
"step": 658
},
{
"epoch": 18.309859154929576,
"grad_norm": 0.28490886092185974,
"learning_rate": 7.337487071716232e-07,
"loss": 0.286302775144577,
"step": 659
},
{
"epoch": 18.338028169014084,
"grad_norm": 0.27361124753952026,
"learning_rate": 7.330314893841101e-07,
"loss": 0.2801797389984131,
"step": 660
},
{
"epoch": 18.366197183098592,
"grad_norm": 0.28517088294029236,
"learning_rate": 7.323137140509381e-07,
"loss": 0.2785356640815735,
"step": 661
},
{
"epoch": 18.3943661971831,
"grad_norm": 0.2725742757320404,
"learning_rate": 7.315953833585755e-07,
"loss": 0.27504605054855347,
"step": 662
},
{
"epoch": 18.422535211267604,
"grad_norm": 0.29915499687194824,
"learning_rate": 7.308764994951821e-07,
"loss": 0.2808704078197479,
"step": 663
},
{
"epoch": 18.450704225352112,
"grad_norm": 0.31304341554641724,
"learning_rate": 7.301570646506027e-07,
"loss": 0.2911706566810608,
"step": 664
},
{
"epoch": 18.47887323943662,
"grad_norm": 0.2919553816318512,
"learning_rate": 7.294370810163607e-07,
"loss": 0.27866852283477783,
"step": 665
},
{
"epoch": 18.507042253521128,
"grad_norm": 0.3162909746170044,
"learning_rate": 7.287165507856512e-07,
"loss": 0.2802932560443878,
"step": 666
},
{
"epoch": 18.535211267605632,
"grad_norm": 0.303523451089859,
"learning_rate": 7.279954761533342e-07,
"loss": 0.2824591398239136,
"step": 667
},
{
"epoch": 18.56338028169014,
"grad_norm": 0.29366716742515564,
"learning_rate": 7.27273859315928e-07,
"loss": 0.28101497888565063,
"step": 668
},
{
"epoch": 18.591549295774648,
"grad_norm": 0.28469985723495483,
"learning_rate": 7.265517024716026e-07,
"loss": 0.29134345054626465,
"step": 669
},
{
"epoch": 18.619718309859156,
"grad_norm": 0.28721922636032104,
"learning_rate": 7.258290078201731e-07,
"loss": 0.284817636013031,
"step": 670
},
{
"epoch": 18.647887323943664,
"grad_norm": 0.30535197257995605,
"learning_rate": 7.251057775630927e-07,
"loss": 0.28168779611587524,
"step": 671
},
{
"epoch": 18.676056338028168,
"grad_norm": 0.2980702817440033,
"learning_rate": 7.243820139034464e-07,
"loss": 0.27493056654930115,
"step": 672
},
{
"epoch": 18.704225352112676,
"grad_norm": 0.28984636068344116,
"learning_rate": 7.236577190459433e-07,
"loss": 0.2975635528564453,
"step": 673
},
{
"epoch": 18.732394366197184,
"grad_norm": 0.29580390453338623,
"learning_rate": 7.229328951969115e-07,
"loss": 0.2849118113517761,
"step": 674
},
{
"epoch": 18.760563380281692,
"grad_norm": 0.2950834035873413,
"learning_rate": 7.222075445642904e-07,
"loss": 0.26458609104156494,
"step": 675
},
{
"epoch": 18.788732394366196,
"grad_norm": 0.29167890548706055,
"learning_rate": 7.214816693576234e-07,
"loss": 0.2846098840236664,
"step": 676
},
{
"epoch": 18.816901408450704,
"grad_norm": 0.2784614861011505,
"learning_rate": 7.207552717880522e-07,
"loss": 0.28443169593811035,
"step": 677
},
{
"epoch": 18.845070422535212,
"grad_norm": 0.29537051916122437,
"learning_rate": 7.200283540683102e-07,
"loss": 0.27960023283958435,
"step": 678
},
{
"epoch": 18.87323943661972,
"grad_norm": 0.2873672544956207,
"learning_rate": 7.193009184127145e-07,
"loss": 0.28757309913635254,
"step": 679
},
{
"epoch": 18.901408450704224,
"grad_norm": 0.28597328066825867,
"learning_rate": 7.185729670371604e-07,
"loss": 0.2904655635356903,
"step": 680
},
{
"epoch": 18.929577464788732,
"grad_norm": 0.29267045855522156,
"learning_rate": 7.17844502159114e-07,
"loss": 0.2797931432723999,
"step": 681
},
{
"epoch": 18.95774647887324,
"grad_norm": 0.27707934379577637,
"learning_rate": 7.171155259976057e-07,
"loss": 0.2788022458553314,
"step": 682
},
{
"epoch": 18.985915492957748,
"grad_norm": 0.2854091227054596,
"learning_rate": 7.163860407732231e-07,
"loss": 0.28216353058815,
"step": 683
},
{
"epoch": 19.0,
"grad_norm": 0.4010404348373413,
"learning_rate": 7.156560487081051e-07,
"loss": 0.2831748127937317,
"step": 684
},
{
"epoch": 19.028169014084508,
"grad_norm": 0.2948407232761383,
"learning_rate": 7.149255520259338e-07,
"loss": 0.26844292879104614,
"step": 685
},
{
"epoch": 19.056338028169016,
"grad_norm": 0.2946661114692688,
"learning_rate": 7.141945529519288e-07,
"loss": 0.2809017300605774,
"step": 686
},
{
"epoch": 19.08450704225352,
"grad_norm": 0.27715936303138733,
"learning_rate": 7.134630537128403e-07,
"loss": 0.2835448980331421,
"step": 687
},
{
"epoch": 19.112676056338028,
"grad_norm": 0.2933226525783539,
"learning_rate": 7.127310565369415e-07,
"loss": 0.2795133888721466,
"step": 688
},
{
"epoch": 19.140845070422536,
"grad_norm": 0.28180861473083496,
"learning_rate": 7.11998563654023e-07,
"loss": 0.2750745713710785,
"step": 689
},
{
"epoch": 19.169014084507044,
"grad_norm": 0.2755012810230255,
"learning_rate": 7.11265577295385e-07,
"loss": 0.281097412109375,
"step": 690
},
{
"epoch": 19.197183098591548,
"grad_norm": 0.2865377962589264,
"learning_rate": 7.105320996938314e-07,
"loss": 0.2677628993988037,
"step": 691
},
{
"epoch": 19.225352112676056,
"grad_norm": 0.2958216369152069,
"learning_rate": 7.097981330836616e-07,
"loss": 0.2733122408390045,
"step": 692
},
{
"epoch": 19.253521126760564,
"grad_norm": 0.2982434034347534,
"learning_rate": 7.090636797006657e-07,
"loss": 0.2764785885810852,
"step": 693
},
{
"epoch": 19.281690140845072,
"grad_norm": 0.31210824847221375,
"learning_rate": 7.083287417821157e-07,
"loss": 0.27116531133651733,
"step": 694
},
{
"epoch": 19.309859154929576,
"grad_norm": 0.29045426845550537,
"learning_rate": 7.075933215667604e-07,
"loss": 0.2775840163230896,
"step": 695
},
{
"epoch": 19.338028169014084,
"grad_norm": 0.29685893654823303,
"learning_rate": 7.068574212948169e-07,
"loss": 0.2803945541381836,
"step": 696
},
{
"epoch": 19.366197183098592,
"grad_norm": 0.2790866494178772,
"learning_rate": 7.06121043207965e-07,
"loss": 0.2769659161567688,
"step": 697
},
{
"epoch": 19.3943661971831,
"grad_norm": 0.31644630432128906,
"learning_rate": 7.053841895493406e-07,
"loss": 0.27923786640167236,
"step": 698
},
{
"epoch": 19.422535211267604,
"grad_norm": 0.30641067028045654,
"learning_rate": 7.046468625635274e-07,
"loss": 0.2825276255607605,
"step": 699
},
{
"epoch": 19.450704225352112,
"grad_norm": 0.292458713054657,
"learning_rate": 7.039090644965509e-07,
"loss": 0.27422571182250977,
"step": 700
},
{
"epoch": 19.47887323943662,
"grad_norm": 0.2903311550617218,
"learning_rate": 7.031707975958726e-07,
"loss": 0.27189522981643677,
"step": 701
},
{
"epoch": 19.507042253521128,
"grad_norm": 0.2947315275669098,
"learning_rate": 7.024320641103811e-07,
"loss": 0.2683555483818054,
"step": 702
},
{
"epoch": 19.535211267605632,
"grad_norm": 0.29522547125816345,
"learning_rate": 7.01692866290387e-07,
"loss": 0.28815943002700806,
"step": 703
},
{
"epoch": 19.56338028169014,
"grad_norm": 0.28272008895874023,
"learning_rate": 7.009532063876148e-07,
"loss": 0.2853075861930847,
"step": 704
},
{
"epoch": 19.591549295774648,
"grad_norm": 0.286604642868042,
"learning_rate": 7.002130866551968e-07,
"loss": 0.2744004726409912,
"step": 705
},
{
"epoch": 19.619718309859156,
"grad_norm": 0.2829611301422119,
"learning_rate": 6.994725093476664e-07,
"loss": 0.2899395525455475,
"step": 706
},
{
"epoch": 19.647887323943664,
"grad_norm": 0.3035781681537628,
"learning_rate": 6.987314767209503e-07,
"loss": 0.29819610714912415,
"step": 707
},
{
"epoch": 19.676056338028168,
"grad_norm": 0.30463680624961853,
"learning_rate": 6.979899910323624e-07,
"loss": 0.2818058729171753,
"step": 708
},
{
"epoch": 19.704225352112676,
"grad_norm": 0.29514482617378235,
"learning_rate": 6.972480545405968e-07,
"loss": 0.294766366481781,
"step": 709
},
{
"epoch": 19.732394366197184,
"grad_norm": 0.282625675201416,
"learning_rate": 6.965056695057204e-07,
"loss": 0.27316591143608093,
"step": 710
},
{
"epoch": 19.760563380281692,
"grad_norm": 0.3090338110923767,
"learning_rate": 6.957628381891673e-07,
"loss": 0.2785091698169708,
"step": 711
},
{
"epoch": 19.788732394366196,
"grad_norm": 0.2826164960861206,
"learning_rate": 6.950195628537299e-07,
"loss": 0.2870754301548004,
"step": 712
},
{
"epoch": 19.816901408450704,
"grad_norm": 0.29807525873184204,
"learning_rate": 6.942758457635543e-07,
"loss": 0.27232879400253296,
"step": 713
},
{
"epoch": 19.845070422535212,
"grad_norm": 0.2901877760887146,
"learning_rate": 6.935316891841315e-07,
"loss": 0.2786208987236023,
"step": 714
},
{
"epoch": 19.87323943661972,
"grad_norm": 0.2947152853012085,
"learning_rate": 6.927870953822915e-07,
"loss": 0.2676268517971039,
"step": 715
},
{
"epoch": 19.901408450704224,
"grad_norm": 0.30847856402397156,
"learning_rate": 6.920420666261961e-07,
"loss": 0.27726125717163086,
"step": 716
},
{
"epoch": 19.929577464788732,
"grad_norm": 0.29455119371414185,
"learning_rate": 6.912966051853322e-07,
"loss": 0.28886911273002625,
"step": 717
},
{
"epoch": 19.95774647887324,
"grad_norm": 0.2961712181568146,
"learning_rate": 6.905507133305047e-07,
"loss": 0.2736320495605469,
"step": 718
},
{
"epoch": 19.985915492957748,
"grad_norm": 0.2923624515533447,
"learning_rate": 6.898043933338293e-07,
"loss": 0.2720155119895935,
"step": 719
},
{
"epoch": 20.0,
"grad_norm": 0.40786370635032654,
"learning_rate": 6.890576474687263e-07,
"loss": 0.3052176237106323,
"step": 720
},
{
"epoch": 20.028169014084508,
"grad_norm": 0.281310498714447,
"learning_rate": 6.883104780099133e-07,
"loss": 0.2827909588813782,
"step": 721
},
{
"epoch": 20.056338028169016,
"grad_norm": 0.28428319096565247,
"learning_rate": 6.875628872333975e-07,
"loss": 0.2593810558319092,
"step": 722
},
{
"epoch": 20.08450704225352,
"grad_norm": 0.28026291728019714,
"learning_rate": 6.868148774164706e-07,
"loss": 0.2783263027667999,
"step": 723
},
{
"epoch": 20.112676056338028,
"grad_norm": 0.2842010259628296,
"learning_rate": 6.860664508377001e-07,
"loss": 0.2809029221534729,
"step": 724
},
{
"epoch": 20.140845070422536,
"grad_norm": 0.2880638539791107,
"learning_rate": 6.853176097769228e-07,
"loss": 0.26888588070869446,
"step": 725
},
{
"epoch": 20.169014084507044,
"grad_norm": 0.28630784153938293,
"learning_rate": 6.84568356515239e-07,
"loss": 0.2781735062599182,
"step": 726
},
{
"epoch": 20.197183098591548,
"grad_norm": 0.30342307686805725,
"learning_rate": 6.838186933350036e-07,
"loss": 0.27911239862442017,
"step": 727
},
{
"epoch": 20.225352112676056,
"grad_norm": 0.29965290427207947,
"learning_rate": 6.83068622519821e-07,
"loss": 0.2759650945663452,
"step": 728
},
{
"epoch": 20.253521126760564,
"grad_norm": 0.2921484708786011,
"learning_rate": 6.823181463545366e-07,
"loss": 0.26791465282440186,
"step": 729
},
{
"epoch": 20.281690140845072,
"grad_norm": 0.29477155208587646,
"learning_rate": 6.815672671252315e-07,
"loss": 0.27440106868743896,
"step": 730
},
{
"epoch": 20.309859154929576,
"grad_norm": 0.2930176854133606,
"learning_rate": 6.808159871192136e-07,
"loss": 0.28788119554519653,
"step": 731
},
{
"epoch": 20.338028169014084,
"grad_norm": 0.304382860660553,
"learning_rate": 6.800643086250121e-07,
"loss": 0.2717517614364624,
"step": 732
},
{
"epoch": 20.366197183098592,
"grad_norm": 0.2945499122142792,
"learning_rate": 6.793122339323705e-07,
"loss": 0.29744279384613037,
"step": 733
},
{
"epoch": 20.3943661971831,
"grad_norm": 0.2932227849960327,
"learning_rate": 6.78559765332238e-07,
"loss": 0.2782973051071167,
"step": 734
},
{
"epoch": 20.422535211267604,
"grad_norm": 0.29432976245880127,
"learning_rate": 6.778069051167653e-07,
"loss": 0.28551533818244934,
"step": 735
},
{
"epoch": 20.450704225352112,
"grad_norm": 0.30091312527656555,
"learning_rate": 6.770536555792944e-07,
"loss": 0.28610894083976746,
"step": 736
},
{
"epoch": 20.47887323943662,
"grad_norm": 0.29813316464424133,
"learning_rate": 6.763000190143545e-07,
"loss": 0.28137102723121643,
"step": 737
},
{
"epoch": 20.507042253521128,
"grad_norm": 0.28738856315612793,
"learning_rate": 6.755459977176532e-07,
"loss": 0.26876533031463623,
"step": 738
},
{
"epoch": 20.535211267605632,
"grad_norm": 0.2894875407218933,
"learning_rate": 6.747915939860701e-07,
"loss": 0.2704589366912842,
"step": 739
},
{
"epoch": 20.56338028169014,
"grad_norm": 0.3046717047691345,
"learning_rate": 6.740368101176495e-07,
"loss": 0.28678447008132935,
"step": 740
},
{
"epoch": 20.591549295774648,
"grad_norm": 0.29942622780799866,
"learning_rate": 6.732816484115946e-07,
"loss": 0.27722471952438354,
"step": 741
},
{
"epoch": 20.619718309859156,
"grad_norm": 0.2984582185745239,
"learning_rate": 6.725261111682584e-07,
"loss": 0.2638360261917114,
"step": 742
},
{
"epoch": 20.647887323943664,
"grad_norm": 0.2943922281265259,
"learning_rate": 6.717702006891386e-07,
"loss": 0.286998450756073,
"step": 743
},
{
"epoch": 20.676056338028168,
"grad_norm": 0.2971697747707367,
"learning_rate": 6.710139192768694e-07,
"loss": 0.2628033757209778,
"step": 744
},
{
"epoch": 20.704225352112676,
"grad_norm": 0.2915992736816406,
"learning_rate": 6.702572692352155e-07,
"loss": 0.2789704203605652,
"step": 745
},
{
"epoch": 20.732394366197184,
"grad_norm": 0.29871392250061035,
"learning_rate": 6.695002528690639e-07,
"loss": 0.2669401168823242,
"step": 746
},
{
"epoch": 20.760563380281692,
"grad_norm": 0.29496580362319946,
"learning_rate": 6.687428724844179e-07,
"loss": 0.2711006999015808,
"step": 747
},
{
"epoch": 20.788732394366196,
"grad_norm": 0.29237619042396545,
"learning_rate": 6.679851303883891e-07,
"loss": 0.2822151780128479,
"step": 748
},
{
"epoch": 20.816901408450704,
"grad_norm": 0.29689720273017883,
"learning_rate": 6.672270288891918e-07,
"loss": 0.2751491665840149,
"step": 749
},
{
"epoch": 20.845070422535212,
"grad_norm": 0.28889331221580505,
"learning_rate": 6.664685702961344e-07,
"loss": 0.2681749761104584,
"step": 750
},
{
"epoch": 20.87323943661972,
"grad_norm": 0.2995631694793701,
"learning_rate": 6.657097569196133e-07,
"loss": 0.2793988287448883,
"step": 751
},
{
"epoch": 20.901408450704224,
"grad_norm": 0.29980671405792236,
"learning_rate": 6.649505910711058e-07,
"loss": 0.27338624000549316,
"step": 752
},
{
"epoch": 20.929577464788732,
"grad_norm": 0.29344668984413147,
"learning_rate": 6.641910750631626e-07,
"loss": 0.284781813621521,
"step": 753
},
{
"epoch": 20.95774647887324,
"grad_norm": 0.29827746748924255,
"learning_rate": 6.634312112094013e-07,
"loss": 0.27890220284461975,
"step": 754
},
{
"epoch": 20.985915492957748,
"grad_norm": 0.2813144326210022,
"learning_rate": 6.626710018244987e-07,
"loss": 0.2822881042957306,
"step": 755
},
{
"epoch": 21.0,
"grad_norm": 0.3963703215122223,
"learning_rate": 6.619104492241847e-07,
"loss": 0.27128899097442627,
"step": 756
},
{
"epoch": 21.028169014084508,
"grad_norm": 0.2815580666065216,
"learning_rate": 6.611495557252344e-07,
"loss": 0.26516419649124146,
"step": 757
},
{
"epoch": 21.056338028169016,
"grad_norm": 0.2884436845779419,
"learning_rate": 6.603883236454612e-07,
"loss": 0.2861919701099396,
"step": 758
},
{
"epoch": 21.08450704225352,
"grad_norm": 0.29655352234840393,
"learning_rate": 6.596267553037102e-07,
"loss": 0.28643375635147095,
"step": 759
},
{
"epoch": 21.112676056338028,
"grad_norm": 0.2927301824092865,
"learning_rate": 6.588648530198504e-07,
"loss": 0.26665711402893066,
"step": 760
},
{
"epoch": 21.140845070422536,
"grad_norm": 0.3053556978702545,
"learning_rate": 6.581026191147687e-07,
"loss": 0.2608697712421417,
"step": 761
},
{
"epoch": 21.169014084507044,
"grad_norm": 0.2939828634262085,
"learning_rate": 6.573400559103613e-07,
"loss": 0.2792375683784485,
"step": 762
},
{
"epoch": 21.197183098591548,
"grad_norm": 0.2972046136856079,
"learning_rate": 6.565771657295285e-07,
"loss": 0.28457099199295044,
"step": 763
},
{
"epoch": 21.225352112676056,
"grad_norm": 0.2918429672718048,
"learning_rate": 6.558139508961654e-07,
"loss": 0.2648508548736572,
"step": 764
},
{
"epoch": 21.253521126760564,
"grad_norm": 0.28380143642425537,
"learning_rate": 6.550504137351575e-07,
"loss": 0.27792784571647644,
"step": 765
},
{
"epoch": 21.281690140845072,
"grad_norm": 0.3151639997959137,
"learning_rate": 6.542865565723707e-07,
"loss": 0.2657250165939331,
"step": 766
},
{
"epoch": 21.309859154929576,
"grad_norm": 0.2861776351928711,
"learning_rate": 6.53522381734647e-07,
"loss": 0.27351340651512146,
"step": 767
},
{
"epoch": 21.338028169014084,
"grad_norm": 0.28596001863479614,
"learning_rate": 6.527578915497951e-07,
"loss": 0.28022241592407227,
"step": 768
},
{
"epoch": 21.366197183098592,
"grad_norm": 0.29702675342559814,
"learning_rate": 6.519930883465847e-07,
"loss": 0.2644035518169403,
"step": 769
},
{
"epoch": 21.3943661971831,
"grad_norm": 0.2863904535770416,
"learning_rate": 6.512279744547392e-07,
"loss": 0.2721293568611145,
"step": 770
},
{
"epoch": 21.422535211267604,
"grad_norm": 0.311262845993042,
"learning_rate": 6.50462552204928e-07,
"loss": 0.2911388874053955,
"step": 771
},
{
"epoch": 21.450704225352112,
"grad_norm": 0.3132490813732147,
"learning_rate": 6.496968239287603e-07,
"loss": 0.27957841753959656,
"step": 772
},
{
"epoch": 21.47887323943662,
"grad_norm": 0.29439255595207214,
"learning_rate": 6.489307919587769e-07,
"loss": 0.28288164734840393,
"step": 773
},
{
"epoch": 21.507042253521128,
"grad_norm": 0.3006008267402649,
"learning_rate": 6.481644586284442e-07,
"loss": 0.26865097880363464,
"step": 774
},
{
"epoch": 21.535211267605632,
"grad_norm": 0.28934645652770996,
"learning_rate": 6.473978262721463e-07,
"loss": 0.28625524044036865,
"step": 775
},
{
"epoch": 21.56338028169014,
"grad_norm": 0.28962355852127075,
"learning_rate": 6.466308972251785e-07,
"loss": 0.2737366855144501,
"step": 776
},
{
"epoch": 21.591549295774648,
"grad_norm": 0.29193779826164246,
"learning_rate": 6.458636738237395e-07,
"loss": 0.2644401788711548,
"step": 777
},
{
"epoch": 21.619718309859156,
"grad_norm": 0.31439822912216187,
"learning_rate": 6.45096158404925e-07,
"loss": 0.2638384699821472,
"step": 778
},
{
"epoch": 21.647887323943664,
"grad_norm": 0.2855563163757324,
"learning_rate": 6.443283533067198e-07,
"loss": 0.2697969079017639,
"step": 779
},
{
"epoch": 21.676056338028168,
"grad_norm": 0.2941296398639679,
"learning_rate": 6.435602608679916e-07,
"loss": 0.27152666449546814,
"step": 780
},
{
"epoch": 21.704225352112676,
"grad_norm": 0.2861116826534271,
"learning_rate": 6.427918834284834e-07,
"loss": 0.2749404013156891,
"step": 781
},
{
"epoch": 21.732394366197184,
"grad_norm": 0.30467715859413147,
"learning_rate": 6.420232233288055e-07,
"loss": 0.28106456995010376,
"step": 782
},
{
"epoch": 21.760563380281692,
"grad_norm": 0.2885453402996063,
"learning_rate": 6.412542829104306e-07,
"loss": 0.2661711275577545,
"step": 783
},
{
"epoch": 21.788732394366196,
"grad_norm": 0.30243006348609924,
"learning_rate": 6.404850645156841e-07,
"loss": 0.28171294927597046,
"step": 784
},
{
"epoch": 21.816901408450704,
"grad_norm": 0.29606276750564575,
"learning_rate": 6.397155704877388e-07,
"loss": 0.2737141251564026,
"step": 785
},
{
"epoch": 21.845070422535212,
"grad_norm": 0.30514174699783325,
"learning_rate": 6.389458031706068e-07,
"loss": 0.2778671979904175,
"step": 786
},
{
"epoch": 21.87323943661972,
"grad_norm": 0.29419445991516113,
"learning_rate": 6.381757649091329e-07,
"loss": 0.27829116582870483,
"step": 787
},
{
"epoch": 21.901408450704224,
"grad_norm": 0.30376535654067993,
"learning_rate": 6.374054580489873e-07,
"loss": 0.26818743348121643,
"step": 788
},
{
"epoch": 21.929577464788732,
"grad_norm": 0.29063352942466736,
"learning_rate": 6.366348849366583e-07,
"loss": 0.28016185760498047,
"step": 789
},
{
"epoch": 21.95774647887324,
"grad_norm": 0.29429173469543457,
"learning_rate": 6.358640479194451e-07,
"loss": 0.27824854850769043,
"step": 790
},
{
"epoch": 21.985915492957748,
"grad_norm": 0.28934815526008606,
"learning_rate": 6.35092949345451e-07,
"loss": 0.2743881344795227,
"step": 791
},
{
"epoch": 22.0,
"grad_norm": 0.41559702157974243,
"learning_rate": 6.343215915635761e-07,
"loss": 0.2856147289276123,
"step": 792
},
{
"epoch": 22.028169014084508,
"grad_norm": 0.29498717188835144,
"learning_rate": 6.335499769235098e-07,
"loss": 0.2729465961456299,
"step": 793
},
{
"epoch": 22.056338028169016,
"grad_norm": 0.30124449729919434,
"learning_rate": 6.327781077757241e-07,
"loss": 0.2874697744846344,
"step": 794
},
{
"epoch": 22.08450704225352,
"grad_norm": 0.3204105794429779,
"learning_rate": 6.320059864714664e-07,
"loss": 0.2923066020011902,
"step": 795
},
{
"epoch": 22.112676056338028,
"grad_norm": 0.2912622392177582,
"learning_rate": 6.31233615362752e-07,
"loss": 0.2808852791786194,
"step": 796
},
{
"epoch": 22.140845070422536,
"grad_norm": 0.30250096321105957,
"learning_rate": 6.304609968023572e-07,
"loss": 0.27111589908599854,
"step": 797
},
{
"epoch": 22.169014084507044,
"grad_norm": 0.3024645447731018,
"learning_rate": 6.296881331438126e-07,
"loss": 0.2812804877758026,
"step": 798
},
{
"epoch": 22.197183098591548,
"grad_norm": 0.29673656821250916,
"learning_rate": 6.289150267413942e-07,
"loss": 0.2681958079338074,
"step": 799
},
{
"epoch": 22.225352112676056,
"grad_norm": 0.29564592242240906,
"learning_rate": 6.281416799501187e-07,
"loss": 0.26508989930152893,
"step": 800
},
{
"epoch": 22.253521126760564,
"grad_norm": 0.2849496603012085,
"learning_rate": 6.273680951257342e-07,
"loss": 0.27044007182121277,
"step": 801
},
{
"epoch": 22.281690140845072,
"grad_norm": 0.30459970235824585,
"learning_rate": 6.265942746247146e-07,
"loss": 0.26503556966781616,
"step": 802
},
{
"epoch": 22.309859154929576,
"grad_norm": 0.29415223002433777,
"learning_rate": 6.258202208042511e-07,
"loss": 0.26770085096359253,
"step": 803
},
{
"epoch": 22.338028169014084,
"grad_norm": 0.3101199269294739,
"learning_rate": 6.25045936022246e-07,
"loss": 0.26633113622665405,
"step": 804
},
{
"epoch": 22.366197183098592,
"grad_norm": 0.28551825881004333,
"learning_rate": 6.242714226373049e-07,
"loss": 0.2745598256587982,
"step": 805
},
{
"epoch": 22.3943661971831,
"grad_norm": 0.30341607332229614,
"learning_rate": 6.2349668300873e-07,
"loss": 0.2879912853240967,
"step": 806
},
{
"epoch": 22.422535211267604,
"grad_norm": 0.33077767491340637,
"learning_rate": 6.227217194965125e-07,
"loss": 0.28035950660705566,
"step": 807
},
{
"epoch": 22.450704225352112,
"grad_norm": 0.305733859539032,
"learning_rate": 6.219465344613258e-07,
"loss": 0.2842296361923218,
"step": 808
},
{
"epoch": 22.47887323943662,
"grad_norm": 0.2931113839149475,
"learning_rate": 6.211711302645177e-07,
"loss": 0.2730957865715027,
"step": 809
},
{
"epoch": 22.507042253521128,
"grad_norm": 0.2949962913990021,
"learning_rate": 6.203955092681039e-07,
"loss": 0.281680166721344,
"step": 810
},
{
"epoch": 22.535211267605632,
"grad_norm": 0.30062124133110046,
"learning_rate": 6.196196738347607e-07,
"loss": 0.2771790027618408,
"step": 811
},
{
"epoch": 22.56338028169014,
"grad_norm": 0.29685312509536743,
"learning_rate": 6.188436263278172e-07,
"loss": 0.27885377407073975,
"step": 812
},
{
"epoch": 22.591549295774648,
"grad_norm": 0.30217039585113525,
"learning_rate": 6.180673691112486e-07,
"loss": 0.2664039433002472,
"step": 813
},
{
"epoch": 22.619718309859156,
"grad_norm": 0.2935945987701416,
"learning_rate": 6.172909045496694e-07,
"loss": 0.266349196434021,
"step": 814
},
{
"epoch": 22.647887323943664,
"grad_norm": 0.31217825412750244,
"learning_rate": 6.165142350083249e-07,
"loss": 0.2723742127418518,
"step": 815
},
{
"epoch": 22.676056338028168,
"grad_norm": 0.2960183918476105,
"learning_rate": 6.157373628530852e-07,
"loss": 0.272281289100647,
"step": 816
},
{
"epoch": 22.704225352112676,
"grad_norm": 0.2914189100265503,
"learning_rate": 6.149602904504378e-07,
"loss": 0.26770728826522827,
"step": 817
},
{
"epoch": 22.732394366197184,
"grad_norm": 0.2774648368358612,
"learning_rate": 6.141830201674802e-07,
"loss": 0.2694011330604553,
"step": 818
},
{
"epoch": 22.760563380281692,
"grad_norm": 0.29001736640930176,
"learning_rate": 6.134055543719121e-07,
"loss": 0.2670798897743225,
"step": 819
},
{
"epoch": 22.788732394366196,
"grad_norm": 0.31117716431617737,
"learning_rate": 6.126278954320294e-07,
"loss": 0.26127567887306213,
"step": 820
},
{
"epoch": 22.816901408450704,
"grad_norm": 0.29720577597618103,
"learning_rate": 6.118500457167159e-07,
"loss": 0.27497297525405884,
"step": 821
},
{
"epoch": 22.845070422535212,
"grad_norm": 0.3057437241077423,
"learning_rate": 6.11072007595437e-07,
"loss": 0.27363038063049316,
"step": 822
},
{
"epoch": 22.87323943661972,
"grad_norm": 0.323045939207077,
"learning_rate": 6.102937834382315e-07,
"loss": 0.27130627632141113,
"step": 823
},
{
"epoch": 22.901408450704224,
"grad_norm": 0.28948745131492615,
"learning_rate": 6.095153756157051e-07,
"loss": 0.26591163873672485,
"step": 824
},
{
"epoch": 22.929577464788732,
"grad_norm": 0.27952200174331665,
"learning_rate": 6.087367864990232e-07,
"loss": 0.266745388507843,
"step": 825
},
{
"epoch": 22.95774647887324,
"grad_norm": 0.30804452300071716,
"learning_rate": 6.079580184599032e-07,
"loss": 0.2794422507286072,
"step": 826
},
{
"epoch": 22.985915492957748,
"grad_norm": 0.3002220392227173,
"learning_rate": 6.071790738706078e-07,
"loss": 0.26469242572784424,
"step": 827
},
{
"epoch": 23.0,
"grad_norm": 0.4127134084701538,
"learning_rate": 6.06399955103937e-07,
"loss": 0.2482779324054718,
"step": 828
},
{
"epoch": 23.028169014084508,
"grad_norm": 0.30051475763320923,
"learning_rate": 6.056206645332217e-07,
"loss": 0.26631736755371094,
"step": 829
},
{
"epoch": 23.056338028169016,
"grad_norm": 0.3008311688899994,
"learning_rate": 6.048412045323164e-07,
"loss": 0.27459877729415894,
"step": 830
},
{
"epoch": 23.08450704225352,
"grad_norm": 0.28853461146354675,
"learning_rate": 6.040615774755911e-07,
"loss": 0.26959413290023804,
"step": 831
},
{
"epoch": 23.112676056338028,
"grad_norm": 0.29199543595314026,
"learning_rate": 6.032817857379256e-07,
"loss": 0.2588391900062561,
"step": 832
},
{
"epoch": 23.140845070422536,
"grad_norm": 0.29191362857818604,
"learning_rate": 6.025018316946999e-07,
"loss": 0.27447617053985596,
"step": 833
},
{
"epoch": 23.169014084507044,
"grad_norm": 0.29501983523368835,
"learning_rate": 6.017217177217899e-07,
"loss": 0.26884716749191284,
"step": 834
},
{
"epoch": 23.197183098591548,
"grad_norm": 0.3098088502883911,
"learning_rate": 6.009414461955581e-07,
"loss": 0.28516972064971924,
"step": 835
},
{
"epoch": 23.225352112676056,
"grad_norm": 0.3027796149253845,
"learning_rate": 6.001610194928464e-07,
"loss": 0.2739514112472534,
"step": 836
},
{
"epoch": 23.253521126760564,
"grad_norm": 0.31156665086746216,
"learning_rate": 5.993804399909703e-07,
"loss": 0.26852983236312866,
"step": 837
},
{
"epoch": 23.281690140845072,
"grad_norm": 0.2958903908729553,
"learning_rate": 5.985997100677103e-07,
"loss": 0.2743365168571472,
"step": 838
},
{
"epoch": 23.309859154929576,
"grad_norm": 0.31140410900115967,
"learning_rate": 5.97818832101305e-07,
"loss": 0.27525418996810913,
"step": 839
},
{
"epoch": 23.338028169014084,
"grad_norm": 0.3082049787044525,
"learning_rate": 5.97037808470444e-07,
"loss": 0.27074384689331055,
"step": 840
},
{
"epoch": 23.366197183098592,
"grad_norm": 0.2950114905834198,
"learning_rate": 5.96256641554261e-07,
"loss": 0.26068389415740967,
"step": 841
},
{
"epoch": 23.3943661971831,
"grad_norm": 0.31746307015419006,
"learning_rate": 5.954753337323259e-07,
"loss": 0.2648658752441406,
"step": 842
},
{
"epoch": 23.422535211267604,
"grad_norm": 0.2906374931335449,
"learning_rate": 5.946938873846375e-07,
"loss": 0.29040125012397766,
"step": 843
},
{
"epoch": 23.450704225352112,
"grad_norm": 0.3055919408798218,
"learning_rate": 5.939123048916173e-07,
"loss": 0.2694965600967407,
"step": 844
},
{
"epoch": 23.47887323943662,
"grad_norm": 0.3007211983203888,
"learning_rate": 5.931305886341008e-07,
"loss": 0.25987839698791504,
"step": 845
},
{
"epoch": 23.507042253521128,
"grad_norm": 0.3042035400867462,
"learning_rate": 5.923487409933315e-07,
"loss": 0.26484209299087524,
"step": 846
},
{
"epoch": 23.535211267605632,
"grad_norm": 0.30741506814956665,
"learning_rate": 5.915667643509528e-07,
"loss": 0.2735103368759155,
"step": 847
},
{
"epoch": 23.56338028169014,
"grad_norm": 0.30859899520874023,
"learning_rate": 5.907846610890011e-07,
"loss": 0.27706003189086914,
"step": 848
},
{
"epoch": 23.591549295774648,
"grad_norm": 0.29999226331710815,
"learning_rate": 5.900024335898987e-07,
"loss": 0.2733941674232483,
"step": 849
},
{
"epoch": 23.619718309859156,
"grad_norm": 0.3084903955459595,
"learning_rate": 5.892200842364462e-07,
"loss": 0.282131165266037,
"step": 850
},
{
"epoch": 23.647887323943664,
"grad_norm": 0.29400384426116943,
"learning_rate": 5.884376154118154e-07,
"loss": 0.26756390929222107,
"step": 851
},
{
"epoch": 23.676056338028168,
"grad_norm": 0.31666234135627747,
"learning_rate": 5.87655029499542e-07,
"loss": 0.2766130268573761,
"step": 852
},
{
"epoch": 23.704225352112676,
"grad_norm": 0.30233001708984375,
"learning_rate": 5.868723288835184e-07,
"loss": 0.2544291019439697,
"step": 853
},
{
"epoch": 23.732394366197184,
"grad_norm": 0.2888985276222229,
"learning_rate": 5.860895159479864e-07,
"loss": 0.272182822227478,
"step": 854
},
{
"epoch": 23.760563380281692,
"grad_norm": 0.29870662093162537,
"learning_rate": 5.853065930775303e-07,
"loss": 0.2798278331756592,
"step": 855
},
{
"epoch": 23.788732394366196,
"grad_norm": 0.307162344455719,
"learning_rate": 5.845235626570683e-07,
"loss": 0.2772548794746399,
"step": 856
},
{
"epoch": 23.816901408450704,
"grad_norm": 0.290558785200119,
"learning_rate": 5.837404270718475e-07,
"loss": 0.2746056020259857,
"step": 857
},
{
"epoch": 23.845070422535212,
"grad_norm": 0.30080270767211914,
"learning_rate": 5.829571887074343e-07,
"loss": 0.2648829519748688,
"step": 858
},
{
"epoch": 23.87323943661972,
"grad_norm": 0.3067336678504944,
"learning_rate": 5.821738499497086e-07,
"loss": 0.2871520519256592,
"step": 859
},
{
"epoch": 23.901408450704224,
"grad_norm": 0.29975709319114685,
"learning_rate": 5.813904131848564e-07,
"loss": 0.26279598474502563,
"step": 860
},
{
"epoch": 23.929577464788732,
"grad_norm": 0.3006797730922699,
"learning_rate": 5.806068807993617e-07,
"loss": 0.2586716115474701,
"step": 861
},
{
"epoch": 23.95774647887324,
"grad_norm": 0.31139636039733887,
"learning_rate": 5.798232551800002e-07,
"loss": 0.26469486951828003,
"step": 862
},
{
"epoch": 23.985915492957748,
"grad_norm": 0.295448899269104,
"learning_rate": 5.790395387138311e-07,
"loss": 0.27641937136650085,
"step": 863
},
{
"epoch": 24.0,
"grad_norm": 0.41943204402923584,
"learning_rate": 5.78255733788191e-07,
"loss": 0.2656780779361725,
"step": 864
},
{
"epoch": 24.028169014084508,
"grad_norm": 0.2978457808494568,
"learning_rate": 5.774718427906856e-07,
"loss": 0.27108752727508545,
"step": 865
},
{
"epoch": 24.056338028169016,
"grad_norm": 0.2980673015117645,
"learning_rate": 5.766878681091828e-07,
"loss": 0.27321118116378784,
"step": 866
},
{
"epoch": 24.08450704225352,
"grad_norm": 0.30751070380210876,
"learning_rate": 5.759038121318052e-07,
"loss": 0.26482248306274414,
"step": 867
},
{
"epoch": 24.112676056338028,
"grad_norm": 0.2982223629951477,
"learning_rate": 5.751196772469237e-07,
"loss": 0.2737855315208435,
"step": 868
},
{
"epoch": 24.140845070422536,
"grad_norm": 0.2943744361400604,
"learning_rate": 5.743354658431489e-07,
"loss": 0.27646419405937195,
"step": 869
},
{
"epoch": 24.169014084507044,
"grad_norm": 0.2863228917121887,
"learning_rate": 5.735511803093248e-07,
"loss": 0.2726101279258728,
"step": 870
},
{
"epoch": 24.197183098591548,
"grad_norm": 0.2973101735115051,
"learning_rate": 5.727668230345209e-07,
"loss": 0.2601590156555176,
"step": 871
},
{
"epoch": 24.225352112676056,
"grad_norm": 0.3052431344985962,
"learning_rate": 5.71982396408026e-07,
"loss": 0.27889275550842285,
"step": 872
},
{
"epoch": 24.253521126760564,
"grad_norm": 0.3076930046081543,
"learning_rate": 5.711979028193391e-07,
"loss": 0.2612301707267761,
"step": 873
},
{
"epoch": 24.281690140845072,
"grad_norm": 0.2986485958099365,
"learning_rate": 5.704133446581642e-07,
"loss": 0.27018094062805176,
"step": 874
},
{
"epoch": 24.309859154929576,
"grad_norm": 0.3108276426792145,
"learning_rate": 5.696287243144012e-07,
"loss": 0.27102935314178467,
"step": 875
},
{
"epoch": 24.338028169014084,
"grad_norm": 0.30193671584129333,
"learning_rate": 5.688440441781398e-07,
"loss": 0.2653925120830536,
"step": 876
},
{
"epoch": 24.366197183098592,
"grad_norm": 0.3071465492248535,
"learning_rate": 5.680593066396518e-07,
"loss": 0.2752073109149933,
"step": 877
},
{
"epoch": 24.3943661971831,
"grad_norm": 0.31397056579589844,
"learning_rate": 5.672745140893839e-07,
"loss": 0.2662411332130432,
"step": 878
},
{
"epoch": 24.422535211267604,
"grad_norm": 0.2991463243961334,
"learning_rate": 5.664896689179504e-07,
"loss": 0.24169263243675232,
"step": 879
},
{
"epoch": 24.450704225352112,
"grad_norm": 0.3123292028903961,
"learning_rate": 5.657047735161255e-07,
"loss": 0.27330368757247925,
"step": 880
},
{
"epoch": 24.47887323943662,
"grad_norm": 0.3062734305858612,
"learning_rate": 5.649198302748368e-07,
"loss": 0.26652461290359497,
"step": 881
},
{
"epoch": 24.507042253521128,
"grad_norm": 0.2875562906265259,
"learning_rate": 5.641348415851577e-07,
"loss": 0.2717418670654297,
"step": 882
},
{
"epoch": 24.535211267605632,
"grad_norm": 0.30724218487739563,
"learning_rate": 5.633498098382998e-07,
"loss": 0.2761197090148926,
"step": 883
},
{
"epoch": 24.56338028169014,
"grad_norm": 0.30381572246551514,
"learning_rate": 5.625647374256061e-07,
"loss": 0.2838340997695923,
"step": 884
},
{
"epoch": 24.591549295774648,
"grad_norm": 0.30817776918411255,
"learning_rate": 5.617796267385429e-07,
"loss": 0.26739388704299927,
"step": 885
},
{
"epoch": 24.619718309859156,
"grad_norm": 0.31107473373413086,
"learning_rate": 5.60994480168694e-07,
"loss": 0.27139878273010254,
"step": 886
},
{
"epoch": 24.647887323943664,
"grad_norm": 0.29710572957992554,
"learning_rate": 5.602093001077517e-07,
"loss": 0.26788806915283203,
"step": 887
},
{
"epoch": 24.676056338028168,
"grad_norm": 0.31037789583206177,
"learning_rate": 5.594240889475106e-07,
"loss": 0.2767243981361389,
"step": 888
},
{
"epoch": 24.704225352112676,
"grad_norm": 0.30905231833457947,
"learning_rate": 5.586388490798604e-07,
"loss": 0.2679288685321808,
"step": 889
},
{
"epoch": 24.732394366197184,
"grad_norm": 0.30612513422966003,
"learning_rate": 5.578535828967777e-07,
"loss": 0.2660091519355774,
"step": 890
},
{
"epoch": 24.760563380281692,
"grad_norm": 0.29661476612091064,
"learning_rate": 5.570682927903193e-07,
"loss": 0.27202385663986206,
"step": 891
},
{
"epoch": 24.788732394366196,
"grad_norm": 0.31154492497444153,
"learning_rate": 5.562829811526154e-07,
"loss": 0.26965251564979553,
"step": 892
},
{
"epoch": 24.816901408450704,
"grad_norm": 0.29887905716896057,
"learning_rate": 5.554976503758612e-07,
"loss": 0.2663193345069885,
"step": 893
},
{
"epoch": 24.845070422535212,
"grad_norm": 0.3046702444553375,
"learning_rate": 5.547123028523106e-07,
"loss": 0.26517826318740845,
"step": 894
},
{
"epoch": 24.87323943661972,
"grad_norm": 0.29926952719688416,
"learning_rate": 5.539269409742683e-07,
"loss": 0.2689710855484009,
"step": 895
},
{
"epoch": 24.901408450704224,
"grad_norm": 0.31607043743133545,
"learning_rate": 5.531415671340826e-07,
"loss": 0.2774956226348877,
"step": 896
},
{
"epoch": 24.929577464788732,
"grad_norm": 0.313334584236145,
"learning_rate": 5.523561837241387e-07,
"loss": 0.2801990807056427,
"step": 897
},
{
"epoch": 24.95774647887324,
"grad_norm": 0.3167824149131775,
"learning_rate": 5.515707931368507e-07,
"loss": 0.2556470036506653,
"step": 898
},
{
"epoch": 24.985915492957748,
"grad_norm": 0.3055095970630646,
"learning_rate": 5.507853977646543e-07,
"loss": 0.2693515121936798,
"step": 899
},
{
"epoch": 25.0,
"grad_norm": 0.41877350211143494,
"learning_rate": 5.5e-07,
"loss": 0.2642577588558197,
"step": 900
},
{
"epoch": 25.028169014084508,
"grad_norm": 0.3000764548778534,
"learning_rate": 5.492146022353459e-07,
"loss": 0.2616558074951172,
"step": 901
},
{
"epoch": 25.056338028169016,
"grad_norm": 0.30835723876953125,
"learning_rate": 5.484292068631494e-07,
"loss": 0.260206401348114,
"step": 902
},
{
"epoch": 25.08450704225352,
"grad_norm": 0.30945923924446106,
"learning_rate": 5.476438162758611e-07,
"loss": 0.26666033267974854,
"step": 903
},
{
"epoch": 25.112676056338028,
"grad_norm": 0.3131259083747864,
"learning_rate": 5.468584328659172e-07,
"loss": 0.2688153386116028,
"step": 904
},
{
"epoch": 25.140845070422536,
"grad_norm": 0.31281140446662903,
"learning_rate": 5.460730590257317e-07,
"loss": 0.25907081365585327,
"step": 905
},
{
"epoch": 25.169014084507044,
"grad_norm": 0.300714910030365,
"learning_rate": 5.452876971476896e-07,
"loss": 0.2585920989513397,
"step": 906
},
{
"epoch": 25.197183098591548,
"grad_norm": 0.31137779355049133,
"learning_rate": 5.445023496241388e-07,
"loss": 0.2691946029663086,
"step": 907
},
{
"epoch": 25.225352112676056,
"grad_norm": 0.31905803084373474,
"learning_rate": 5.437170188473847e-07,
"loss": 0.25889474153518677,
"step": 908
},
{
"epoch": 25.253521126760564,
"grad_norm": 0.30952438712120056,
"learning_rate": 5.429317072096807e-07,
"loss": 0.26691755652427673,
"step": 909
},
{
"epoch": 25.281690140845072,
"grad_norm": 0.3063667416572571,
"learning_rate": 5.421464171032224e-07,
"loss": 0.2661867141723633,
"step": 910
},
{
"epoch": 25.309859154929576,
"grad_norm": 0.31403201818466187,
"learning_rate": 5.413611509201396e-07,
"loss": 0.26902246475219727,
"step": 911
},
{
"epoch": 25.338028169014084,
"grad_norm": 0.3037600815296173,
"learning_rate": 5.405759110524894e-07,
"loss": 0.26004883646965027,
"step": 912
},
{
"epoch": 25.366197183098592,
"grad_norm": 0.3116777837276459,
"learning_rate": 5.397906998922483e-07,
"loss": 0.27219873666763306,
"step": 913
},
{
"epoch": 25.3943661971831,
"grad_norm": 0.2961476445198059,
"learning_rate": 5.390055198313061e-07,
"loss": 0.26753348112106323,
"step": 914
},
{
"epoch": 25.422535211267604,
"grad_norm": 0.3180798888206482,
"learning_rate": 5.382203732614571e-07,
"loss": 0.2706093192100525,
"step": 915
},
{
"epoch": 25.450704225352112,
"grad_norm": 0.2982124090194702,
"learning_rate": 5.37435262574394e-07,
"loss": 0.2601392865180969,
"step": 916
},
{
"epoch": 25.47887323943662,
"grad_norm": 0.29854777455329895,
"learning_rate": 5.366501901617001e-07,
"loss": 0.2788724899291992,
"step": 917
},
{
"epoch": 25.507042253521128,
"grad_norm": 0.30327802896499634,
"learning_rate": 5.358651584148423e-07,
"loss": 0.26465606689453125,
"step": 918
},
{
"epoch": 25.535211267605632,
"grad_norm": 0.3136656582355499,
"learning_rate": 5.350801697251633e-07,
"loss": 0.2621968984603882,
"step": 919
},
{
"epoch": 25.56338028169014,
"grad_norm": 0.3008262813091278,
"learning_rate": 5.342952264838747e-07,
"loss": 0.2775859236717224,
"step": 920
},
{
"epoch": 25.591549295774648,
"grad_norm": 0.31797295808792114,
"learning_rate": 5.335103310820496e-07,
"loss": 0.2715638279914856,
"step": 921
},
{
"epoch": 25.619718309859156,
"grad_norm": 0.3112519383430481,
"learning_rate": 5.32725485910616e-07,
"loss": 0.26941171288490295,
"step": 922
},
{
"epoch": 25.647887323943664,
"grad_norm": 0.2887360453605652,
"learning_rate": 5.319406933603482e-07,
"loss": 0.26261216402053833,
"step": 923
},
{
"epoch": 25.676056338028168,
"grad_norm": 0.3208933472633362,
"learning_rate": 5.311559558218603e-07,
"loss": 0.26436418294906616,
"step": 924
},
{
"epoch": 25.704225352112676,
"grad_norm": 0.30341023206710815,
"learning_rate": 5.303712756855988e-07,
"loss": 0.2747180461883545,
"step": 925
},
{
"epoch": 25.732394366197184,
"grad_norm": 0.31803277134895325,
"learning_rate": 5.295866553418358e-07,
"loss": 0.2771461606025696,
"step": 926
},
{
"epoch": 25.760563380281692,
"grad_norm": 0.3123302459716797,
"learning_rate": 5.288020971806608e-07,
"loss": 0.26546305418014526,
"step": 927
},
{
"epoch": 25.788732394366196,
"grad_norm": 0.3141644597053528,
"learning_rate": 5.28017603591974e-07,
"loss": 0.27546215057373047,
"step": 928
},
{
"epoch": 25.816901408450704,
"grad_norm": 0.29840072989463806,
"learning_rate": 5.27233176965479e-07,
"loss": 0.25834715366363525,
"step": 929
},
{
"epoch": 25.845070422535212,
"grad_norm": 0.3083305060863495,
"learning_rate": 5.264488196906752e-07,
"loss": 0.2746443748474121,
"step": 930
},
{
"epoch": 25.87323943661972,
"grad_norm": 0.30847135186195374,
"learning_rate": 5.256645341568511e-07,
"loss": 0.2748471200466156,
"step": 931
},
{
"epoch": 25.901408450704224,
"grad_norm": 0.30591723322868347,
"learning_rate": 5.248803227530763e-07,
"loss": 0.26996147632598877,
"step": 932
},
{
"epoch": 25.929577464788732,
"grad_norm": 0.314569354057312,
"learning_rate": 5.240961878681947e-07,
"loss": 0.28236207365989685,
"step": 933
},
{
"epoch": 25.95774647887324,
"grad_norm": 0.32219424843788147,
"learning_rate": 5.233121318908173e-07,
"loss": 0.2674041986465454,
"step": 934
},
{
"epoch": 25.985915492957748,
"grad_norm": 0.3121417760848999,
"learning_rate": 5.225281572093143e-07,
"loss": 0.2723839282989502,
"step": 935
},
{
"epoch": 26.0,
"grad_norm": 0.4469078481197357,
"learning_rate": 5.21744266211809e-07,
"loss": 0.2659713625907898,
"step": 936
},
{
"epoch": 26.028169014084508,
"grad_norm": 0.3079273998737335,
"learning_rate": 5.20960461286169e-07,
"loss": 0.2612949013710022,
"step": 937
},
{
"epoch": 26.056338028169016,
"grad_norm": 0.29670900106430054,
"learning_rate": 5.2017674482e-07,
"loss": 0.26683154702186584,
"step": 938
},
{
"epoch": 26.08450704225352,
"grad_norm": 0.3200303018093109,
"learning_rate": 5.193931192006385e-07,
"loss": 0.2616243362426758,
"step": 939
},
{
"epoch": 26.112676056338028,
"grad_norm": 0.31682220101356506,
"learning_rate": 5.186095868151436e-07,
"loss": 0.27138951420783997,
"step": 940
},
{
"epoch": 26.140845070422536,
"grad_norm": 0.30821120738983154,
"learning_rate": 5.178261500502912e-07,
"loss": 0.26395922899246216,
"step": 941
},
{
"epoch": 26.169014084507044,
"grad_norm": 0.3168351352214813,
"learning_rate": 5.170428112925659e-07,
"loss": 0.2528039813041687,
"step": 942
},
{
"epoch": 26.197183098591548,
"grad_norm": 0.31877174973487854,
"learning_rate": 5.162595729281526e-07,
"loss": 0.268981397151947,
"step": 943
},
{
"epoch": 26.225352112676056,
"grad_norm": 0.30236542224884033,
"learning_rate": 5.154764373429315e-07,
"loss": 0.26689520478248596,
"step": 944
},
{
"epoch": 26.253521126760564,
"grad_norm": 0.31615039706230164,
"learning_rate": 5.146934069224698e-07,
"loss": 0.25211524963378906,
"step": 945
},
{
"epoch": 26.281690140845072,
"grad_norm": 0.304155558347702,
"learning_rate": 5.139104840520135e-07,
"loss": 0.26361894607543945,
"step": 946
},
{
"epoch": 26.309859154929576,
"grad_norm": 0.31038856506347656,
"learning_rate": 5.131276711164815e-07,
"loss": 0.26455777883529663,
"step": 947
},
{
"epoch": 26.338028169014084,
"grad_norm": 0.3139597177505493,
"learning_rate": 5.123449705004581e-07,
"loss": 0.2526125907897949,
"step": 948
},
{
"epoch": 26.366197183098592,
"grad_norm": 0.3288014233112335,
"learning_rate": 5.115623845881847e-07,
"loss": 0.2677180767059326,
"step": 949
},
{
"epoch": 26.3943661971831,
"grad_norm": 0.33518192172050476,
"learning_rate": 5.107799157635538e-07,
"loss": 0.2683093249797821,
"step": 950
},
{
"epoch": 26.422535211267604,
"grad_norm": 0.3219356834888458,
"learning_rate": 5.099975664101014e-07,
"loss": 0.2773933708667755,
"step": 951
},
{
"epoch": 26.450704225352112,
"grad_norm": 0.32385388016700745,
"learning_rate": 5.09215338910999e-07,
"loss": 0.2612137198448181,
"step": 952
},
{
"epoch": 26.47887323943662,
"grad_norm": 0.32834818959236145,
"learning_rate": 5.084332356490472e-07,
"loss": 0.2747904658317566,
"step": 953
},
{
"epoch": 26.507042253521128,
"grad_norm": 0.32953891158103943,
"learning_rate": 5.076512590066685e-07,
"loss": 0.2700774669647217,
"step": 954
},
{
"epoch": 26.535211267605632,
"grad_norm": 0.31470146775245667,
"learning_rate": 5.068694113658992e-07,
"loss": 0.26825615763664246,
"step": 955
},
{
"epoch": 26.56338028169014,
"grad_norm": 0.3184269964694977,
"learning_rate": 5.060876951083828e-07,
"loss": 0.2559502124786377,
"step": 956
},
{
"epoch": 26.591549295774648,
"grad_norm": 0.3205021619796753,
"learning_rate": 5.053061126153624e-07,
"loss": 0.26462531089782715,
"step": 957
},
{
"epoch": 26.619718309859156,
"grad_norm": 0.3158126473426819,
"learning_rate": 5.045246662676741e-07,
"loss": 0.2701690196990967,
"step": 958
},
{
"epoch": 26.647887323943664,
"grad_norm": 0.3104144334793091,
"learning_rate": 5.037433584457389e-07,
"loss": 0.27104830741882324,
"step": 959
},
{
"epoch": 26.676056338028168,
"grad_norm": 0.3229422867298126,
"learning_rate": 5.02962191529556e-07,
"loss": 0.2765110731124878,
"step": 960
},
{
"epoch": 26.704225352112676,
"grad_norm": 0.3127235770225525,
"learning_rate": 5.021811678986951e-07,
"loss": 0.26477351784706116,
"step": 961
},
{
"epoch": 26.732394366197184,
"grad_norm": 0.31363457441329956,
"learning_rate": 5.014002899322896e-07,
"loss": 0.2696647644042969,
"step": 962
},
{
"epoch": 26.760563380281692,
"grad_norm": 0.3330313265323639,
"learning_rate": 5.006195600090296e-07,
"loss": 0.2720947861671448,
"step": 963
},
{
"epoch": 26.788732394366196,
"grad_norm": 0.3137781023979187,
"learning_rate": 4.998389805071536e-07,
"loss": 0.2770814001560211,
"step": 964
},
{
"epoch": 26.816901408450704,
"grad_norm": 0.30663928389549255,
"learning_rate": 4.990585538044419e-07,
"loss": 0.26743337512016296,
"step": 965
},
{
"epoch": 26.845070422535212,
"grad_norm": 0.3439841866493225,
"learning_rate": 4.982782822782101e-07,
"loss": 0.26640748977661133,
"step": 966
},
{
"epoch": 26.87323943661972,
"grad_norm": 0.30016517639160156,
"learning_rate": 4.974981683053001e-07,
"loss": 0.2630905508995056,
"step": 967
},
{
"epoch": 26.901408450704224,
"grad_norm": 0.30313640832901,
"learning_rate": 4.967182142620745e-07,
"loss": 0.26278769969940186,
"step": 968
},
{
"epoch": 26.929577464788732,
"grad_norm": 0.3100942373275757,
"learning_rate": 4.959384225244087e-07,
"loss": 0.25859004259109497,
"step": 969
},
{
"epoch": 26.95774647887324,
"grad_norm": 0.3049146234989166,
"learning_rate": 4.951587954676837e-07,
"loss": 0.2737579941749573,
"step": 970
},
{
"epoch": 26.985915492957748,
"grad_norm": 0.3105259835720062,
"learning_rate": 4.943793354667783e-07,
"loss": 0.2698732018470764,
"step": 971
},
{
"epoch": 27.0,
"grad_norm": 0.43671199679374695,
"learning_rate": 4.93600044896063e-07,
"loss": 0.2851495146751404,
"step": 972
},
{
"epoch": 27.028169014084508,
"grad_norm": 0.3152709901332855,
"learning_rate": 4.928209261293923e-07,
"loss": 0.27372750639915466,
"step": 973
},
{
"epoch": 27.056338028169016,
"grad_norm": 0.3281909227371216,
"learning_rate": 4.920419815400968e-07,
"loss": 0.26317745447158813,
"step": 974
},
{
"epoch": 27.08450704225352,
"grad_norm": 0.30629420280456543,
"learning_rate": 4.912632135009769e-07,
"loss": 0.267042338848114,
"step": 975
},
{
"epoch": 27.112676056338028,
"grad_norm": 0.31097206473350525,
"learning_rate": 4.904846243842949e-07,
"loss": 0.2647910714149475,
"step": 976
},
{
"epoch": 27.140845070422536,
"grad_norm": 0.30723172426223755,
"learning_rate": 4.897062165617686e-07,
"loss": 0.27176767587661743,
"step": 977
},
{
"epoch": 27.169014084507044,
"grad_norm": 0.333957701921463,
"learning_rate": 4.88927992404563e-07,
"loss": 0.26361826062202454,
"step": 978
},
{
"epoch": 27.197183098591548,
"grad_norm": 0.30476778745651245,
"learning_rate": 4.881499542832841e-07,
"loss": 0.2584869861602783,
"step": 979
},
{
"epoch": 27.225352112676056,
"grad_norm": 0.3146997392177582,
"learning_rate": 4.873721045679706e-07,
"loss": 0.2549043893814087,
"step": 980
},
{
"epoch": 27.253521126760564,
"grad_norm": 0.30739930272102356,
"learning_rate": 4.865944456280878e-07,
"loss": 0.2622683644294739,
"step": 981
},
{
"epoch": 27.281690140845072,
"grad_norm": 0.3006227910518646,
"learning_rate": 4.858169798325198e-07,
"loss": 0.27283164858818054,
"step": 982
},
{
"epoch": 27.309859154929576,
"grad_norm": 0.31303322315216064,
"learning_rate": 4.850397095495621e-07,
"loss": 0.2585863471031189,
"step": 983
},
{
"epoch": 27.338028169014084,
"grad_norm": 0.3036518692970276,
"learning_rate": 4.842626371469149e-07,
"loss": 0.2656107246875763,
"step": 984
},
{
"epoch": 27.366197183098592,
"grad_norm": 0.3137490749359131,
"learning_rate": 4.834857649916752e-07,
"loss": 0.25737249851226807,
"step": 985
},
{
"epoch": 27.3943661971831,
"grad_norm": 0.3161812424659729,
"learning_rate": 4.827090954503308e-07,
"loss": 0.2658624053001404,
"step": 986
},
{
"epoch": 27.422535211267604,
"grad_norm": 0.2974465489387512,
"learning_rate": 4.819326308887513e-07,
"loss": 0.2653939425945282,
"step": 987
},
{
"epoch": 27.450704225352112,
"grad_norm": 0.3207877576351166,
"learning_rate": 4.811563736721829e-07,
"loss": 0.2567484378814697,
"step": 988
},
{
"epoch": 27.47887323943662,
"grad_norm": 0.30379563570022583,
"learning_rate": 4.803803261652395e-07,
"loss": 0.2731136083602905,
"step": 989
},
{
"epoch": 27.507042253521128,
"grad_norm": 0.30110257863998413,
"learning_rate": 4.79604490731896e-07,
"loss": 0.2533247172832489,
"step": 990
},
{
"epoch": 27.535211267605632,
"grad_norm": 0.32354485988616943,
"learning_rate": 4.788288697354824e-07,
"loss": 0.2776826024055481,
"step": 991
},
{
"epoch": 27.56338028169014,
"grad_norm": 0.3137172758579254,
"learning_rate": 4.780534655386743e-07,
"loss": 0.2678206264972687,
"step": 992
},
{
"epoch": 27.591549295774648,
"grad_norm": 0.3129335641860962,
"learning_rate": 4.772782805034876e-07,
"loss": 0.27128273248672485,
"step": 993
},
{
"epoch": 27.619718309859156,
"grad_norm": 0.3112099766731262,
"learning_rate": 4.7650331699127013e-07,
"loss": 0.25505757331848145,
"step": 994
},
{
"epoch": 27.647887323943664,
"grad_norm": 0.3214300274848938,
"learning_rate": 4.75728577362695e-07,
"loss": 0.252490371465683,
"step": 995
},
{
"epoch": 27.676056338028168,
"grad_norm": 0.3177250623703003,
"learning_rate": 4.749540639777539e-07,
"loss": 0.2748945355415344,
"step": 996
},
{
"epoch": 27.704225352112676,
"grad_norm": 0.3087361752986908,
"learning_rate": 4.741797791957489e-07,
"loss": 0.26117944717407227,
"step": 997
},
{
"epoch": 27.732394366197184,
"grad_norm": 0.3008691072463989,
"learning_rate": 4.7340572537528547e-07,
"loss": 0.2576630115509033,
"step": 998
},
{
"epoch": 27.760563380281692,
"grad_norm": 0.3111347556114197,
"learning_rate": 4.7263190487426563e-07,
"loss": 0.26800209283828735,
"step": 999
},
{
"epoch": 27.788732394366196,
"grad_norm": 0.2986048758029938,
"learning_rate": 4.7185832004988133e-07,
"loss": 0.2734978497028351,
"step": 1000
},
{
"epoch": 27.816901408450704,
"grad_norm": 0.31797438859939575,
"learning_rate": 4.710849732586059e-07,
"loss": 0.2649095356464386,
"step": 1001
},
{
"epoch": 27.845070422535212,
"grad_norm": 0.3100630044937134,
"learning_rate": 4.703118668561875e-07,
"loss": 0.2550201117992401,
"step": 1002
},
{
"epoch": 27.87323943661972,
"grad_norm": 0.3206699788570404,
"learning_rate": 4.6953900319764274e-07,
"loss": 0.26471948623657227,
"step": 1003
},
{
"epoch": 27.901408450704224,
"grad_norm": 0.3138802945613861,
"learning_rate": 4.68766384637248e-07,
"loss": 0.26174217462539673,
"step": 1004
},
{
"epoch": 27.929577464788732,
"grad_norm": 0.3069911301136017,
"learning_rate": 4.679940135285336e-07,
"loss": 0.26182085275650024,
"step": 1005
},
{
"epoch": 27.95774647887324,
"grad_norm": 0.3080894351005554,
"learning_rate": 4.672218922242759e-07,
"loss": 0.272597074508667,
"step": 1006
},
{
"epoch": 27.985915492957748,
"grad_norm": 0.30975106358528137,
"learning_rate": 4.664500230764903e-07,
"loss": 0.28192490339279175,
"step": 1007
},
{
"epoch": 28.0,
"grad_norm": 0.44492414593696594,
"learning_rate": 4.656784084364238e-07,
"loss": 0.2805609405040741,
"step": 1008
},
{
"epoch": 28.028169014084508,
"grad_norm": 0.3142589330673218,
"learning_rate": 4.6490705065454883e-07,
"loss": 0.2571072280406952,
"step": 1009
},
{
"epoch": 28.056338028169016,
"grad_norm": 0.3059631884098053,
"learning_rate": 4.641359520805548e-07,
"loss": 0.2683190107345581,
"step": 1010
},
{
"epoch": 28.08450704225352,
"grad_norm": 0.32835182547569275,
"learning_rate": 4.6336511506334177e-07,
"loss": 0.2751193344593048,
"step": 1011
},
{
"epoch": 28.112676056338028,
"grad_norm": 0.31909412145614624,
"learning_rate": 4.6259454195101267e-07,
"loss": 0.27306729555130005,
"step": 1012
},
{
"epoch": 28.140845070422536,
"grad_norm": 0.32016029953956604,
"learning_rate": 4.61824235090867e-07,
"loss": 0.2615482211112976,
"step": 1013
},
{
"epoch": 28.169014084507044,
"grad_norm": 0.30900275707244873,
"learning_rate": 4.6105419682939316e-07,
"loss": 0.2553929388523102,
"step": 1014
},
{
"epoch": 28.197183098591548,
"grad_norm": 0.3047516942024231,
"learning_rate": 4.602844295122613e-07,
"loss": 0.26050907373428345,
"step": 1015
},
{
"epoch": 28.225352112676056,
"grad_norm": 0.31619319319725037,
"learning_rate": 4.59514935484316e-07,
"loss": 0.2493715137243271,
"step": 1016
},
{
"epoch": 28.253521126760564,
"grad_norm": 0.31594234704971313,
"learning_rate": 4.5874571708956953e-07,
"loss": 0.26061999797821045,
"step": 1017
},
{
"epoch": 28.281690140845072,
"grad_norm": 0.31763410568237305,
"learning_rate": 4.579767766711944e-07,
"loss": 0.2720048427581787,
"step": 1018
},
{
"epoch": 28.309859154929576,
"grad_norm": 0.3225538432598114,
"learning_rate": 4.572081165715167e-07,
"loss": 0.26587527990341187,
"step": 1019
},
{
"epoch": 28.338028169014084,
"grad_norm": 0.33830496668815613,
"learning_rate": 4.5643973913200837e-07,
"loss": 0.26142361760139465,
"step": 1020
},
{
"epoch": 28.366197183098592,
"grad_norm": 0.30440667271614075,
"learning_rate": 4.556716466932803e-07,
"loss": 0.25490373373031616,
"step": 1021
},
{
"epoch": 28.3943661971831,
"grad_norm": 0.30009451508522034,
"learning_rate": 4.549038415950751e-07,
"loss": 0.258319616317749,
"step": 1022
},
{
"epoch": 28.422535211267604,
"grad_norm": 0.32110437750816345,
"learning_rate": 4.5413632617626054e-07,
"loss": 0.2684330344200134,
"step": 1023
},
{
"epoch": 28.450704225352112,
"grad_norm": 0.3126528561115265,
"learning_rate": 4.5336910277482155e-07,
"loss": 0.2647142708301544,
"step": 1024
},
{
"epoch": 28.47887323943662,
"grad_norm": 0.30162736773490906,
"learning_rate": 4.526021737278537e-07,
"loss": 0.2717491388320923,
"step": 1025
},
{
"epoch": 28.507042253521128,
"grad_norm": 0.32018333673477173,
"learning_rate": 4.51835541371556e-07,
"loss": 0.2770422697067261,
"step": 1026
},
{
"epoch": 28.535211267605632,
"grad_norm": 0.3132731318473816,
"learning_rate": 4.5106920804122304e-07,
"loss": 0.2692522406578064,
"step": 1027
},
{
"epoch": 28.56338028169014,
"grad_norm": 0.30906060338020325,
"learning_rate": 4.503031760712397e-07,
"loss": 0.2523694932460785,
"step": 1028
},
{
"epoch": 28.591549295774648,
"grad_norm": 0.3276032507419586,
"learning_rate": 4.4953744779507197e-07,
"loss": 0.26482313871383667,
"step": 1029
},
{
"epoch": 28.619718309859156,
"grad_norm": 0.33187615871429443,
"learning_rate": 4.4877202554526084e-07,
"loss": 0.2603946924209595,
"step": 1030
},
{
"epoch": 28.647887323943664,
"grad_norm": 0.30181628465652466,
"learning_rate": 4.480069116534151e-07,
"loss": 0.25871700048446655,
"step": 1031
},
{
"epoch": 28.676056338028168,
"grad_norm": 0.3155851662158966,
"learning_rate": 4.4724210845020494e-07,
"loss": 0.2617461681365967,
"step": 1032
},
{
"epoch": 28.704225352112676,
"grad_norm": 0.30370378494262695,
"learning_rate": 4.4647761826535303e-07,
"loss": 0.26235488057136536,
"step": 1033
},
{
"epoch": 28.732394366197184,
"grad_norm": 0.317186564207077,
"learning_rate": 4.457134434276293e-07,
"loss": 0.26761680841445923,
"step": 1034
},
{
"epoch": 28.760563380281692,
"grad_norm": 0.3287314772605896,
"learning_rate": 4.449495862648427e-07,
"loss": 0.261843204498291,
"step": 1035
},
{
"epoch": 28.788732394366196,
"grad_norm": 0.33204883337020874,
"learning_rate": 4.441860491038345e-07,
"loss": 0.2633381485939026,
"step": 1036
},
{
"epoch": 28.816901408450704,
"grad_norm": 0.32268011569976807,
"learning_rate": 4.4342283427047164e-07,
"loss": 0.24900981783866882,
"step": 1037
},
{
"epoch": 28.845070422535212,
"grad_norm": 0.3224244713783264,
"learning_rate": 4.4265994408963867e-07,
"loss": 0.2667103111743927,
"step": 1038
},
{
"epoch": 28.87323943661972,
"grad_norm": 0.3169482350349426,
"learning_rate": 4.418973808852313e-07,
"loss": 0.268291175365448,
"step": 1039
},
{
"epoch": 28.901408450704224,
"grad_norm": 0.33006441593170166,
"learning_rate": 4.4113514698014953e-07,
"loss": 0.27004534006118774,
"step": 1040
},
{
"epoch": 28.929577464788732,
"grad_norm": 0.35179299116134644,
"learning_rate": 4.403732446962899e-07,
"loss": 0.2628635764122009,
"step": 1041
},
{
"epoch": 28.95774647887324,
"grad_norm": 0.3151315748691559,
"learning_rate": 4.3961167635453876e-07,
"loss": 0.2677478492259979,
"step": 1042
},
{
"epoch": 28.985915492957748,
"grad_norm": 0.3185572922229767,
"learning_rate": 4.388504442747657e-07,
"loss": 0.2660791873931885,
"step": 1043
},
{
"epoch": 29.0,
"grad_norm": 0.45902183651924133,
"learning_rate": 4.3808955077581546e-07,
"loss": 0.2720754146575928,
"step": 1044
},
{
"epoch": 29.028169014084508,
"grad_norm": 0.3011077344417572,
"learning_rate": 4.373289981755013e-07,
"loss": 0.25422877073287964,
"step": 1045
},
{
"epoch": 29.056338028169016,
"grad_norm": 0.3089461028575897,
"learning_rate": 4.365687887905988e-07,
"loss": 0.2498088926076889,
"step": 1046
},
{
"epoch": 29.08450704225352,
"grad_norm": 0.32150641083717346,
"learning_rate": 4.358089249368375e-07,
"loss": 0.2662513554096222,
"step": 1047
},
{
"epoch": 29.112676056338028,
"grad_norm": 0.32592031359672546,
"learning_rate": 4.350494089288943e-07,
"loss": 0.2539994418621063,
"step": 1048
},
{
"epoch": 29.140845070422536,
"grad_norm": 0.31924694776535034,
"learning_rate": 4.3429024308038686e-07,
"loss": 0.2557491958141327,
"step": 1049
},
{
"epoch": 29.169014084507044,
"grad_norm": 0.32504960894584656,
"learning_rate": 4.3353142970386557e-07,
"loss": 0.26317501068115234,
"step": 1050
},
{
"epoch": 29.197183098591548,
"grad_norm": 0.3093854784965515,
"learning_rate": 4.327729711108082e-07,
"loss": 0.25340092182159424,
"step": 1051
},
{
"epoch": 29.225352112676056,
"grad_norm": 0.313862144947052,
"learning_rate": 4.3201486961161093e-07,
"loss": 0.2559676766395569,
"step": 1052
},
{
"epoch": 29.253521126760564,
"grad_norm": 0.3301529288291931,
"learning_rate": 4.312571275155823e-07,
"loss": 0.2709015905857086,
"step": 1053
},
{
"epoch": 29.281690140845072,
"grad_norm": 0.32452118396759033,
"learning_rate": 4.304997471309361e-07,
"loss": 0.2698490619659424,
"step": 1054
},
{
"epoch": 29.309859154929576,
"grad_norm": 0.3382558226585388,
"learning_rate": 4.297427307647844e-07,
"loss": 0.2615205645561218,
"step": 1055
},
{
"epoch": 29.338028169014084,
"grad_norm": 0.3098710775375366,
"learning_rate": 4.2898608072313045e-07,
"loss": 0.2664251923561096,
"step": 1056
},
{
"epoch": 29.366197183098592,
"grad_norm": 0.3207705318927765,
"learning_rate": 4.2822979931086144e-07,
"loss": 0.2764906883239746,
"step": 1057
},
{
"epoch": 29.3943661971831,
"grad_norm": 0.3483034372329712,
"learning_rate": 4.2747388883174154e-07,
"loss": 0.2622952163219452,
"step": 1058
},
{
"epoch": 29.422535211267604,
"grad_norm": 0.30950114130973816,
"learning_rate": 4.267183515884054e-07,
"loss": 0.2630128860473633,
"step": 1059
},
{
"epoch": 29.450704225352112,
"grad_norm": 0.32425740361213684,
"learning_rate": 4.2596318988235037e-07,
"loss": 0.25917208194732666,
"step": 1060
},
{
"epoch": 29.47887323943662,
"grad_norm": 0.3382692039012909,
"learning_rate": 4.2520840601392996e-07,
"loss": 0.26483750343322754,
"step": 1061
},
{
"epoch": 29.507042253521128,
"grad_norm": 0.30861786007881165,
"learning_rate": 4.2445400228234687e-07,
"loss": 0.2531127631664276,
"step": 1062
},
{
"epoch": 29.535211267605632,
"grad_norm": 0.33470088243484497,
"learning_rate": 4.2369998098564554e-07,
"loss": 0.263372540473938,
"step": 1063
},
{
"epoch": 29.56338028169014,
"grad_norm": 0.34484177827835083,
"learning_rate": 4.2294634442070553e-07,
"loss": 0.263760507106781,
"step": 1064
},
{
"epoch": 29.591549295774648,
"grad_norm": 0.32152125239372253,
"learning_rate": 4.2219309488323487e-07,
"loss": 0.2630784511566162,
"step": 1065
},
{
"epoch": 29.619718309859156,
"grad_norm": 0.3259511888027191,
"learning_rate": 4.214402346677619e-07,
"loss": 0.26080453395843506,
"step": 1066
},
{
"epoch": 29.647887323943664,
"grad_norm": 0.32442566752433777,
"learning_rate": 4.206877660676297e-07,
"loss": 0.2604103088378906,
"step": 1067
},
{
"epoch": 29.676056338028168,
"grad_norm": 0.3231119215488434,
"learning_rate": 4.1993569137498776e-07,
"loss": 0.26589787006378174,
"step": 1068
},
{
"epoch": 29.704225352112676,
"grad_norm": 0.3275383412837982,
"learning_rate": 4.1918401288078633e-07,
"loss": 0.2476288229227066,
"step": 1069
},
{
"epoch": 29.732394366197184,
"grad_norm": 0.3219151496887207,
"learning_rate": 4.1843273287476854e-07,
"loss": 0.26332658529281616,
"step": 1070
},
{
"epoch": 29.760563380281692,
"grad_norm": 0.31227391958236694,
"learning_rate": 4.1768185364546326e-07,
"loss": 0.2647852301597595,
"step": 1071
},
{
"epoch": 29.788732394366196,
"grad_norm": 0.3090374767780304,
"learning_rate": 4.1693137748017915e-07,
"loss": 0.2562742531299591,
"step": 1072
},
{
"epoch": 29.816901408450704,
"grad_norm": 0.32516875863075256,
"learning_rate": 4.161813066649963e-07,
"loss": 0.27417412400245667,
"step": 1073
},
{
"epoch": 29.845070422535212,
"grad_norm": 0.3393928110599518,
"learning_rate": 4.15431643484761e-07,
"loss": 0.25790080428123474,
"step": 1074
},
{
"epoch": 29.87323943661972,
"grad_norm": 0.3293744623661041,
"learning_rate": 4.146823902230772e-07,
"loss": 0.27599674463272095,
"step": 1075
},
{
"epoch": 29.901408450704224,
"grad_norm": 0.336525022983551,
"learning_rate": 4.1393354916230005e-07,
"loss": 0.2566748261451721,
"step": 1076
},
{
"epoch": 29.929577464788732,
"grad_norm": 0.30744579434394836,
"learning_rate": 4.1318512258352936e-07,
"loss": 0.276886522769928,
"step": 1077
},
{
"epoch": 29.95774647887324,
"grad_norm": 0.3156173527240753,
"learning_rate": 4.124371127666024e-07,
"loss": 0.27484360337257385,
"step": 1078
},
{
"epoch": 29.985915492957748,
"grad_norm": 0.31924012303352356,
"learning_rate": 4.1168952199008677e-07,
"loss": 0.2567445635795593,
"step": 1079
},
{
"epoch": 30.0,
"grad_norm": 0.4623652994632721,
"learning_rate": 4.1094235253127374e-07,
"loss": 0.27351921796798706,
"step": 1080
},
{
"epoch": 30.028169014084508,
"grad_norm": 0.32494813203811646,
"learning_rate": 4.101956066661708e-07,
"loss": 0.26006799936294556,
"step": 1081
},
{
"epoch": 30.056338028169016,
"grad_norm": 0.3355497121810913,
"learning_rate": 4.0944928666949527e-07,
"loss": 0.26071614027023315,
"step": 1082
},
{
"epoch": 30.08450704225352,
"grad_norm": 0.3180653750896454,
"learning_rate": 4.0870339481466774e-07,
"loss": 0.2741304039955139,
"step": 1083
},
{
"epoch": 30.112676056338028,
"grad_norm": 0.31589558720588684,
"learning_rate": 4.079579333738039e-07,
"loss": 0.2640499770641327,
"step": 1084
},
{
"epoch": 30.140845070422536,
"grad_norm": 0.33277377486228943,
"learning_rate": 4.0721290461770863e-07,
"loss": 0.2542555630207062,
"step": 1085
},
{
"epoch": 30.169014084507044,
"grad_norm": 0.31191685795783997,
"learning_rate": 4.064683108158685e-07,
"loss": 0.24946148693561554,
"step": 1086
},
{
"epoch": 30.197183098591548,
"grad_norm": 0.31646913290023804,
"learning_rate": 4.057241542364457e-07,
"loss": 0.2565403878688812,
"step": 1087
},
{
"epoch": 30.225352112676056,
"grad_norm": 0.32091739773750305,
"learning_rate": 4.0498043714627006e-07,
"loss": 0.2608620226383209,
"step": 1088
},
{
"epoch": 30.253521126760564,
"grad_norm": 0.3244355618953705,
"learning_rate": 4.042371618108329e-07,
"loss": 0.25209081172943115,
"step": 1089
},
{
"epoch": 30.281690140845072,
"grad_norm": 0.3262701630592346,
"learning_rate": 4.034943304942796e-07,
"loss": 0.2566452622413635,
"step": 1090
},
{
"epoch": 30.309859154929576,
"grad_norm": 0.35125988721847534,
"learning_rate": 4.027519454594033e-07,
"loss": 0.2646006643772125,
"step": 1091
},
{
"epoch": 30.338028169014084,
"grad_norm": 0.32471081614494324,
"learning_rate": 4.020100089676376e-07,
"loss": 0.2576545178890228,
"step": 1092
},
{
"epoch": 30.366197183098592,
"grad_norm": 0.33542898297309875,
"learning_rate": 4.012685232790497e-07,
"loss": 0.25865480303764343,
"step": 1093
},
{
"epoch": 30.3943661971831,
"grad_norm": 0.31360387802124023,
"learning_rate": 4.005274906523336e-07,
"loss": 0.25481581687927246,
"step": 1094
},
{
"epoch": 30.422535211267604,
"grad_norm": 0.33107563853263855,
"learning_rate": 3.9978691334480306e-07,
"loss": 0.252411812543869,
"step": 1095
},
{
"epoch": 30.450704225352112,
"grad_norm": 0.3281182050704956,
"learning_rate": 3.9904679361238526e-07,
"loss": 0.2586092948913574,
"step": 1096
},
{
"epoch": 30.47887323943662,
"grad_norm": 0.32694414258003235,
"learning_rate": 3.9830713370961313e-07,
"loss": 0.26445192098617554,
"step": 1097
},
{
"epoch": 30.507042253521128,
"grad_norm": 0.318498432636261,
"learning_rate": 3.975679358896189e-07,
"loss": 0.25009143352508545,
"step": 1098
},
{
"epoch": 30.535211267605632,
"grad_norm": 0.3352436423301697,
"learning_rate": 3.968292024041275e-07,
"loss": 0.2770006060600281,
"step": 1099
},
{
"epoch": 30.56338028169014,
"grad_norm": 0.3413051664829254,
"learning_rate": 3.9609093550344907e-07,
"loss": 0.2675744593143463,
"step": 1100
},
{
"epoch": 30.591549295774648,
"grad_norm": 0.33011800050735474,
"learning_rate": 3.953531374364728e-07,
"loss": 0.25982439517974854,
"step": 1101
},
{
"epoch": 30.619718309859156,
"grad_norm": 0.3153058588504791,
"learning_rate": 3.946158104506594e-07,
"loss": 0.26440930366516113,
"step": 1102
},
{
"epoch": 30.647887323943664,
"grad_norm": 0.33693262934684753,
"learning_rate": 3.938789567920349e-07,
"loss": 0.2564413845539093,
"step": 1103
},
{
"epoch": 30.676056338028168,
"grad_norm": 0.3082239031791687,
"learning_rate": 3.931425787051832e-07,
"loss": 0.26095646619796753,
"step": 1104
},
{
"epoch": 30.704225352112676,
"grad_norm": 0.34148088097572327,
"learning_rate": 3.924066784332396e-07,
"loss": 0.27237722277641296,
"step": 1105
},
{
"epoch": 30.732394366197184,
"grad_norm": 0.3161861300468445,
"learning_rate": 3.9167125821788416e-07,
"loss": 0.25798144936561584,
"step": 1106
},
{
"epoch": 30.760563380281692,
"grad_norm": 0.33590832352638245,
"learning_rate": 3.909363202993343e-07,
"loss": 0.2643035650253296,
"step": 1107
},
{
"epoch": 30.788732394366196,
"grad_norm": 0.33959585428237915,
"learning_rate": 3.902018669163384e-07,
"loss": 0.2613189220428467,
"step": 1108
},
{
"epoch": 30.816901408450704,
"grad_norm": 0.31452202796936035,
"learning_rate": 3.894679003061686e-07,
"loss": 0.26554104685783386,
"step": 1109
},
{
"epoch": 30.845070422535212,
"grad_norm": 0.3322625160217285,
"learning_rate": 3.8873442270461485e-07,
"loss": 0.2571873664855957,
"step": 1110
},
{
"epoch": 30.87323943661972,
"grad_norm": 0.33110320568084717,
"learning_rate": 3.88001436345977e-07,
"loss": 0.26796817779541016,
"step": 1111
},
{
"epoch": 30.901408450704224,
"grad_norm": 0.32166630029678345,
"learning_rate": 3.872689434630585e-07,
"loss": 0.25648969411849976,
"step": 1112
},
{
"epoch": 30.929577464788732,
"grad_norm": 0.3449627757072449,
"learning_rate": 3.8653694628715984e-07,
"loss": 0.26782190799713135,
"step": 1113
},
{
"epoch": 30.95774647887324,
"grad_norm": 0.3227315843105316,
"learning_rate": 3.8580544704807117e-07,
"loss": 0.2791867256164551,
"step": 1114
},
{
"epoch": 30.985915492957748,
"grad_norm": 0.3112963140010834,
"learning_rate": 3.850744479740663e-07,
"loss": 0.26565277576446533,
"step": 1115
},
{
"epoch": 31.0,
"grad_norm": 0.4575044810771942,
"learning_rate": 3.843439512918949e-07,
"loss": 0.25405725836753845,
"step": 1116
},
{
"epoch": 31.028169014084508,
"grad_norm": 0.3324749767780304,
"learning_rate": 3.8361395922677687e-07,
"loss": 0.26342666149139404,
"step": 1117
},
{
"epoch": 31.056338028169016,
"grad_norm": 0.3335409164428711,
"learning_rate": 3.8288447400239443e-07,
"loss": 0.27227702736854553,
"step": 1118
},
{
"epoch": 31.08450704225352,
"grad_norm": 0.33716699481010437,
"learning_rate": 3.82155497840886e-07,
"loss": 0.2696995437145233,
"step": 1119
},
{
"epoch": 31.112676056338028,
"grad_norm": 0.33672624826431274,
"learning_rate": 3.8142703296283953e-07,
"loss": 0.2588409185409546,
"step": 1120
},
{
"epoch": 31.140845070422536,
"grad_norm": 0.3224928081035614,
"learning_rate": 3.806990815872855e-07,
"loss": 0.2625422775745392,
"step": 1121
},
{
"epoch": 31.169014084507044,
"grad_norm": 0.32264038920402527,
"learning_rate": 3.7997164593168983e-07,
"loss": 0.251539021730423,
"step": 1122
},
{
"epoch": 31.197183098591548,
"grad_norm": 0.33344459533691406,
"learning_rate": 3.7924472821194765e-07,
"loss": 0.25519099831581116,
"step": 1123
},
{
"epoch": 31.225352112676056,
"grad_norm": 0.3551379442214966,
"learning_rate": 3.785183306423767e-07,
"loss": 0.2584845721721649,
"step": 1124
},
{
"epoch": 31.253521126760564,
"grad_norm": 0.3440611660480499,
"learning_rate": 3.777924554357096e-07,
"loss": 0.2609241008758545,
"step": 1125
},
{
"epoch": 31.281690140845072,
"grad_norm": 0.3400917649269104,
"learning_rate": 3.7706710480308835e-07,
"loss": 0.26181089878082275,
"step": 1126
},
{
"epoch": 31.309859154929576,
"grad_norm": 0.3361797630786896,
"learning_rate": 3.7634228095405673e-07,
"loss": 0.2546064853668213,
"step": 1127
},
{
"epoch": 31.338028169014084,
"grad_norm": 0.3346230387687683,
"learning_rate": 3.7561798609655373e-07,
"loss": 0.26581573486328125,
"step": 1128
},
{
"epoch": 31.366197183098592,
"grad_norm": 0.34457266330718994,
"learning_rate": 3.748942224369073e-07,
"loss": 0.2582035958766937,
"step": 1129
},
{
"epoch": 31.3943661971831,
"grad_norm": 0.3213818073272705,
"learning_rate": 3.7417099217982686e-07,
"loss": 0.25484442710876465,
"step": 1130
},
{
"epoch": 31.422535211267604,
"grad_norm": 0.3486325442790985,
"learning_rate": 3.734482975283975e-07,
"loss": 0.27330318093299866,
"step": 1131
},
{
"epoch": 31.450704225352112,
"grad_norm": 0.3430873453617096,
"learning_rate": 3.72726140684072e-07,
"loss": 0.25915205478668213,
"step": 1132
},
{
"epoch": 31.47887323943662,
"grad_norm": 0.3348333537578583,
"learning_rate": 3.720045238466658e-07,
"loss": 0.2582821846008301,
"step": 1133
},
{
"epoch": 31.507042253521128,
"grad_norm": 0.3174356520175934,
"learning_rate": 3.712834492143487e-07,
"loss": 0.2682039737701416,
"step": 1134
},
{
"epoch": 31.535211267605632,
"grad_norm": 0.3320380449295044,
"learning_rate": 3.7056291898363925e-07,
"loss": 0.2751486003398895,
"step": 1135
},
{
"epoch": 31.56338028169014,
"grad_norm": 0.3412676155567169,
"learning_rate": 3.6984293534939737e-07,
"loss": 0.2540426254272461,
"step": 1136
},
{
"epoch": 31.591549295774648,
"grad_norm": 0.35137638449668884,
"learning_rate": 3.69123500504818e-07,
"loss": 0.2570858895778656,
"step": 1137
},
{
"epoch": 31.619718309859156,
"grad_norm": 0.32933273911476135,
"learning_rate": 3.6840461664142444e-07,
"loss": 0.2535385489463806,
"step": 1138
},
{
"epoch": 31.647887323943664,
"grad_norm": 0.32296112179756165,
"learning_rate": 3.6768628594906193e-07,
"loss": 0.26802340149879456,
"step": 1139
},
{
"epoch": 31.676056338028168,
"grad_norm": 0.33371275663375854,
"learning_rate": 3.6696851061588994e-07,
"loss": 0.26279398798942566,
"step": 1140
},
{
"epoch": 31.704225352112676,
"grad_norm": 0.3587881624698639,
"learning_rate": 3.6625129282837685e-07,
"loss": 0.26237016916275024,
"step": 1141
},
{
"epoch": 31.732394366197184,
"grad_norm": 0.3388115465641022,
"learning_rate": 3.655346347712922e-07,
"loss": 0.2542800307273865,
"step": 1142
},
{
"epoch": 31.760563380281692,
"grad_norm": 0.3145511746406555,
"learning_rate": 3.6481853862770107e-07,
"loss": 0.2536108195781708,
"step": 1143
},
{
"epoch": 31.788732394366196,
"grad_norm": 0.34181296825408936,
"learning_rate": 3.641030065789562e-07,
"loss": 0.2601550817489624,
"step": 1144
},
{
"epoch": 31.816901408450704,
"grad_norm": 0.322862833738327,
"learning_rate": 3.6338804080469253e-07,
"loss": 0.25029903650283813,
"step": 1145
},
{
"epoch": 31.845070422535212,
"grad_norm": 0.3622659146785736,
"learning_rate": 3.6267364348281946e-07,
"loss": 0.26150447130203247,
"step": 1146
},
{
"epoch": 31.87323943661972,
"grad_norm": 0.330181360244751,
"learning_rate": 3.6195981678951535e-07,
"loss": 0.2587708830833435,
"step": 1147
},
{
"epoch": 31.901408450704224,
"grad_norm": 0.3616638779640198,
"learning_rate": 3.612465628992203e-07,
"loss": 0.26097607612609863,
"step": 1148
},
{
"epoch": 31.929577464788732,
"grad_norm": 0.3439587652683258,
"learning_rate": 3.60533883984629e-07,
"loss": 0.2429528385400772,
"step": 1149
},
{
"epoch": 31.95774647887324,
"grad_norm": 0.3390144407749176,
"learning_rate": 3.5982178221668533e-07,
"loss": 0.2673777937889099,
"step": 1150
},
{
"epoch": 31.985915492957748,
"grad_norm": 0.3215203881263733,
"learning_rate": 3.591102597645743e-07,
"loss": 0.25635766983032227,
"step": 1151
},
{
"epoch": 32.0,
"grad_norm": 0.4861057698726654,
"learning_rate": 3.5839931879571725e-07,
"loss": 0.26994332671165466,
"step": 1152
},
{
"epoch": 32.028169014084504,
"grad_norm": 0.3433145582675934,
"learning_rate": 3.5768896147576344e-07,
"loss": 0.2525317072868347,
"step": 1153
},
{
"epoch": 32.056338028169016,
"grad_norm": 0.34238752722740173,
"learning_rate": 3.5697918996858443e-07,
"loss": 0.271589457988739,
"step": 1154
},
{
"epoch": 32.08450704225352,
"grad_norm": 0.33140960335731506,
"learning_rate": 3.5627000643626704e-07,
"loss": 0.2612978219985962,
"step": 1155
},
{
"epoch": 32.11267605633803,
"grad_norm": 0.31951841711997986,
"learning_rate": 3.555614130391079e-07,
"loss": 0.27151286602020264,
"step": 1156
},
{
"epoch": 32.140845070422536,
"grad_norm": 0.3442953824996948,
"learning_rate": 3.5485341193560503e-07,
"loss": 0.2442217469215393,
"step": 1157
},
{
"epoch": 32.16901408450704,
"grad_norm": 0.3276779055595398,
"learning_rate": 3.5414600528245266e-07,
"loss": 0.25613170862197876,
"step": 1158
},
{
"epoch": 32.19718309859155,
"grad_norm": 0.33608436584472656,
"learning_rate": 3.534391952345341e-07,
"loss": 0.2614259123802185,
"step": 1159
},
{
"epoch": 32.225352112676056,
"grad_norm": 0.3303307592868805,
"learning_rate": 3.5273298394491515e-07,
"loss": 0.2672120928764343,
"step": 1160
},
{
"epoch": 32.25352112676056,
"grad_norm": 0.32655128836631775,
"learning_rate": 3.5202737356483816e-07,
"loss": 0.25033846497535706,
"step": 1161
},
{
"epoch": 32.28169014084507,
"grad_norm": 0.3326750099658966,
"learning_rate": 3.513223662437147e-07,
"loss": 0.2697717547416687,
"step": 1162
},
{
"epoch": 32.309859154929576,
"grad_norm": 0.33951663970947266,
"learning_rate": 3.5061796412911913e-07,
"loss": 0.25987690687179565,
"step": 1163
},
{
"epoch": 32.33802816901409,
"grad_norm": 0.3316378891468048,
"learning_rate": 3.4991416936678276e-07,
"loss": 0.26063597202301025,
"step": 1164
},
{
"epoch": 32.36619718309859,
"grad_norm": 0.33838751912117004,
"learning_rate": 3.49210984100586e-07,
"loss": 0.26821669936180115,
"step": 1165
},
{
"epoch": 32.394366197183096,
"grad_norm": 0.3294714689254761,
"learning_rate": 3.4850841047255364e-07,
"loss": 0.2651536464691162,
"step": 1166
},
{
"epoch": 32.42253521126761,
"grad_norm": 0.32624831795692444,
"learning_rate": 3.4780645062284665e-07,
"loss": 0.26797136664390564,
"step": 1167
},
{
"epoch": 32.45070422535211,
"grad_norm": 0.3322686553001404,
"learning_rate": 3.471051066897562e-07,
"loss": 0.2507922649383545,
"step": 1168
},
{
"epoch": 32.478873239436616,
"grad_norm": 0.34128591418266296,
"learning_rate": 3.4640438080969773e-07,
"loss": 0.2541847229003906,
"step": 1169
},
{
"epoch": 32.50704225352113,
"grad_norm": 0.3294316828250885,
"learning_rate": 3.45704275117204e-07,
"loss": 0.26326608657836914,
"step": 1170
},
{
"epoch": 32.53521126760563,
"grad_norm": 0.3293727934360504,
"learning_rate": 3.450047917449181e-07,
"loss": 0.2654852271080017,
"step": 1171
},
{
"epoch": 32.563380281690144,
"grad_norm": 0.32460466027259827,
"learning_rate": 3.4430593282358777e-07,
"loss": 0.25532153248786926,
"step": 1172
},
{
"epoch": 32.59154929577465,
"grad_norm": 0.3373318016529083,
"learning_rate": 3.4360770048205843e-07,
"loss": 0.25554513931274414,
"step": 1173
},
{
"epoch": 32.61971830985915,
"grad_norm": 0.34251123666763306,
"learning_rate": 3.429100968472668e-07,
"loss": 0.26249927282333374,
"step": 1174
},
{
"epoch": 32.647887323943664,
"grad_norm": 0.32484838366508484,
"learning_rate": 3.4221312404423486e-07,
"loss": 0.2562830448150635,
"step": 1175
},
{
"epoch": 32.67605633802817,
"grad_norm": 0.3435952365398407,
"learning_rate": 3.4151678419606233e-07,
"loss": 0.2574070692062378,
"step": 1176
},
{
"epoch": 32.70422535211267,
"grad_norm": 0.33101195096969604,
"learning_rate": 3.4082107942392136e-07,
"loss": 0.257138729095459,
"step": 1177
},
{
"epoch": 32.732394366197184,
"grad_norm": 0.37783390283584595,
"learning_rate": 3.4012601184704904e-07,
"loss": 0.26037871837615967,
"step": 1178
},
{
"epoch": 32.76056338028169,
"grad_norm": 0.33994340896606445,
"learning_rate": 3.3943158358274203e-07,
"loss": 0.27281370759010315,
"step": 1179
},
{
"epoch": 32.7887323943662,
"grad_norm": 0.32044896483421326,
"learning_rate": 3.387377967463493e-07,
"loss": 0.2526357173919678,
"step": 1180
},
{
"epoch": 32.816901408450704,
"grad_norm": 0.3177328109741211,
"learning_rate": 3.3804465345126545e-07,
"loss": 0.24474188685417175,
"step": 1181
},
{
"epoch": 32.84507042253521,
"grad_norm": 0.3454241454601288,
"learning_rate": 3.3735215580892575e-07,
"loss": 0.24287842214107513,
"step": 1182
},
{
"epoch": 32.87323943661972,
"grad_norm": 0.3315359354019165,
"learning_rate": 3.366603059287977e-07,
"loss": 0.26422587037086487,
"step": 1183
},
{
"epoch": 32.901408450704224,
"grad_norm": 0.3329971730709076,
"learning_rate": 3.359691059183761e-07,
"loss": 0.2687873840332031,
"step": 1184
},
{
"epoch": 32.929577464788736,
"grad_norm": 0.32194119691848755,
"learning_rate": 3.3527855788317614e-07,
"loss": 0.2529294788837433,
"step": 1185
},
{
"epoch": 32.95774647887324,
"grad_norm": 0.3383830487728119,
"learning_rate": 3.3458866392672694e-07,
"loss": 0.24743716418743134,
"step": 1186
},
{
"epoch": 32.985915492957744,
"grad_norm": 0.3237183690071106,
"learning_rate": 3.338994261505649e-07,
"loss": 0.2624974250793457,
"step": 1187
},
{
"epoch": 33.0,
"grad_norm": 0.4738941192626953,
"learning_rate": 3.3321084665422803e-07,
"loss": 0.2611575722694397,
"step": 1188
},
{
"epoch": 33.028169014084504,
"grad_norm": 0.3192257285118103,
"learning_rate": 3.325229275352489e-07,
"loss": 0.25964364409446716,
"step": 1189
},
{
"epoch": 33.056338028169016,
"grad_norm": 0.3343312442302704,
"learning_rate": 3.3183567088914833e-07,
"loss": 0.2630879282951355,
"step": 1190
},
{
"epoch": 33.08450704225352,
"grad_norm": 0.32633543014526367,
"learning_rate": 3.3114907880942933e-07,
"loss": 0.2663639783859253,
"step": 1191
},
{
"epoch": 33.11267605633803,
"grad_norm": 0.3315299451351166,
"learning_rate": 3.3046315338757026e-07,
"loss": 0.2600438892841339,
"step": 1192
},
{
"epoch": 33.140845070422536,
"grad_norm": 0.35579875111579895,
"learning_rate": 3.297778967130191e-07,
"loss": 0.2606794834136963,
"step": 1193
},
{
"epoch": 33.16901408450704,
"grad_norm": 0.3733043074607849,
"learning_rate": 3.290933108731866e-07,
"loss": 0.2512716054916382,
"step": 1194
},
{
"epoch": 33.19718309859155,
"grad_norm": 0.345547616481781,
"learning_rate": 3.2840939795343987e-07,
"loss": 0.26478058099746704,
"step": 1195
},
{
"epoch": 33.225352112676056,
"grad_norm": 0.33482369780540466,
"learning_rate": 3.2772616003709616e-07,
"loss": 0.2547541856765747,
"step": 1196
},
{
"epoch": 33.25352112676056,
"grad_norm": 0.3360159695148468,
"learning_rate": 3.270435992054166e-07,
"loss": 0.2729008197784424,
"step": 1197
},
{
"epoch": 33.28169014084507,
"grad_norm": 0.34279924631118774,
"learning_rate": 3.263617175376001e-07,
"loss": 0.253216028213501,
"step": 1198
},
{
"epoch": 33.309859154929576,
"grad_norm": 0.33277833461761475,
"learning_rate": 3.2568051711077636e-07,
"loss": 0.2548581659793854,
"step": 1199
},
{
"epoch": 33.33802816901409,
"grad_norm": 0.3363766074180603,
"learning_rate": 3.250000000000001e-07,
"loss": 0.25859585404396057,
"step": 1200
},
{
"epoch": 33.36619718309859,
"grad_norm": 0.3143514394760132,
"learning_rate": 3.2432016827824414e-07,
"loss": 0.25202757120132446,
"step": 1201
},
{
"epoch": 33.394366197183096,
"grad_norm": 0.3307502567768097,
"learning_rate": 3.2364102401639423e-07,
"loss": 0.2585509717464447,
"step": 1202
},
{
"epoch": 33.42253521126761,
"grad_norm": 0.33466944098472595,
"learning_rate": 3.229625692832414e-07,
"loss": 0.25337138772010803,
"step": 1203
},
{
"epoch": 33.45070422535211,
"grad_norm": 0.31453531980514526,
"learning_rate": 3.222848061454764e-07,
"loss": 0.2618822455406189,
"step": 1204
},
{
"epoch": 33.478873239436616,
"grad_norm": 0.35038280487060547,
"learning_rate": 3.216077366676833e-07,
"loss": 0.26571914553642273,
"step": 1205
},
{
"epoch": 33.50704225352113,
"grad_norm": 0.3479344844818115,
"learning_rate": 3.209313629123329e-07,
"loss": 0.26047736406326294,
"step": 1206
},
{
"epoch": 33.53521126760563,
"grad_norm": 0.339733362197876,
"learning_rate": 3.2025568693977745e-07,
"loss": 0.2580920159816742,
"step": 1207
},
{
"epoch": 33.563380281690144,
"grad_norm": 0.3457892835140228,
"learning_rate": 3.195807108082429e-07,
"loss": 0.25361278653144836,
"step": 1208
},
{
"epoch": 33.59154929577465,
"grad_norm": 0.35116419196128845,
"learning_rate": 3.1890643657382356e-07,
"loss": 0.2517722249031067,
"step": 1209
},
{
"epoch": 33.61971830985915,
"grad_norm": 0.3323304355144501,
"learning_rate": 3.182328662904756e-07,
"loss": 0.25763052701950073,
"step": 1210
},
{
"epoch": 33.647887323943664,
"grad_norm": 0.3180283308029175,
"learning_rate": 3.175600020100112e-07,
"loss": 0.26268666982650757,
"step": 1211
},
{
"epoch": 33.67605633802817,
"grad_norm": 0.32394516468048096,
"learning_rate": 3.168878457820915e-07,
"loss": 0.2540284991264343,
"step": 1212
},
{
"epoch": 33.70422535211267,
"grad_norm": 0.3315521478652954,
"learning_rate": 3.162163996542209e-07,
"loss": 0.26291581988334656,
"step": 1213
},
{
"epoch": 33.732394366197184,
"grad_norm": 0.32950082421302795,
"learning_rate": 3.155456656717408e-07,
"loss": 0.2569209039211273,
"step": 1214
},
{
"epoch": 33.76056338028169,
"grad_norm": 0.3513064384460449,
"learning_rate": 3.14875645877823e-07,
"loss": 0.24890759587287903,
"step": 1215
},
{
"epoch": 33.7887323943662,
"grad_norm": 0.3389022946357727,
"learning_rate": 3.142063423134644e-07,
"loss": 0.2649242579936981,
"step": 1216
},
{
"epoch": 33.816901408450704,
"grad_norm": 0.3270207941532135,
"learning_rate": 3.135377570174796e-07,
"loss": 0.26036375761032104,
"step": 1217
},
{
"epoch": 33.84507042253521,
"grad_norm": 0.35390451550483704,
"learning_rate": 3.1286989202649503e-07,
"loss": 0.25314897298812866,
"step": 1218
},
{
"epoch": 33.87323943661972,
"grad_norm": 0.3263014256954193,
"learning_rate": 3.122027493749438e-07,
"loss": 0.2565680742263794,
"step": 1219
},
{
"epoch": 33.901408450704224,
"grad_norm": 0.3133479654788971,
"learning_rate": 3.115363310950578e-07,
"loss": 0.2629280090332031,
"step": 1220
},
{
"epoch": 33.929577464788736,
"grad_norm": 0.3530975580215454,
"learning_rate": 3.1087063921686263e-07,
"loss": 0.26493778824806213,
"step": 1221
},
{
"epoch": 33.95774647887324,
"grad_norm": 0.3344945013523102,
"learning_rate": 3.102056757681715e-07,
"loss": 0.2550634741783142,
"step": 1222
},
{
"epoch": 33.985915492957744,
"grad_norm": 0.32563889026641846,
"learning_rate": 3.0954144277457817e-07,
"loss": 0.25193893909454346,
"step": 1223
},
{
"epoch": 34.0,
"grad_norm": 0.48929160833358765,
"learning_rate": 3.0887794225945143e-07,
"loss": 0.2488047182559967,
"step": 1224
},
{
"epoch": 34.028169014084504,
"grad_norm": 0.32252368330955505,
"learning_rate": 3.0821517624392925e-07,
"loss": 0.25322937965393066,
"step": 1225
},
{
"epoch": 34.056338028169016,
"grad_norm": 0.3510408401489258,
"learning_rate": 3.075531467469116e-07,
"loss": 0.265546977519989,
"step": 1226
},
{
"epoch": 34.08450704225352,
"grad_norm": 0.33205100893974304,
"learning_rate": 3.0689185578505525e-07,
"loss": 0.2621091902256012,
"step": 1227
},
{
"epoch": 34.11267605633803,
"grad_norm": 0.33356767892837524,
"learning_rate": 3.062313053727671e-07,
"loss": 0.24525871872901917,
"step": 1228
},
{
"epoch": 34.140845070422536,
"grad_norm": 0.32789838314056396,
"learning_rate": 3.055714975221981e-07,
"loss": 0.2655676007270813,
"step": 1229
},
{
"epoch": 34.16901408450704,
"grad_norm": 0.3837502598762512,
"learning_rate": 3.0491243424323783e-07,
"loss": 0.2583563029766083,
"step": 1230
},
{
"epoch": 34.19718309859155,
"grad_norm": 0.32497507333755493,
"learning_rate": 3.0425411754350694e-07,
"loss": 0.25412964820861816,
"step": 1231
},
{
"epoch": 34.225352112676056,
"grad_norm": 0.3423527181148529,
"learning_rate": 3.0359654942835247e-07,
"loss": 0.2603622078895569,
"step": 1232
},
{
"epoch": 34.25352112676056,
"grad_norm": 0.3326815068721771,
"learning_rate": 3.029397319008407e-07,
"loss": 0.2565937638282776,
"step": 1233
},
{
"epoch": 34.28169014084507,
"grad_norm": 0.3410370945930481,
"learning_rate": 3.02283666961752e-07,
"loss": 0.2687773108482361,
"step": 1234
},
{
"epoch": 34.309859154929576,
"grad_norm": 0.33839917182922363,
"learning_rate": 3.016283566095739e-07,
"loss": 0.27057865262031555,
"step": 1235
},
{
"epoch": 34.33802816901409,
"grad_norm": 0.32578834891319275,
"learning_rate": 3.0097380284049523e-07,
"loss": 0.2486121952533722,
"step": 1236
},
{
"epoch": 34.36619718309859,
"grad_norm": 0.34315571188926697,
"learning_rate": 3.003200076484004e-07,
"loss": 0.24546003341674805,
"step": 1237
},
{
"epoch": 34.394366197183096,
"grad_norm": 0.32684844732284546,
"learning_rate": 2.996669730248628e-07,
"loss": 0.2699982523918152,
"step": 1238
},
{
"epoch": 34.42253521126761,
"grad_norm": 0.33143216371536255,
"learning_rate": 2.9901470095913943e-07,
"loss": 0.25373488664627075,
"step": 1239
},
{
"epoch": 34.45070422535211,
"grad_norm": 0.35439276695251465,
"learning_rate": 2.9836319343816397e-07,
"loss": 0.24537047743797302,
"step": 1240
},
{
"epoch": 34.478873239436616,
"grad_norm": 0.33683332800865173,
"learning_rate": 2.977124524465413e-07,
"loss": 0.2581592798233032,
"step": 1241
},
{
"epoch": 34.50704225352113,
"grad_norm": 0.3526037037372589,
"learning_rate": 2.9706247996654134e-07,
"loss": 0.2586764693260193,
"step": 1242
},
{
"epoch": 34.53521126760563,
"grad_norm": 0.3380417823791504,
"learning_rate": 2.964132779780929e-07,
"loss": 0.263625830411911,
"step": 1243
},
{
"epoch": 34.563380281690144,
"grad_norm": 0.3443485200405121,
"learning_rate": 2.9576484845877793e-07,
"loss": 0.2503140866756439,
"step": 1244
},
{
"epoch": 34.59154929577465,
"grad_norm": 0.35234031081199646,
"learning_rate": 2.9511719338382535e-07,
"loss": 0.25954437255859375,
"step": 1245
},
{
"epoch": 34.61971830985915,
"grad_norm": 0.3406411111354828,
"learning_rate": 2.944703147261046e-07,
"loss": 0.2619974613189697,
"step": 1246
},
{
"epoch": 34.647887323943664,
"grad_norm": 0.3347373306751251,
"learning_rate": 2.938242144561201e-07,
"loss": 0.2618395984172821,
"step": 1247
},
{
"epoch": 34.67605633802817,
"grad_norm": 0.33204221725463867,
"learning_rate": 2.931788945420058e-07,
"loss": 0.26617297530174255,
"step": 1248
},
{
"epoch": 34.70422535211267,
"grad_norm": 0.3484657406806946,
"learning_rate": 2.925343569495178e-07,
"loss": 0.2656903564929962,
"step": 1249
},
{
"epoch": 34.732394366197184,
"grad_norm": 0.3254799544811249,
"learning_rate": 2.918906036420294e-07,
"loss": 0.24855300784111023,
"step": 1250
},
{
"epoch": 34.76056338028169,
"grad_norm": 0.33594822883605957,
"learning_rate": 2.9124763658052474e-07,
"loss": 0.2618425786495209,
"step": 1251
},
{
"epoch": 34.7887323943662,
"grad_norm": 0.323949933052063,
"learning_rate": 2.9060545772359305e-07,
"loss": 0.2546170949935913,
"step": 1252
},
{
"epoch": 34.816901408450704,
"grad_norm": 0.3242202699184418,
"learning_rate": 2.8996406902742267e-07,
"loss": 0.24211625754833221,
"step": 1253
},
{
"epoch": 34.84507042253521,
"grad_norm": 0.3353058695793152,
"learning_rate": 2.893234724457946e-07,
"loss": 0.25402140617370605,
"step": 1254
},
{
"epoch": 34.87323943661972,
"grad_norm": 0.33988505601882935,
"learning_rate": 2.886836699300771e-07,
"loss": 0.24861261248588562,
"step": 1255
},
{
"epoch": 34.901408450704224,
"grad_norm": 0.3339218199253082,
"learning_rate": 2.8804466342921987e-07,
"loss": 0.25520533323287964,
"step": 1256
},
{
"epoch": 34.929577464788736,
"grad_norm": 0.3448787033557892,
"learning_rate": 2.874064548897472e-07,
"loss": 0.2663518786430359,
"step": 1257
},
{
"epoch": 34.95774647887324,
"grad_norm": 0.3454734981060028,
"learning_rate": 2.86769046255753e-07,
"loss": 0.25287461280822754,
"step": 1258
},
{
"epoch": 34.985915492957744,
"grad_norm": 0.3322574496269226,
"learning_rate": 2.8613243946889477e-07,
"loss": 0.25937291979789734,
"step": 1259
},
{
"epoch": 35.0,
"grad_norm": 0.47356757521629333,
"learning_rate": 2.854966364683872e-07,
"loss": 0.2588436007499695,
"step": 1260
},
{
"epoch": 35.028169014084504,
"grad_norm": 0.32370901107788086,
"learning_rate": 2.848616391909959e-07,
"loss": 0.2847004234790802,
"step": 1261
},
{
"epoch": 35.056338028169016,
"grad_norm": 0.3340662717819214,
"learning_rate": 2.842274495710335e-07,
"loss": 0.24963748455047607,
"step": 1262
},
{
"epoch": 35.08450704225352,
"grad_norm": 0.3470820188522339,
"learning_rate": 2.835940695403512e-07,
"loss": 0.25704559683799744,
"step": 1263
},
{
"epoch": 35.11267605633803,
"grad_norm": 0.3213740289211273,
"learning_rate": 2.829615010283344e-07,
"loss": 0.24562162160873413,
"step": 1264
},
{
"epoch": 35.140845070422536,
"grad_norm": 0.3323827385902405,
"learning_rate": 2.8232974596189653e-07,
"loss": 0.25376367568969727,
"step": 1265
},
{
"epoch": 35.16901408450704,
"grad_norm": 0.32620102167129517,
"learning_rate": 2.8169880626547283e-07,
"loss": 0.25920748710632324,
"step": 1266
},
{
"epoch": 35.19718309859155,
"grad_norm": 0.34155285358428955,
"learning_rate": 2.8106868386101545e-07,
"loss": 0.2532484233379364,
"step": 1267
},
{
"epoch": 35.225352112676056,
"grad_norm": 0.32295599579811096,
"learning_rate": 2.8043938066798645e-07,
"loss": 0.2596886456012726,
"step": 1268
},
{
"epoch": 35.25352112676056,
"grad_norm": 0.3390556871891022,
"learning_rate": 2.7981089860335225e-07,
"loss": 0.2628597021102905,
"step": 1269
},
{
"epoch": 35.28169014084507,
"grad_norm": 0.3397858738899231,
"learning_rate": 2.791832395815782e-07,
"loss": 0.260450154542923,
"step": 1270
},
{
"epoch": 35.309859154929576,
"grad_norm": 0.3356383442878723,
"learning_rate": 2.7855640551462287e-07,
"loss": 0.24709969758987427,
"step": 1271
},
{
"epoch": 35.33802816901409,
"grad_norm": 0.3386112153530121,
"learning_rate": 2.7793039831193133e-07,
"loss": 0.2554944157600403,
"step": 1272
},
{
"epoch": 35.36619718309859,
"grad_norm": 0.34547311067581177,
"learning_rate": 2.773052198804301e-07,
"loss": 0.2689363658428192,
"step": 1273
},
{
"epoch": 35.394366197183096,
"grad_norm": 0.34119531512260437,
"learning_rate": 2.766808721245211e-07,
"loss": 0.2566688656806946,
"step": 1274
},
{
"epoch": 35.42253521126761,
"grad_norm": 0.3342508375644684,
"learning_rate": 2.760573569460757e-07,
"loss": 0.24888336658477783,
"step": 1275
},
{
"epoch": 35.45070422535211,
"grad_norm": 0.33420711755752563,
"learning_rate": 2.7543467624442956e-07,
"loss": 0.27446046471595764,
"step": 1276
},
{
"epoch": 35.478873239436616,
"grad_norm": 0.3241899907588959,
"learning_rate": 2.7481283191637605e-07,
"loss": 0.24648495018482208,
"step": 1277
},
{
"epoch": 35.50704225352113,
"grad_norm": 0.3267020285129547,
"learning_rate": 2.741918258561607e-07,
"loss": 0.2573559880256653,
"step": 1278
},
{
"epoch": 35.53521126760563,
"grad_norm": 0.3532126247882843,
"learning_rate": 2.7357165995547547e-07,
"loss": 0.2432764172554016,
"step": 1279
},
{
"epoch": 35.563380281690144,
"grad_norm": 0.33826351165771484,
"learning_rate": 2.729523361034538e-07,
"loss": 0.25668877363204956,
"step": 1280
},
{
"epoch": 35.59154929577465,
"grad_norm": 0.338796466588974,
"learning_rate": 2.7233385618666315e-07,
"loss": 0.2522228956222534,
"step": 1281
},
{
"epoch": 35.61971830985915,
"grad_norm": 0.3262656629085541,
"learning_rate": 2.717162220891007e-07,
"loss": 0.2595973312854767,
"step": 1282
},
{
"epoch": 35.647887323943664,
"grad_norm": 0.3441692590713501,
"learning_rate": 2.7109943569218707e-07,
"loss": 0.26480039954185486,
"step": 1283
},
{
"epoch": 35.67605633802817,
"grad_norm": 0.3370777368545532,
"learning_rate": 2.7048349887476037e-07,
"loss": 0.25393831729888916,
"step": 1284
},
{
"epoch": 35.70422535211267,
"grad_norm": 0.34027761220932007,
"learning_rate": 2.698684135130713e-07,
"loss": 0.24741466343402863,
"step": 1285
},
{
"epoch": 35.732394366197184,
"grad_norm": 0.3438904881477356,
"learning_rate": 2.692541814807763e-07,
"loss": 0.2620083689689636,
"step": 1286
},
{
"epoch": 35.76056338028169,
"grad_norm": 0.33286988735198975,
"learning_rate": 2.686408046489328e-07,
"loss": 0.2683720588684082,
"step": 1287
},
{
"epoch": 35.7887323943662,
"grad_norm": 0.3397563397884369,
"learning_rate": 2.6802828488599294e-07,
"loss": 0.25813597440719604,
"step": 1288
},
{
"epoch": 35.816901408450704,
"grad_norm": 0.34016039967536926,
"learning_rate": 2.6741662405779796e-07,
"loss": 0.25924018025398254,
"step": 1289
},
{
"epoch": 35.84507042253521,
"grad_norm": 0.3287438452243805,
"learning_rate": 2.6680582402757324e-07,
"loss": 0.24357835948467255,
"step": 1290
},
{
"epoch": 35.87323943661972,
"grad_norm": 0.3473154306411743,
"learning_rate": 2.661958866559213e-07,
"loss": 0.25433164834976196,
"step": 1291
},
{
"epoch": 35.901408450704224,
"grad_norm": 0.3320452570915222,
"learning_rate": 2.655868138008171e-07,
"loss": 0.2620140016078949,
"step": 1292
},
{
"epoch": 35.929577464788736,
"grad_norm": 0.35027673840522766,
"learning_rate": 2.649786073176025e-07,
"loss": 0.26484349370002747,
"step": 1293
},
{
"epoch": 35.95774647887324,
"grad_norm": 0.34910938143730164,
"learning_rate": 2.6437126905897967e-07,
"loss": 0.24849724769592285,
"step": 1294
},
{
"epoch": 35.985915492957744,
"grad_norm": 0.3321913480758667,
"learning_rate": 2.637648008750062e-07,
"loss": 0.24661482870578766,
"step": 1295
},
{
"epoch": 36.0,
"grad_norm": 0.48746395111083984,
"learning_rate": 2.631592046130896e-07,
"loss": 0.25251615047454834,
"step": 1296
},
{
"epoch": 36.028169014084504,
"grad_norm": 0.3326322138309479,
"learning_rate": 2.6255448211798103e-07,
"loss": 0.2514849603176117,
"step": 1297
},
{
"epoch": 36.056338028169016,
"grad_norm": 0.323958158493042,
"learning_rate": 2.6195063523177e-07,
"loss": 0.2420714795589447,
"step": 1298
},
{
"epoch": 36.08450704225352,
"grad_norm": 0.3715856075286865,
"learning_rate": 2.613476657938789e-07,
"loss": 0.24617412686347961,
"step": 1299
},
{
"epoch": 36.11267605633803,
"grad_norm": 0.34012261033058167,
"learning_rate": 2.6074557564105724e-07,
"loss": 0.26243406534194946,
"step": 1300
},
{
"epoch": 36.140845070422536,
"grad_norm": 0.33578699827194214,
"learning_rate": 2.6014436660737605e-07,
"loss": 0.2461467981338501,
"step": 1301
},
{
"epoch": 36.16901408450704,
"grad_norm": 0.3389386832714081,
"learning_rate": 2.595440405242222e-07,
"loss": 0.2597675025463104,
"step": 1302
},
{
"epoch": 36.19718309859155,
"grad_norm": 0.33628833293914795,
"learning_rate": 2.589445992202931e-07,
"loss": 0.2510983943939209,
"step": 1303
},
{
"epoch": 36.225352112676056,
"grad_norm": 0.3409932851791382,
"learning_rate": 2.583460445215911e-07,
"loss": 0.2607109844684601,
"step": 1304
},
{
"epoch": 36.25352112676056,
"grad_norm": 0.3476935625076294,
"learning_rate": 2.5774837825141736e-07,
"loss": 0.26868295669555664,
"step": 1305
},
{
"epoch": 36.28169014084507,
"grad_norm": 0.3389628231525421,
"learning_rate": 2.571516022303671e-07,
"loss": 0.24396029114723206,
"step": 1306
},
{
"epoch": 36.309859154929576,
"grad_norm": 0.3351360261440277,
"learning_rate": 2.565557182763235e-07,
"loss": 0.2638927102088928,
"step": 1307
},
{
"epoch": 36.33802816901409,
"grad_norm": 0.34508877992630005,
"learning_rate": 2.5596072820445254e-07,
"loss": 0.25982603430747986,
"step": 1308
},
{
"epoch": 36.36619718309859,
"grad_norm": 0.3333590626716614,
"learning_rate": 2.5536663382719713e-07,
"loss": 0.25606241822242737,
"step": 1309
},
{
"epoch": 36.394366197183096,
"grad_norm": 0.33822396397590637,
"learning_rate": 2.547734369542718e-07,
"loss": 0.2518611252307892,
"step": 1310
},
{
"epoch": 36.42253521126761,
"grad_norm": 0.3358154594898224,
"learning_rate": 2.5418113939265686e-07,
"loss": 0.25333690643310547,
"step": 1311
},
{
"epoch": 36.45070422535211,
"grad_norm": 0.33005034923553467,
"learning_rate": 2.5358974294659373e-07,
"loss": 0.24985584616661072,
"step": 1312
},
{
"epoch": 36.478873239436616,
"grad_norm": 0.3343973159790039,
"learning_rate": 2.5299924941757843e-07,
"loss": 0.27109482884407043,
"step": 1313
},
{
"epoch": 36.50704225352113,
"grad_norm": 0.33798739314079285,
"learning_rate": 2.5240966060435674e-07,
"loss": 0.2599262595176697,
"step": 1314
},
{
"epoch": 36.53521126760563,
"grad_norm": 0.33094605803489685,
"learning_rate": 2.5182097830291824e-07,
"loss": 0.24939575791358948,
"step": 1315
},
{
"epoch": 36.563380281690144,
"grad_norm": 0.3303806185722351,
"learning_rate": 2.512332043064913e-07,
"loss": 0.2498035877943039,
"step": 1316
},
{
"epoch": 36.59154929577465,
"grad_norm": 0.3437672555446625,
"learning_rate": 2.5064634040553767e-07,
"loss": 0.26817601919174194,
"step": 1317
},
{
"epoch": 36.61971830985915,
"grad_norm": 0.3672111928462982,
"learning_rate": 2.5006038838774647e-07,
"loss": 0.2572394609451294,
"step": 1318
},
{
"epoch": 36.647887323943664,
"grad_norm": 0.34106817841529846,
"learning_rate": 2.494753500380291e-07,
"loss": 0.25872814655303955,
"step": 1319
},
{
"epoch": 36.67605633802817,
"grad_norm": 0.35012519359588623,
"learning_rate": 2.488912271385139e-07,
"loss": 0.2478848099708557,
"step": 1320
},
{
"epoch": 36.70422535211267,
"grad_norm": 0.3354050815105438,
"learning_rate": 2.483080214685404e-07,
"loss": 0.2592930793762207,
"step": 1321
},
{
"epoch": 36.732394366197184,
"grad_norm": 0.3539486825466156,
"learning_rate": 2.4772573480465445e-07,
"loss": 0.24492186307907104,
"step": 1322
},
{
"epoch": 36.76056338028169,
"grad_norm": 0.34425100684165955,
"learning_rate": 2.471443689206021e-07,
"loss": 0.2586178779602051,
"step": 1323
},
{
"epoch": 36.7887323943662,
"grad_norm": 0.35161006450653076,
"learning_rate": 2.465639255873246e-07,
"loss": 0.2581009268760681,
"step": 1324
},
{
"epoch": 36.816901408450704,
"grad_norm": 0.3478921949863434,
"learning_rate": 2.4598440657295286e-07,
"loss": 0.2674616575241089,
"step": 1325
},
{
"epoch": 36.84507042253521,
"grad_norm": 0.35100990533828735,
"learning_rate": 2.454058136428027e-07,
"loss": 0.27003878355026245,
"step": 1326
},
{
"epoch": 36.87323943661972,
"grad_norm": 0.3363000452518463,
"learning_rate": 2.4482814855936834e-07,
"loss": 0.2609623968601227,
"step": 1327
},
{
"epoch": 36.901408450704224,
"grad_norm": 0.3406379222869873,
"learning_rate": 2.4425141308231765e-07,
"loss": 0.2661615014076233,
"step": 1328
},
{
"epoch": 36.929577464788736,
"grad_norm": 0.331514447927475,
"learning_rate": 2.43675608968487e-07,
"loss": 0.24595093727111816,
"step": 1329
},
{
"epoch": 36.95774647887324,
"grad_norm": 0.33636540174484253,
"learning_rate": 2.4310073797187573e-07,
"loss": 0.2518694996833801,
"step": 1330
},
{
"epoch": 36.985915492957744,
"grad_norm": 0.3203655779361725,
"learning_rate": 2.4252680184364045e-07,
"loss": 0.24997392296791077,
"step": 1331
},
{
"epoch": 37.0,
"grad_norm": 0.47873687744140625,
"learning_rate": 2.4195380233209006e-07,
"loss": 0.24962179362773895,
"step": 1332
}
],
"logging_steps": 1,
"max_steps": 1800,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 1.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.788879174705873e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}