1SV70 / checkpoint-2500 /trainer_state.json
gotzmann's picture
..
dbbec96
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8369601606963508,
"eval_steps": 500,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00033478406427854036,
"grad_norm": 2.5951156616210938,
"learning_rate": 5e-06,
"loss": 2.0872,
"step": 1
},
{
"epoch": 0.0006695681285570807,
"grad_norm": 2.011127471923828,
"learning_rate": 1e-05,
"loss": 2.0068,
"step": 2
},
{
"epoch": 0.001004352192835621,
"grad_norm": 2.058666467666626,
"learning_rate": 1.5e-05,
"loss": 2.0258,
"step": 3
},
{
"epoch": 0.0013391362571141614,
"grad_norm": 2.2120566368103027,
"learning_rate": 2e-05,
"loss": 2.0142,
"step": 4
},
{
"epoch": 0.0016739203213927017,
"grad_norm": 2.370628595352173,
"learning_rate": 2.5e-05,
"loss": 2.0344,
"step": 5
},
{
"epoch": 0.002008704385671242,
"grad_norm": 1.437334418296814,
"learning_rate": 3e-05,
"loss": 1.9203,
"step": 6
},
{
"epoch": 0.002343488449949782,
"grad_norm": 1.1889039278030396,
"learning_rate": 3.5e-05,
"loss": 1.9264,
"step": 7
},
{
"epoch": 0.002678272514228323,
"grad_norm": 1.0925624370574951,
"learning_rate": 4e-05,
"loss": 1.9148,
"step": 8
},
{
"epoch": 0.003013056578506863,
"grad_norm": 0.5106806755065918,
"learning_rate": 4.5e-05,
"loss": 1.9098,
"step": 9
},
{
"epoch": 0.0033478406427854034,
"grad_norm": 0.506732702255249,
"learning_rate": 5e-05,
"loss": 1.832,
"step": 10
},
{
"epoch": 0.0036826247070639436,
"grad_norm": 0.47460949420928955,
"learning_rate": 5.500000000000001e-05,
"loss": 1.879,
"step": 11
},
{
"epoch": 0.004017408771342484,
"grad_norm": 0.5833293199539185,
"learning_rate": 6e-05,
"loss": 1.8777,
"step": 12
},
{
"epoch": 0.004352192835621024,
"grad_norm": 0.4383687376976013,
"learning_rate": 6.500000000000001e-05,
"loss": 1.8559,
"step": 13
},
{
"epoch": 0.004686976899899564,
"grad_norm": 0.35056746006011963,
"learning_rate": 7e-05,
"loss": 1.7573,
"step": 14
},
{
"epoch": 0.0050217609641781055,
"grad_norm": 0.5545064210891724,
"learning_rate": 7.500000000000001e-05,
"loss": 1.7541,
"step": 15
},
{
"epoch": 0.005356545028456646,
"grad_norm": 0.3440060019493103,
"learning_rate": 8e-05,
"loss": 1.6876,
"step": 16
},
{
"epoch": 0.005691329092735186,
"grad_norm": 0.36561861634254456,
"learning_rate": 8.5e-05,
"loss": 1.7454,
"step": 17
},
{
"epoch": 0.006026113157013726,
"grad_norm": 0.282402902841568,
"learning_rate": 9e-05,
"loss": 1.8184,
"step": 18
},
{
"epoch": 0.0063608972212922665,
"grad_norm": 0.1981375813484192,
"learning_rate": 9.5e-05,
"loss": 1.7448,
"step": 19
},
{
"epoch": 0.006695681285570807,
"grad_norm": 0.16754242777824402,
"learning_rate": 0.0001,
"loss": 1.7555,
"step": 20
},
{
"epoch": 0.007030465349849347,
"grad_norm": 0.17915141582489014,
"learning_rate": 0.0001,
"loss": 1.7533,
"step": 21
},
{
"epoch": 0.007365249414127887,
"grad_norm": 0.1990516483783722,
"learning_rate": 0.0001,
"loss": 1.6819,
"step": 22
},
{
"epoch": 0.0077000334784064275,
"grad_norm": 0.20808538794517517,
"learning_rate": 0.0001,
"loss": 1.7345,
"step": 23
},
{
"epoch": 0.008034817542684968,
"grad_norm": 0.2500799894332886,
"learning_rate": 0.0001,
"loss": 1.7636,
"step": 24
},
{
"epoch": 0.008369601606963508,
"grad_norm": 0.2141977846622467,
"learning_rate": 0.0001,
"loss": 1.7475,
"step": 25
},
{
"epoch": 0.008704385671242048,
"grad_norm": 0.2018044888973236,
"learning_rate": 0.0001,
"loss": 1.6445,
"step": 26
},
{
"epoch": 0.009039169735520589,
"grad_norm": 0.19822722673416138,
"learning_rate": 0.0001,
"loss": 1.7604,
"step": 27
},
{
"epoch": 0.009373953799799129,
"grad_norm": 0.18675795197486877,
"learning_rate": 0.0001,
"loss": 1.7743,
"step": 28
},
{
"epoch": 0.009708737864077669,
"grad_norm": 0.16032469272613525,
"learning_rate": 0.0001,
"loss": 1.7221,
"step": 29
},
{
"epoch": 0.010043521928356211,
"grad_norm": 0.17107701301574707,
"learning_rate": 0.0001,
"loss": 1.7514,
"step": 30
},
{
"epoch": 0.010378305992634751,
"grad_norm": 0.1589154601097107,
"learning_rate": 0.0001,
"loss": 1.6738,
"step": 31
},
{
"epoch": 0.010713090056913292,
"grad_norm": 0.13346004486083984,
"learning_rate": 0.0001,
"loss": 1.6011,
"step": 32
},
{
"epoch": 0.011047874121191832,
"grad_norm": 0.1687479168176651,
"learning_rate": 0.0001,
"loss": 1.7694,
"step": 33
},
{
"epoch": 0.011382658185470372,
"grad_norm": 0.14785747230052948,
"learning_rate": 0.0001,
"loss": 1.6836,
"step": 34
},
{
"epoch": 0.011717442249748912,
"grad_norm": 0.13441652059555054,
"learning_rate": 0.0001,
"loss": 1.7087,
"step": 35
},
{
"epoch": 0.012052226314027453,
"grad_norm": 0.13479024171829224,
"learning_rate": 0.0001,
"loss": 1.6456,
"step": 36
},
{
"epoch": 0.012387010378305993,
"grad_norm": 0.15816231071949005,
"learning_rate": 0.0001,
"loss": 1.6643,
"step": 37
},
{
"epoch": 0.012721794442584533,
"grad_norm": 0.12814071774482727,
"learning_rate": 0.0001,
"loss": 1.6382,
"step": 38
},
{
"epoch": 0.013056578506863073,
"grad_norm": 0.129450261592865,
"learning_rate": 0.0001,
"loss": 1.7623,
"step": 39
},
{
"epoch": 0.013391362571141614,
"grad_norm": 0.13946504890918732,
"learning_rate": 0.0001,
"loss": 1.8067,
"step": 40
},
{
"epoch": 0.013726146635420154,
"grad_norm": 0.1161293238401413,
"learning_rate": 0.0001,
"loss": 1.688,
"step": 41
},
{
"epoch": 0.014060930699698694,
"grad_norm": 0.11296379566192627,
"learning_rate": 0.0001,
"loss": 1.6035,
"step": 42
},
{
"epoch": 0.014395714763977234,
"grad_norm": 0.12507247924804688,
"learning_rate": 0.0001,
"loss": 1.7287,
"step": 43
},
{
"epoch": 0.014730498828255775,
"grad_norm": 0.11496929079294205,
"learning_rate": 0.0001,
"loss": 1.626,
"step": 44
},
{
"epoch": 0.015065282892534315,
"grad_norm": 0.13881774246692657,
"learning_rate": 0.0001,
"loss": 1.7501,
"step": 45
},
{
"epoch": 0.015400066956812855,
"grad_norm": 0.1255090981721878,
"learning_rate": 0.0001,
"loss": 1.6952,
"step": 46
},
{
"epoch": 0.015734851021091397,
"grad_norm": 0.11783197522163391,
"learning_rate": 0.0001,
"loss": 1.6256,
"step": 47
},
{
"epoch": 0.016069635085369936,
"grad_norm": 0.12152993679046631,
"learning_rate": 0.0001,
"loss": 1.6443,
"step": 48
},
{
"epoch": 0.016404419149648478,
"grad_norm": 0.12172088027000427,
"learning_rate": 0.0001,
"loss": 1.6927,
"step": 49
},
{
"epoch": 0.016739203213927016,
"grad_norm": 0.13490882515907288,
"learning_rate": 0.0001,
"loss": 1.7372,
"step": 50
},
{
"epoch": 0.017073987278205558,
"grad_norm": 0.1124483197927475,
"learning_rate": 0.0001,
"loss": 1.6206,
"step": 51
},
{
"epoch": 0.017408771342484097,
"grad_norm": 0.11569201201200485,
"learning_rate": 0.0001,
"loss": 1.7156,
"step": 52
},
{
"epoch": 0.01774355540676264,
"grad_norm": 0.12394021451473236,
"learning_rate": 0.0001,
"loss": 1.6132,
"step": 53
},
{
"epoch": 0.018078339471041177,
"grad_norm": 0.11930014938116074,
"learning_rate": 0.0001,
"loss": 1.6552,
"step": 54
},
{
"epoch": 0.01841312353531972,
"grad_norm": 0.1183612123131752,
"learning_rate": 0.0001,
"loss": 1.6953,
"step": 55
},
{
"epoch": 0.018747907599598258,
"grad_norm": 0.11677711457014084,
"learning_rate": 0.0001,
"loss": 1.6936,
"step": 56
},
{
"epoch": 0.0190826916638768,
"grad_norm": 0.12049452215433121,
"learning_rate": 0.0001,
"loss": 1.6381,
"step": 57
},
{
"epoch": 0.019417475728155338,
"grad_norm": 0.11653623729944229,
"learning_rate": 0.0001,
"loss": 1.7704,
"step": 58
},
{
"epoch": 0.01975225979243388,
"grad_norm": 0.12089766561985016,
"learning_rate": 0.0001,
"loss": 1.6819,
"step": 59
},
{
"epoch": 0.020087043856712422,
"grad_norm": 0.12823008000850677,
"learning_rate": 0.0001,
"loss": 1.7584,
"step": 60
},
{
"epoch": 0.02042182792099096,
"grad_norm": 0.12439601868391037,
"learning_rate": 0.0001,
"loss": 1.6761,
"step": 61
},
{
"epoch": 0.020756611985269503,
"grad_norm": 0.12000609189271927,
"learning_rate": 0.0001,
"loss": 1.7014,
"step": 62
},
{
"epoch": 0.02109139604954804,
"grad_norm": 0.12034812569618225,
"learning_rate": 0.0001,
"loss": 1.7128,
"step": 63
},
{
"epoch": 0.021426180113826583,
"grad_norm": 0.11534720659255981,
"learning_rate": 0.0001,
"loss": 1.694,
"step": 64
},
{
"epoch": 0.02176096417810512,
"grad_norm": 0.11633310467004776,
"learning_rate": 0.0001,
"loss": 1.6718,
"step": 65
},
{
"epoch": 0.022095748242383664,
"grad_norm": 0.13419900834560394,
"learning_rate": 0.0001,
"loss": 1.707,
"step": 66
},
{
"epoch": 0.022430532306662202,
"grad_norm": 0.11928509920835495,
"learning_rate": 0.0001,
"loss": 1.6935,
"step": 67
},
{
"epoch": 0.022765316370940744,
"grad_norm": 0.11948949843645096,
"learning_rate": 0.0001,
"loss": 1.6304,
"step": 68
},
{
"epoch": 0.023100100435219283,
"grad_norm": 0.12679244577884674,
"learning_rate": 0.0001,
"loss": 1.6605,
"step": 69
},
{
"epoch": 0.023434884499497825,
"grad_norm": 0.10675504058599472,
"learning_rate": 0.0001,
"loss": 1.6785,
"step": 70
},
{
"epoch": 0.023769668563776363,
"grad_norm": 0.12108162045478821,
"learning_rate": 0.0001,
"loss": 1.6695,
"step": 71
},
{
"epoch": 0.024104452628054905,
"grad_norm": 0.11032188683748245,
"learning_rate": 0.0001,
"loss": 1.7293,
"step": 72
},
{
"epoch": 0.024439236692333444,
"grad_norm": 0.11592775583267212,
"learning_rate": 0.0001,
"loss": 1.6726,
"step": 73
},
{
"epoch": 0.024774020756611986,
"grad_norm": 0.11566442996263504,
"learning_rate": 0.0001,
"loss": 1.6396,
"step": 74
},
{
"epoch": 0.025108804820890524,
"grad_norm": 0.11673177778720856,
"learning_rate": 0.0001,
"loss": 1.6223,
"step": 75
},
{
"epoch": 0.025443588885169066,
"grad_norm": 0.1140669733285904,
"learning_rate": 0.0001,
"loss": 1.6886,
"step": 76
},
{
"epoch": 0.025778372949447605,
"grad_norm": 0.11448585987091064,
"learning_rate": 0.0001,
"loss": 1.6765,
"step": 77
},
{
"epoch": 0.026113157013726147,
"grad_norm": 0.11363522708415985,
"learning_rate": 0.0001,
"loss": 1.6241,
"step": 78
},
{
"epoch": 0.02644794107800469,
"grad_norm": 0.10882357507944107,
"learning_rate": 0.0001,
"loss": 1.6495,
"step": 79
},
{
"epoch": 0.026782725142283227,
"grad_norm": 0.11577261239290237,
"learning_rate": 0.0001,
"loss": 1.6941,
"step": 80
},
{
"epoch": 0.02711750920656177,
"grad_norm": 0.12674297392368317,
"learning_rate": 0.0001,
"loss": 1.7615,
"step": 81
},
{
"epoch": 0.027452293270840308,
"grad_norm": 0.11801646649837494,
"learning_rate": 0.0001,
"loss": 1.6414,
"step": 82
},
{
"epoch": 0.02778707733511885,
"grad_norm": 0.11615725606679916,
"learning_rate": 0.0001,
"loss": 1.6586,
"step": 83
},
{
"epoch": 0.028121861399397388,
"grad_norm": 0.1159651130437851,
"learning_rate": 0.0001,
"loss": 1.6371,
"step": 84
},
{
"epoch": 0.02845664546367593,
"grad_norm": 0.12539416551589966,
"learning_rate": 0.0001,
"loss": 1.7152,
"step": 85
},
{
"epoch": 0.02879142952795447,
"grad_norm": 0.10691766440868378,
"learning_rate": 0.0001,
"loss": 1.552,
"step": 86
},
{
"epoch": 0.02912621359223301,
"grad_norm": 0.11859432607889175,
"learning_rate": 0.0001,
"loss": 1.6516,
"step": 87
},
{
"epoch": 0.02946099765651155,
"grad_norm": 0.12362800538539886,
"learning_rate": 0.0001,
"loss": 1.6944,
"step": 88
},
{
"epoch": 0.02979578172079009,
"grad_norm": 0.12135861068964005,
"learning_rate": 0.0001,
"loss": 1.6703,
"step": 89
},
{
"epoch": 0.03013056578506863,
"grad_norm": 0.15077495574951172,
"learning_rate": 0.0001,
"loss": 1.7522,
"step": 90
},
{
"epoch": 0.03046534984934717,
"grad_norm": 0.1137770563364029,
"learning_rate": 0.0001,
"loss": 1.6263,
"step": 91
},
{
"epoch": 0.03080013391362571,
"grad_norm": 0.11616989970207214,
"learning_rate": 0.0001,
"loss": 1.7166,
"step": 92
},
{
"epoch": 0.031134917977904252,
"grad_norm": 0.14210130274295807,
"learning_rate": 0.0001,
"loss": 1.7889,
"step": 93
},
{
"epoch": 0.031469702042182794,
"grad_norm": 0.1261507272720337,
"learning_rate": 0.0001,
"loss": 1.6593,
"step": 94
},
{
"epoch": 0.03180448610646133,
"grad_norm": 0.13197694718837738,
"learning_rate": 0.0001,
"loss": 1.6182,
"step": 95
},
{
"epoch": 0.03213927017073987,
"grad_norm": 0.11830636113882065,
"learning_rate": 0.0001,
"loss": 1.6373,
"step": 96
},
{
"epoch": 0.03247405423501841,
"grad_norm": 0.12643662095069885,
"learning_rate": 0.0001,
"loss": 1.6601,
"step": 97
},
{
"epoch": 0.032808838299296955,
"grad_norm": 0.13787776231765747,
"learning_rate": 0.0001,
"loss": 1.7496,
"step": 98
},
{
"epoch": 0.033143622363575494,
"grad_norm": 0.1096898540854454,
"learning_rate": 0.0001,
"loss": 1.5582,
"step": 99
},
{
"epoch": 0.03347840642785403,
"grad_norm": 0.13948234915733337,
"learning_rate": 0.0001,
"loss": 1.6281,
"step": 100
},
{
"epoch": 0.03381319049213258,
"grad_norm": 0.11294490098953247,
"learning_rate": 0.0001,
"loss": 1.6703,
"step": 101
},
{
"epoch": 0.034147974556411116,
"grad_norm": 0.12141433358192444,
"learning_rate": 0.0001,
"loss": 1.6553,
"step": 102
},
{
"epoch": 0.034482758620689655,
"grad_norm": 0.13332489132881165,
"learning_rate": 0.0001,
"loss": 1.6761,
"step": 103
},
{
"epoch": 0.03481754268496819,
"grad_norm": 0.12173039466142654,
"learning_rate": 0.0001,
"loss": 1.6304,
"step": 104
},
{
"epoch": 0.03515232674924674,
"grad_norm": 0.12168910354375839,
"learning_rate": 0.0001,
"loss": 1.6396,
"step": 105
},
{
"epoch": 0.03548711081352528,
"grad_norm": 0.1244431585073471,
"learning_rate": 0.0001,
"loss": 1.6463,
"step": 106
},
{
"epoch": 0.035821894877803816,
"grad_norm": 0.12028734385967255,
"learning_rate": 0.0001,
"loss": 1.684,
"step": 107
},
{
"epoch": 0.036156678942082354,
"grad_norm": 0.12029126286506653,
"learning_rate": 0.0001,
"loss": 1.6799,
"step": 108
},
{
"epoch": 0.0364914630063609,
"grad_norm": 0.11806860566139221,
"learning_rate": 0.0001,
"loss": 1.7245,
"step": 109
},
{
"epoch": 0.03682624707063944,
"grad_norm": 0.12406452000141144,
"learning_rate": 0.0001,
"loss": 1.6881,
"step": 110
},
{
"epoch": 0.03716103113491798,
"grad_norm": 0.118985615670681,
"learning_rate": 0.0001,
"loss": 1.6675,
"step": 111
},
{
"epoch": 0.037495815199196515,
"grad_norm": 0.12949040532112122,
"learning_rate": 0.0001,
"loss": 1.6871,
"step": 112
},
{
"epoch": 0.03783059926347506,
"grad_norm": 0.12375173717737198,
"learning_rate": 0.0001,
"loss": 1.6234,
"step": 113
},
{
"epoch": 0.0381653833277536,
"grad_norm": 0.11779066920280457,
"learning_rate": 0.0001,
"loss": 1.7399,
"step": 114
},
{
"epoch": 0.03850016739203214,
"grad_norm": 0.1195269301533699,
"learning_rate": 0.0001,
"loss": 1.65,
"step": 115
},
{
"epoch": 0.038834951456310676,
"grad_norm": 0.11929327249526978,
"learning_rate": 0.0001,
"loss": 1.6214,
"step": 116
},
{
"epoch": 0.03916973552058922,
"grad_norm": 0.11532218009233475,
"learning_rate": 0.0001,
"loss": 1.6395,
"step": 117
},
{
"epoch": 0.03950451958486776,
"grad_norm": 0.11126700043678284,
"learning_rate": 0.0001,
"loss": 1.622,
"step": 118
},
{
"epoch": 0.0398393036491463,
"grad_norm": 0.1309433877468109,
"learning_rate": 0.0001,
"loss": 1.5791,
"step": 119
},
{
"epoch": 0.040174087713424844,
"grad_norm": 0.12015924602746964,
"learning_rate": 0.0001,
"loss": 1.655,
"step": 120
},
{
"epoch": 0.04050887177770338,
"grad_norm": 0.12615351378917694,
"learning_rate": 0.0001,
"loss": 1.6215,
"step": 121
},
{
"epoch": 0.04084365584198192,
"grad_norm": 0.1387631893157959,
"learning_rate": 0.0001,
"loss": 1.7451,
"step": 122
},
{
"epoch": 0.04117843990626046,
"grad_norm": 0.1166117936372757,
"learning_rate": 0.0001,
"loss": 1.6537,
"step": 123
},
{
"epoch": 0.041513223970539005,
"grad_norm": 0.1521015763282776,
"learning_rate": 0.0001,
"loss": 1.6545,
"step": 124
},
{
"epoch": 0.041848008034817544,
"grad_norm": 0.1296280473470688,
"learning_rate": 0.0001,
"loss": 1.6355,
"step": 125
},
{
"epoch": 0.04218279209909608,
"grad_norm": 0.13189557194709778,
"learning_rate": 0.0001,
"loss": 1.5868,
"step": 126
},
{
"epoch": 0.04251757616337462,
"grad_norm": 0.1445418745279312,
"learning_rate": 0.0001,
"loss": 1.7444,
"step": 127
},
{
"epoch": 0.042852360227653166,
"grad_norm": 0.11560577899217606,
"learning_rate": 0.0001,
"loss": 1.6468,
"step": 128
},
{
"epoch": 0.043187144291931705,
"grad_norm": 0.16312864422798157,
"learning_rate": 0.0001,
"loss": 1.6734,
"step": 129
},
{
"epoch": 0.04352192835621024,
"grad_norm": 0.1284494251012802,
"learning_rate": 0.0001,
"loss": 1.6643,
"step": 130
},
{
"epoch": 0.04385671242048878,
"grad_norm": 0.11743518710136414,
"learning_rate": 0.0001,
"loss": 1.6273,
"step": 131
},
{
"epoch": 0.04419149648476733,
"grad_norm": 0.17127898335456848,
"learning_rate": 0.0001,
"loss": 1.5955,
"step": 132
},
{
"epoch": 0.044526280549045866,
"grad_norm": 0.1554144024848938,
"learning_rate": 0.0001,
"loss": 1.7738,
"step": 133
},
{
"epoch": 0.044861064613324404,
"grad_norm": 0.13085848093032837,
"learning_rate": 0.0001,
"loss": 1.5957,
"step": 134
},
{
"epoch": 0.04519584867760294,
"grad_norm": 0.1883288025856018,
"learning_rate": 0.0001,
"loss": 1.6159,
"step": 135
},
{
"epoch": 0.04553063274188149,
"grad_norm": 0.11826716363430023,
"learning_rate": 0.0001,
"loss": 1.6284,
"step": 136
},
{
"epoch": 0.04586541680616003,
"grad_norm": 0.15767724812030792,
"learning_rate": 0.0001,
"loss": 1.682,
"step": 137
},
{
"epoch": 0.046200200870438565,
"grad_norm": 0.14300817251205444,
"learning_rate": 0.0001,
"loss": 1.6152,
"step": 138
},
{
"epoch": 0.04653498493471711,
"grad_norm": 0.11646521836519241,
"learning_rate": 0.0001,
"loss": 1.6343,
"step": 139
},
{
"epoch": 0.04686976899899565,
"grad_norm": 0.12624727189540863,
"learning_rate": 0.0001,
"loss": 1.6128,
"step": 140
},
{
"epoch": 0.04720455306327419,
"grad_norm": 0.14111122488975525,
"learning_rate": 0.0001,
"loss": 1.618,
"step": 141
},
{
"epoch": 0.047539337127552726,
"grad_norm": 0.1404058188199997,
"learning_rate": 0.0001,
"loss": 1.66,
"step": 142
},
{
"epoch": 0.04787412119183127,
"grad_norm": 0.12555940449237823,
"learning_rate": 0.0001,
"loss": 1.666,
"step": 143
},
{
"epoch": 0.04820890525610981,
"grad_norm": 0.14494475722312927,
"learning_rate": 0.0001,
"loss": 1.6147,
"step": 144
},
{
"epoch": 0.04854368932038835,
"grad_norm": 0.12508632242679596,
"learning_rate": 0.0001,
"loss": 1.5765,
"step": 145
},
{
"epoch": 0.04887847338466689,
"grad_norm": 0.11790450662374496,
"learning_rate": 0.0001,
"loss": 1.7342,
"step": 146
},
{
"epoch": 0.04921325744894543,
"grad_norm": 0.1416400671005249,
"learning_rate": 0.0001,
"loss": 1.6673,
"step": 147
},
{
"epoch": 0.04954804151322397,
"grad_norm": 0.13537850975990295,
"learning_rate": 0.0001,
"loss": 1.6328,
"step": 148
},
{
"epoch": 0.04988282557750251,
"grad_norm": 0.12219058722257614,
"learning_rate": 0.0001,
"loss": 1.6677,
"step": 149
},
{
"epoch": 0.05021760964178105,
"grad_norm": 0.1398639678955078,
"learning_rate": 0.0001,
"loss": 1.6454,
"step": 150
},
{
"epoch": 0.050552393706059594,
"grad_norm": 0.14572647213935852,
"learning_rate": 0.0001,
"loss": 1.6094,
"step": 151
},
{
"epoch": 0.05088717777033813,
"grad_norm": 0.10937194526195526,
"learning_rate": 0.0001,
"loss": 1.5776,
"step": 152
},
{
"epoch": 0.05122196183461667,
"grad_norm": 0.1404120773077011,
"learning_rate": 0.0001,
"loss": 1.6112,
"step": 153
},
{
"epoch": 0.05155674589889521,
"grad_norm": 0.1480460911989212,
"learning_rate": 0.0001,
"loss": 1.6196,
"step": 154
},
{
"epoch": 0.051891529963173755,
"grad_norm": 0.10971348732709885,
"learning_rate": 0.0001,
"loss": 1.5744,
"step": 155
},
{
"epoch": 0.05222631402745229,
"grad_norm": 0.1468382179737091,
"learning_rate": 0.0001,
"loss": 1.7518,
"step": 156
},
{
"epoch": 0.05256109809173083,
"grad_norm": 0.13429516553878784,
"learning_rate": 0.0001,
"loss": 1.5812,
"step": 157
},
{
"epoch": 0.05289588215600938,
"grad_norm": 0.11399335414171219,
"learning_rate": 0.0001,
"loss": 1.6812,
"step": 158
},
{
"epoch": 0.053230666220287916,
"grad_norm": 0.13944409787654877,
"learning_rate": 0.0001,
"loss": 1.6789,
"step": 159
},
{
"epoch": 0.053565450284566454,
"grad_norm": 0.1390630453824997,
"learning_rate": 0.0001,
"loss": 1.6368,
"step": 160
},
{
"epoch": 0.05390023434884499,
"grad_norm": 0.1098702922463417,
"learning_rate": 0.0001,
"loss": 1.5462,
"step": 161
},
{
"epoch": 0.05423501841312354,
"grad_norm": 0.13710471987724304,
"learning_rate": 0.0001,
"loss": 1.7208,
"step": 162
},
{
"epoch": 0.05456980247740208,
"grad_norm": 0.1283336579799652,
"learning_rate": 0.0001,
"loss": 1.6648,
"step": 163
},
{
"epoch": 0.054904586541680615,
"grad_norm": 0.11550601571798325,
"learning_rate": 0.0001,
"loss": 1.7409,
"step": 164
},
{
"epoch": 0.055239370605959154,
"grad_norm": 0.12028289586305618,
"learning_rate": 0.0001,
"loss": 1.6685,
"step": 165
},
{
"epoch": 0.0555741546702377,
"grad_norm": 0.13237926363945007,
"learning_rate": 0.0001,
"loss": 1.6639,
"step": 166
},
{
"epoch": 0.05590893873451624,
"grad_norm": 0.11385014653205872,
"learning_rate": 0.0001,
"loss": 1.6742,
"step": 167
},
{
"epoch": 0.056243722798794776,
"grad_norm": 0.13613030314445496,
"learning_rate": 0.0001,
"loss": 1.6898,
"step": 168
},
{
"epoch": 0.056578506863073315,
"grad_norm": 0.12617048621177673,
"learning_rate": 0.0001,
"loss": 1.6239,
"step": 169
},
{
"epoch": 0.05691329092735186,
"grad_norm": 0.11637625098228455,
"learning_rate": 0.0001,
"loss": 1.6362,
"step": 170
},
{
"epoch": 0.0572480749916304,
"grad_norm": 0.13217699527740479,
"learning_rate": 0.0001,
"loss": 1.6319,
"step": 171
},
{
"epoch": 0.05758285905590894,
"grad_norm": 0.12088079750537872,
"learning_rate": 0.0001,
"loss": 1.4997,
"step": 172
},
{
"epoch": 0.057917643120187476,
"grad_norm": 0.11359237879514694,
"learning_rate": 0.0001,
"loss": 1.564,
"step": 173
},
{
"epoch": 0.05825242718446602,
"grad_norm": 0.12509793043136597,
"learning_rate": 0.0001,
"loss": 1.6855,
"step": 174
},
{
"epoch": 0.05858721124874456,
"grad_norm": 0.1233699694275856,
"learning_rate": 0.0001,
"loss": 1.665,
"step": 175
},
{
"epoch": 0.0589219953130231,
"grad_norm": 0.11172114312648773,
"learning_rate": 0.0001,
"loss": 1.6242,
"step": 176
},
{
"epoch": 0.059256779377301644,
"grad_norm": 0.12242110818624496,
"learning_rate": 0.0001,
"loss": 1.6736,
"step": 177
},
{
"epoch": 0.05959156344158018,
"grad_norm": 0.12275474518537521,
"learning_rate": 0.0001,
"loss": 1.6373,
"step": 178
},
{
"epoch": 0.05992634750585872,
"grad_norm": 0.11666038632392883,
"learning_rate": 0.0001,
"loss": 1.6957,
"step": 179
},
{
"epoch": 0.06026113157013726,
"grad_norm": 0.1209944486618042,
"learning_rate": 0.0001,
"loss": 1.618,
"step": 180
},
{
"epoch": 0.060595915634415805,
"grad_norm": 0.12028312683105469,
"learning_rate": 0.0001,
"loss": 1.6738,
"step": 181
},
{
"epoch": 0.06093069969869434,
"grad_norm": 0.11835712194442749,
"learning_rate": 0.0001,
"loss": 1.6348,
"step": 182
},
{
"epoch": 0.06126548376297288,
"grad_norm": 0.13166043162345886,
"learning_rate": 0.0001,
"loss": 1.6064,
"step": 183
},
{
"epoch": 0.06160026782725142,
"grad_norm": 0.1366170346736908,
"learning_rate": 0.0001,
"loss": 1.674,
"step": 184
},
{
"epoch": 0.061935051891529966,
"grad_norm": 0.12185468524694443,
"learning_rate": 0.0001,
"loss": 1.5695,
"step": 185
},
{
"epoch": 0.062269835955808504,
"grad_norm": 0.12310407310724258,
"learning_rate": 0.0001,
"loss": 1.6799,
"step": 186
},
{
"epoch": 0.06260462002008704,
"grad_norm": 0.14412462711334229,
"learning_rate": 0.0001,
"loss": 1.5855,
"step": 187
},
{
"epoch": 0.06293940408436559,
"grad_norm": 0.11908841878175735,
"learning_rate": 0.0001,
"loss": 1.5752,
"step": 188
},
{
"epoch": 0.06327418814864412,
"grad_norm": 0.12137061357498169,
"learning_rate": 0.0001,
"loss": 1.6018,
"step": 189
},
{
"epoch": 0.06360897221292267,
"grad_norm": 0.128020778298378,
"learning_rate": 0.0001,
"loss": 1.5894,
"step": 190
},
{
"epoch": 0.06394375627720121,
"grad_norm": 0.13447493314743042,
"learning_rate": 0.0001,
"loss": 1.5884,
"step": 191
},
{
"epoch": 0.06427854034147974,
"grad_norm": 0.11885492503643036,
"learning_rate": 0.0001,
"loss": 1.6245,
"step": 192
},
{
"epoch": 0.06461332440575829,
"grad_norm": 0.13066913187503815,
"learning_rate": 0.0001,
"loss": 1.6807,
"step": 193
},
{
"epoch": 0.06494810847003682,
"grad_norm": 0.12650778889656067,
"learning_rate": 0.0001,
"loss": 1.6498,
"step": 194
},
{
"epoch": 0.06528289253431536,
"grad_norm": 0.116504967212677,
"learning_rate": 0.0001,
"loss": 1.6037,
"step": 195
},
{
"epoch": 0.06561767659859391,
"grad_norm": 0.12200898677110672,
"learning_rate": 0.0001,
"loss": 1.5816,
"step": 196
},
{
"epoch": 0.06595246066287244,
"grad_norm": 0.13350239396095276,
"learning_rate": 0.0001,
"loss": 1.6281,
"step": 197
},
{
"epoch": 0.06628724472715099,
"grad_norm": 0.12119137495756149,
"learning_rate": 0.0001,
"loss": 1.5747,
"step": 198
},
{
"epoch": 0.06662202879142953,
"grad_norm": 0.12292595952749252,
"learning_rate": 0.0001,
"loss": 1.6294,
"step": 199
},
{
"epoch": 0.06695681285570806,
"grad_norm": 0.14958657324314117,
"learning_rate": 0.0001,
"loss": 1.7248,
"step": 200
},
{
"epoch": 0.06729159691998661,
"grad_norm": 0.1206580251455307,
"learning_rate": 0.0001,
"loss": 1.647,
"step": 201
},
{
"epoch": 0.06762638098426516,
"grad_norm": 0.13404549658298492,
"learning_rate": 0.0001,
"loss": 1.6827,
"step": 202
},
{
"epoch": 0.06796116504854369,
"grad_norm": 0.11746184527873993,
"learning_rate": 0.0001,
"loss": 1.5827,
"step": 203
},
{
"epoch": 0.06829594911282223,
"grad_norm": 0.1220933049917221,
"learning_rate": 0.0001,
"loss": 1.6209,
"step": 204
},
{
"epoch": 0.06863073317710076,
"grad_norm": 0.1395500898361206,
"learning_rate": 0.0001,
"loss": 1.6691,
"step": 205
},
{
"epoch": 0.06896551724137931,
"grad_norm": 0.12085775285959244,
"learning_rate": 0.0001,
"loss": 1.6186,
"step": 206
},
{
"epoch": 0.06930030130565785,
"grad_norm": 0.139579176902771,
"learning_rate": 0.0001,
"loss": 1.6357,
"step": 207
},
{
"epoch": 0.06963508536993639,
"grad_norm": 0.12011922895908356,
"learning_rate": 0.0001,
"loss": 1.5418,
"step": 208
},
{
"epoch": 0.06996986943421493,
"grad_norm": 0.11939892917871475,
"learning_rate": 0.0001,
"loss": 1.5816,
"step": 209
},
{
"epoch": 0.07030465349849348,
"grad_norm": 0.12651924788951874,
"learning_rate": 0.0001,
"loss": 1.5286,
"step": 210
},
{
"epoch": 0.07063943756277201,
"grad_norm": 0.13420534133911133,
"learning_rate": 0.0001,
"loss": 1.6213,
"step": 211
},
{
"epoch": 0.07097422162705055,
"grad_norm": 0.11868797987699509,
"learning_rate": 0.0001,
"loss": 1.6367,
"step": 212
},
{
"epoch": 0.07130900569132909,
"grad_norm": 0.11338218301534653,
"learning_rate": 0.0001,
"loss": 1.517,
"step": 213
},
{
"epoch": 0.07164378975560763,
"grad_norm": 0.14230981469154358,
"learning_rate": 0.0001,
"loss": 1.6773,
"step": 214
},
{
"epoch": 0.07197857381988618,
"grad_norm": 0.11315491795539856,
"learning_rate": 0.0001,
"loss": 1.5564,
"step": 215
},
{
"epoch": 0.07231335788416471,
"grad_norm": 0.12009023874998093,
"learning_rate": 0.0001,
"loss": 1.6317,
"step": 216
},
{
"epoch": 0.07264814194844325,
"grad_norm": 0.1332681030035019,
"learning_rate": 0.0001,
"loss": 1.6393,
"step": 217
},
{
"epoch": 0.0729829260127218,
"grad_norm": 0.12581905722618103,
"learning_rate": 0.0001,
"loss": 1.7155,
"step": 218
},
{
"epoch": 0.07331771007700033,
"grad_norm": 0.12259216606616974,
"learning_rate": 0.0001,
"loss": 1.661,
"step": 219
},
{
"epoch": 0.07365249414127888,
"grad_norm": 0.13090763986110687,
"learning_rate": 0.0001,
"loss": 1.6692,
"step": 220
},
{
"epoch": 0.07398727820555742,
"grad_norm": 0.11311494559049606,
"learning_rate": 0.0001,
"loss": 1.6653,
"step": 221
},
{
"epoch": 0.07432206226983595,
"grad_norm": 0.1307578831911087,
"learning_rate": 0.0001,
"loss": 1.5978,
"step": 222
},
{
"epoch": 0.0746568463341145,
"grad_norm": 0.12622885406017303,
"learning_rate": 0.0001,
"loss": 1.7782,
"step": 223
},
{
"epoch": 0.07499163039839303,
"grad_norm": 0.11902297288179398,
"learning_rate": 0.0001,
"loss": 1.5689,
"step": 224
},
{
"epoch": 0.07532641446267158,
"grad_norm": 0.11696305125951767,
"learning_rate": 0.0001,
"loss": 1.6077,
"step": 225
},
{
"epoch": 0.07566119852695012,
"grad_norm": 0.11666855216026306,
"learning_rate": 0.0001,
"loss": 1.5568,
"step": 226
},
{
"epoch": 0.07599598259122865,
"grad_norm": 0.12056950479745865,
"learning_rate": 0.0001,
"loss": 1.6829,
"step": 227
},
{
"epoch": 0.0763307666555072,
"grad_norm": 0.11957021802663803,
"learning_rate": 0.0001,
"loss": 1.7184,
"step": 228
},
{
"epoch": 0.07666555071978574,
"grad_norm": 0.11590487509965897,
"learning_rate": 0.0001,
"loss": 1.6775,
"step": 229
},
{
"epoch": 0.07700033478406428,
"grad_norm": 0.11034328490495682,
"learning_rate": 0.0001,
"loss": 1.5773,
"step": 230
},
{
"epoch": 0.07733511884834282,
"grad_norm": 0.12097325176000595,
"learning_rate": 0.0001,
"loss": 1.5552,
"step": 231
},
{
"epoch": 0.07766990291262135,
"grad_norm": 0.11697199940681458,
"learning_rate": 0.0001,
"loss": 1.6762,
"step": 232
},
{
"epoch": 0.0780046869768999,
"grad_norm": 0.11488549411296844,
"learning_rate": 0.0001,
"loss": 1.6219,
"step": 233
},
{
"epoch": 0.07833947104117844,
"grad_norm": 0.12868645787239075,
"learning_rate": 0.0001,
"loss": 1.6596,
"step": 234
},
{
"epoch": 0.07867425510545697,
"grad_norm": 0.11428504437208176,
"learning_rate": 0.0001,
"loss": 1.5926,
"step": 235
},
{
"epoch": 0.07900903916973552,
"grad_norm": 0.14550745487213135,
"learning_rate": 0.0001,
"loss": 1.6773,
"step": 236
},
{
"epoch": 0.07934382323401407,
"grad_norm": 0.11800127476453781,
"learning_rate": 0.0001,
"loss": 1.7403,
"step": 237
},
{
"epoch": 0.0796786072982926,
"grad_norm": 0.12732075154781342,
"learning_rate": 0.0001,
"loss": 1.6886,
"step": 238
},
{
"epoch": 0.08001339136257114,
"grad_norm": 0.1188284233212471,
"learning_rate": 0.0001,
"loss": 1.6552,
"step": 239
},
{
"epoch": 0.08034817542684969,
"grad_norm": 0.12447573244571686,
"learning_rate": 0.0001,
"loss": 1.668,
"step": 240
},
{
"epoch": 0.08068295949112822,
"grad_norm": 0.129620760679245,
"learning_rate": 0.0001,
"loss": 1.6134,
"step": 241
},
{
"epoch": 0.08101774355540677,
"grad_norm": 0.12539665400981903,
"learning_rate": 0.0001,
"loss": 1.7069,
"step": 242
},
{
"epoch": 0.0813525276196853,
"grad_norm": 0.13554492592811584,
"learning_rate": 0.0001,
"loss": 1.6704,
"step": 243
},
{
"epoch": 0.08168731168396384,
"grad_norm": 0.11758473515510559,
"learning_rate": 0.0001,
"loss": 1.6329,
"step": 244
},
{
"epoch": 0.08202209574824239,
"grad_norm": 0.11309672147035599,
"learning_rate": 0.0001,
"loss": 1.5836,
"step": 245
},
{
"epoch": 0.08235687981252092,
"grad_norm": 0.12910054624080658,
"learning_rate": 0.0001,
"loss": 1.6104,
"step": 246
},
{
"epoch": 0.08269166387679946,
"grad_norm": 0.12267620116472244,
"learning_rate": 0.0001,
"loss": 1.6505,
"step": 247
},
{
"epoch": 0.08302644794107801,
"grad_norm": 0.12700802087783813,
"learning_rate": 0.0001,
"loss": 1.6474,
"step": 248
},
{
"epoch": 0.08336123200535654,
"grad_norm": 0.13106848299503326,
"learning_rate": 0.0001,
"loss": 1.7076,
"step": 249
},
{
"epoch": 0.08369601606963509,
"grad_norm": 0.12598051130771637,
"learning_rate": 0.0001,
"loss": 1.6463,
"step": 250
},
{
"epoch": 0.08403080013391362,
"grad_norm": 0.1270611584186554,
"learning_rate": 0.0001,
"loss": 1.6407,
"step": 251
},
{
"epoch": 0.08436558419819216,
"grad_norm": 0.1215846911072731,
"learning_rate": 0.0001,
"loss": 1.7082,
"step": 252
},
{
"epoch": 0.08470036826247071,
"grad_norm": 0.11944068968296051,
"learning_rate": 0.0001,
"loss": 1.6046,
"step": 253
},
{
"epoch": 0.08503515232674924,
"grad_norm": 0.12395983189344406,
"learning_rate": 0.0001,
"loss": 1.6444,
"step": 254
},
{
"epoch": 0.08536993639102779,
"grad_norm": 0.11616060882806778,
"learning_rate": 0.0001,
"loss": 1.6514,
"step": 255
},
{
"epoch": 0.08570472045530633,
"grad_norm": 0.1274399757385254,
"learning_rate": 0.0001,
"loss": 1.6023,
"step": 256
},
{
"epoch": 0.08603950451958486,
"grad_norm": 0.11419884115457535,
"learning_rate": 0.0001,
"loss": 1.6053,
"step": 257
},
{
"epoch": 0.08637428858386341,
"grad_norm": 0.11922091245651245,
"learning_rate": 0.0001,
"loss": 1.6771,
"step": 258
},
{
"epoch": 0.08670907264814195,
"grad_norm": 0.12727287411689758,
"learning_rate": 0.0001,
"loss": 1.5332,
"step": 259
},
{
"epoch": 0.08704385671242049,
"grad_norm": 0.12368068844079971,
"learning_rate": 0.0001,
"loss": 1.6962,
"step": 260
},
{
"epoch": 0.08737864077669903,
"grad_norm": 0.11546538770198822,
"learning_rate": 0.0001,
"loss": 1.6239,
"step": 261
},
{
"epoch": 0.08771342484097756,
"grad_norm": 0.13736455142498016,
"learning_rate": 0.0001,
"loss": 1.7133,
"step": 262
},
{
"epoch": 0.08804820890525611,
"grad_norm": 0.12773726880550385,
"learning_rate": 0.0001,
"loss": 1.6127,
"step": 263
},
{
"epoch": 0.08838299296953465,
"grad_norm": 0.12833422422409058,
"learning_rate": 0.0001,
"loss": 1.5803,
"step": 264
},
{
"epoch": 0.08871777703381319,
"grad_norm": 0.13427826762199402,
"learning_rate": 0.0001,
"loss": 1.5815,
"step": 265
},
{
"epoch": 0.08905256109809173,
"grad_norm": 0.1173439621925354,
"learning_rate": 0.0001,
"loss": 1.5457,
"step": 266
},
{
"epoch": 0.08938734516237028,
"grad_norm": 0.12156970053911209,
"learning_rate": 0.0001,
"loss": 1.5969,
"step": 267
},
{
"epoch": 0.08972212922664881,
"grad_norm": 0.15133506059646606,
"learning_rate": 0.0001,
"loss": 1.6223,
"step": 268
},
{
"epoch": 0.09005691329092735,
"grad_norm": 0.13353589177131653,
"learning_rate": 0.0001,
"loss": 1.545,
"step": 269
},
{
"epoch": 0.09039169735520589,
"grad_norm": 0.12940257787704468,
"learning_rate": 0.0001,
"loss": 1.6135,
"step": 270
},
{
"epoch": 0.09072648141948443,
"grad_norm": 0.12897267937660217,
"learning_rate": 0.0001,
"loss": 1.6413,
"step": 271
},
{
"epoch": 0.09106126548376298,
"grad_norm": 0.12336087226867676,
"learning_rate": 0.0001,
"loss": 1.702,
"step": 272
},
{
"epoch": 0.09139604954804151,
"grad_norm": 0.11277737468481064,
"learning_rate": 0.0001,
"loss": 1.5743,
"step": 273
},
{
"epoch": 0.09173083361232005,
"grad_norm": 0.11659134924411774,
"learning_rate": 0.0001,
"loss": 1.6456,
"step": 274
},
{
"epoch": 0.0920656176765986,
"grad_norm": 0.11736118793487549,
"learning_rate": 0.0001,
"loss": 1.655,
"step": 275
},
{
"epoch": 0.09240040174087713,
"grad_norm": 0.12133463472127914,
"learning_rate": 0.0001,
"loss": 1.6771,
"step": 276
},
{
"epoch": 0.09273518580515568,
"grad_norm": 0.11516664177179337,
"learning_rate": 0.0001,
"loss": 1.5545,
"step": 277
},
{
"epoch": 0.09306996986943422,
"grad_norm": 0.10916180163621902,
"learning_rate": 0.0001,
"loss": 1.5301,
"step": 278
},
{
"epoch": 0.09340475393371275,
"grad_norm": 0.11232040077447891,
"learning_rate": 0.0001,
"loss": 1.5489,
"step": 279
},
{
"epoch": 0.0937395379979913,
"grad_norm": 0.12515543401241302,
"learning_rate": 0.0001,
"loss": 1.6817,
"step": 280
},
{
"epoch": 0.09407432206226983,
"grad_norm": 0.11998307704925537,
"learning_rate": 0.0001,
"loss": 1.563,
"step": 281
},
{
"epoch": 0.09440910612654838,
"grad_norm": 0.12774354219436646,
"learning_rate": 0.0001,
"loss": 1.622,
"step": 282
},
{
"epoch": 0.09474389019082692,
"grad_norm": 0.12023581564426422,
"learning_rate": 0.0001,
"loss": 1.5367,
"step": 283
},
{
"epoch": 0.09507867425510545,
"grad_norm": 0.12877605855464935,
"learning_rate": 0.0001,
"loss": 1.5806,
"step": 284
},
{
"epoch": 0.095413458319384,
"grad_norm": 0.11994509398937225,
"learning_rate": 0.0001,
"loss": 1.6017,
"step": 285
},
{
"epoch": 0.09574824238366254,
"grad_norm": 0.12522728741168976,
"learning_rate": 0.0001,
"loss": 1.6213,
"step": 286
},
{
"epoch": 0.09608302644794108,
"grad_norm": 0.13130401074886322,
"learning_rate": 0.0001,
"loss": 1.6211,
"step": 287
},
{
"epoch": 0.09641781051221962,
"grad_norm": 0.1242026537656784,
"learning_rate": 0.0001,
"loss": 1.6428,
"step": 288
},
{
"epoch": 0.09675259457649815,
"grad_norm": 0.12561045587062836,
"learning_rate": 0.0001,
"loss": 1.7275,
"step": 289
},
{
"epoch": 0.0970873786407767,
"grad_norm": 0.11756443232297897,
"learning_rate": 0.0001,
"loss": 1.5905,
"step": 290
},
{
"epoch": 0.09742216270505524,
"grad_norm": 0.11787443608045578,
"learning_rate": 0.0001,
"loss": 1.5809,
"step": 291
},
{
"epoch": 0.09775694676933377,
"grad_norm": 0.11708027869462967,
"learning_rate": 0.0001,
"loss": 1.6205,
"step": 292
},
{
"epoch": 0.09809173083361232,
"grad_norm": 0.12011709064245224,
"learning_rate": 0.0001,
"loss": 1.6327,
"step": 293
},
{
"epoch": 0.09842651489789087,
"grad_norm": 0.12868238985538483,
"learning_rate": 0.0001,
"loss": 1.7539,
"step": 294
},
{
"epoch": 0.0987612989621694,
"grad_norm": 0.11626073718070984,
"learning_rate": 0.0001,
"loss": 1.6877,
"step": 295
},
{
"epoch": 0.09909608302644794,
"grad_norm": 0.1279468834400177,
"learning_rate": 0.0001,
"loss": 1.635,
"step": 296
},
{
"epoch": 0.09943086709072649,
"grad_norm": 0.12956663966178894,
"learning_rate": 0.0001,
"loss": 1.5736,
"step": 297
},
{
"epoch": 0.09976565115500502,
"grad_norm": 0.11931903660297394,
"learning_rate": 0.0001,
"loss": 1.6534,
"step": 298
},
{
"epoch": 0.10010043521928357,
"grad_norm": 0.12837816774845123,
"learning_rate": 0.0001,
"loss": 1.5923,
"step": 299
},
{
"epoch": 0.1004352192835621,
"grad_norm": 0.12146858870983124,
"learning_rate": 0.0001,
"loss": 1.6206,
"step": 300
},
{
"epoch": 0.10077000334784064,
"grad_norm": 0.11455334722995758,
"learning_rate": 0.0001,
"loss": 1.5292,
"step": 301
},
{
"epoch": 0.10110478741211919,
"grad_norm": 0.12035822868347168,
"learning_rate": 0.0001,
"loss": 1.576,
"step": 302
},
{
"epoch": 0.10143957147639772,
"grad_norm": 0.12373282760381699,
"learning_rate": 0.0001,
"loss": 1.6688,
"step": 303
},
{
"epoch": 0.10177435554067626,
"grad_norm": 0.13985779881477356,
"learning_rate": 0.0001,
"loss": 1.667,
"step": 304
},
{
"epoch": 0.10210913960495481,
"grad_norm": 0.11246056109666824,
"learning_rate": 0.0001,
"loss": 1.6014,
"step": 305
},
{
"epoch": 0.10244392366923334,
"grad_norm": 0.13154080510139465,
"learning_rate": 0.0001,
"loss": 1.5909,
"step": 306
},
{
"epoch": 0.10277870773351189,
"grad_norm": 0.13235047459602356,
"learning_rate": 0.0001,
"loss": 1.6888,
"step": 307
},
{
"epoch": 0.10311349179779042,
"grad_norm": 0.13294562697410583,
"learning_rate": 0.0001,
"loss": 1.6534,
"step": 308
},
{
"epoch": 0.10344827586206896,
"grad_norm": 0.1274106800556183,
"learning_rate": 0.0001,
"loss": 1.7178,
"step": 309
},
{
"epoch": 0.10378305992634751,
"grad_norm": 0.11676975339651108,
"learning_rate": 0.0001,
"loss": 1.5587,
"step": 310
},
{
"epoch": 0.10411784399062604,
"grad_norm": 0.1180170550942421,
"learning_rate": 0.0001,
"loss": 1.5579,
"step": 311
},
{
"epoch": 0.10445262805490459,
"grad_norm": 0.1267906278371811,
"learning_rate": 0.0001,
"loss": 1.5994,
"step": 312
},
{
"epoch": 0.10478741211918313,
"grad_norm": 0.12398704141378403,
"learning_rate": 0.0001,
"loss": 1.5459,
"step": 313
},
{
"epoch": 0.10512219618346166,
"grad_norm": 0.12039758265018463,
"learning_rate": 0.0001,
"loss": 1.5995,
"step": 314
},
{
"epoch": 0.10545698024774021,
"grad_norm": 0.12191271781921387,
"learning_rate": 0.0001,
"loss": 1.5639,
"step": 315
},
{
"epoch": 0.10579176431201875,
"grad_norm": 0.1351427584886551,
"learning_rate": 0.0001,
"loss": 1.6553,
"step": 316
},
{
"epoch": 0.10612654837629729,
"grad_norm": 0.13542529940605164,
"learning_rate": 0.0001,
"loss": 1.5455,
"step": 317
},
{
"epoch": 0.10646133244057583,
"grad_norm": 0.13739462196826935,
"learning_rate": 0.0001,
"loss": 1.6414,
"step": 318
},
{
"epoch": 0.10679611650485436,
"grad_norm": 0.11810696870088577,
"learning_rate": 0.0001,
"loss": 1.7078,
"step": 319
},
{
"epoch": 0.10713090056913291,
"grad_norm": 0.13632580637931824,
"learning_rate": 0.0001,
"loss": 1.6044,
"step": 320
},
{
"epoch": 0.10746568463341145,
"grad_norm": 0.12454043328762054,
"learning_rate": 0.0001,
"loss": 1.6654,
"step": 321
},
{
"epoch": 0.10780046869768999,
"grad_norm": 0.11818061023950577,
"learning_rate": 0.0001,
"loss": 1.5693,
"step": 322
},
{
"epoch": 0.10813525276196853,
"grad_norm": 0.12229089438915253,
"learning_rate": 0.0001,
"loss": 1.6248,
"step": 323
},
{
"epoch": 0.10847003682624708,
"grad_norm": 0.11546499282121658,
"learning_rate": 0.0001,
"loss": 1.5091,
"step": 324
},
{
"epoch": 0.10880482089052561,
"grad_norm": 0.12005545943975449,
"learning_rate": 0.0001,
"loss": 1.5801,
"step": 325
},
{
"epoch": 0.10913960495480415,
"grad_norm": 0.12114623188972473,
"learning_rate": 0.0001,
"loss": 1.6552,
"step": 326
},
{
"epoch": 0.10947438901908269,
"grad_norm": 0.11608844995498657,
"learning_rate": 0.0001,
"loss": 1.5183,
"step": 327
},
{
"epoch": 0.10980917308336123,
"grad_norm": 0.11119306832551956,
"learning_rate": 0.0001,
"loss": 1.5515,
"step": 328
},
{
"epoch": 0.11014395714763978,
"grad_norm": 0.12586964666843414,
"learning_rate": 0.0001,
"loss": 1.6353,
"step": 329
},
{
"epoch": 0.11047874121191831,
"grad_norm": 0.127826526761055,
"learning_rate": 0.0001,
"loss": 1.7205,
"step": 330
},
{
"epoch": 0.11081352527619685,
"grad_norm": 0.11828092485666275,
"learning_rate": 0.0001,
"loss": 1.6711,
"step": 331
},
{
"epoch": 0.1111483093404754,
"grad_norm": 0.13583530485630035,
"learning_rate": 0.0001,
"loss": 1.6455,
"step": 332
},
{
"epoch": 0.11148309340475393,
"grad_norm": 0.11893647909164429,
"learning_rate": 0.0001,
"loss": 1.5707,
"step": 333
},
{
"epoch": 0.11181787746903248,
"grad_norm": 0.13151027262210846,
"learning_rate": 0.0001,
"loss": 1.6576,
"step": 334
},
{
"epoch": 0.11215266153331102,
"grad_norm": 0.11656352877616882,
"learning_rate": 0.0001,
"loss": 1.6456,
"step": 335
},
{
"epoch": 0.11248744559758955,
"grad_norm": 0.1267959475517273,
"learning_rate": 0.0001,
"loss": 1.5069,
"step": 336
},
{
"epoch": 0.1128222296618681,
"grad_norm": 0.12403184920549393,
"learning_rate": 0.0001,
"loss": 1.5273,
"step": 337
},
{
"epoch": 0.11315701372614663,
"grad_norm": 0.12692154943943024,
"learning_rate": 0.0001,
"loss": 1.6647,
"step": 338
},
{
"epoch": 0.11349179779042518,
"grad_norm": 0.11919606477022171,
"learning_rate": 0.0001,
"loss": 1.6833,
"step": 339
},
{
"epoch": 0.11382658185470372,
"grad_norm": 0.11304503679275513,
"learning_rate": 0.0001,
"loss": 1.5757,
"step": 340
},
{
"epoch": 0.11416136591898225,
"grad_norm": 0.11996794492006302,
"learning_rate": 0.0001,
"loss": 1.6102,
"step": 341
},
{
"epoch": 0.1144961499832608,
"grad_norm": 0.12606146931648254,
"learning_rate": 0.0001,
"loss": 1.59,
"step": 342
},
{
"epoch": 0.11483093404753934,
"grad_norm": 0.12146681547164917,
"learning_rate": 0.0001,
"loss": 1.4989,
"step": 343
},
{
"epoch": 0.11516571811181787,
"grad_norm": 0.13275377452373505,
"learning_rate": 0.0001,
"loss": 1.6152,
"step": 344
},
{
"epoch": 0.11550050217609642,
"grad_norm": 0.12684765458106995,
"learning_rate": 0.0001,
"loss": 1.542,
"step": 345
},
{
"epoch": 0.11583528624037495,
"grad_norm": 0.1186991035938263,
"learning_rate": 0.0001,
"loss": 1.573,
"step": 346
},
{
"epoch": 0.1161700703046535,
"grad_norm": 0.12221034616231918,
"learning_rate": 0.0001,
"loss": 1.6418,
"step": 347
},
{
"epoch": 0.11650485436893204,
"grad_norm": 0.11776617169380188,
"learning_rate": 0.0001,
"loss": 1.5821,
"step": 348
},
{
"epoch": 0.11683963843321057,
"grad_norm": 0.13464072346687317,
"learning_rate": 0.0001,
"loss": 1.6188,
"step": 349
},
{
"epoch": 0.11717442249748912,
"grad_norm": 0.13101482391357422,
"learning_rate": 0.0001,
"loss": 1.5194,
"step": 350
},
{
"epoch": 0.11750920656176767,
"grad_norm": 0.11970439553260803,
"learning_rate": 0.0001,
"loss": 1.5891,
"step": 351
},
{
"epoch": 0.1178439906260462,
"grad_norm": 0.11731956154108047,
"learning_rate": 0.0001,
"loss": 1.6441,
"step": 352
},
{
"epoch": 0.11817877469032474,
"grad_norm": 0.1163954809308052,
"learning_rate": 0.0001,
"loss": 1.5739,
"step": 353
},
{
"epoch": 0.11851355875460329,
"grad_norm": 0.13119016587734222,
"learning_rate": 0.0001,
"loss": 1.6667,
"step": 354
},
{
"epoch": 0.11884834281888182,
"grad_norm": 0.11406403034925461,
"learning_rate": 0.0001,
"loss": 1.5391,
"step": 355
},
{
"epoch": 0.11918312688316036,
"grad_norm": 0.12543243169784546,
"learning_rate": 0.0001,
"loss": 1.6413,
"step": 356
},
{
"epoch": 0.1195179109474389,
"grad_norm": 0.11639681458473206,
"learning_rate": 0.0001,
"loss": 1.5946,
"step": 357
},
{
"epoch": 0.11985269501171744,
"grad_norm": 0.11582693457603455,
"learning_rate": 0.0001,
"loss": 1.5797,
"step": 358
},
{
"epoch": 0.12018747907599599,
"grad_norm": 0.12131619453430176,
"learning_rate": 0.0001,
"loss": 1.5762,
"step": 359
},
{
"epoch": 0.12052226314027452,
"grad_norm": 0.1220826804637909,
"learning_rate": 0.0001,
"loss": 1.4938,
"step": 360
},
{
"epoch": 0.12085704720455306,
"grad_norm": 0.12737631797790527,
"learning_rate": 0.0001,
"loss": 1.5622,
"step": 361
},
{
"epoch": 0.12119183126883161,
"grad_norm": 0.12794937193393707,
"learning_rate": 0.0001,
"loss": 1.5852,
"step": 362
},
{
"epoch": 0.12152661533311014,
"grad_norm": 0.11786255985498428,
"learning_rate": 0.0001,
"loss": 1.6532,
"step": 363
},
{
"epoch": 0.12186139939738869,
"grad_norm": 0.12443582713603973,
"learning_rate": 0.0001,
"loss": 1.5664,
"step": 364
},
{
"epoch": 0.12219618346166722,
"grad_norm": 0.124130979180336,
"learning_rate": 0.0001,
"loss": 1.5809,
"step": 365
},
{
"epoch": 0.12253096752594576,
"grad_norm": 0.11969106644392014,
"learning_rate": 0.0001,
"loss": 1.5073,
"step": 366
},
{
"epoch": 0.12286575159022431,
"grad_norm": 0.12146104872226715,
"learning_rate": 0.0001,
"loss": 1.6322,
"step": 367
},
{
"epoch": 0.12320053565450284,
"grad_norm": 0.11919710785150528,
"learning_rate": 0.0001,
"loss": 1.6405,
"step": 368
},
{
"epoch": 0.12353531971878139,
"grad_norm": 0.12359990924596786,
"learning_rate": 0.0001,
"loss": 1.6564,
"step": 369
},
{
"epoch": 0.12387010378305993,
"grad_norm": 0.12216739356517792,
"learning_rate": 0.0001,
"loss": 1.658,
"step": 370
},
{
"epoch": 0.12420488784733846,
"grad_norm": 0.12388269603252411,
"learning_rate": 0.0001,
"loss": 1.6542,
"step": 371
},
{
"epoch": 0.12453967191161701,
"grad_norm": 0.12631452083587646,
"learning_rate": 0.0001,
"loss": 1.5741,
"step": 372
},
{
"epoch": 0.12487445597589555,
"grad_norm": 0.11718172580003738,
"learning_rate": 0.0001,
"loss": 1.5247,
"step": 373
},
{
"epoch": 0.12520924004017409,
"grad_norm": 0.11787404865026474,
"learning_rate": 0.0001,
"loss": 1.604,
"step": 374
},
{
"epoch": 0.12554402410445262,
"grad_norm": 0.1190713569521904,
"learning_rate": 0.0001,
"loss": 1.5771,
"step": 375
},
{
"epoch": 0.12587880816873118,
"grad_norm": 0.11780121177434921,
"learning_rate": 0.0001,
"loss": 1.6445,
"step": 376
},
{
"epoch": 0.1262135922330097,
"grad_norm": 0.11370184272527695,
"learning_rate": 0.0001,
"loss": 1.4544,
"step": 377
},
{
"epoch": 0.12654837629728824,
"grad_norm": 0.12931419909000397,
"learning_rate": 0.0001,
"loss": 1.5261,
"step": 378
},
{
"epoch": 0.1268831603615668,
"grad_norm": 0.11074584722518921,
"learning_rate": 0.0001,
"loss": 1.5329,
"step": 379
},
{
"epoch": 0.12721794442584533,
"grad_norm": 0.1251228302717209,
"learning_rate": 0.0001,
"loss": 1.6181,
"step": 380
},
{
"epoch": 0.12755272849012386,
"grad_norm": 0.11304245889186859,
"learning_rate": 0.0001,
"loss": 1.5198,
"step": 381
},
{
"epoch": 0.12788751255440242,
"grad_norm": 0.11219135671854019,
"learning_rate": 0.0001,
"loss": 1.494,
"step": 382
},
{
"epoch": 0.12822229661868095,
"grad_norm": 0.13162165880203247,
"learning_rate": 0.0001,
"loss": 1.6073,
"step": 383
},
{
"epoch": 0.12855708068295948,
"grad_norm": 0.11944107711315155,
"learning_rate": 0.0001,
"loss": 1.6021,
"step": 384
},
{
"epoch": 0.12889186474723804,
"grad_norm": 0.11878252029418945,
"learning_rate": 0.0001,
"loss": 1.6051,
"step": 385
},
{
"epoch": 0.12922664881151658,
"grad_norm": 0.1224270910024643,
"learning_rate": 0.0001,
"loss": 1.596,
"step": 386
},
{
"epoch": 0.1295614328757951,
"grad_norm": 0.12815283238887787,
"learning_rate": 0.0001,
"loss": 1.6652,
"step": 387
},
{
"epoch": 0.12989621694007364,
"grad_norm": 0.11265059560537338,
"learning_rate": 0.0001,
"loss": 1.5478,
"step": 388
},
{
"epoch": 0.1302310010043522,
"grad_norm": 0.12850640714168549,
"learning_rate": 0.0001,
"loss": 1.6242,
"step": 389
},
{
"epoch": 0.13056578506863073,
"grad_norm": 0.11487656831741333,
"learning_rate": 0.0001,
"loss": 1.6089,
"step": 390
},
{
"epoch": 0.13090056913290926,
"grad_norm": 0.1160978302359581,
"learning_rate": 0.0001,
"loss": 1.6248,
"step": 391
},
{
"epoch": 0.13123535319718782,
"grad_norm": 0.12001185864210129,
"learning_rate": 0.0001,
"loss": 1.5911,
"step": 392
},
{
"epoch": 0.13157013726146635,
"grad_norm": 0.11623065918684006,
"learning_rate": 0.0001,
"loss": 1.6194,
"step": 393
},
{
"epoch": 0.13190492132574488,
"grad_norm": 0.11913128942251205,
"learning_rate": 0.0001,
"loss": 1.6233,
"step": 394
},
{
"epoch": 0.13223970539002344,
"grad_norm": 0.11658355593681335,
"learning_rate": 0.0001,
"loss": 1.629,
"step": 395
},
{
"epoch": 0.13257448945430197,
"grad_norm": 0.11986858397722244,
"learning_rate": 0.0001,
"loss": 1.7414,
"step": 396
},
{
"epoch": 0.1329092735185805,
"grad_norm": 0.12659533321857452,
"learning_rate": 0.0001,
"loss": 1.6037,
"step": 397
},
{
"epoch": 0.13324405758285907,
"grad_norm": 0.11471698433160782,
"learning_rate": 0.0001,
"loss": 1.5939,
"step": 398
},
{
"epoch": 0.1335788416471376,
"grad_norm": 0.12152232974767685,
"learning_rate": 0.0001,
"loss": 1.5663,
"step": 399
},
{
"epoch": 0.13391362571141613,
"grad_norm": 0.12228668481111526,
"learning_rate": 0.0001,
"loss": 1.6717,
"step": 400
},
{
"epoch": 0.1342484097756947,
"grad_norm": 0.11998744308948517,
"learning_rate": 0.0001,
"loss": 1.6532,
"step": 401
},
{
"epoch": 0.13458319383997322,
"grad_norm": 0.12556074559688568,
"learning_rate": 0.0001,
"loss": 1.6477,
"step": 402
},
{
"epoch": 0.13491797790425175,
"grad_norm": 0.12216352671384811,
"learning_rate": 0.0001,
"loss": 1.6084,
"step": 403
},
{
"epoch": 0.1352527619685303,
"grad_norm": 0.1290225237607956,
"learning_rate": 0.0001,
"loss": 1.6369,
"step": 404
},
{
"epoch": 0.13558754603280884,
"grad_norm": 0.11453018337488174,
"learning_rate": 0.0001,
"loss": 1.5886,
"step": 405
},
{
"epoch": 0.13592233009708737,
"grad_norm": 0.12192509323358536,
"learning_rate": 0.0001,
"loss": 1.5788,
"step": 406
},
{
"epoch": 0.1362571141613659,
"grad_norm": 0.11374159157276154,
"learning_rate": 0.0001,
"loss": 1.5429,
"step": 407
},
{
"epoch": 0.13659189822564446,
"grad_norm": 0.11875942349433899,
"learning_rate": 0.0001,
"loss": 1.6524,
"step": 408
},
{
"epoch": 0.136926682289923,
"grad_norm": 0.12176533043384552,
"learning_rate": 0.0001,
"loss": 1.6572,
"step": 409
},
{
"epoch": 0.13726146635420153,
"grad_norm": 0.12200423330068588,
"learning_rate": 0.0001,
"loss": 1.7139,
"step": 410
},
{
"epoch": 0.1375962504184801,
"grad_norm": 0.11800340563058853,
"learning_rate": 0.0001,
"loss": 1.6276,
"step": 411
},
{
"epoch": 0.13793103448275862,
"grad_norm": 0.12321179360151291,
"learning_rate": 0.0001,
"loss": 1.6849,
"step": 412
},
{
"epoch": 0.13826581854703715,
"grad_norm": 0.12165375053882599,
"learning_rate": 0.0001,
"loss": 1.5823,
"step": 413
},
{
"epoch": 0.1386006026113157,
"grad_norm": 0.12587733566761017,
"learning_rate": 0.0001,
"loss": 1.5712,
"step": 414
},
{
"epoch": 0.13893538667559424,
"grad_norm": 0.11877655982971191,
"learning_rate": 0.0001,
"loss": 1.606,
"step": 415
},
{
"epoch": 0.13927017073987277,
"grad_norm": 0.11970411241054535,
"learning_rate": 0.0001,
"loss": 1.4995,
"step": 416
},
{
"epoch": 0.13960495480415133,
"grad_norm": 0.14770293235778809,
"learning_rate": 0.0001,
"loss": 1.7334,
"step": 417
},
{
"epoch": 0.13993973886842986,
"grad_norm": 0.11904104053974152,
"learning_rate": 0.0001,
"loss": 1.6258,
"step": 418
},
{
"epoch": 0.1402745229327084,
"grad_norm": 0.13043157756328583,
"learning_rate": 0.0001,
"loss": 1.5564,
"step": 419
},
{
"epoch": 0.14060930699698695,
"grad_norm": 0.1354888528585434,
"learning_rate": 0.0001,
"loss": 1.6391,
"step": 420
},
{
"epoch": 0.1409440910612655,
"grad_norm": 0.11834760010242462,
"learning_rate": 0.0001,
"loss": 1.5345,
"step": 421
},
{
"epoch": 0.14127887512554402,
"grad_norm": 0.13029152154922485,
"learning_rate": 0.0001,
"loss": 1.5007,
"step": 422
},
{
"epoch": 0.14161365918982258,
"grad_norm": 0.1352154165506363,
"learning_rate": 0.0001,
"loss": 1.5925,
"step": 423
},
{
"epoch": 0.1419484432541011,
"grad_norm": 0.13768818974494934,
"learning_rate": 0.0001,
"loss": 1.6513,
"step": 424
},
{
"epoch": 0.14228322731837964,
"grad_norm": 0.1345231682062149,
"learning_rate": 0.0001,
"loss": 1.6524,
"step": 425
},
{
"epoch": 0.14261801138265817,
"grad_norm": 0.11808541417121887,
"learning_rate": 0.0001,
"loss": 1.6038,
"step": 426
},
{
"epoch": 0.14295279544693673,
"grad_norm": 0.1403636336326599,
"learning_rate": 0.0001,
"loss": 1.5559,
"step": 427
},
{
"epoch": 0.14328757951121526,
"grad_norm": 0.13042065501213074,
"learning_rate": 0.0001,
"loss": 1.6516,
"step": 428
},
{
"epoch": 0.1436223635754938,
"grad_norm": 0.12809261679649353,
"learning_rate": 0.0001,
"loss": 1.5913,
"step": 429
},
{
"epoch": 0.14395714763977235,
"grad_norm": 0.13735899329185486,
"learning_rate": 0.0001,
"loss": 1.5814,
"step": 430
},
{
"epoch": 0.14429193170405089,
"grad_norm": 0.12458304315805435,
"learning_rate": 0.0001,
"loss": 1.6909,
"step": 431
},
{
"epoch": 0.14462671576832942,
"grad_norm": 0.11777736246585846,
"learning_rate": 0.0001,
"loss": 1.599,
"step": 432
},
{
"epoch": 0.14496149983260798,
"grad_norm": 0.11958497762680054,
"learning_rate": 0.0001,
"loss": 1.6224,
"step": 433
},
{
"epoch": 0.1452962838968865,
"grad_norm": 0.11626480519771576,
"learning_rate": 0.0001,
"loss": 1.6192,
"step": 434
},
{
"epoch": 0.14563106796116504,
"grad_norm": 0.12103210389614105,
"learning_rate": 0.0001,
"loss": 1.5581,
"step": 435
},
{
"epoch": 0.1459658520254436,
"grad_norm": 0.1175006702542305,
"learning_rate": 0.0001,
"loss": 1.6147,
"step": 436
},
{
"epoch": 0.14630063608972213,
"grad_norm": 0.1194823831319809,
"learning_rate": 0.0001,
"loss": 1.4559,
"step": 437
},
{
"epoch": 0.14663542015400066,
"grad_norm": 0.12060422450304031,
"learning_rate": 0.0001,
"loss": 1.706,
"step": 438
},
{
"epoch": 0.14697020421827922,
"grad_norm": 0.12133188545703888,
"learning_rate": 0.0001,
"loss": 1.6583,
"step": 439
},
{
"epoch": 0.14730498828255775,
"grad_norm": 0.11069684475660324,
"learning_rate": 0.0001,
"loss": 1.5626,
"step": 440
},
{
"epoch": 0.14763977234683628,
"grad_norm": 0.11735668778419495,
"learning_rate": 0.0001,
"loss": 1.5014,
"step": 441
},
{
"epoch": 0.14797455641111484,
"grad_norm": 0.11778223514556885,
"learning_rate": 0.0001,
"loss": 1.6483,
"step": 442
},
{
"epoch": 0.14830934047539338,
"grad_norm": 0.11628784239292145,
"learning_rate": 0.0001,
"loss": 1.5629,
"step": 443
},
{
"epoch": 0.1486441245396719,
"grad_norm": 0.12314952164888382,
"learning_rate": 0.0001,
"loss": 1.6362,
"step": 444
},
{
"epoch": 0.14897890860395044,
"grad_norm": 0.11853016167879105,
"learning_rate": 0.0001,
"loss": 1.6355,
"step": 445
},
{
"epoch": 0.149313692668229,
"grad_norm": 0.1322093904018402,
"learning_rate": 0.0001,
"loss": 1.7655,
"step": 446
},
{
"epoch": 0.14964847673250753,
"grad_norm": 0.11611328274011612,
"learning_rate": 0.0001,
"loss": 1.5878,
"step": 447
},
{
"epoch": 0.14998326079678606,
"grad_norm": 0.11989305913448334,
"learning_rate": 0.0001,
"loss": 1.5576,
"step": 448
},
{
"epoch": 0.15031804486106462,
"grad_norm": 0.11867792159318924,
"learning_rate": 0.0001,
"loss": 1.5453,
"step": 449
},
{
"epoch": 0.15065282892534315,
"grad_norm": 0.11955395340919495,
"learning_rate": 0.0001,
"loss": 1.6089,
"step": 450
},
{
"epoch": 0.15098761298962168,
"grad_norm": 0.13159644603729248,
"learning_rate": 0.0001,
"loss": 1.6053,
"step": 451
},
{
"epoch": 0.15132239705390024,
"grad_norm": 0.12264451384544373,
"learning_rate": 0.0001,
"loss": 1.6277,
"step": 452
},
{
"epoch": 0.15165718111817877,
"grad_norm": 0.1267840564250946,
"learning_rate": 0.0001,
"loss": 1.6047,
"step": 453
},
{
"epoch": 0.1519919651824573,
"grad_norm": 0.1316317319869995,
"learning_rate": 0.0001,
"loss": 1.5497,
"step": 454
},
{
"epoch": 0.15232674924673587,
"grad_norm": 0.12278051674365997,
"learning_rate": 0.0001,
"loss": 1.665,
"step": 455
},
{
"epoch": 0.1526615333110144,
"grad_norm": 0.13153740763664246,
"learning_rate": 0.0001,
"loss": 1.6262,
"step": 456
},
{
"epoch": 0.15299631737529293,
"grad_norm": 0.12118583172559738,
"learning_rate": 0.0001,
"loss": 1.5897,
"step": 457
},
{
"epoch": 0.1533311014395715,
"grad_norm": 0.12203945219516754,
"learning_rate": 0.0001,
"loss": 1.5709,
"step": 458
},
{
"epoch": 0.15366588550385002,
"grad_norm": 0.13483074307441711,
"learning_rate": 0.0001,
"loss": 1.662,
"step": 459
},
{
"epoch": 0.15400066956812855,
"grad_norm": 0.12122450023889542,
"learning_rate": 0.0001,
"loss": 1.6289,
"step": 460
},
{
"epoch": 0.1543354536324071,
"grad_norm": 0.1384558528661728,
"learning_rate": 0.0001,
"loss": 1.6274,
"step": 461
},
{
"epoch": 0.15467023769668564,
"grad_norm": 0.1436455249786377,
"learning_rate": 0.0001,
"loss": 1.6007,
"step": 462
},
{
"epoch": 0.15500502176096417,
"grad_norm": 0.12359965592622757,
"learning_rate": 0.0001,
"loss": 1.6757,
"step": 463
},
{
"epoch": 0.1553398058252427,
"grad_norm": 0.13497023284435272,
"learning_rate": 0.0001,
"loss": 1.6328,
"step": 464
},
{
"epoch": 0.15567458988952126,
"grad_norm": 0.12588655948638916,
"learning_rate": 0.0001,
"loss": 1.6066,
"step": 465
},
{
"epoch": 0.1560093739537998,
"grad_norm": 0.11950384825468063,
"learning_rate": 0.0001,
"loss": 1.6388,
"step": 466
},
{
"epoch": 0.15634415801807833,
"grad_norm": 0.13280175626277924,
"learning_rate": 0.0001,
"loss": 1.6097,
"step": 467
},
{
"epoch": 0.1566789420823569,
"grad_norm": 0.11717383563518524,
"learning_rate": 0.0001,
"loss": 1.6519,
"step": 468
},
{
"epoch": 0.15701372614663542,
"grad_norm": 0.12387187778949738,
"learning_rate": 0.0001,
"loss": 1.5661,
"step": 469
},
{
"epoch": 0.15734851021091395,
"grad_norm": 0.12535057961940765,
"learning_rate": 0.0001,
"loss": 1.601,
"step": 470
},
{
"epoch": 0.1576832942751925,
"grad_norm": 0.12057804316282272,
"learning_rate": 0.0001,
"loss": 1.6463,
"step": 471
},
{
"epoch": 0.15801807833947104,
"grad_norm": 0.1360681802034378,
"learning_rate": 0.0001,
"loss": 1.7093,
"step": 472
},
{
"epoch": 0.15835286240374957,
"grad_norm": 0.11986411362886429,
"learning_rate": 0.0001,
"loss": 1.5864,
"step": 473
},
{
"epoch": 0.15868764646802813,
"grad_norm": 0.11335694789886475,
"learning_rate": 0.0001,
"loss": 1.5495,
"step": 474
},
{
"epoch": 0.15902243053230666,
"grad_norm": 0.11684451997280121,
"learning_rate": 0.0001,
"loss": 1.5295,
"step": 475
},
{
"epoch": 0.1593572145965852,
"grad_norm": 0.12882184982299805,
"learning_rate": 0.0001,
"loss": 1.6903,
"step": 476
},
{
"epoch": 0.15969199866086375,
"grad_norm": 0.12175029516220093,
"learning_rate": 0.0001,
"loss": 1.6421,
"step": 477
},
{
"epoch": 0.16002678272514229,
"grad_norm": 0.1330244094133377,
"learning_rate": 0.0001,
"loss": 1.5691,
"step": 478
},
{
"epoch": 0.16036156678942082,
"grad_norm": 0.12204015254974365,
"learning_rate": 0.0001,
"loss": 1.6557,
"step": 479
},
{
"epoch": 0.16069635085369938,
"grad_norm": 0.1265457272529602,
"learning_rate": 0.0001,
"loss": 1.6319,
"step": 480
},
{
"epoch": 0.1610311349179779,
"grad_norm": 0.13419146835803986,
"learning_rate": 0.0001,
"loss": 1.5694,
"step": 481
},
{
"epoch": 0.16136591898225644,
"grad_norm": 0.12663477659225464,
"learning_rate": 0.0001,
"loss": 1.49,
"step": 482
},
{
"epoch": 0.16170070304653497,
"grad_norm": 0.1386338621377945,
"learning_rate": 0.0001,
"loss": 1.5774,
"step": 483
},
{
"epoch": 0.16203548711081353,
"grad_norm": 0.1266423612833023,
"learning_rate": 0.0001,
"loss": 1.568,
"step": 484
},
{
"epoch": 0.16237027117509206,
"grad_norm": 0.11795584112405777,
"learning_rate": 0.0001,
"loss": 1.5952,
"step": 485
},
{
"epoch": 0.1627050552393706,
"grad_norm": 0.13247069716453552,
"learning_rate": 0.0001,
"loss": 1.5486,
"step": 486
},
{
"epoch": 0.16303983930364915,
"grad_norm": 0.12367638945579529,
"learning_rate": 0.0001,
"loss": 1.6618,
"step": 487
},
{
"epoch": 0.16337462336792768,
"grad_norm": 0.11988285183906555,
"learning_rate": 0.0001,
"loss": 1.6338,
"step": 488
},
{
"epoch": 0.16370940743220622,
"grad_norm": 0.12422308325767517,
"learning_rate": 0.0001,
"loss": 1.5753,
"step": 489
},
{
"epoch": 0.16404419149648478,
"grad_norm": 0.12060552090406418,
"learning_rate": 0.0001,
"loss": 1.6158,
"step": 490
},
{
"epoch": 0.1643789755607633,
"grad_norm": 0.1219470277428627,
"learning_rate": 0.0001,
"loss": 1.5057,
"step": 491
},
{
"epoch": 0.16471375962504184,
"grad_norm": 0.12771841883659363,
"learning_rate": 0.0001,
"loss": 1.6627,
"step": 492
},
{
"epoch": 0.1650485436893204,
"grad_norm": 0.11713176220655441,
"learning_rate": 0.0001,
"loss": 1.5697,
"step": 493
},
{
"epoch": 0.16538332775359893,
"grad_norm": 0.1419348567724228,
"learning_rate": 0.0001,
"loss": 1.7253,
"step": 494
},
{
"epoch": 0.16571811181787746,
"grad_norm": 0.1297536939382553,
"learning_rate": 0.0001,
"loss": 1.666,
"step": 495
},
{
"epoch": 0.16605289588215602,
"grad_norm": 0.12997077405452728,
"learning_rate": 0.0001,
"loss": 1.5825,
"step": 496
},
{
"epoch": 0.16638767994643455,
"grad_norm": 0.14354097843170166,
"learning_rate": 0.0001,
"loss": 1.628,
"step": 497
},
{
"epoch": 0.16672246401071308,
"grad_norm": 0.12498887628316879,
"learning_rate": 0.0001,
"loss": 1.7003,
"step": 498
},
{
"epoch": 0.16705724807499164,
"grad_norm": 0.13219912350177765,
"learning_rate": 0.0001,
"loss": 1.6218,
"step": 499
},
{
"epoch": 0.16739203213927017,
"grad_norm": 0.13144424557685852,
"learning_rate": 0.0001,
"loss": 1.5874,
"step": 500
},
{
"epoch": 0.1677268162035487,
"grad_norm": 0.12147901952266693,
"learning_rate": 0.0001,
"loss": 1.6308,
"step": 501
},
{
"epoch": 0.16806160026782724,
"grad_norm": 0.13109005987644196,
"learning_rate": 0.0001,
"loss": 1.7168,
"step": 502
},
{
"epoch": 0.1683963843321058,
"grad_norm": 0.1306311935186386,
"learning_rate": 0.0001,
"loss": 1.5859,
"step": 503
},
{
"epoch": 0.16873116839638433,
"grad_norm": 0.115351103246212,
"learning_rate": 0.0001,
"loss": 1.6124,
"step": 504
},
{
"epoch": 0.16906595246066286,
"grad_norm": 0.12713004648685455,
"learning_rate": 0.0001,
"loss": 1.5558,
"step": 505
},
{
"epoch": 0.16940073652494142,
"grad_norm": 0.1304563283920288,
"learning_rate": 0.0001,
"loss": 1.676,
"step": 506
},
{
"epoch": 0.16973552058921995,
"grad_norm": 0.12284432351589203,
"learning_rate": 0.0001,
"loss": 1.5585,
"step": 507
},
{
"epoch": 0.17007030465349848,
"grad_norm": 0.12343181669712067,
"learning_rate": 0.0001,
"loss": 1.5869,
"step": 508
},
{
"epoch": 0.17040508871777704,
"grad_norm": 0.11459839344024658,
"learning_rate": 0.0001,
"loss": 1.6051,
"step": 509
},
{
"epoch": 0.17073987278205557,
"grad_norm": 0.11883780360221863,
"learning_rate": 0.0001,
"loss": 1.5064,
"step": 510
},
{
"epoch": 0.1710746568463341,
"grad_norm": 0.12307373434305191,
"learning_rate": 0.0001,
"loss": 1.5257,
"step": 511
},
{
"epoch": 0.17140944091061266,
"grad_norm": 0.11666516959667206,
"learning_rate": 0.0001,
"loss": 1.5842,
"step": 512
},
{
"epoch": 0.1717442249748912,
"grad_norm": 0.11493846029043198,
"learning_rate": 0.0001,
"loss": 1.6215,
"step": 513
},
{
"epoch": 0.17207900903916973,
"grad_norm": 0.1198093444108963,
"learning_rate": 0.0001,
"loss": 1.5875,
"step": 514
},
{
"epoch": 0.1724137931034483,
"grad_norm": 0.11997364461421967,
"learning_rate": 0.0001,
"loss": 1.5819,
"step": 515
},
{
"epoch": 0.17274857716772682,
"grad_norm": 0.12003917992115021,
"learning_rate": 0.0001,
"loss": 1.7019,
"step": 516
},
{
"epoch": 0.17308336123200535,
"grad_norm": 0.11761089414358139,
"learning_rate": 0.0001,
"loss": 1.5742,
"step": 517
},
{
"epoch": 0.1734181452962839,
"grad_norm": 0.12004124373197556,
"learning_rate": 0.0001,
"loss": 1.5947,
"step": 518
},
{
"epoch": 0.17375292936056244,
"grad_norm": 0.12139872461557388,
"learning_rate": 0.0001,
"loss": 1.4861,
"step": 519
},
{
"epoch": 0.17408771342484097,
"grad_norm": 0.12214326858520508,
"learning_rate": 0.0001,
"loss": 1.6953,
"step": 520
},
{
"epoch": 0.1744224974891195,
"grad_norm": 0.12239626795053482,
"learning_rate": 0.0001,
"loss": 1.5529,
"step": 521
},
{
"epoch": 0.17475728155339806,
"grad_norm": 0.11888886988162994,
"learning_rate": 0.0001,
"loss": 1.5099,
"step": 522
},
{
"epoch": 0.1750920656176766,
"grad_norm": 0.11585521697998047,
"learning_rate": 0.0001,
"loss": 1.5392,
"step": 523
},
{
"epoch": 0.17542684968195513,
"grad_norm": 0.1300823837518692,
"learning_rate": 0.0001,
"loss": 1.6598,
"step": 524
},
{
"epoch": 0.1757616337462337,
"grad_norm": 0.12741157412528992,
"learning_rate": 0.0001,
"loss": 1.5798,
"step": 525
},
{
"epoch": 0.17609641781051222,
"grad_norm": 0.11614137142896652,
"learning_rate": 0.0001,
"loss": 1.5343,
"step": 526
},
{
"epoch": 0.17643120187479075,
"grad_norm": 0.12221526354551315,
"learning_rate": 0.0001,
"loss": 1.552,
"step": 527
},
{
"epoch": 0.1767659859390693,
"grad_norm": 0.13221661746501923,
"learning_rate": 0.0001,
"loss": 1.6213,
"step": 528
},
{
"epoch": 0.17710077000334784,
"grad_norm": 0.12069322913885117,
"learning_rate": 0.0001,
"loss": 1.6148,
"step": 529
},
{
"epoch": 0.17743555406762637,
"grad_norm": 0.11254309117794037,
"learning_rate": 0.0001,
"loss": 1.5917,
"step": 530
},
{
"epoch": 0.17777033813190493,
"grad_norm": 0.11715224385261536,
"learning_rate": 0.0001,
"loss": 1.6343,
"step": 531
},
{
"epoch": 0.17810512219618346,
"grad_norm": 0.1183256059885025,
"learning_rate": 0.0001,
"loss": 1.4889,
"step": 532
},
{
"epoch": 0.178439906260462,
"grad_norm": 0.12182603031396866,
"learning_rate": 0.0001,
"loss": 1.5487,
"step": 533
},
{
"epoch": 0.17877469032474055,
"grad_norm": 0.1232253909111023,
"learning_rate": 0.0001,
"loss": 1.6754,
"step": 534
},
{
"epoch": 0.17910947438901909,
"grad_norm": 0.11796277016401291,
"learning_rate": 0.0001,
"loss": 1.6396,
"step": 535
},
{
"epoch": 0.17944425845329762,
"grad_norm": 0.13181637227535248,
"learning_rate": 0.0001,
"loss": 1.6505,
"step": 536
},
{
"epoch": 0.17977904251757618,
"grad_norm": 0.11481553316116333,
"learning_rate": 0.0001,
"loss": 1.492,
"step": 537
},
{
"epoch": 0.1801138265818547,
"grad_norm": 0.12842705845832825,
"learning_rate": 0.0001,
"loss": 1.734,
"step": 538
},
{
"epoch": 0.18044861064613324,
"grad_norm": 0.1235375851392746,
"learning_rate": 0.0001,
"loss": 1.6496,
"step": 539
},
{
"epoch": 0.18078339471041177,
"grad_norm": 0.12111697345972061,
"learning_rate": 0.0001,
"loss": 1.5044,
"step": 540
},
{
"epoch": 0.18111817877469033,
"grad_norm": 0.12484171241521835,
"learning_rate": 0.0001,
"loss": 1.6643,
"step": 541
},
{
"epoch": 0.18145296283896886,
"grad_norm": 0.12675760686397552,
"learning_rate": 0.0001,
"loss": 1.6188,
"step": 542
},
{
"epoch": 0.1817877469032474,
"grad_norm": 0.12203079462051392,
"learning_rate": 0.0001,
"loss": 1.507,
"step": 543
},
{
"epoch": 0.18212253096752595,
"grad_norm": 0.12013613432645798,
"learning_rate": 0.0001,
"loss": 1.6247,
"step": 544
},
{
"epoch": 0.18245731503180448,
"grad_norm": 0.12438444793224335,
"learning_rate": 0.0001,
"loss": 1.5849,
"step": 545
},
{
"epoch": 0.18279209909608302,
"grad_norm": 0.13607415556907654,
"learning_rate": 0.0001,
"loss": 1.6562,
"step": 546
},
{
"epoch": 0.18312688316036158,
"grad_norm": 0.1240532174706459,
"learning_rate": 0.0001,
"loss": 1.5205,
"step": 547
},
{
"epoch": 0.1834616672246401,
"grad_norm": 0.1510075032711029,
"learning_rate": 0.0001,
"loss": 1.6608,
"step": 548
},
{
"epoch": 0.18379645128891864,
"grad_norm": 0.11965179443359375,
"learning_rate": 0.0001,
"loss": 1.6391,
"step": 549
},
{
"epoch": 0.1841312353531972,
"grad_norm": 0.14874660968780518,
"learning_rate": 0.0001,
"loss": 1.6156,
"step": 550
},
{
"epoch": 0.18446601941747573,
"grad_norm": 0.1273370385169983,
"learning_rate": 0.0001,
"loss": 1.5117,
"step": 551
},
{
"epoch": 0.18480080348175426,
"grad_norm": 0.1213572546839714,
"learning_rate": 0.0001,
"loss": 1.5124,
"step": 552
},
{
"epoch": 0.18513558754603282,
"grad_norm": 0.1602640151977539,
"learning_rate": 0.0001,
"loss": 1.6318,
"step": 553
},
{
"epoch": 0.18547037161031135,
"grad_norm": 0.12859167158603668,
"learning_rate": 0.0001,
"loss": 1.6562,
"step": 554
},
{
"epoch": 0.18580515567458988,
"grad_norm": 0.13728216290473938,
"learning_rate": 0.0001,
"loss": 1.5873,
"step": 555
},
{
"epoch": 0.18613993973886844,
"grad_norm": 0.12880103290081024,
"learning_rate": 0.0001,
"loss": 1.5121,
"step": 556
},
{
"epoch": 0.18647472380314697,
"grad_norm": 0.1293378323316574,
"learning_rate": 0.0001,
"loss": 1.6275,
"step": 557
},
{
"epoch": 0.1868095078674255,
"grad_norm": 0.1387391984462738,
"learning_rate": 0.0001,
"loss": 1.6486,
"step": 558
},
{
"epoch": 0.18714429193170404,
"grad_norm": 0.14882785081863403,
"learning_rate": 0.0001,
"loss": 1.6422,
"step": 559
},
{
"epoch": 0.1874790759959826,
"grad_norm": 0.11521956324577332,
"learning_rate": 0.0001,
"loss": 1.5032,
"step": 560
},
{
"epoch": 0.18781386006026113,
"grad_norm": 0.12418463081121445,
"learning_rate": 0.0001,
"loss": 1.5422,
"step": 561
},
{
"epoch": 0.18814864412453966,
"grad_norm": 0.13123475015163422,
"learning_rate": 0.0001,
"loss": 1.6459,
"step": 562
},
{
"epoch": 0.18848342818881822,
"grad_norm": 0.12267505377531052,
"learning_rate": 0.0001,
"loss": 1.61,
"step": 563
},
{
"epoch": 0.18881821225309675,
"grad_norm": 0.12172992527484894,
"learning_rate": 0.0001,
"loss": 1.551,
"step": 564
},
{
"epoch": 0.18915299631737528,
"grad_norm": 0.12027712911367416,
"learning_rate": 0.0001,
"loss": 1.6178,
"step": 565
},
{
"epoch": 0.18948778038165384,
"grad_norm": 0.11598297208547592,
"learning_rate": 0.0001,
"loss": 1.5959,
"step": 566
},
{
"epoch": 0.18982256444593237,
"grad_norm": 0.11541326344013214,
"learning_rate": 0.0001,
"loss": 1.5936,
"step": 567
},
{
"epoch": 0.1901573485102109,
"grad_norm": 0.12343809008598328,
"learning_rate": 0.0001,
"loss": 1.6091,
"step": 568
},
{
"epoch": 0.19049213257448946,
"grad_norm": 0.11451027542352676,
"learning_rate": 0.0001,
"loss": 1.6203,
"step": 569
},
{
"epoch": 0.190826916638768,
"grad_norm": 0.1260651797056198,
"learning_rate": 0.0001,
"loss": 1.6105,
"step": 570
},
{
"epoch": 0.19116170070304653,
"grad_norm": 0.1183401346206665,
"learning_rate": 0.0001,
"loss": 1.583,
"step": 571
},
{
"epoch": 0.1914964847673251,
"grad_norm": 0.11767153441905975,
"learning_rate": 0.0001,
"loss": 1.5717,
"step": 572
},
{
"epoch": 0.19183126883160362,
"grad_norm": 0.11693871766328812,
"learning_rate": 0.0001,
"loss": 1.5783,
"step": 573
},
{
"epoch": 0.19216605289588215,
"grad_norm": 0.1267687827348709,
"learning_rate": 0.0001,
"loss": 1.5803,
"step": 574
},
{
"epoch": 0.1925008369601607,
"grad_norm": 0.11946652829647064,
"learning_rate": 0.0001,
"loss": 1.5575,
"step": 575
},
{
"epoch": 0.19283562102443924,
"grad_norm": 0.12602412700653076,
"learning_rate": 0.0001,
"loss": 1.7297,
"step": 576
},
{
"epoch": 0.19317040508871777,
"grad_norm": 0.12529441714286804,
"learning_rate": 0.0001,
"loss": 1.6877,
"step": 577
},
{
"epoch": 0.1935051891529963,
"grad_norm": 0.12578092515468597,
"learning_rate": 0.0001,
"loss": 1.5397,
"step": 578
},
{
"epoch": 0.19383997321727486,
"grad_norm": 0.12697197496891022,
"learning_rate": 0.0001,
"loss": 1.5541,
"step": 579
},
{
"epoch": 0.1941747572815534,
"grad_norm": 0.12927542626857758,
"learning_rate": 0.0001,
"loss": 1.6155,
"step": 580
},
{
"epoch": 0.19450954134583193,
"grad_norm": 0.1361040472984314,
"learning_rate": 0.0001,
"loss": 1.5857,
"step": 581
},
{
"epoch": 0.19484432541011049,
"grad_norm": 0.11877462267875671,
"learning_rate": 0.0001,
"loss": 1.5558,
"step": 582
},
{
"epoch": 0.19517910947438902,
"grad_norm": 0.14642973244190216,
"learning_rate": 0.0001,
"loss": 1.6476,
"step": 583
},
{
"epoch": 0.19551389353866755,
"grad_norm": 0.13428737223148346,
"learning_rate": 0.0001,
"loss": 1.5862,
"step": 584
},
{
"epoch": 0.1958486776029461,
"grad_norm": 0.1275390088558197,
"learning_rate": 0.0001,
"loss": 1.5418,
"step": 585
},
{
"epoch": 0.19618346166722464,
"grad_norm": 0.1398482322692871,
"learning_rate": 0.0001,
"loss": 1.4985,
"step": 586
},
{
"epoch": 0.19651824573150317,
"grad_norm": 0.12443619966506958,
"learning_rate": 0.0001,
"loss": 1.5726,
"step": 587
},
{
"epoch": 0.19685302979578173,
"grad_norm": 0.12923243641853333,
"learning_rate": 0.0001,
"loss": 1.5596,
"step": 588
},
{
"epoch": 0.19718781386006026,
"grad_norm": 0.14045698940753937,
"learning_rate": 0.0001,
"loss": 1.5475,
"step": 589
},
{
"epoch": 0.1975225979243388,
"grad_norm": 0.12687772512435913,
"learning_rate": 0.0001,
"loss": 1.7041,
"step": 590
},
{
"epoch": 0.19785738198861735,
"grad_norm": 0.14536388218402863,
"learning_rate": 0.0001,
"loss": 1.5724,
"step": 591
},
{
"epoch": 0.19819216605289589,
"grad_norm": 0.1331462413072586,
"learning_rate": 0.0001,
"loss": 1.6991,
"step": 592
},
{
"epoch": 0.19852695011717442,
"grad_norm": 0.13363464176654816,
"learning_rate": 0.0001,
"loss": 1.6665,
"step": 593
},
{
"epoch": 0.19886173418145298,
"grad_norm": 0.13291539251804352,
"learning_rate": 0.0001,
"loss": 1.6278,
"step": 594
},
{
"epoch": 0.1991965182457315,
"grad_norm": 0.1261158436536789,
"learning_rate": 0.0001,
"loss": 1.6129,
"step": 595
},
{
"epoch": 0.19953130231001004,
"grad_norm": 0.12324585020542145,
"learning_rate": 0.0001,
"loss": 1.6509,
"step": 596
},
{
"epoch": 0.19986608637428857,
"grad_norm": 0.11849376559257507,
"learning_rate": 0.0001,
"loss": 1.6226,
"step": 597
},
{
"epoch": 0.20020087043856713,
"grad_norm": 0.1167241707444191,
"learning_rate": 0.0001,
"loss": 1.5539,
"step": 598
},
{
"epoch": 0.20053565450284566,
"grad_norm": 0.11860879510641098,
"learning_rate": 0.0001,
"loss": 1.5962,
"step": 599
},
{
"epoch": 0.2008704385671242,
"grad_norm": 0.12385833263397217,
"learning_rate": 0.0001,
"loss": 1.593,
"step": 600
},
{
"epoch": 0.20120522263140275,
"grad_norm": 0.12093829363584518,
"learning_rate": 0.0001,
"loss": 1.6914,
"step": 601
},
{
"epoch": 0.20154000669568128,
"grad_norm": 0.11839880049228668,
"learning_rate": 0.0001,
"loss": 1.5645,
"step": 602
},
{
"epoch": 0.20187479075995982,
"grad_norm": 0.11958955973386765,
"learning_rate": 0.0001,
"loss": 1.6964,
"step": 603
},
{
"epoch": 0.20220957482423838,
"grad_norm": 0.12148015946149826,
"learning_rate": 0.0001,
"loss": 1.6201,
"step": 604
},
{
"epoch": 0.2025443588885169,
"grad_norm": 0.11879414319992065,
"learning_rate": 0.0001,
"loss": 1.5696,
"step": 605
},
{
"epoch": 0.20287914295279544,
"grad_norm": 0.11815709620714188,
"learning_rate": 0.0001,
"loss": 1.5771,
"step": 606
},
{
"epoch": 0.203213927017074,
"grad_norm": 0.12391653656959534,
"learning_rate": 0.0001,
"loss": 1.4984,
"step": 607
},
{
"epoch": 0.20354871108135253,
"grad_norm": 0.12949740886688232,
"learning_rate": 0.0001,
"loss": 1.6746,
"step": 608
},
{
"epoch": 0.20388349514563106,
"grad_norm": 0.12630179524421692,
"learning_rate": 0.0001,
"loss": 1.5984,
"step": 609
},
{
"epoch": 0.20421827920990962,
"grad_norm": 0.13836237788200378,
"learning_rate": 0.0001,
"loss": 1.6562,
"step": 610
},
{
"epoch": 0.20455306327418815,
"grad_norm": 0.12105460464954376,
"learning_rate": 0.0001,
"loss": 1.628,
"step": 611
},
{
"epoch": 0.20488784733846668,
"grad_norm": 0.13807529211044312,
"learning_rate": 0.0001,
"loss": 1.5858,
"step": 612
},
{
"epoch": 0.20522263140274524,
"grad_norm": 0.12660756707191467,
"learning_rate": 0.0001,
"loss": 1.5819,
"step": 613
},
{
"epoch": 0.20555741546702377,
"grad_norm": 0.11513250321149826,
"learning_rate": 0.0001,
"loss": 1.5572,
"step": 614
},
{
"epoch": 0.2058921995313023,
"grad_norm": 0.12499019503593445,
"learning_rate": 0.0001,
"loss": 1.5902,
"step": 615
},
{
"epoch": 0.20622698359558084,
"grad_norm": 0.13060630857944489,
"learning_rate": 0.0001,
"loss": 1.6933,
"step": 616
},
{
"epoch": 0.2065617676598594,
"grad_norm": 0.11751000583171844,
"learning_rate": 0.0001,
"loss": 1.6165,
"step": 617
},
{
"epoch": 0.20689655172413793,
"grad_norm": 0.12362553179264069,
"learning_rate": 0.0001,
"loss": 1.6214,
"step": 618
},
{
"epoch": 0.20723133578841646,
"grad_norm": 0.11933618783950806,
"learning_rate": 0.0001,
"loss": 1.6041,
"step": 619
},
{
"epoch": 0.20756611985269502,
"grad_norm": 0.12560446560382843,
"learning_rate": 0.0001,
"loss": 1.689,
"step": 620
},
{
"epoch": 0.20790090391697355,
"grad_norm": 0.12433163821697235,
"learning_rate": 0.0001,
"loss": 1.6717,
"step": 621
},
{
"epoch": 0.20823568798125208,
"grad_norm": 0.12220048159360886,
"learning_rate": 0.0001,
"loss": 1.6216,
"step": 622
},
{
"epoch": 0.20857047204553064,
"grad_norm": 0.11404889076948166,
"learning_rate": 0.0001,
"loss": 1.5362,
"step": 623
},
{
"epoch": 0.20890525610980917,
"grad_norm": 0.11990871280431747,
"learning_rate": 0.0001,
"loss": 1.5971,
"step": 624
},
{
"epoch": 0.2092400401740877,
"grad_norm": 0.11785005033016205,
"learning_rate": 0.0001,
"loss": 1.5641,
"step": 625
},
{
"epoch": 0.20957482423836626,
"grad_norm": 0.12312883138656616,
"learning_rate": 0.0001,
"loss": 1.617,
"step": 626
},
{
"epoch": 0.2099096083026448,
"grad_norm": 0.11449938267469406,
"learning_rate": 0.0001,
"loss": 1.5396,
"step": 627
},
{
"epoch": 0.21024439236692333,
"grad_norm": 0.1219322681427002,
"learning_rate": 0.0001,
"loss": 1.5951,
"step": 628
},
{
"epoch": 0.2105791764312019,
"grad_norm": 0.12152589112520218,
"learning_rate": 0.0001,
"loss": 1.6017,
"step": 629
},
{
"epoch": 0.21091396049548042,
"grad_norm": 0.11546038091182709,
"learning_rate": 0.0001,
"loss": 1.5969,
"step": 630
},
{
"epoch": 0.21124874455975895,
"grad_norm": 0.1294824779033661,
"learning_rate": 0.0001,
"loss": 1.5983,
"step": 631
},
{
"epoch": 0.2115835286240375,
"grad_norm": 0.12606552243232727,
"learning_rate": 0.0001,
"loss": 1.6026,
"step": 632
},
{
"epoch": 0.21191831268831604,
"grad_norm": 0.12761344015598297,
"learning_rate": 0.0001,
"loss": 1.6561,
"step": 633
},
{
"epoch": 0.21225309675259457,
"grad_norm": 0.11588580161333084,
"learning_rate": 0.0001,
"loss": 1.5967,
"step": 634
},
{
"epoch": 0.2125878808168731,
"grad_norm": 0.11629272252321243,
"learning_rate": 0.0001,
"loss": 1.5894,
"step": 635
},
{
"epoch": 0.21292266488115166,
"grad_norm": 0.1237213984131813,
"learning_rate": 0.0001,
"loss": 1.6113,
"step": 636
},
{
"epoch": 0.2132574489454302,
"grad_norm": 0.12293344736099243,
"learning_rate": 0.0001,
"loss": 1.5972,
"step": 637
},
{
"epoch": 0.21359223300970873,
"grad_norm": 0.1172887459397316,
"learning_rate": 0.0001,
"loss": 1.5765,
"step": 638
},
{
"epoch": 0.21392701707398729,
"grad_norm": 0.12403010576963425,
"learning_rate": 0.0001,
"loss": 1.5639,
"step": 639
},
{
"epoch": 0.21426180113826582,
"grad_norm": 0.12683235108852386,
"learning_rate": 0.0001,
"loss": 1.5197,
"step": 640
},
{
"epoch": 0.21459658520254435,
"grad_norm": 0.11593903601169586,
"learning_rate": 0.0001,
"loss": 1.5158,
"step": 641
},
{
"epoch": 0.2149313692668229,
"grad_norm": 0.1251828819513321,
"learning_rate": 0.0001,
"loss": 1.6396,
"step": 642
},
{
"epoch": 0.21526615333110144,
"grad_norm": 0.12358346581459045,
"learning_rate": 0.0001,
"loss": 1.6012,
"step": 643
},
{
"epoch": 0.21560093739537997,
"grad_norm": 0.11473721265792847,
"learning_rate": 0.0001,
"loss": 1.5365,
"step": 644
},
{
"epoch": 0.21593572145965853,
"grad_norm": 0.1184060201048851,
"learning_rate": 0.0001,
"loss": 1.4507,
"step": 645
},
{
"epoch": 0.21627050552393706,
"grad_norm": 0.12540043890476227,
"learning_rate": 0.0001,
"loss": 1.5854,
"step": 646
},
{
"epoch": 0.2166052895882156,
"grad_norm": 0.12070447206497192,
"learning_rate": 0.0001,
"loss": 1.6097,
"step": 647
},
{
"epoch": 0.21694007365249415,
"grad_norm": 0.11351459473371506,
"learning_rate": 0.0001,
"loss": 1.5937,
"step": 648
},
{
"epoch": 0.21727485771677268,
"grad_norm": 0.1242094486951828,
"learning_rate": 0.0001,
"loss": 1.5222,
"step": 649
},
{
"epoch": 0.21760964178105122,
"grad_norm": 0.12054958194494247,
"learning_rate": 0.0001,
"loss": 1.5285,
"step": 650
},
{
"epoch": 0.21794442584532978,
"grad_norm": 0.12539923191070557,
"learning_rate": 0.0001,
"loss": 1.5001,
"step": 651
},
{
"epoch": 0.2182792099096083,
"grad_norm": 0.12270530313253403,
"learning_rate": 0.0001,
"loss": 1.6769,
"step": 652
},
{
"epoch": 0.21861399397388684,
"grad_norm": 0.12920905649662018,
"learning_rate": 0.0001,
"loss": 1.5999,
"step": 653
},
{
"epoch": 0.21894877803816537,
"grad_norm": 0.13267312943935394,
"learning_rate": 0.0001,
"loss": 1.5382,
"step": 654
},
{
"epoch": 0.21928356210244393,
"grad_norm": 0.11984428763389587,
"learning_rate": 0.0001,
"loss": 1.631,
"step": 655
},
{
"epoch": 0.21961834616672246,
"grad_norm": 0.1474982053041458,
"learning_rate": 0.0001,
"loss": 1.6709,
"step": 656
},
{
"epoch": 0.219953130231001,
"grad_norm": 0.13542193174362183,
"learning_rate": 0.0001,
"loss": 1.6415,
"step": 657
},
{
"epoch": 0.22028791429527955,
"grad_norm": 0.13832658529281616,
"learning_rate": 0.0001,
"loss": 1.6118,
"step": 658
},
{
"epoch": 0.22062269835955808,
"grad_norm": 0.15140588581562042,
"learning_rate": 0.0001,
"loss": 1.6314,
"step": 659
},
{
"epoch": 0.22095748242383662,
"grad_norm": 0.12110920995473862,
"learning_rate": 0.0001,
"loss": 1.5884,
"step": 660
},
{
"epoch": 0.22129226648811517,
"grad_norm": 0.14811581373214722,
"learning_rate": 0.0001,
"loss": 1.6642,
"step": 661
},
{
"epoch": 0.2216270505523937,
"grad_norm": 0.12733857333660126,
"learning_rate": 0.0001,
"loss": 1.5512,
"step": 662
},
{
"epoch": 0.22196183461667224,
"grad_norm": 0.13028332591056824,
"learning_rate": 0.0001,
"loss": 1.5613,
"step": 663
},
{
"epoch": 0.2222966186809508,
"grad_norm": 0.1242808997631073,
"learning_rate": 0.0001,
"loss": 1.5869,
"step": 664
},
{
"epoch": 0.22263140274522933,
"grad_norm": 0.12380847334861755,
"learning_rate": 0.0001,
"loss": 1.5926,
"step": 665
},
{
"epoch": 0.22296618680950786,
"grad_norm": 0.12564754486083984,
"learning_rate": 0.0001,
"loss": 1.5811,
"step": 666
},
{
"epoch": 0.22330097087378642,
"grad_norm": 0.1509399712085724,
"learning_rate": 0.0001,
"loss": 1.7172,
"step": 667
},
{
"epoch": 0.22363575493806495,
"grad_norm": 0.12397512793540955,
"learning_rate": 0.0001,
"loss": 1.642,
"step": 668
},
{
"epoch": 0.22397053900234348,
"grad_norm": 0.13826021552085876,
"learning_rate": 0.0001,
"loss": 1.6395,
"step": 669
},
{
"epoch": 0.22430532306662204,
"grad_norm": 0.1417902112007141,
"learning_rate": 0.0001,
"loss": 1.6169,
"step": 670
},
{
"epoch": 0.22464010713090057,
"grad_norm": 0.12220132350921631,
"learning_rate": 0.0001,
"loss": 1.5686,
"step": 671
},
{
"epoch": 0.2249748911951791,
"grad_norm": 0.13563144207000732,
"learning_rate": 0.0001,
"loss": 1.6556,
"step": 672
},
{
"epoch": 0.22530967525945764,
"grad_norm": 0.13794521987438202,
"learning_rate": 0.0001,
"loss": 1.5187,
"step": 673
},
{
"epoch": 0.2256444593237362,
"grad_norm": 0.12060145288705826,
"learning_rate": 0.0001,
"loss": 1.5901,
"step": 674
},
{
"epoch": 0.22597924338801473,
"grad_norm": 0.13909369707107544,
"learning_rate": 0.0001,
"loss": 1.5101,
"step": 675
},
{
"epoch": 0.22631402745229326,
"grad_norm": 0.13746792078018188,
"learning_rate": 0.0001,
"loss": 1.6084,
"step": 676
},
{
"epoch": 0.22664881151657182,
"grad_norm": 0.11612525582313538,
"learning_rate": 0.0001,
"loss": 1.606,
"step": 677
},
{
"epoch": 0.22698359558085035,
"grad_norm": 0.13988125324249268,
"learning_rate": 0.0001,
"loss": 1.6123,
"step": 678
},
{
"epoch": 0.22731837964512888,
"grad_norm": 0.13023462891578674,
"learning_rate": 0.0001,
"loss": 1.6202,
"step": 679
},
{
"epoch": 0.22765316370940744,
"grad_norm": 0.11764882504940033,
"learning_rate": 0.0001,
"loss": 1.5744,
"step": 680
},
{
"epoch": 0.22798794777368597,
"grad_norm": 0.12987253069877625,
"learning_rate": 0.0001,
"loss": 1.6287,
"step": 681
},
{
"epoch": 0.2283227318379645,
"grad_norm": 0.12687528133392334,
"learning_rate": 0.0001,
"loss": 1.6177,
"step": 682
},
{
"epoch": 0.22865751590224306,
"grad_norm": 0.117088183760643,
"learning_rate": 0.0001,
"loss": 1.5704,
"step": 683
},
{
"epoch": 0.2289922999665216,
"grad_norm": 0.13380305469036102,
"learning_rate": 0.0001,
"loss": 1.5013,
"step": 684
},
{
"epoch": 0.22932708403080013,
"grad_norm": 0.13155803084373474,
"learning_rate": 0.0001,
"loss": 1.6627,
"step": 685
},
{
"epoch": 0.2296618680950787,
"grad_norm": 0.12210634350776672,
"learning_rate": 0.0001,
"loss": 1.491,
"step": 686
},
{
"epoch": 0.22999665215935722,
"grad_norm": 0.12427474558353424,
"learning_rate": 0.0001,
"loss": 1.6381,
"step": 687
},
{
"epoch": 0.23033143622363575,
"grad_norm": 0.12354297190904617,
"learning_rate": 0.0001,
"loss": 1.5804,
"step": 688
},
{
"epoch": 0.2306662202879143,
"grad_norm": 0.11402271687984467,
"learning_rate": 0.0001,
"loss": 1.5562,
"step": 689
},
{
"epoch": 0.23100100435219284,
"grad_norm": 0.12571346759796143,
"learning_rate": 0.0001,
"loss": 1.6974,
"step": 690
},
{
"epoch": 0.23133578841647137,
"grad_norm": 0.12201119214296341,
"learning_rate": 0.0001,
"loss": 1.5866,
"step": 691
},
{
"epoch": 0.2316705724807499,
"grad_norm": 0.13017117977142334,
"learning_rate": 0.0001,
"loss": 1.6493,
"step": 692
},
{
"epoch": 0.23200535654502846,
"grad_norm": 0.11595404893159866,
"learning_rate": 0.0001,
"loss": 1.5047,
"step": 693
},
{
"epoch": 0.232340140609307,
"grad_norm": 0.11953503638505936,
"learning_rate": 0.0001,
"loss": 1.4952,
"step": 694
},
{
"epoch": 0.23267492467358553,
"grad_norm": 0.11844140291213989,
"learning_rate": 0.0001,
"loss": 1.6223,
"step": 695
},
{
"epoch": 0.23300970873786409,
"grad_norm": 0.12358598411083221,
"learning_rate": 0.0001,
"loss": 1.6303,
"step": 696
},
{
"epoch": 0.23334449280214262,
"grad_norm": 0.12384648621082306,
"learning_rate": 0.0001,
"loss": 1.6594,
"step": 697
},
{
"epoch": 0.23367927686642115,
"grad_norm": 0.11835581809282303,
"learning_rate": 0.0001,
"loss": 1.6098,
"step": 698
},
{
"epoch": 0.2340140609306997,
"grad_norm": 0.1138228103518486,
"learning_rate": 0.0001,
"loss": 1.4118,
"step": 699
},
{
"epoch": 0.23434884499497824,
"grad_norm": 0.11459102481603622,
"learning_rate": 0.0001,
"loss": 1.5633,
"step": 700
},
{
"epoch": 0.23468362905925677,
"grad_norm": 0.11587528139352798,
"learning_rate": 0.0001,
"loss": 1.6973,
"step": 701
},
{
"epoch": 0.23501841312353533,
"grad_norm": 0.13280251622200012,
"learning_rate": 0.0001,
"loss": 1.5161,
"step": 702
},
{
"epoch": 0.23535319718781386,
"grad_norm": 0.12264399230480194,
"learning_rate": 0.0001,
"loss": 1.656,
"step": 703
},
{
"epoch": 0.2356879812520924,
"grad_norm": 0.11608457565307617,
"learning_rate": 0.0001,
"loss": 1.5099,
"step": 704
},
{
"epoch": 0.23602276531637095,
"grad_norm": 0.12152610719203949,
"learning_rate": 0.0001,
"loss": 1.5169,
"step": 705
},
{
"epoch": 0.23635754938064948,
"grad_norm": 0.12914855778217316,
"learning_rate": 0.0001,
"loss": 1.5904,
"step": 706
},
{
"epoch": 0.23669233344492802,
"grad_norm": 0.13277898728847504,
"learning_rate": 0.0001,
"loss": 1.656,
"step": 707
},
{
"epoch": 0.23702711750920658,
"grad_norm": 0.12540487945079803,
"learning_rate": 0.0001,
"loss": 1.6178,
"step": 708
},
{
"epoch": 0.2373619015734851,
"grad_norm": 0.11845158785581589,
"learning_rate": 0.0001,
"loss": 1.5014,
"step": 709
},
{
"epoch": 0.23769668563776364,
"grad_norm": 0.11418534815311432,
"learning_rate": 0.0001,
"loss": 1.5292,
"step": 710
},
{
"epoch": 0.23803146970204217,
"grad_norm": 0.1384686678647995,
"learning_rate": 0.0001,
"loss": 1.6188,
"step": 711
},
{
"epoch": 0.23836625376632073,
"grad_norm": 0.12325987964868546,
"learning_rate": 0.0001,
"loss": 1.5636,
"step": 712
},
{
"epoch": 0.23870103783059926,
"grad_norm": 0.11931071430444717,
"learning_rate": 0.0001,
"loss": 1.5655,
"step": 713
},
{
"epoch": 0.2390358218948778,
"grad_norm": 0.12119931727647781,
"learning_rate": 0.0001,
"loss": 1.5289,
"step": 714
},
{
"epoch": 0.23937060595915635,
"grad_norm": 0.12172186374664307,
"learning_rate": 0.0001,
"loss": 1.6467,
"step": 715
},
{
"epoch": 0.23970539002343488,
"grad_norm": 0.12344299256801605,
"learning_rate": 0.0001,
"loss": 1.5616,
"step": 716
},
{
"epoch": 0.24004017408771341,
"grad_norm": 0.12173335254192352,
"learning_rate": 0.0001,
"loss": 1.6135,
"step": 717
},
{
"epoch": 0.24037495815199197,
"grad_norm": 0.1223810538649559,
"learning_rate": 0.0001,
"loss": 1.6239,
"step": 718
},
{
"epoch": 0.2407097422162705,
"grad_norm": 0.11744136363267899,
"learning_rate": 0.0001,
"loss": 1.5704,
"step": 719
},
{
"epoch": 0.24104452628054904,
"grad_norm": 0.12341196089982986,
"learning_rate": 0.0001,
"loss": 1.6704,
"step": 720
},
{
"epoch": 0.2413793103448276,
"grad_norm": 0.12578146159648895,
"learning_rate": 0.0001,
"loss": 1.604,
"step": 721
},
{
"epoch": 0.24171409440910613,
"grad_norm": 0.12708286941051483,
"learning_rate": 0.0001,
"loss": 1.5583,
"step": 722
},
{
"epoch": 0.24204887847338466,
"grad_norm": 0.11757246404886246,
"learning_rate": 0.0001,
"loss": 1.4911,
"step": 723
},
{
"epoch": 0.24238366253766322,
"grad_norm": 0.1309349238872528,
"learning_rate": 0.0001,
"loss": 1.6648,
"step": 724
},
{
"epoch": 0.24271844660194175,
"grad_norm": 0.13289286196231842,
"learning_rate": 0.0001,
"loss": 1.6547,
"step": 725
},
{
"epoch": 0.24305323066622028,
"grad_norm": 0.12044942378997803,
"learning_rate": 0.0001,
"loss": 1.661,
"step": 726
},
{
"epoch": 0.24338801473049884,
"grad_norm": 0.12810328602790833,
"learning_rate": 0.0001,
"loss": 1.6775,
"step": 727
},
{
"epoch": 0.24372279879477737,
"grad_norm": 0.12643273174762726,
"learning_rate": 0.0001,
"loss": 1.4938,
"step": 728
},
{
"epoch": 0.2440575828590559,
"grad_norm": 0.1253504455089569,
"learning_rate": 0.0001,
"loss": 1.6482,
"step": 729
},
{
"epoch": 0.24439236692333444,
"grad_norm": 0.12725912034511566,
"learning_rate": 0.0001,
"loss": 1.4911,
"step": 730
},
{
"epoch": 0.244727150987613,
"grad_norm": 0.13506008684635162,
"learning_rate": 0.0001,
"loss": 1.5739,
"step": 731
},
{
"epoch": 0.24506193505189153,
"grad_norm": 0.12034797668457031,
"learning_rate": 0.0001,
"loss": 1.6477,
"step": 732
},
{
"epoch": 0.24539671911617006,
"grad_norm": 0.12169791758060455,
"learning_rate": 0.0001,
"loss": 1.6398,
"step": 733
},
{
"epoch": 0.24573150318044862,
"grad_norm": 0.1253383755683899,
"learning_rate": 0.0001,
"loss": 1.5921,
"step": 734
},
{
"epoch": 0.24606628724472715,
"grad_norm": 0.11854001134634018,
"learning_rate": 0.0001,
"loss": 1.598,
"step": 735
},
{
"epoch": 0.24640107130900568,
"grad_norm": 0.13825742900371552,
"learning_rate": 0.0001,
"loss": 1.6588,
"step": 736
},
{
"epoch": 0.24673585537328424,
"grad_norm": 0.1235450729727745,
"learning_rate": 0.0001,
"loss": 1.5872,
"step": 737
},
{
"epoch": 0.24707063943756277,
"grad_norm": 0.12598398327827454,
"learning_rate": 0.0001,
"loss": 1.6038,
"step": 738
},
{
"epoch": 0.2474054235018413,
"grad_norm": 0.14527225494384766,
"learning_rate": 0.0001,
"loss": 1.6419,
"step": 739
},
{
"epoch": 0.24774020756611986,
"grad_norm": 0.11842803657054901,
"learning_rate": 0.0001,
"loss": 1.5628,
"step": 740
},
{
"epoch": 0.2480749916303984,
"grad_norm": 0.12376052141189575,
"learning_rate": 0.0001,
"loss": 1.5271,
"step": 741
},
{
"epoch": 0.24840977569467693,
"grad_norm": 0.13634417951107025,
"learning_rate": 0.0001,
"loss": 1.7012,
"step": 742
},
{
"epoch": 0.24874455975895549,
"grad_norm": 0.12457748502492905,
"learning_rate": 0.0001,
"loss": 1.5623,
"step": 743
},
{
"epoch": 0.24907934382323402,
"grad_norm": 0.11860496550798416,
"learning_rate": 0.0001,
"loss": 1.6049,
"step": 744
},
{
"epoch": 0.24941412788751255,
"grad_norm": 0.12447136640548706,
"learning_rate": 0.0001,
"loss": 1.6967,
"step": 745
},
{
"epoch": 0.2497489119517911,
"grad_norm": 0.12220341712236404,
"learning_rate": 0.0001,
"loss": 1.5819,
"step": 746
},
{
"epoch": 0.2500836960160696,
"grad_norm": 0.11865612119436264,
"learning_rate": 0.0001,
"loss": 1.5519,
"step": 747
},
{
"epoch": 0.25041848008034817,
"grad_norm": 0.11847954988479614,
"learning_rate": 0.0001,
"loss": 1.5087,
"step": 748
},
{
"epoch": 0.25075326414462673,
"grad_norm": 0.12107084691524506,
"learning_rate": 0.0001,
"loss": 1.5995,
"step": 749
},
{
"epoch": 0.25108804820890523,
"grad_norm": 0.12188322097063065,
"learning_rate": 0.0001,
"loss": 1.6439,
"step": 750
},
{
"epoch": 0.2514228322731838,
"grad_norm": 0.12144109606742859,
"learning_rate": 0.0001,
"loss": 1.5613,
"step": 751
},
{
"epoch": 0.25175761633746235,
"grad_norm": 0.12133816629648209,
"learning_rate": 0.0001,
"loss": 1.5364,
"step": 752
},
{
"epoch": 0.25209240040174086,
"grad_norm": 0.11708073318004608,
"learning_rate": 0.0001,
"loss": 1.5221,
"step": 753
},
{
"epoch": 0.2524271844660194,
"grad_norm": 0.1203671544790268,
"learning_rate": 0.0001,
"loss": 1.5736,
"step": 754
},
{
"epoch": 0.252761968530298,
"grad_norm": 0.12079092115163803,
"learning_rate": 0.0001,
"loss": 1.5842,
"step": 755
},
{
"epoch": 0.2530967525945765,
"grad_norm": 0.1294735223054886,
"learning_rate": 0.0001,
"loss": 1.5994,
"step": 756
},
{
"epoch": 0.25343153665885504,
"grad_norm": 0.1251528263092041,
"learning_rate": 0.0001,
"loss": 1.6391,
"step": 757
},
{
"epoch": 0.2537663207231336,
"grad_norm": 0.12093610316514969,
"learning_rate": 0.0001,
"loss": 1.6275,
"step": 758
},
{
"epoch": 0.2541011047874121,
"grad_norm": 0.1214980036020279,
"learning_rate": 0.0001,
"loss": 1.5887,
"step": 759
},
{
"epoch": 0.25443588885169066,
"grad_norm": 0.12011279165744781,
"learning_rate": 0.0001,
"loss": 1.5973,
"step": 760
},
{
"epoch": 0.2547706729159692,
"grad_norm": 0.12630945444107056,
"learning_rate": 0.0001,
"loss": 1.6184,
"step": 761
},
{
"epoch": 0.2551054569802477,
"grad_norm": 0.12001120299100876,
"learning_rate": 0.0001,
"loss": 1.5298,
"step": 762
},
{
"epoch": 0.2554402410445263,
"grad_norm": 0.1369365155696869,
"learning_rate": 0.0001,
"loss": 1.5718,
"step": 763
},
{
"epoch": 0.25577502510880484,
"grad_norm": 0.1201329231262207,
"learning_rate": 0.0001,
"loss": 1.5354,
"step": 764
},
{
"epoch": 0.25610980917308335,
"grad_norm": 0.12741532921791077,
"learning_rate": 0.0001,
"loss": 1.6193,
"step": 765
},
{
"epoch": 0.2564445932373619,
"grad_norm": 0.12349703162908554,
"learning_rate": 0.0001,
"loss": 1.6143,
"step": 766
},
{
"epoch": 0.25677937730164047,
"grad_norm": 0.11855439841747284,
"learning_rate": 0.0001,
"loss": 1.6037,
"step": 767
},
{
"epoch": 0.25711416136591897,
"grad_norm": 0.12034845352172852,
"learning_rate": 0.0001,
"loss": 1.5317,
"step": 768
},
{
"epoch": 0.25744894543019753,
"grad_norm": 0.11987943202257156,
"learning_rate": 0.0001,
"loss": 1.535,
"step": 769
},
{
"epoch": 0.2577837294944761,
"grad_norm": 0.12118515372276306,
"learning_rate": 0.0001,
"loss": 1.5974,
"step": 770
},
{
"epoch": 0.2581185135587546,
"grad_norm": 0.12842996418476105,
"learning_rate": 0.0001,
"loss": 1.609,
"step": 771
},
{
"epoch": 0.25845329762303315,
"grad_norm": 0.12420446425676346,
"learning_rate": 0.0001,
"loss": 1.6093,
"step": 772
},
{
"epoch": 0.2587880816873117,
"grad_norm": 0.12443120032548904,
"learning_rate": 0.0001,
"loss": 1.6122,
"step": 773
},
{
"epoch": 0.2591228657515902,
"grad_norm": 0.11912049353122711,
"learning_rate": 0.0001,
"loss": 1.5209,
"step": 774
},
{
"epoch": 0.2594576498158688,
"grad_norm": 0.1273064911365509,
"learning_rate": 0.0001,
"loss": 1.608,
"step": 775
},
{
"epoch": 0.2597924338801473,
"grad_norm": 0.11585114896297455,
"learning_rate": 0.0001,
"loss": 1.3888,
"step": 776
},
{
"epoch": 0.26012721794442584,
"grad_norm": 0.12005290389060974,
"learning_rate": 0.0001,
"loss": 1.4666,
"step": 777
},
{
"epoch": 0.2604620020087044,
"grad_norm": 0.11954595148563385,
"learning_rate": 0.0001,
"loss": 1.5558,
"step": 778
},
{
"epoch": 0.2607967860729829,
"grad_norm": 0.1307271122932434,
"learning_rate": 0.0001,
"loss": 1.6063,
"step": 779
},
{
"epoch": 0.26113157013726146,
"grad_norm": 0.113981693983078,
"learning_rate": 0.0001,
"loss": 1.4857,
"step": 780
},
{
"epoch": 0.26146635420154,
"grad_norm": 0.1225418671965599,
"learning_rate": 0.0001,
"loss": 1.5508,
"step": 781
},
{
"epoch": 0.2618011382658185,
"grad_norm": 0.12919741868972778,
"learning_rate": 0.0001,
"loss": 1.6255,
"step": 782
},
{
"epoch": 0.2621359223300971,
"grad_norm": 0.11552941054105759,
"learning_rate": 0.0001,
"loss": 1.6183,
"step": 783
},
{
"epoch": 0.26247070639437564,
"grad_norm": 0.13457614183425903,
"learning_rate": 0.0001,
"loss": 1.6461,
"step": 784
},
{
"epoch": 0.26280549045865415,
"grad_norm": 0.11841408908367157,
"learning_rate": 0.0001,
"loss": 1.5481,
"step": 785
},
{
"epoch": 0.2631402745229327,
"grad_norm": 0.11701938509941101,
"learning_rate": 0.0001,
"loss": 1.5883,
"step": 786
},
{
"epoch": 0.26347505858721126,
"grad_norm": 0.14221838116645813,
"learning_rate": 0.0001,
"loss": 1.5904,
"step": 787
},
{
"epoch": 0.26380984265148977,
"grad_norm": 0.11813905090093613,
"learning_rate": 0.0001,
"loss": 1.5653,
"step": 788
},
{
"epoch": 0.2641446267157683,
"grad_norm": 0.1315639317035675,
"learning_rate": 0.0001,
"loss": 1.5811,
"step": 789
},
{
"epoch": 0.2644794107800469,
"grad_norm": 0.13400433957576752,
"learning_rate": 0.0001,
"loss": 1.5363,
"step": 790
},
{
"epoch": 0.2648141948443254,
"grad_norm": 0.12116281688213348,
"learning_rate": 0.0001,
"loss": 1.6353,
"step": 791
},
{
"epoch": 0.26514897890860395,
"grad_norm": 0.1382567137479782,
"learning_rate": 0.0001,
"loss": 1.592,
"step": 792
},
{
"epoch": 0.2654837629728825,
"grad_norm": 0.14005912840366364,
"learning_rate": 0.0001,
"loss": 1.6114,
"step": 793
},
{
"epoch": 0.265818547037161,
"grad_norm": 0.13382911682128906,
"learning_rate": 0.0001,
"loss": 1.5942,
"step": 794
},
{
"epoch": 0.26615333110143957,
"grad_norm": 0.12423510104417801,
"learning_rate": 0.0001,
"loss": 1.5378,
"step": 795
},
{
"epoch": 0.26648811516571813,
"grad_norm": 0.12228628993034363,
"learning_rate": 0.0001,
"loss": 1.5704,
"step": 796
},
{
"epoch": 0.26682289922999664,
"grad_norm": 0.1286916881799698,
"learning_rate": 0.0001,
"loss": 1.6037,
"step": 797
},
{
"epoch": 0.2671576832942752,
"grad_norm": 0.12864018976688385,
"learning_rate": 0.0001,
"loss": 1.6522,
"step": 798
},
{
"epoch": 0.26749246735855375,
"grad_norm": 0.12012400478124619,
"learning_rate": 0.0001,
"loss": 1.5275,
"step": 799
},
{
"epoch": 0.26782725142283226,
"grad_norm": 0.12273643165826797,
"learning_rate": 0.0001,
"loss": 1.5848,
"step": 800
},
{
"epoch": 0.2681620354871108,
"grad_norm": 0.13991284370422363,
"learning_rate": 0.0001,
"loss": 1.6271,
"step": 801
},
{
"epoch": 0.2684968195513894,
"grad_norm": 0.1236526146531105,
"learning_rate": 0.0001,
"loss": 1.57,
"step": 802
},
{
"epoch": 0.2688316036156679,
"grad_norm": 0.1302153319120407,
"learning_rate": 0.0001,
"loss": 1.5638,
"step": 803
},
{
"epoch": 0.26916638767994644,
"grad_norm": 0.11963735520839691,
"learning_rate": 0.0001,
"loss": 1.6089,
"step": 804
},
{
"epoch": 0.269501171744225,
"grad_norm": 0.13298673927783966,
"learning_rate": 0.0001,
"loss": 1.6313,
"step": 805
},
{
"epoch": 0.2698359558085035,
"grad_norm": 0.13616934418678284,
"learning_rate": 0.0001,
"loss": 1.653,
"step": 806
},
{
"epoch": 0.27017073987278206,
"grad_norm": 0.12497668713331223,
"learning_rate": 0.0001,
"loss": 1.5514,
"step": 807
},
{
"epoch": 0.2705055239370606,
"grad_norm": 0.11764683574438095,
"learning_rate": 0.0001,
"loss": 1.5878,
"step": 808
},
{
"epoch": 0.2708403080013391,
"grad_norm": 0.12114263325929642,
"learning_rate": 0.0001,
"loss": 1.5628,
"step": 809
},
{
"epoch": 0.2711750920656177,
"grad_norm": 0.1347784847021103,
"learning_rate": 0.0001,
"loss": 1.7159,
"step": 810
},
{
"epoch": 0.27150987612989624,
"grad_norm": 0.12009880691766739,
"learning_rate": 0.0001,
"loss": 1.6043,
"step": 811
},
{
"epoch": 0.27184466019417475,
"grad_norm": 0.1278241127729416,
"learning_rate": 0.0001,
"loss": 1.6309,
"step": 812
},
{
"epoch": 0.2721794442584533,
"grad_norm": 0.1216406300663948,
"learning_rate": 0.0001,
"loss": 1.5867,
"step": 813
},
{
"epoch": 0.2725142283227318,
"grad_norm": 0.11623333394527435,
"learning_rate": 0.0001,
"loss": 1.5272,
"step": 814
},
{
"epoch": 0.27284901238701037,
"grad_norm": 0.11762827634811401,
"learning_rate": 0.0001,
"loss": 1.4148,
"step": 815
},
{
"epoch": 0.27318379645128893,
"grad_norm": 0.12679798901081085,
"learning_rate": 0.0001,
"loss": 1.678,
"step": 816
},
{
"epoch": 0.27351858051556743,
"grad_norm": 0.12463215738534927,
"learning_rate": 0.0001,
"loss": 1.6383,
"step": 817
},
{
"epoch": 0.273853364579846,
"grad_norm": 0.12248417735099792,
"learning_rate": 0.0001,
"loss": 1.5937,
"step": 818
},
{
"epoch": 0.27418814864412455,
"grad_norm": 0.11953899264335632,
"learning_rate": 0.0001,
"loss": 1.5704,
"step": 819
},
{
"epoch": 0.27452293270840306,
"grad_norm": 0.12919148802757263,
"learning_rate": 0.0001,
"loss": 1.6948,
"step": 820
},
{
"epoch": 0.2748577167726816,
"grad_norm": 0.11798353493213654,
"learning_rate": 0.0001,
"loss": 1.4814,
"step": 821
},
{
"epoch": 0.2751925008369602,
"grad_norm": 0.13017946481704712,
"learning_rate": 0.0001,
"loss": 1.5837,
"step": 822
},
{
"epoch": 0.2755272849012387,
"grad_norm": 0.1253434419631958,
"learning_rate": 0.0001,
"loss": 1.5418,
"step": 823
},
{
"epoch": 0.27586206896551724,
"grad_norm": 0.11546894907951355,
"learning_rate": 0.0001,
"loss": 1.5851,
"step": 824
},
{
"epoch": 0.2761968530297958,
"grad_norm": 0.12117631733417511,
"learning_rate": 0.0001,
"loss": 1.6335,
"step": 825
},
{
"epoch": 0.2765316370940743,
"grad_norm": 0.12088704109191895,
"learning_rate": 0.0001,
"loss": 1.571,
"step": 826
},
{
"epoch": 0.27686642115835286,
"grad_norm": 0.12261457741260529,
"learning_rate": 0.0001,
"loss": 1.5143,
"step": 827
},
{
"epoch": 0.2772012052226314,
"grad_norm": 0.12313897162675858,
"learning_rate": 0.0001,
"loss": 1.621,
"step": 828
},
{
"epoch": 0.2775359892869099,
"grad_norm": 0.12563903629779816,
"learning_rate": 0.0001,
"loss": 1.657,
"step": 829
},
{
"epoch": 0.2778707733511885,
"grad_norm": 0.1187531128525734,
"learning_rate": 0.0001,
"loss": 1.5346,
"step": 830
},
{
"epoch": 0.27820555741546704,
"grad_norm": 0.12233595550060272,
"learning_rate": 0.0001,
"loss": 1.5835,
"step": 831
},
{
"epoch": 0.27854034147974555,
"grad_norm": 0.12235147505998611,
"learning_rate": 0.0001,
"loss": 1.6104,
"step": 832
},
{
"epoch": 0.2788751255440241,
"grad_norm": 0.11765027791261673,
"learning_rate": 0.0001,
"loss": 1.5489,
"step": 833
},
{
"epoch": 0.27920990960830266,
"grad_norm": 0.12349414080381393,
"learning_rate": 0.0001,
"loss": 1.6089,
"step": 834
},
{
"epoch": 0.27954469367258117,
"grad_norm": 0.12419009208679199,
"learning_rate": 0.0001,
"loss": 1.6418,
"step": 835
},
{
"epoch": 0.2798794777368597,
"grad_norm": 0.12406160682439804,
"learning_rate": 0.0001,
"loss": 1.5774,
"step": 836
},
{
"epoch": 0.2802142618011383,
"grad_norm": 0.11722970008850098,
"learning_rate": 0.0001,
"loss": 1.5634,
"step": 837
},
{
"epoch": 0.2805490458654168,
"grad_norm": 0.1188267171382904,
"learning_rate": 0.0001,
"loss": 1.5005,
"step": 838
},
{
"epoch": 0.28088382992969535,
"grad_norm": 0.11977598071098328,
"learning_rate": 0.0001,
"loss": 1.5556,
"step": 839
},
{
"epoch": 0.2812186139939739,
"grad_norm": 0.12196852266788483,
"learning_rate": 0.0001,
"loss": 1.604,
"step": 840
},
{
"epoch": 0.2815533980582524,
"grad_norm": 0.12035735696554184,
"learning_rate": 0.0001,
"loss": 1.5304,
"step": 841
},
{
"epoch": 0.281888182122531,
"grad_norm": 0.12375766038894653,
"learning_rate": 0.0001,
"loss": 1.5929,
"step": 842
},
{
"epoch": 0.28222296618680953,
"grad_norm": 0.1304924190044403,
"learning_rate": 0.0001,
"loss": 1.6148,
"step": 843
},
{
"epoch": 0.28255775025108804,
"grad_norm": 0.12864375114440918,
"learning_rate": 0.0001,
"loss": 1.4907,
"step": 844
},
{
"epoch": 0.2828925343153666,
"grad_norm": 0.12013059109449387,
"learning_rate": 0.0001,
"loss": 1.5051,
"step": 845
},
{
"epoch": 0.28322731837964515,
"grad_norm": 0.1277569979429245,
"learning_rate": 0.0001,
"loss": 1.5942,
"step": 846
},
{
"epoch": 0.28356210244392366,
"grad_norm": 0.13474377989768982,
"learning_rate": 0.0001,
"loss": 1.6098,
"step": 847
},
{
"epoch": 0.2838968865082022,
"grad_norm": 0.12635944783687592,
"learning_rate": 0.0001,
"loss": 1.6217,
"step": 848
},
{
"epoch": 0.2842316705724808,
"grad_norm": 0.12218885123729706,
"learning_rate": 0.0001,
"loss": 1.578,
"step": 849
},
{
"epoch": 0.2845664546367593,
"grad_norm": 0.12037128210067749,
"learning_rate": 0.0001,
"loss": 1.5502,
"step": 850
},
{
"epoch": 0.28490123870103784,
"grad_norm": 0.12386499345302582,
"learning_rate": 0.0001,
"loss": 1.6922,
"step": 851
},
{
"epoch": 0.28523602276531634,
"grad_norm": 0.1298052966594696,
"learning_rate": 0.0001,
"loss": 1.6589,
"step": 852
},
{
"epoch": 0.2855708068295949,
"grad_norm": 0.12143804877996445,
"learning_rate": 0.0001,
"loss": 1.5856,
"step": 853
},
{
"epoch": 0.28590559089387346,
"grad_norm": 0.11675681918859482,
"learning_rate": 0.0001,
"loss": 1.4877,
"step": 854
},
{
"epoch": 0.28624037495815197,
"grad_norm": 0.11870943009853363,
"learning_rate": 0.0001,
"loss": 1.5699,
"step": 855
},
{
"epoch": 0.2865751590224305,
"grad_norm": 0.12752340734004974,
"learning_rate": 0.0001,
"loss": 1.5648,
"step": 856
},
{
"epoch": 0.2869099430867091,
"grad_norm": 0.1254730522632599,
"learning_rate": 0.0001,
"loss": 1.5331,
"step": 857
},
{
"epoch": 0.2872447271509876,
"grad_norm": 0.12351144105195999,
"learning_rate": 0.0001,
"loss": 1.5984,
"step": 858
},
{
"epoch": 0.28757951121526615,
"grad_norm": 0.12823925912380219,
"learning_rate": 0.0001,
"loss": 1.4704,
"step": 859
},
{
"epoch": 0.2879142952795447,
"grad_norm": 0.12884090840816498,
"learning_rate": 0.0001,
"loss": 1.5302,
"step": 860
},
{
"epoch": 0.2882490793438232,
"grad_norm": 0.12310319393873215,
"learning_rate": 0.0001,
"loss": 1.5554,
"step": 861
},
{
"epoch": 0.28858386340810177,
"grad_norm": 0.12592901289463043,
"learning_rate": 0.0001,
"loss": 1.573,
"step": 862
},
{
"epoch": 0.28891864747238033,
"grad_norm": 0.12326246500015259,
"learning_rate": 0.0001,
"loss": 1.5408,
"step": 863
},
{
"epoch": 0.28925343153665883,
"grad_norm": 0.12688298523426056,
"learning_rate": 0.0001,
"loss": 1.609,
"step": 864
},
{
"epoch": 0.2895882156009374,
"grad_norm": 0.13284268975257874,
"learning_rate": 0.0001,
"loss": 1.4774,
"step": 865
},
{
"epoch": 0.28992299966521595,
"grad_norm": 0.12346718460321426,
"learning_rate": 0.0001,
"loss": 1.5675,
"step": 866
},
{
"epoch": 0.29025778372949446,
"grad_norm": 0.12501643598079681,
"learning_rate": 0.0001,
"loss": 1.54,
"step": 867
},
{
"epoch": 0.290592567793773,
"grad_norm": 0.14129911363124847,
"learning_rate": 0.0001,
"loss": 1.5707,
"step": 868
},
{
"epoch": 0.2909273518580516,
"grad_norm": 0.11998032033443451,
"learning_rate": 0.0001,
"loss": 1.6025,
"step": 869
},
{
"epoch": 0.2912621359223301,
"grad_norm": 0.14502458274364471,
"learning_rate": 0.0001,
"loss": 1.6066,
"step": 870
},
{
"epoch": 0.29159691998660864,
"grad_norm": 0.13429078459739685,
"learning_rate": 0.0001,
"loss": 1.5773,
"step": 871
},
{
"epoch": 0.2919317040508872,
"grad_norm": 0.12702088057994843,
"learning_rate": 0.0001,
"loss": 1.5331,
"step": 872
},
{
"epoch": 0.2922664881151657,
"grad_norm": 0.1450689435005188,
"learning_rate": 0.0001,
"loss": 1.6426,
"step": 873
},
{
"epoch": 0.29260127217944426,
"grad_norm": 0.12571430206298828,
"learning_rate": 0.0001,
"loss": 1.5702,
"step": 874
},
{
"epoch": 0.2929360562437228,
"grad_norm": 0.15491126477718353,
"learning_rate": 0.0001,
"loss": 1.6229,
"step": 875
},
{
"epoch": 0.2932708403080013,
"grad_norm": 0.1497523933649063,
"learning_rate": 0.0001,
"loss": 1.6073,
"step": 876
},
{
"epoch": 0.2936056243722799,
"grad_norm": 0.12279631197452545,
"learning_rate": 0.0001,
"loss": 1.5836,
"step": 877
},
{
"epoch": 0.29394040843655844,
"grad_norm": 0.16039283573627472,
"learning_rate": 0.0001,
"loss": 1.6125,
"step": 878
},
{
"epoch": 0.29427519250083695,
"grad_norm": 0.1275695562362671,
"learning_rate": 0.0001,
"loss": 1.5279,
"step": 879
},
{
"epoch": 0.2946099765651155,
"grad_norm": 0.12885813415050507,
"learning_rate": 0.0001,
"loss": 1.5662,
"step": 880
},
{
"epoch": 0.29494476062939406,
"grad_norm": 0.1439967006444931,
"learning_rate": 0.0001,
"loss": 1.6408,
"step": 881
},
{
"epoch": 0.29527954469367257,
"grad_norm": 0.12064056098461151,
"learning_rate": 0.0001,
"loss": 1.5292,
"step": 882
},
{
"epoch": 0.29561432875795113,
"grad_norm": 0.12883847951889038,
"learning_rate": 0.0001,
"loss": 1.6024,
"step": 883
},
{
"epoch": 0.2959491128222297,
"grad_norm": 0.12654966115951538,
"learning_rate": 0.0001,
"loss": 1.5838,
"step": 884
},
{
"epoch": 0.2962838968865082,
"grad_norm": 0.13914820551872253,
"learning_rate": 0.0001,
"loss": 1.5345,
"step": 885
},
{
"epoch": 0.29661868095078675,
"grad_norm": 0.12559537589550018,
"learning_rate": 0.0001,
"loss": 1.515,
"step": 886
},
{
"epoch": 0.2969534650150653,
"grad_norm": 0.1451893299818039,
"learning_rate": 0.0001,
"loss": 1.5924,
"step": 887
},
{
"epoch": 0.2972882490793438,
"grad_norm": 0.13416925072669983,
"learning_rate": 0.0001,
"loss": 1.6371,
"step": 888
},
{
"epoch": 0.2976230331436224,
"grad_norm": 0.12274248152971268,
"learning_rate": 0.0001,
"loss": 1.6539,
"step": 889
},
{
"epoch": 0.2979578172079009,
"grad_norm": 0.143101766705513,
"learning_rate": 0.0001,
"loss": 1.5748,
"step": 890
},
{
"epoch": 0.29829260127217944,
"grad_norm": 0.12564097344875336,
"learning_rate": 0.0001,
"loss": 1.5875,
"step": 891
},
{
"epoch": 0.298627385336458,
"grad_norm": 0.12403486669063568,
"learning_rate": 0.0001,
"loss": 1.5765,
"step": 892
},
{
"epoch": 0.2989621694007365,
"grad_norm": 0.13099223375320435,
"learning_rate": 0.0001,
"loss": 1.5656,
"step": 893
},
{
"epoch": 0.29929695346501506,
"grad_norm": 0.12135787308216095,
"learning_rate": 0.0001,
"loss": 1.4958,
"step": 894
},
{
"epoch": 0.2996317375292936,
"grad_norm": 0.12442804127931595,
"learning_rate": 0.0001,
"loss": 1.6222,
"step": 895
},
{
"epoch": 0.2999665215935721,
"grad_norm": 0.12768028676509857,
"learning_rate": 0.0001,
"loss": 1.6719,
"step": 896
},
{
"epoch": 0.3003013056578507,
"grad_norm": 0.1240835040807724,
"learning_rate": 0.0001,
"loss": 1.5114,
"step": 897
},
{
"epoch": 0.30063608972212924,
"grad_norm": 0.12057949602603912,
"learning_rate": 0.0001,
"loss": 1.5864,
"step": 898
},
{
"epoch": 0.30097087378640774,
"grad_norm": 0.1332257241010666,
"learning_rate": 0.0001,
"loss": 1.652,
"step": 899
},
{
"epoch": 0.3013056578506863,
"grad_norm": 0.12191877514123917,
"learning_rate": 0.0001,
"loss": 1.6016,
"step": 900
},
{
"epoch": 0.30164044191496486,
"grad_norm": 0.13481038808822632,
"learning_rate": 0.0001,
"loss": 1.5724,
"step": 901
},
{
"epoch": 0.30197522597924337,
"grad_norm": 0.12434981763362885,
"learning_rate": 0.0001,
"loss": 1.5873,
"step": 902
},
{
"epoch": 0.3023100100435219,
"grad_norm": 0.12398968636989594,
"learning_rate": 0.0001,
"loss": 1.5917,
"step": 903
},
{
"epoch": 0.3026447941078005,
"grad_norm": 0.13455741107463837,
"learning_rate": 0.0001,
"loss": 1.6293,
"step": 904
},
{
"epoch": 0.302979578172079,
"grad_norm": 0.12864330410957336,
"learning_rate": 0.0001,
"loss": 1.6671,
"step": 905
},
{
"epoch": 0.30331436223635755,
"grad_norm": 0.1306915581226349,
"learning_rate": 0.0001,
"loss": 1.5669,
"step": 906
},
{
"epoch": 0.3036491463006361,
"grad_norm": 0.12770214676856995,
"learning_rate": 0.0001,
"loss": 1.515,
"step": 907
},
{
"epoch": 0.3039839303649146,
"grad_norm": 0.12244972586631775,
"learning_rate": 0.0001,
"loss": 1.7102,
"step": 908
},
{
"epoch": 0.30431871442919317,
"grad_norm": 0.12544330954551697,
"learning_rate": 0.0001,
"loss": 1.5809,
"step": 909
},
{
"epoch": 0.30465349849347173,
"grad_norm": 0.12653569877147675,
"learning_rate": 0.0001,
"loss": 1.5504,
"step": 910
},
{
"epoch": 0.30498828255775023,
"grad_norm": 0.1295597404241562,
"learning_rate": 0.0001,
"loss": 1.6077,
"step": 911
},
{
"epoch": 0.3053230666220288,
"grad_norm": 0.13423195481300354,
"learning_rate": 0.0001,
"loss": 1.6433,
"step": 912
},
{
"epoch": 0.30565785068630735,
"grad_norm": 0.12957747280597687,
"learning_rate": 0.0001,
"loss": 1.72,
"step": 913
},
{
"epoch": 0.30599263475058586,
"grad_norm": 0.1274273693561554,
"learning_rate": 0.0001,
"loss": 1.5916,
"step": 914
},
{
"epoch": 0.3063274188148644,
"grad_norm": 0.12693728506565094,
"learning_rate": 0.0001,
"loss": 1.5582,
"step": 915
},
{
"epoch": 0.306662202879143,
"grad_norm": 0.12224942445755005,
"learning_rate": 0.0001,
"loss": 1.6431,
"step": 916
},
{
"epoch": 0.3069969869434215,
"grad_norm": 0.12495341151952744,
"learning_rate": 0.0001,
"loss": 1.6554,
"step": 917
},
{
"epoch": 0.30733177100770004,
"grad_norm": 0.12348316609859467,
"learning_rate": 0.0001,
"loss": 1.5617,
"step": 918
},
{
"epoch": 0.3076665550719786,
"grad_norm": 0.12086449563503265,
"learning_rate": 0.0001,
"loss": 1.5866,
"step": 919
},
{
"epoch": 0.3080013391362571,
"grad_norm": 0.12970371544361115,
"learning_rate": 0.0001,
"loss": 1.6444,
"step": 920
},
{
"epoch": 0.30833612320053566,
"grad_norm": 0.115717314183712,
"learning_rate": 0.0001,
"loss": 1.4493,
"step": 921
},
{
"epoch": 0.3086709072648142,
"grad_norm": 0.1250089704990387,
"learning_rate": 0.0001,
"loss": 1.5889,
"step": 922
},
{
"epoch": 0.3090056913290927,
"grad_norm": 0.11084622144699097,
"learning_rate": 0.0001,
"loss": 1.3815,
"step": 923
},
{
"epoch": 0.3093404753933713,
"grad_norm": 0.12127161026000977,
"learning_rate": 0.0001,
"loss": 1.5558,
"step": 924
},
{
"epoch": 0.30967525945764984,
"grad_norm": 0.12244665622711182,
"learning_rate": 0.0001,
"loss": 1.6409,
"step": 925
},
{
"epoch": 0.31001004352192835,
"grad_norm": 0.12553781270980835,
"learning_rate": 0.0001,
"loss": 1.6205,
"step": 926
},
{
"epoch": 0.3103448275862069,
"grad_norm": 0.12222031503915787,
"learning_rate": 0.0001,
"loss": 1.6323,
"step": 927
},
{
"epoch": 0.3106796116504854,
"grad_norm": 0.1246923953294754,
"learning_rate": 0.0001,
"loss": 1.719,
"step": 928
},
{
"epoch": 0.31101439571476397,
"grad_norm": 0.13237862288951874,
"learning_rate": 0.0001,
"loss": 1.6517,
"step": 929
},
{
"epoch": 0.31134917977904253,
"grad_norm": 0.11562683433294296,
"learning_rate": 0.0001,
"loss": 1.5043,
"step": 930
},
{
"epoch": 0.31168396384332103,
"grad_norm": 0.12860921025276184,
"learning_rate": 0.0001,
"loss": 1.5939,
"step": 931
},
{
"epoch": 0.3120187479075996,
"grad_norm": 0.11789809912443161,
"learning_rate": 0.0001,
"loss": 1.4763,
"step": 932
},
{
"epoch": 0.31235353197187815,
"grad_norm": 0.12612248957157135,
"learning_rate": 0.0001,
"loss": 1.6355,
"step": 933
},
{
"epoch": 0.31268831603615665,
"grad_norm": 0.14561748504638672,
"learning_rate": 0.0001,
"loss": 1.6897,
"step": 934
},
{
"epoch": 0.3130231001004352,
"grad_norm": 0.1276092380285263,
"learning_rate": 0.0001,
"loss": 1.6438,
"step": 935
},
{
"epoch": 0.3133578841647138,
"grad_norm": 0.13539274036884308,
"learning_rate": 0.0001,
"loss": 1.5562,
"step": 936
},
{
"epoch": 0.3136926682289923,
"grad_norm": 0.12490363419055939,
"learning_rate": 0.0001,
"loss": 1.5592,
"step": 937
},
{
"epoch": 0.31402745229327084,
"grad_norm": 0.12392627447843552,
"learning_rate": 0.0001,
"loss": 1.6344,
"step": 938
},
{
"epoch": 0.3143622363575494,
"grad_norm": 0.13469712436199188,
"learning_rate": 0.0001,
"loss": 1.7123,
"step": 939
},
{
"epoch": 0.3146970204218279,
"grad_norm": 0.13380196690559387,
"learning_rate": 0.0001,
"loss": 1.6485,
"step": 940
},
{
"epoch": 0.31503180448610646,
"grad_norm": 0.12370868027210236,
"learning_rate": 0.0001,
"loss": 1.5663,
"step": 941
},
{
"epoch": 0.315366588550385,
"grad_norm": 0.1381116360425949,
"learning_rate": 0.0001,
"loss": 1.5682,
"step": 942
},
{
"epoch": 0.3157013726146635,
"grad_norm": 0.15112708508968353,
"learning_rate": 0.0001,
"loss": 1.6236,
"step": 943
},
{
"epoch": 0.3160361566789421,
"grad_norm": 0.13402314484119415,
"learning_rate": 0.0001,
"loss": 1.67,
"step": 944
},
{
"epoch": 0.31637094074322064,
"grad_norm": 0.13505329191684723,
"learning_rate": 0.0001,
"loss": 1.5149,
"step": 945
},
{
"epoch": 0.31670572480749914,
"grad_norm": 0.1328267902135849,
"learning_rate": 0.0001,
"loss": 1.5129,
"step": 946
},
{
"epoch": 0.3170405088717777,
"grad_norm": 0.12792791426181793,
"learning_rate": 0.0001,
"loss": 1.5868,
"step": 947
},
{
"epoch": 0.31737529293605626,
"grad_norm": 0.11726494878530502,
"learning_rate": 0.0001,
"loss": 1.5581,
"step": 948
},
{
"epoch": 0.31771007700033477,
"grad_norm": 0.12302982062101364,
"learning_rate": 0.0001,
"loss": 1.5296,
"step": 949
},
{
"epoch": 0.3180448610646133,
"grad_norm": 0.1206970065832138,
"learning_rate": 0.0001,
"loss": 1.5066,
"step": 950
},
{
"epoch": 0.3183796451288919,
"grad_norm": 0.1165679469704628,
"learning_rate": 0.0001,
"loss": 1.5486,
"step": 951
},
{
"epoch": 0.3187144291931704,
"grad_norm": 0.12752187252044678,
"learning_rate": 0.0001,
"loss": 1.6441,
"step": 952
},
{
"epoch": 0.31904921325744895,
"grad_norm": 0.12091311067342758,
"learning_rate": 0.0001,
"loss": 1.5482,
"step": 953
},
{
"epoch": 0.3193839973217275,
"grad_norm": 0.12838125228881836,
"learning_rate": 0.0001,
"loss": 1.6027,
"step": 954
},
{
"epoch": 0.319718781386006,
"grad_norm": 0.11839887499809265,
"learning_rate": 0.0001,
"loss": 1.5533,
"step": 955
},
{
"epoch": 0.32005356545028457,
"grad_norm": 0.1277683675289154,
"learning_rate": 0.0001,
"loss": 1.5461,
"step": 956
},
{
"epoch": 0.32038834951456313,
"grad_norm": 0.12134066224098206,
"learning_rate": 0.0001,
"loss": 1.5649,
"step": 957
},
{
"epoch": 0.32072313357884163,
"grad_norm": 0.12735500931739807,
"learning_rate": 0.0001,
"loss": 1.608,
"step": 958
},
{
"epoch": 0.3210579176431202,
"grad_norm": 0.133828267455101,
"learning_rate": 0.0001,
"loss": 1.5675,
"step": 959
},
{
"epoch": 0.32139270170739875,
"grad_norm": 0.12437241524457932,
"learning_rate": 0.0001,
"loss": 1.6325,
"step": 960
},
{
"epoch": 0.32172748577167726,
"grad_norm": 0.12489302456378937,
"learning_rate": 0.0001,
"loss": 1.6441,
"step": 961
},
{
"epoch": 0.3220622698359558,
"grad_norm": 0.12957216799259186,
"learning_rate": 0.0001,
"loss": 1.5328,
"step": 962
},
{
"epoch": 0.3223970539002344,
"grad_norm": 0.1317603886127472,
"learning_rate": 0.0001,
"loss": 1.6061,
"step": 963
},
{
"epoch": 0.3227318379645129,
"grad_norm": 0.12075690180063248,
"learning_rate": 0.0001,
"loss": 1.5508,
"step": 964
},
{
"epoch": 0.32306662202879144,
"grad_norm": 0.11924642324447632,
"learning_rate": 0.0001,
"loss": 1.4772,
"step": 965
},
{
"epoch": 0.32340140609306994,
"grad_norm": 0.12515272200107574,
"learning_rate": 0.0001,
"loss": 1.5748,
"step": 966
},
{
"epoch": 0.3237361901573485,
"grad_norm": 0.11952123045921326,
"learning_rate": 0.0001,
"loss": 1.5852,
"step": 967
},
{
"epoch": 0.32407097422162706,
"grad_norm": 0.125240296125412,
"learning_rate": 0.0001,
"loss": 1.5388,
"step": 968
},
{
"epoch": 0.32440575828590557,
"grad_norm": 0.12284346669912338,
"learning_rate": 0.0001,
"loss": 1.6059,
"step": 969
},
{
"epoch": 0.3247405423501841,
"grad_norm": 0.11825854331254959,
"learning_rate": 0.0001,
"loss": 1.52,
"step": 970
},
{
"epoch": 0.3250753264144627,
"grad_norm": 0.1247822567820549,
"learning_rate": 0.0001,
"loss": 1.6265,
"step": 971
},
{
"epoch": 0.3254101104787412,
"grad_norm": 0.12490460276603699,
"learning_rate": 0.0001,
"loss": 1.6047,
"step": 972
},
{
"epoch": 0.32574489454301975,
"grad_norm": 0.11784359812736511,
"learning_rate": 0.0001,
"loss": 1.451,
"step": 973
},
{
"epoch": 0.3260796786072983,
"grad_norm": 0.12558013200759888,
"learning_rate": 0.0001,
"loss": 1.6244,
"step": 974
},
{
"epoch": 0.3264144626715768,
"grad_norm": 0.12492769211530685,
"learning_rate": 0.0001,
"loss": 1.6821,
"step": 975
},
{
"epoch": 0.32674924673585537,
"grad_norm": 0.11894410103559494,
"learning_rate": 0.0001,
"loss": 1.5476,
"step": 976
},
{
"epoch": 0.32708403080013393,
"grad_norm": 0.12406729906797409,
"learning_rate": 0.0001,
"loss": 1.5954,
"step": 977
},
{
"epoch": 0.32741881486441243,
"grad_norm": 0.12805567681789398,
"learning_rate": 0.0001,
"loss": 1.5216,
"step": 978
},
{
"epoch": 0.327753598928691,
"grad_norm": 0.12648111581802368,
"learning_rate": 0.0001,
"loss": 1.6923,
"step": 979
},
{
"epoch": 0.32808838299296955,
"grad_norm": 0.12503187358379364,
"learning_rate": 0.0001,
"loss": 1.6204,
"step": 980
},
{
"epoch": 0.32842316705724806,
"grad_norm": 0.12180895358324051,
"learning_rate": 0.0001,
"loss": 1.5764,
"step": 981
},
{
"epoch": 0.3287579511215266,
"grad_norm": 0.12118836492300034,
"learning_rate": 0.0001,
"loss": 1.4937,
"step": 982
},
{
"epoch": 0.3290927351858052,
"grad_norm": 0.12758868932724,
"learning_rate": 0.0001,
"loss": 1.6198,
"step": 983
},
{
"epoch": 0.3294275192500837,
"grad_norm": 0.1190565824508667,
"learning_rate": 0.0001,
"loss": 1.587,
"step": 984
},
{
"epoch": 0.32976230331436224,
"grad_norm": 0.12521426379680634,
"learning_rate": 0.0001,
"loss": 1.5403,
"step": 985
},
{
"epoch": 0.3300970873786408,
"grad_norm": 0.1259697824716568,
"learning_rate": 0.0001,
"loss": 1.5356,
"step": 986
},
{
"epoch": 0.3304318714429193,
"grad_norm": 0.12639686465263367,
"learning_rate": 0.0001,
"loss": 1.5941,
"step": 987
},
{
"epoch": 0.33076665550719786,
"grad_norm": 0.12533701956272125,
"learning_rate": 0.0001,
"loss": 1.6826,
"step": 988
},
{
"epoch": 0.3311014395714764,
"grad_norm": 0.1349916309118271,
"learning_rate": 0.0001,
"loss": 1.6818,
"step": 989
},
{
"epoch": 0.3314362236357549,
"grad_norm": 0.12522515654563904,
"learning_rate": 0.0001,
"loss": 1.531,
"step": 990
},
{
"epoch": 0.3317710077000335,
"grad_norm": 0.12278946489095688,
"learning_rate": 0.0001,
"loss": 1.5098,
"step": 991
},
{
"epoch": 0.33210579176431204,
"grad_norm": 0.1286853700876236,
"learning_rate": 0.0001,
"loss": 1.5117,
"step": 992
},
{
"epoch": 0.33244057582859055,
"grad_norm": 0.1212511882185936,
"learning_rate": 0.0001,
"loss": 1.4762,
"step": 993
},
{
"epoch": 0.3327753598928691,
"grad_norm": 0.1347900927066803,
"learning_rate": 0.0001,
"loss": 1.6793,
"step": 994
},
{
"epoch": 0.33311014395714766,
"grad_norm": 0.11994650959968567,
"learning_rate": 0.0001,
"loss": 1.6026,
"step": 995
},
{
"epoch": 0.33344492802142617,
"grad_norm": 0.13167862594127655,
"learning_rate": 0.0001,
"loss": 1.6341,
"step": 996
},
{
"epoch": 0.3337797120857047,
"grad_norm": 0.13315805792808533,
"learning_rate": 0.0001,
"loss": 1.5414,
"step": 997
},
{
"epoch": 0.3341144961499833,
"grad_norm": 0.12088074535131454,
"learning_rate": 0.0001,
"loss": 1.5769,
"step": 998
},
{
"epoch": 0.3344492802142618,
"grad_norm": 0.13783089816570282,
"learning_rate": 0.0001,
"loss": 1.5365,
"step": 999
},
{
"epoch": 0.33478406427854035,
"grad_norm": 0.13187260925769806,
"learning_rate": 0.0001,
"loss": 1.5929,
"step": 1000
},
{
"epoch": 0.3351188483428189,
"grad_norm": 0.13189886510372162,
"learning_rate": 0.0001,
"loss": 1.5591,
"step": 1001
},
{
"epoch": 0.3354536324070974,
"grad_norm": 0.1421831101179123,
"learning_rate": 0.0001,
"loss": 1.5674,
"step": 1002
},
{
"epoch": 0.335788416471376,
"grad_norm": 0.1282414346933365,
"learning_rate": 0.0001,
"loss": 1.5696,
"step": 1003
},
{
"epoch": 0.3361232005356545,
"grad_norm": 0.13641226291656494,
"learning_rate": 0.0001,
"loss": 1.5336,
"step": 1004
},
{
"epoch": 0.33645798459993304,
"grad_norm": 0.14396816492080688,
"learning_rate": 0.0001,
"loss": 1.5648,
"step": 1005
},
{
"epoch": 0.3367927686642116,
"grad_norm": 0.12792754173278809,
"learning_rate": 0.0001,
"loss": 1.631,
"step": 1006
},
{
"epoch": 0.3371275527284901,
"grad_norm": 0.1327052116394043,
"learning_rate": 0.0001,
"loss": 1.5746,
"step": 1007
},
{
"epoch": 0.33746233679276866,
"grad_norm": 0.14353278279304504,
"learning_rate": 0.0001,
"loss": 1.5345,
"step": 1008
},
{
"epoch": 0.3377971208570472,
"grad_norm": 0.137548565864563,
"learning_rate": 0.0001,
"loss": 1.6771,
"step": 1009
},
{
"epoch": 0.3381319049213257,
"grad_norm": 0.13727347552776337,
"learning_rate": 0.0001,
"loss": 1.6451,
"step": 1010
},
{
"epoch": 0.3384666889856043,
"grad_norm": 0.13395574688911438,
"learning_rate": 0.0001,
"loss": 1.5378,
"step": 1011
},
{
"epoch": 0.33880147304988284,
"grad_norm": 0.12692630290985107,
"learning_rate": 0.0001,
"loss": 1.5555,
"step": 1012
},
{
"epoch": 0.33913625711416134,
"grad_norm": 0.12900549173355103,
"learning_rate": 0.0001,
"loss": 1.5451,
"step": 1013
},
{
"epoch": 0.3394710411784399,
"grad_norm": 0.11654023826122284,
"learning_rate": 0.0001,
"loss": 1.5063,
"step": 1014
},
{
"epoch": 0.33980582524271846,
"grad_norm": 0.13518574833869934,
"learning_rate": 0.0001,
"loss": 1.5578,
"step": 1015
},
{
"epoch": 0.34014060930699697,
"grad_norm": 0.126609668135643,
"learning_rate": 0.0001,
"loss": 1.4299,
"step": 1016
},
{
"epoch": 0.3404753933712755,
"grad_norm": 0.12412185966968536,
"learning_rate": 0.0001,
"loss": 1.5083,
"step": 1017
},
{
"epoch": 0.3408101774355541,
"grad_norm": 0.12521536648273468,
"learning_rate": 0.0001,
"loss": 1.5264,
"step": 1018
},
{
"epoch": 0.3411449614998326,
"grad_norm": 0.12396744638681412,
"learning_rate": 0.0001,
"loss": 1.5984,
"step": 1019
},
{
"epoch": 0.34147974556411115,
"grad_norm": 0.12353380024433136,
"learning_rate": 0.0001,
"loss": 1.5615,
"step": 1020
},
{
"epoch": 0.3418145296283897,
"grad_norm": 0.1337115615606308,
"learning_rate": 0.0001,
"loss": 1.5777,
"step": 1021
},
{
"epoch": 0.3421493136926682,
"grad_norm": 0.13354641199111938,
"learning_rate": 0.0001,
"loss": 1.5417,
"step": 1022
},
{
"epoch": 0.34248409775694677,
"grad_norm": 0.12444625794887543,
"learning_rate": 0.0001,
"loss": 1.579,
"step": 1023
},
{
"epoch": 0.34281888182122533,
"grad_norm": 0.12876839935779572,
"learning_rate": 0.0001,
"loss": 1.4921,
"step": 1024
},
{
"epoch": 0.34315366588550383,
"grad_norm": 0.13097478449344635,
"learning_rate": 0.0001,
"loss": 1.5756,
"step": 1025
},
{
"epoch": 0.3434884499497824,
"grad_norm": 0.1257512867450714,
"learning_rate": 0.0001,
"loss": 1.5273,
"step": 1026
},
{
"epoch": 0.34382323401406095,
"grad_norm": 0.13378176093101501,
"learning_rate": 0.0001,
"loss": 1.5484,
"step": 1027
},
{
"epoch": 0.34415801807833946,
"grad_norm": 0.1325940638780594,
"learning_rate": 0.0001,
"loss": 1.6229,
"step": 1028
},
{
"epoch": 0.344492802142618,
"grad_norm": 0.11962547153234482,
"learning_rate": 0.0001,
"loss": 1.4859,
"step": 1029
},
{
"epoch": 0.3448275862068966,
"grad_norm": 0.12927503883838654,
"learning_rate": 0.0001,
"loss": 1.6788,
"step": 1030
},
{
"epoch": 0.3451623702711751,
"grad_norm": 0.13427825272083282,
"learning_rate": 0.0001,
"loss": 1.5514,
"step": 1031
},
{
"epoch": 0.34549715433545364,
"grad_norm": 0.13139428198337555,
"learning_rate": 0.0001,
"loss": 1.6164,
"step": 1032
},
{
"epoch": 0.3458319383997322,
"grad_norm": 0.12266752868890762,
"learning_rate": 0.0001,
"loss": 1.5226,
"step": 1033
},
{
"epoch": 0.3461667224640107,
"grad_norm": 0.14490726590156555,
"learning_rate": 0.0001,
"loss": 1.5562,
"step": 1034
},
{
"epoch": 0.34650150652828926,
"grad_norm": 0.11922143399715424,
"learning_rate": 0.0001,
"loss": 1.465,
"step": 1035
},
{
"epoch": 0.3468362905925678,
"grad_norm": 0.12442134320735931,
"learning_rate": 0.0001,
"loss": 1.5653,
"step": 1036
},
{
"epoch": 0.3471710746568463,
"grad_norm": 0.1383199840784073,
"learning_rate": 0.0001,
"loss": 1.5509,
"step": 1037
},
{
"epoch": 0.3475058587211249,
"grad_norm": 0.12311188876628876,
"learning_rate": 0.0001,
"loss": 1.5429,
"step": 1038
},
{
"epoch": 0.34784064278540344,
"grad_norm": 0.12368562817573547,
"learning_rate": 0.0001,
"loss": 1.6099,
"step": 1039
},
{
"epoch": 0.34817542684968195,
"grad_norm": 0.13235348463058472,
"learning_rate": 0.0001,
"loss": 1.586,
"step": 1040
},
{
"epoch": 0.3485102109139605,
"grad_norm": 0.12543101608753204,
"learning_rate": 0.0001,
"loss": 1.5094,
"step": 1041
},
{
"epoch": 0.348844994978239,
"grad_norm": 0.12461157888174057,
"learning_rate": 0.0001,
"loss": 1.6067,
"step": 1042
},
{
"epoch": 0.34917977904251757,
"grad_norm": 0.12375465035438538,
"learning_rate": 0.0001,
"loss": 1.5953,
"step": 1043
},
{
"epoch": 0.34951456310679613,
"grad_norm": 0.13041523098945618,
"learning_rate": 0.0001,
"loss": 1.6088,
"step": 1044
},
{
"epoch": 0.34984934717107463,
"grad_norm": 0.12022354453802109,
"learning_rate": 0.0001,
"loss": 1.4805,
"step": 1045
},
{
"epoch": 0.3501841312353532,
"grad_norm": 0.1251700222492218,
"learning_rate": 0.0001,
"loss": 1.5457,
"step": 1046
},
{
"epoch": 0.35051891529963175,
"grad_norm": 0.12562930583953857,
"learning_rate": 0.0001,
"loss": 1.501,
"step": 1047
},
{
"epoch": 0.35085369936391025,
"grad_norm": 0.13178695738315582,
"learning_rate": 0.0001,
"loss": 1.6332,
"step": 1048
},
{
"epoch": 0.3511884834281888,
"grad_norm": 0.12346772104501724,
"learning_rate": 0.0001,
"loss": 1.5875,
"step": 1049
},
{
"epoch": 0.3515232674924674,
"grad_norm": 0.12000037729740143,
"learning_rate": 0.0001,
"loss": 1.5166,
"step": 1050
},
{
"epoch": 0.3518580515567459,
"grad_norm": 0.13240620493888855,
"learning_rate": 0.0001,
"loss": 1.5801,
"step": 1051
},
{
"epoch": 0.35219283562102444,
"grad_norm": 0.12688381969928741,
"learning_rate": 0.0001,
"loss": 1.5581,
"step": 1052
},
{
"epoch": 0.352527619685303,
"grad_norm": 0.12421749532222748,
"learning_rate": 0.0001,
"loss": 1.5626,
"step": 1053
},
{
"epoch": 0.3528624037495815,
"grad_norm": 0.12876258790493011,
"learning_rate": 0.0001,
"loss": 1.4921,
"step": 1054
},
{
"epoch": 0.35319718781386006,
"grad_norm": 0.13299116492271423,
"learning_rate": 0.0001,
"loss": 1.5828,
"step": 1055
},
{
"epoch": 0.3535319718781386,
"grad_norm": 0.12605415284633636,
"learning_rate": 0.0001,
"loss": 1.5963,
"step": 1056
},
{
"epoch": 0.3538667559424171,
"grad_norm": 0.13100145757198334,
"learning_rate": 0.0001,
"loss": 1.6035,
"step": 1057
},
{
"epoch": 0.3542015400066957,
"grad_norm": 0.12380324304103851,
"learning_rate": 0.0001,
"loss": 1.5784,
"step": 1058
},
{
"epoch": 0.35453632407097424,
"grad_norm": 0.1288285106420517,
"learning_rate": 0.0001,
"loss": 1.5454,
"step": 1059
},
{
"epoch": 0.35487110813525274,
"grad_norm": 0.12464431673288345,
"learning_rate": 0.0001,
"loss": 1.5622,
"step": 1060
},
{
"epoch": 0.3552058921995313,
"grad_norm": 0.12694504857063293,
"learning_rate": 0.0001,
"loss": 1.5361,
"step": 1061
},
{
"epoch": 0.35554067626380986,
"grad_norm": 0.12736117839813232,
"learning_rate": 0.0001,
"loss": 1.5931,
"step": 1062
},
{
"epoch": 0.35587546032808837,
"grad_norm": 0.12816745042800903,
"learning_rate": 0.0001,
"loss": 1.584,
"step": 1063
},
{
"epoch": 0.3562102443923669,
"grad_norm": 0.12096529453992844,
"learning_rate": 0.0001,
"loss": 1.4851,
"step": 1064
},
{
"epoch": 0.3565450284566455,
"grad_norm": 0.12956807017326355,
"learning_rate": 0.0001,
"loss": 1.5296,
"step": 1065
},
{
"epoch": 0.356879812520924,
"grad_norm": 0.12413816154003143,
"learning_rate": 0.0001,
"loss": 1.5634,
"step": 1066
},
{
"epoch": 0.35721459658520255,
"grad_norm": 0.13675865530967712,
"learning_rate": 0.0001,
"loss": 1.498,
"step": 1067
},
{
"epoch": 0.3575493806494811,
"grad_norm": 0.12694036960601807,
"learning_rate": 0.0001,
"loss": 1.6586,
"step": 1068
},
{
"epoch": 0.3578841647137596,
"grad_norm": 0.13280896842479706,
"learning_rate": 0.0001,
"loss": 1.4662,
"step": 1069
},
{
"epoch": 0.35821894877803817,
"grad_norm": 0.13775292038917542,
"learning_rate": 0.0001,
"loss": 1.5833,
"step": 1070
},
{
"epoch": 0.35855373284231673,
"grad_norm": 0.12691499292850494,
"learning_rate": 0.0001,
"loss": 1.6034,
"step": 1071
},
{
"epoch": 0.35888851690659523,
"grad_norm": 0.13247890770435333,
"learning_rate": 0.0001,
"loss": 1.5617,
"step": 1072
},
{
"epoch": 0.3592233009708738,
"grad_norm": 0.1524164378643036,
"learning_rate": 0.0001,
"loss": 1.7153,
"step": 1073
},
{
"epoch": 0.35955808503515235,
"grad_norm": 0.12795189023017883,
"learning_rate": 0.0001,
"loss": 1.5657,
"step": 1074
},
{
"epoch": 0.35989286909943086,
"grad_norm": 0.12827672064304352,
"learning_rate": 0.0001,
"loss": 1.4345,
"step": 1075
},
{
"epoch": 0.3602276531637094,
"grad_norm": 0.13488048315048218,
"learning_rate": 0.0001,
"loss": 1.5137,
"step": 1076
},
{
"epoch": 0.360562437227988,
"grad_norm": 0.11891927570104599,
"learning_rate": 0.0001,
"loss": 1.508,
"step": 1077
},
{
"epoch": 0.3608972212922665,
"grad_norm": 0.1263907551765442,
"learning_rate": 0.0001,
"loss": 1.5969,
"step": 1078
},
{
"epoch": 0.36123200535654504,
"grad_norm": 0.12749949097633362,
"learning_rate": 0.0001,
"loss": 1.5646,
"step": 1079
},
{
"epoch": 0.36156678942082354,
"grad_norm": 0.12221404910087585,
"learning_rate": 0.0001,
"loss": 1.5279,
"step": 1080
},
{
"epoch": 0.3619015734851021,
"grad_norm": 0.12473400682210922,
"learning_rate": 0.0001,
"loss": 1.507,
"step": 1081
},
{
"epoch": 0.36223635754938066,
"grad_norm": 0.13297304511070251,
"learning_rate": 0.0001,
"loss": 1.5636,
"step": 1082
},
{
"epoch": 0.36257114161365916,
"grad_norm": 0.1260288655757904,
"learning_rate": 0.0001,
"loss": 1.5429,
"step": 1083
},
{
"epoch": 0.3629059256779377,
"grad_norm": 0.12271251529455185,
"learning_rate": 0.0001,
"loss": 1.6139,
"step": 1084
},
{
"epoch": 0.3632407097422163,
"grad_norm": 0.13517338037490845,
"learning_rate": 0.0001,
"loss": 1.59,
"step": 1085
},
{
"epoch": 0.3635754938064948,
"grad_norm": 0.12335921078920364,
"learning_rate": 0.0001,
"loss": 1.5477,
"step": 1086
},
{
"epoch": 0.36391027787077335,
"grad_norm": 0.12416140735149384,
"learning_rate": 0.0001,
"loss": 1.5792,
"step": 1087
},
{
"epoch": 0.3642450619350519,
"grad_norm": 0.1330622136592865,
"learning_rate": 0.0001,
"loss": 1.6416,
"step": 1088
},
{
"epoch": 0.3645798459993304,
"grad_norm": 0.11882945895195007,
"learning_rate": 0.0001,
"loss": 1.5633,
"step": 1089
},
{
"epoch": 0.36491463006360897,
"grad_norm": 0.12056804448366165,
"learning_rate": 0.0001,
"loss": 1.5639,
"step": 1090
},
{
"epoch": 0.36524941412788753,
"grad_norm": 0.12773139774799347,
"learning_rate": 0.0001,
"loss": 1.5221,
"step": 1091
},
{
"epoch": 0.36558419819216603,
"grad_norm": 0.12159121781587601,
"learning_rate": 0.0001,
"loss": 1.5255,
"step": 1092
},
{
"epoch": 0.3659189822564446,
"grad_norm": 0.12454614788293839,
"learning_rate": 0.0001,
"loss": 1.5685,
"step": 1093
},
{
"epoch": 0.36625376632072315,
"grad_norm": 0.1252131462097168,
"learning_rate": 0.0001,
"loss": 1.5721,
"step": 1094
},
{
"epoch": 0.36658855038500165,
"grad_norm": 0.12228623777627945,
"learning_rate": 0.0001,
"loss": 1.5488,
"step": 1095
},
{
"epoch": 0.3669233344492802,
"grad_norm": 0.1220550686120987,
"learning_rate": 0.0001,
"loss": 1.524,
"step": 1096
},
{
"epoch": 0.3672581185135588,
"grad_norm": 0.12096890807151794,
"learning_rate": 0.0001,
"loss": 1.4846,
"step": 1097
},
{
"epoch": 0.3675929025778373,
"grad_norm": 0.12377587705850601,
"learning_rate": 0.0001,
"loss": 1.6305,
"step": 1098
},
{
"epoch": 0.36792768664211584,
"grad_norm": 0.12515562772750854,
"learning_rate": 0.0001,
"loss": 1.6078,
"step": 1099
},
{
"epoch": 0.3682624707063944,
"grad_norm": 0.12402921915054321,
"learning_rate": 0.0001,
"loss": 1.532,
"step": 1100
},
{
"epoch": 0.3685972547706729,
"grad_norm": 0.12373632192611694,
"learning_rate": 0.0001,
"loss": 1.512,
"step": 1101
},
{
"epoch": 0.36893203883495146,
"grad_norm": 0.12751725316047668,
"learning_rate": 0.0001,
"loss": 1.5799,
"step": 1102
},
{
"epoch": 0.36926682289923,
"grad_norm": 0.12221360951662064,
"learning_rate": 0.0001,
"loss": 1.4454,
"step": 1103
},
{
"epoch": 0.3696016069635085,
"grad_norm": 0.12299706041812897,
"learning_rate": 0.0001,
"loss": 1.5994,
"step": 1104
},
{
"epoch": 0.3699363910277871,
"grad_norm": 0.1294013112783432,
"learning_rate": 0.0001,
"loss": 1.6196,
"step": 1105
},
{
"epoch": 0.37027117509206564,
"grad_norm": 0.1240616887807846,
"learning_rate": 0.0001,
"loss": 1.5548,
"step": 1106
},
{
"epoch": 0.37060595915634414,
"grad_norm": 0.12403808534145355,
"learning_rate": 0.0001,
"loss": 1.6311,
"step": 1107
},
{
"epoch": 0.3709407432206227,
"grad_norm": 0.11872854828834534,
"learning_rate": 0.0001,
"loss": 1.4156,
"step": 1108
},
{
"epoch": 0.37127552728490126,
"grad_norm": 0.12752331793308258,
"learning_rate": 0.0001,
"loss": 1.6212,
"step": 1109
},
{
"epoch": 0.37161031134917977,
"grad_norm": 0.12329373508691788,
"learning_rate": 0.0001,
"loss": 1.5868,
"step": 1110
},
{
"epoch": 0.3719450954134583,
"grad_norm": 0.12340104579925537,
"learning_rate": 0.0001,
"loss": 1.5292,
"step": 1111
},
{
"epoch": 0.3722798794777369,
"grad_norm": 0.11669819802045822,
"learning_rate": 0.0001,
"loss": 1.5188,
"step": 1112
},
{
"epoch": 0.3726146635420154,
"grad_norm": 0.11677731573581696,
"learning_rate": 0.0001,
"loss": 1.5151,
"step": 1113
},
{
"epoch": 0.37294944760629395,
"grad_norm": 0.12206505239009857,
"learning_rate": 0.0001,
"loss": 1.6733,
"step": 1114
},
{
"epoch": 0.3732842316705725,
"grad_norm": 0.12234992533922195,
"learning_rate": 0.0001,
"loss": 1.5242,
"step": 1115
},
{
"epoch": 0.373619015734851,
"grad_norm": 0.12357670813798904,
"learning_rate": 0.0001,
"loss": 1.5432,
"step": 1116
},
{
"epoch": 0.37395379979912957,
"grad_norm": 0.12345674633979797,
"learning_rate": 0.0001,
"loss": 1.6483,
"step": 1117
},
{
"epoch": 0.3742885838634081,
"grad_norm": 0.1179901510477066,
"learning_rate": 0.0001,
"loss": 1.5899,
"step": 1118
},
{
"epoch": 0.37462336792768663,
"grad_norm": 0.12135247141122818,
"learning_rate": 0.0001,
"loss": 1.554,
"step": 1119
},
{
"epoch": 0.3749581519919652,
"grad_norm": 0.12836892902851105,
"learning_rate": 0.0001,
"loss": 1.6242,
"step": 1120
},
{
"epoch": 0.3752929360562437,
"grad_norm": 0.12851716578006744,
"learning_rate": 0.0001,
"loss": 1.6372,
"step": 1121
},
{
"epoch": 0.37562772012052226,
"grad_norm": 0.12096036225557327,
"learning_rate": 0.0001,
"loss": 1.5042,
"step": 1122
},
{
"epoch": 0.3759625041848008,
"grad_norm": 0.121758371591568,
"learning_rate": 0.0001,
"loss": 1.5561,
"step": 1123
},
{
"epoch": 0.3762972882490793,
"grad_norm": 0.12547370791435242,
"learning_rate": 0.0001,
"loss": 1.571,
"step": 1124
},
{
"epoch": 0.3766320723133579,
"grad_norm": 0.12488459795713425,
"learning_rate": 0.0001,
"loss": 1.6101,
"step": 1125
},
{
"epoch": 0.37696685637763644,
"grad_norm": 0.12440396845340729,
"learning_rate": 0.0001,
"loss": 1.4978,
"step": 1126
},
{
"epoch": 0.37730164044191494,
"grad_norm": 0.1293293535709381,
"learning_rate": 0.0001,
"loss": 1.6226,
"step": 1127
},
{
"epoch": 0.3776364245061935,
"grad_norm": 0.1270667314529419,
"learning_rate": 0.0001,
"loss": 1.5403,
"step": 1128
},
{
"epoch": 0.37797120857047206,
"grad_norm": 0.13023768365383148,
"learning_rate": 0.0001,
"loss": 1.6641,
"step": 1129
},
{
"epoch": 0.37830599263475057,
"grad_norm": 0.12713496387004852,
"learning_rate": 0.0001,
"loss": 1.5685,
"step": 1130
},
{
"epoch": 0.3786407766990291,
"grad_norm": 0.126458540558815,
"learning_rate": 0.0001,
"loss": 1.5624,
"step": 1131
},
{
"epoch": 0.3789755607633077,
"grad_norm": 0.12100820988416672,
"learning_rate": 0.0001,
"loss": 1.5158,
"step": 1132
},
{
"epoch": 0.3793103448275862,
"grad_norm": 0.13373976945877075,
"learning_rate": 0.0001,
"loss": 1.5151,
"step": 1133
},
{
"epoch": 0.37964512889186475,
"grad_norm": 0.12730540335178375,
"learning_rate": 0.0001,
"loss": 1.5701,
"step": 1134
},
{
"epoch": 0.3799799129561433,
"grad_norm": 0.13641048967838287,
"learning_rate": 0.0001,
"loss": 1.5144,
"step": 1135
},
{
"epoch": 0.3803146970204218,
"grad_norm": 0.13271461427211761,
"learning_rate": 0.0001,
"loss": 1.5884,
"step": 1136
},
{
"epoch": 0.38064948108470037,
"grad_norm": 0.12385160475969315,
"learning_rate": 0.0001,
"loss": 1.5374,
"step": 1137
},
{
"epoch": 0.38098426514897893,
"grad_norm": 0.12949350476264954,
"learning_rate": 0.0001,
"loss": 1.546,
"step": 1138
},
{
"epoch": 0.38131904921325743,
"grad_norm": 0.135132297873497,
"learning_rate": 0.0001,
"loss": 1.5913,
"step": 1139
},
{
"epoch": 0.381653833277536,
"grad_norm": 0.11533955484628677,
"learning_rate": 0.0001,
"loss": 1.3968,
"step": 1140
},
{
"epoch": 0.38198861734181455,
"grad_norm": 0.13532719016075134,
"learning_rate": 0.0001,
"loss": 1.5534,
"step": 1141
},
{
"epoch": 0.38232340140609306,
"grad_norm": 0.14101184904575348,
"learning_rate": 0.0001,
"loss": 1.557,
"step": 1142
},
{
"epoch": 0.3826581854703716,
"grad_norm": 0.12038899213075638,
"learning_rate": 0.0001,
"loss": 1.4831,
"step": 1143
},
{
"epoch": 0.3829929695346502,
"grad_norm": 0.13053514063358307,
"learning_rate": 0.0001,
"loss": 1.5882,
"step": 1144
},
{
"epoch": 0.3833277535989287,
"grad_norm": 0.12372793257236481,
"learning_rate": 0.0001,
"loss": 1.6047,
"step": 1145
},
{
"epoch": 0.38366253766320724,
"grad_norm": 0.12823140621185303,
"learning_rate": 0.0001,
"loss": 1.6126,
"step": 1146
},
{
"epoch": 0.3839973217274858,
"grad_norm": 0.12058600783348083,
"learning_rate": 0.0001,
"loss": 1.4713,
"step": 1147
},
{
"epoch": 0.3843321057917643,
"grad_norm": 0.12674620747566223,
"learning_rate": 0.0001,
"loss": 1.6126,
"step": 1148
},
{
"epoch": 0.38466688985604286,
"grad_norm": 0.1214526891708374,
"learning_rate": 0.0001,
"loss": 1.6317,
"step": 1149
},
{
"epoch": 0.3850016739203214,
"grad_norm": 0.12831653654575348,
"learning_rate": 0.0001,
"loss": 1.5479,
"step": 1150
},
{
"epoch": 0.3853364579845999,
"grad_norm": 0.12079459428787231,
"learning_rate": 0.0001,
"loss": 1.5544,
"step": 1151
},
{
"epoch": 0.3856712420488785,
"grad_norm": 0.12021779268980026,
"learning_rate": 0.0001,
"loss": 1.5536,
"step": 1152
},
{
"epoch": 0.38600602611315704,
"grad_norm": 0.13052217662334442,
"learning_rate": 0.0001,
"loss": 1.5482,
"step": 1153
},
{
"epoch": 0.38634081017743555,
"grad_norm": 0.12613235414028168,
"learning_rate": 0.0001,
"loss": 1.6056,
"step": 1154
},
{
"epoch": 0.3866755942417141,
"grad_norm": 0.12751324474811554,
"learning_rate": 0.0001,
"loss": 1.5513,
"step": 1155
},
{
"epoch": 0.3870103783059926,
"grad_norm": 0.11987000703811646,
"learning_rate": 0.0001,
"loss": 1.4836,
"step": 1156
},
{
"epoch": 0.38734516237027117,
"grad_norm": 0.13999362289905548,
"learning_rate": 0.0001,
"loss": 1.6763,
"step": 1157
},
{
"epoch": 0.3876799464345497,
"grad_norm": 0.128611221909523,
"learning_rate": 0.0001,
"loss": 1.6281,
"step": 1158
},
{
"epoch": 0.38801473049882823,
"grad_norm": 0.1292606145143509,
"learning_rate": 0.0001,
"loss": 1.6846,
"step": 1159
},
{
"epoch": 0.3883495145631068,
"grad_norm": 0.13090923428535461,
"learning_rate": 0.0001,
"loss": 1.628,
"step": 1160
},
{
"epoch": 0.38868429862738535,
"grad_norm": 0.12356492131948471,
"learning_rate": 0.0001,
"loss": 1.5158,
"step": 1161
},
{
"epoch": 0.38901908269166385,
"grad_norm": 0.12005447596311569,
"learning_rate": 0.0001,
"loss": 1.62,
"step": 1162
},
{
"epoch": 0.3893538667559424,
"grad_norm": 0.12113460153341293,
"learning_rate": 0.0001,
"loss": 1.4954,
"step": 1163
},
{
"epoch": 0.38968865082022097,
"grad_norm": 0.11953802406787872,
"learning_rate": 0.0001,
"loss": 1.4891,
"step": 1164
},
{
"epoch": 0.3900234348844995,
"grad_norm": 0.1292644739151001,
"learning_rate": 0.0001,
"loss": 1.555,
"step": 1165
},
{
"epoch": 0.39035821894877804,
"grad_norm": 0.12345704436302185,
"learning_rate": 0.0001,
"loss": 1.4939,
"step": 1166
},
{
"epoch": 0.3906930030130566,
"grad_norm": 0.12334253638982773,
"learning_rate": 0.0001,
"loss": 1.6058,
"step": 1167
},
{
"epoch": 0.3910277870773351,
"grad_norm": 0.13044217228889465,
"learning_rate": 0.0001,
"loss": 1.5349,
"step": 1168
},
{
"epoch": 0.39136257114161366,
"grad_norm": 0.12309286743402481,
"learning_rate": 0.0001,
"loss": 1.5007,
"step": 1169
},
{
"epoch": 0.3916973552058922,
"grad_norm": 0.12565681338310242,
"learning_rate": 0.0001,
"loss": 1.5172,
"step": 1170
},
{
"epoch": 0.3920321392701707,
"grad_norm": 0.13335129618644714,
"learning_rate": 0.0001,
"loss": 1.5666,
"step": 1171
},
{
"epoch": 0.3923669233344493,
"grad_norm": 0.12664766609668732,
"learning_rate": 0.0001,
"loss": 1.5471,
"step": 1172
},
{
"epoch": 0.39270170739872784,
"grad_norm": 0.12703973054885864,
"learning_rate": 0.0001,
"loss": 1.545,
"step": 1173
},
{
"epoch": 0.39303649146300634,
"grad_norm": 0.12242884933948517,
"learning_rate": 0.0001,
"loss": 1.4768,
"step": 1174
},
{
"epoch": 0.3933712755272849,
"grad_norm": 0.13055263459682465,
"learning_rate": 0.0001,
"loss": 1.4782,
"step": 1175
},
{
"epoch": 0.39370605959156346,
"grad_norm": 0.13161849975585938,
"learning_rate": 0.0001,
"loss": 1.621,
"step": 1176
},
{
"epoch": 0.39404084365584197,
"grad_norm": 0.1257203370332718,
"learning_rate": 0.0001,
"loss": 1.5655,
"step": 1177
},
{
"epoch": 0.3943756277201205,
"grad_norm": 0.14164592325687408,
"learning_rate": 0.0001,
"loss": 1.4884,
"step": 1178
},
{
"epoch": 0.3947104117843991,
"grad_norm": 0.12696050107479095,
"learning_rate": 0.0001,
"loss": 1.5829,
"step": 1179
},
{
"epoch": 0.3950451958486776,
"grad_norm": 0.12652398645877838,
"learning_rate": 0.0001,
"loss": 1.6345,
"step": 1180
},
{
"epoch": 0.39537997991295615,
"grad_norm": 0.12333660572767258,
"learning_rate": 0.0001,
"loss": 1.5375,
"step": 1181
},
{
"epoch": 0.3957147639772347,
"grad_norm": 0.13108794391155243,
"learning_rate": 0.0001,
"loss": 1.6441,
"step": 1182
},
{
"epoch": 0.3960495480415132,
"grad_norm": 0.13195887207984924,
"learning_rate": 0.0001,
"loss": 1.5939,
"step": 1183
},
{
"epoch": 0.39638433210579177,
"grad_norm": 0.12931646406650543,
"learning_rate": 0.0001,
"loss": 1.5317,
"step": 1184
},
{
"epoch": 0.39671911617007033,
"grad_norm": 0.12439566105604172,
"learning_rate": 0.0001,
"loss": 1.5391,
"step": 1185
},
{
"epoch": 0.39705390023434883,
"grad_norm": 0.12557551264762878,
"learning_rate": 0.0001,
"loss": 1.5723,
"step": 1186
},
{
"epoch": 0.3973886842986274,
"grad_norm": 0.13013330101966858,
"learning_rate": 0.0001,
"loss": 1.4812,
"step": 1187
},
{
"epoch": 0.39772346836290595,
"grad_norm": 0.12955336272716522,
"learning_rate": 0.0001,
"loss": 1.5799,
"step": 1188
},
{
"epoch": 0.39805825242718446,
"grad_norm": 0.1347295343875885,
"learning_rate": 0.0001,
"loss": 1.6634,
"step": 1189
},
{
"epoch": 0.398393036491463,
"grad_norm": 0.13187319040298462,
"learning_rate": 0.0001,
"loss": 1.5146,
"step": 1190
},
{
"epoch": 0.3987278205557416,
"grad_norm": 0.13010048866271973,
"learning_rate": 0.0001,
"loss": 1.5003,
"step": 1191
},
{
"epoch": 0.3990626046200201,
"grad_norm": 0.12330204248428345,
"learning_rate": 0.0001,
"loss": 1.5765,
"step": 1192
},
{
"epoch": 0.39939738868429864,
"grad_norm": 0.1346241533756256,
"learning_rate": 0.0001,
"loss": 1.5979,
"step": 1193
},
{
"epoch": 0.39973217274857714,
"grad_norm": 0.13725797832012177,
"learning_rate": 0.0001,
"loss": 1.5813,
"step": 1194
},
{
"epoch": 0.4000669568128557,
"grad_norm": 0.12039465457201004,
"learning_rate": 0.0001,
"loss": 1.4363,
"step": 1195
},
{
"epoch": 0.40040174087713426,
"grad_norm": 0.1276928186416626,
"learning_rate": 0.0001,
"loss": 1.6575,
"step": 1196
},
{
"epoch": 0.40073652494141276,
"grad_norm": 0.12903235852718353,
"learning_rate": 0.0001,
"loss": 1.6059,
"step": 1197
},
{
"epoch": 0.4010713090056913,
"grad_norm": 0.12678353488445282,
"learning_rate": 0.0001,
"loss": 1.5624,
"step": 1198
},
{
"epoch": 0.4014060930699699,
"grad_norm": 0.12884308397769928,
"learning_rate": 0.0001,
"loss": 1.5995,
"step": 1199
},
{
"epoch": 0.4017408771342484,
"grad_norm": 0.11986846476793289,
"learning_rate": 0.0001,
"loss": 1.4767,
"step": 1200
},
{
"epoch": 0.40207566119852695,
"grad_norm": 0.12227410078048706,
"learning_rate": 0.0001,
"loss": 1.5056,
"step": 1201
},
{
"epoch": 0.4024104452628055,
"grad_norm": 0.12593914568424225,
"learning_rate": 0.0001,
"loss": 1.5836,
"step": 1202
},
{
"epoch": 0.402745229327084,
"grad_norm": 0.12477041780948639,
"learning_rate": 0.0001,
"loss": 1.5745,
"step": 1203
},
{
"epoch": 0.40308001339136257,
"grad_norm": 0.1216067373752594,
"learning_rate": 0.0001,
"loss": 1.5824,
"step": 1204
},
{
"epoch": 0.4034147974556411,
"grad_norm": 0.13550971448421478,
"learning_rate": 0.0001,
"loss": 1.6635,
"step": 1205
},
{
"epoch": 0.40374958151991963,
"grad_norm": 0.12963739037513733,
"learning_rate": 0.0001,
"loss": 1.6586,
"step": 1206
},
{
"epoch": 0.4040843655841982,
"grad_norm": 0.11887506395578384,
"learning_rate": 0.0001,
"loss": 1.4933,
"step": 1207
},
{
"epoch": 0.40441914964847675,
"grad_norm": 0.13262464106082916,
"learning_rate": 0.0001,
"loss": 1.5759,
"step": 1208
},
{
"epoch": 0.40475393371275525,
"grad_norm": 0.13952501118183136,
"learning_rate": 0.0001,
"loss": 1.6918,
"step": 1209
},
{
"epoch": 0.4050887177770338,
"grad_norm": 0.13401460647583008,
"learning_rate": 0.0001,
"loss": 1.5102,
"step": 1210
},
{
"epoch": 0.4054235018413124,
"grad_norm": 0.14476630091667175,
"learning_rate": 0.0001,
"loss": 1.6817,
"step": 1211
},
{
"epoch": 0.4057582859055909,
"grad_norm": 0.1285640001296997,
"learning_rate": 0.0001,
"loss": 1.653,
"step": 1212
},
{
"epoch": 0.40609306996986944,
"grad_norm": 0.13845203816890717,
"learning_rate": 0.0001,
"loss": 1.5996,
"step": 1213
},
{
"epoch": 0.406427854034148,
"grad_norm": 0.13416174054145813,
"learning_rate": 0.0001,
"loss": 1.6222,
"step": 1214
},
{
"epoch": 0.4067626380984265,
"grad_norm": 0.1267634481191635,
"learning_rate": 0.0001,
"loss": 1.5257,
"step": 1215
},
{
"epoch": 0.40709742216270506,
"grad_norm": 0.13453447818756104,
"learning_rate": 0.0001,
"loss": 1.5745,
"step": 1216
},
{
"epoch": 0.4074322062269836,
"grad_norm": 0.12069771438837051,
"learning_rate": 0.0001,
"loss": 1.5516,
"step": 1217
},
{
"epoch": 0.4077669902912621,
"grad_norm": 0.12483450770378113,
"learning_rate": 0.0001,
"loss": 1.5899,
"step": 1218
},
{
"epoch": 0.4081017743555407,
"grad_norm": 0.14123085141181946,
"learning_rate": 0.0001,
"loss": 1.6334,
"step": 1219
},
{
"epoch": 0.40843655841981924,
"grad_norm": 0.12844936549663544,
"learning_rate": 0.0001,
"loss": 1.4936,
"step": 1220
},
{
"epoch": 0.40877134248409774,
"grad_norm": 0.13094481825828552,
"learning_rate": 0.0001,
"loss": 1.6554,
"step": 1221
},
{
"epoch": 0.4091061265483763,
"grad_norm": 0.12563113868236542,
"learning_rate": 0.0001,
"loss": 1.4708,
"step": 1222
},
{
"epoch": 0.40944091061265486,
"grad_norm": 0.12495769560337067,
"learning_rate": 0.0001,
"loss": 1.5012,
"step": 1223
},
{
"epoch": 0.40977569467693337,
"grad_norm": 0.12314360588788986,
"learning_rate": 0.0001,
"loss": 1.5769,
"step": 1224
},
{
"epoch": 0.4101104787412119,
"grad_norm": 0.1389753818511963,
"learning_rate": 0.0001,
"loss": 1.5978,
"step": 1225
},
{
"epoch": 0.4104452628054905,
"grad_norm": 0.12703324854373932,
"learning_rate": 0.0001,
"loss": 1.5349,
"step": 1226
},
{
"epoch": 0.410780046869769,
"grad_norm": 0.11995337903499603,
"learning_rate": 0.0001,
"loss": 1.5307,
"step": 1227
},
{
"epoch": 0.41111483093404755,
"grad_norm": 0.1330454796552658,
"learning_rate": 0.0001,
"loss": 1.6277,
"step": 1228
},
{
"epoch": 0.41144961499832605,
"grad_norm": 0.12632183730602264,
"learning_rate": 0.0001,
"loss": 1.507,
"step": 1229
},
{
"epoch": 0.4117843990626046,
"grad_norm": 0.13255640864372253,
"learning_rate": 0.0001,
"loss": 1.5797,
"step": 1230
},
{
"epoch": 0.41211918312688317,
"grad_norm": 0.13822025060653687,
"learning_rate": 0.0001,
"loss": 1.5945,
"step": 1231
},
{
"epoch": 0.4124539671911617,
"grad_norm": 0.1303391307592392,
"learning_rate": 0.0001,
"loss": 1.5928,
"step": 1232
},
{
"epoch": 0.41278875125544023,
"grad_norm": 0.12309371680021286,
"learning_rate": 0.0001,
"loss": 1.4794,
"step": 1233
},
{
"epoch": 0.4131235353197188,
"grad_norm": 0.12375032901763916,
"learning_rate": 0.0001,
"loss": 1.5133,
"step": 1234
},
{
"epoch": 0.4134583193839973,
"grad_norm": 0.13613499701023102,
"learning_rate": 0.0001,
"loss": 1.621,
"step": 1235
},
{
"epoch": 0.41379310344827586,
"grad_norm": 0.13198764622211456,
"learning_rate": 0.0001,
"loss": 1.5762,
"step": 1236
},
{
"epoch": 0.4141278875125544,
"grad_norm": 0.1294814646244049,
"learning_rate": 0.0001,
"loss": 1.5836,
"step": 1237
},
{
"epoch": 0.4144626715768329,
"grad_norm": 0.12597797811031342,
"learning_rate": 0.0001,
"loss": 1.5988,
"step": 1238
},
{
"epoch": 0.4147974556411115,
"grad_norm": 0.12371232360601425,
"learning_rate": 0.0001,
"loss": 1.5432,
"step": 1239
},
{
"epoch": 0.41513223970539004,
"grad_norm": 0.12919354438781738,
"learning_rate": 0.0001,
"loss": 1.5507,
"step": 1240
},
{
"epoch": 0.41546702376966854,
"grad_norm": 0.12919418513774872,
"learning_rate": 0.0001,
"loss": 1.7431,
"step": 1241
},
{
"epoch": 0.4158018078339471,
"grad_norm": 0.12314452975988388,
"learning_rate": 0.0001,
"loss": 1.5407,
"step": 1242
},
{
"epoch": 0.41613659189822566,
"grad_norm": 0.1360636204481125,
"learning_rate": 0.0001,
"loss": 1.5872,
"step": 1243
},
{
"epoch": 0.41647137596250416,
"grad_norm": 0.12739785015583038,
"learning_rate": 0.0001,
"loss": 1.4998,
"step": 1244
},
{
"epoch": 0.4168061600267827,
"grad_norm": 0.12558461725711823,
"learning_rate": 0.0001,
"loss": 1.6422,
"step": 1245
},
{
"epoch": 0.4171409440910613,
"grad_norm": 0.130743145942688,
"learning_rate": 0.0001,
"loss": 1.6537,
"step": 1246
},
{
"epoch": 0.4174757281553398,
"grad_norm": 0.12714166939258575,
"learning_rate": 0.0001,
"loss": 1.4309,
"step": 1247
},
{
"epoch": 0.41781051221961835,
"grad_norm": 0.12849892675876617,
"learning_rate": 0.0001,
"loss": 1.514,
"step": 1248
},
{
"epoch": 0.4181452962838969,
"grad_norm": 0.1366477757692337,
"learning_rate": 0.0001,
"loss": 1.6397,
"step": 1249
},
{
"epoch": 0.4184800803481754,
"grad_norm": 0.1324029415845871,
"learning_rate": 0.0001,
"loss": 1.5647,
"step": 1250
},
{
"epoch": 0.41881486441245397,
"grad_norm": 0.1272830069065094,
"learning_rate": 0.0001,
"loss": 1.633,
"step": 1251
},
{
"epoch": 0.41914964847673253,
"grad_norm": 0.12891270220279694,
"learning_rate": 0.0001,
"loss": 1.5571,
"step": 1252
},
{
"epoch": 0.41948443254101103,
"grad_norm": 0.1334099918603897,
"learning_rate": 0.0001,
"loss": 1.4905,
"step": 1253
},
{
"epoch": 0.4198192166052896,
"grad_norm": 0.12439723312854767,
"learning_rate": 0.0001,
"loss": 1.5859,
"step": 1254
},
{
"epoch": 0.42015400066956815,
"grad_norm": 0.13870543241500854,
"learning_rate": 0.0001,
"loss": 1.6226,
"step": 1255
},
{
"epoch": 0.42048878473384665,
"grad_norm": 0.13232079148292542,
"learning_rate": 0.0001,
"loss": 1.6566,
"step": 1256
},
{
"epoch": 0.4208235687981252,
"grad_norm": 0.12575885653495789,
"learning_rate": 0.0001,
"loss": 1.5629,
"step": 1257
},
{
"epoch": 0.4211583528624038,
"grad_norm": 0.12995895743370056,
"learning_rate": 0.0001,
"loss": 1.5703,
"step": 1258
},
{
"epoch": 0.4214931369266823,
"grad_norm": 0.12801054120063782,
"learning_rate": 0.0001,
"loss": 1.6326,
"step": 1259
},
{
"epoch": 0.42182792099096084,
"grad_norm": 0.12584693729877472,
"learning_rate": 0.0001,
"loss": 1.6329,
"step": 1260
},
{
"epoch": 0.4221627050552394,
"grad_norm": 0.13142889738082886,
"learning_rate": 0.0001,
"loss": 1.7081,
"step": 1261
},
{
"epoch": 0.4224974891195179,
"grad_norm": 0.12793239951133728,
"learning_rate": 0.0001,
"loss": 1.6032,
"step": 1262
},
{
"epoch": 0.42283227318379646,
"grad_norm": 0.12368165701627731,
"learning_rate": 0.0001,
"loss": 1.5468,
"step": 1263
},
{
"epoch": 0.423167057248075,
"grad_norm": 0.13081911206245422,
"learning_rate": 0.0001,
"loss": 1.6175,
"step": 1264
},
{
"epoch": 0.4235018413123535,
"grad_norm": 0.12801037728786469,
"learning_rate": 0.0001,
"loss": 1.537,
"step": 1265
},
{
"epoch": 0.4238366253766321,
"grad_norm": 0.1274782121181488,
"learning_rate": 0.0001,
"loss": 1.5277,
"step": 1266
},
{
"epoch": 0.4241714094409106,
"grad_norm": 0.1194332018494606,
"learning_rate": 0.0001,
"loss": 1.496,
"step": 1267
},
{
"epoch": 0.42450619350518914,
"grad_norm": 0.13174927234649658,
"learning_rate": 0.0001,
"loss": 1.5975,
"step": 1268
},
{
"epoch": 0.4248409775694677,
"grad_norm": 0.1254556030035019,
"learning_rate": 0.0001,
"loss": 1.6119,
"step": 1269
},
{
"epoch": 0.4251757616337462,
"grad_norm": 0.13203318417072296,
"learning_rate": 0.0001,
"loss": 1.5564,
"step": 1270
},
{
"epoch": 0.42551054569802477,
"grad_norm": 0.12941622734069824,
"learning_rate": 0.0001,
"loss": 1.6285,
"step": 1271
},
{
"epoch": 0.4258453297623033,
"grad_norm": 0.12527894973754883,
"learning_rate": 0.0001,
"loss": 1.5703,
"step": 1272
},
{
"epoch": 0.42618011382658183,
"grad_norm": 0.12617714703083038,
"learning_rate": 0.0001,
"loss": 1.6523,
"step": 1273
},
{
"epoch": 0.4265148978908604,
"grad_norm": 0.12326870858669281,
"learning_rate": 0.0001,
"loss": 1.5533,
"step": 1274
},
{
"epoch": 0.42684968195513895,
"grad_norm": 0.1295124888420105,
"learning_rate": 0.0001,
"loss": 1.5587,
"step": 1275
},
{
"epoch": 0.42718446601941745,
"grad_norm": 0.12248773872852325,
"learning_rate": 0.0001,
"loss": 1.5762,
"step": 1276
},
{
"epoch": 0.427519250083696,
"grad_norm": 0.12932232022285461,
"learning_rate": 0.0001,
"loss": 1.6162,
"step": 1277
},
{
"epoch": 0.42785403414797457,
"grad_norm": 0.1178537905216217,
"learning_rate": 0.0001,
"loss": 1.472,
"step": 1278
},
{
"epoch": 0.4281888182122531,
"grad_norm": 0.1269647628068924,
"learning_rate": 0.0001,
"loss": 1.5551,
"step": 1279
},
{
"epoch": 0.42852360227653163,
"grad_norm": 0.120000459253788,
"learning_rate": 0.0001,
"loss": 1.509,
"step": 1280
},
{
"epoch": 0.4288583863408102,
"grad_norm": 0.12708665430545807,
"learning_rate": 0.0001,
"loss": 1.5293,
"step": 1281
},
{
"epoch": 0.4291931704050887,
"grad_norm": 0.13209426403045654,
"learning_rate": 0.0001,
"loss": 1.6311,
"step": 1282
},
{
"epoch": 0.42952795446936726,
"grad_norm": 0.1305491328239441,
"learning_rate": 0.0001,
"loss": 1.5505,
"step": 1283
},
{
"epoch": 0.4298627385336458,
"grad_norm": 0.1237809956073761,
"learning_rate": 0.0001,
"loss": 1.5457,
"step": 1284
},
{
"epoch": 0.4301975225979243,
"grad_norm": 0.13375982642173767,
"learning_rate": 0.0001,
"loss": 1.5321,
"step": 1285
},
{
"epoch": 0.4305323066622029,
"grad_norm": 0.13597902655601501,
"learning_rate": 0.0001,
"loss": 1.6229,
"step": 1286
},
{
"epoch": 0.43086709072648144,
"grad_norm": 0.12488207966089249,
"learning_rate": 0.0001,
"loss": 1.5231,
"step": 1287
},
{
"epoch": 0.43120187479075994,
"grad_norm": 0.12950995564460754,
"learning_rate": 0.0001,
"loss": 1.7162,
"step": 1288
},
{
"epoch": 0.4315366588550385,
"grad_norm": 0.12734153866767883,
"learning_rate": 0.0001,
"loss": 1.5735,
"step": 1289
},
{
"epoch": 0.43187144291931706,
"grad_norm": 0.13684290647506714,
"learning_rate": 0.0001,
"loss": 1.5866,
"step": 1290
},
{
"epoch": 0.43220622698359557,
"grad_norm": 0.12665408849716187,
"learning_rate": 0.0001,
"loss": 1.5236,
"step": 1291
},
{
"epoch": 0.4325410110478741,
"grad_norm": 0.12092933058738708,
"learning_rate": 0.0001,
"loss": 1.4859,
"step": 1292
},
{
"epoch": 0.4328757951121527,
"grad_norm": 0.14012545347213745,
"learning_rate": 0.0001,
"loss": 1.6158,
"step": 1293
},
{
"epoch": 0.4332105791764312,
"grad_norm": 0.12820059061050415,
"learning_rate": 0.0001,
"loss": 1.5108,
"step": 1294
},
{
"epoch": 0.43354536324070975,
"grad_norm": 0.13247036933898926,
"learning_rate": 0.0001,
"loss": 1.6031,
"step": 1295
},
{
"epoch": 0.4338801473049883,
"grad_norm": 0.12412893772125244,
"learning_rate": 0.0001,
"loss": 1.5829,
"step": 1296
},
{
"epoch": 0.4342149313692668,
"grad_norm": 0.12657597661018372,
"learning_rate": 0.0001,
"loss": 1.5139,
"step": 1297
},
{
"epoch": 0.43454971543354537,
"grad_norm": 0.13494263589382172,
"learning_rate": 0.0001,
"loss": 1.6264,
"step": 1298
},
{
"epoch": 0.43488449949782393,
"grad_norm": 0.12553179264068604,
"learning_rate": 0.0001,
"loss": 1.5587,
"step": 1299
},
{
"epoch": 0.43521928356210243,
"grad_norm": 0.12029055505990982,
"learning_rate": 0.0001,
"loss": 1.5177,
"step": 1300
},
{
"epoch": 0.435554067626381,
"grad_norm": 0.12742608785629272,
"learning_rate": 0.0001,
"loss": 1.6345,
"step": 1301
},
{
"epoch": 0.43588885169065955,
"grad_norm": 0.12749677896499634,
"learning_rate": 0.0001,
"loss": 1.5183,
"step": 1302
},
{
"epoch": 0.43622363575493805,
"grad_norm": 0.13716910779476166,
"learning_rate": 0.0001,
"loss": 1.6064,
"step": 1303
},
{
"epoch": 0.4365584198192166,
"grad_norm": 0.11626800149679184,
"learning_rate": 0.0001,
"loss": 1.461,
"step": 1304
},
{
"epoch": 0.4368932038834951,
"grad_norm": 0.12892816960811615,
"learning_rate": 0.0001,
"loss": 1.5856,
"step": 1305
},
{
"epoch": 0.4372279879477737,
"grad_norm": 0.12171407043933868,
"learning_rate": 0.0001,
"loss": 1.5669,
"step": 1306
},
{
"epoch": 0.43756277201205224,
"grad_norm": 0.12705732882022858,
"learning_rate": 0.0001,
"loss": 1.5392,
"step": 1307
},
{
"epoch": 0.43789755607633074,
"grad_norm": 0.12489151209592819,
"learning_rate": 0.0001,
"loss": 1.5621,
"step": 1308
},
{
"epoch": 0.4382323401406093,
"grad_norm": 0.1306968778371811,
"learning_rate": 0.0001,
"loss": 1.5601,
"step": 1309
},
{
"epoch": 0.43856712420488786,
"grad_norm": 0.12457779794931412,
"learning_rate": 0.0001,
"loss": 1.5292,
"step": 1310
},
{
"epoch": 0.43890190826916636,
"grad_norm": 0.1351223587989807,
"learning_rate": 0.0001,
"loss": 1.6364,
"step": 1311
},
{
"epoch": 0.4392366923334449,
"grad_norm": 0.16403745114803314,
"learning_rate": 0.0001,
"loss": 1.6135,
"step": 1312
},
{
"epoch": 0.4395714763977235,
"grad_norm": 0.1373598426580429,
"learning_rate": 0.0001,
"loss": 1.6102,
"step": 1313
},
{
"epoch": 0.439906260462002,
"grad_norm": 0.12474294006824493,
"learning_rate": 0.0001,
"loss": 1.4732,
"step": 1314
},
{
"epoch": 0.44024104452628054,
"grad_norm": 0.13775482773780823,
"learning_rate": 0.0001,
"loss": 1.4623,
"step": 1315
},
{
"epoch": 0.4405758285905591,
"grad_norm": 0.12874817848205566,
"learning_rate": 0.0001,
"loss": 1.5885,
"step": 1316
},
{
"epoch": 0.4409106126548376,
"grad_norm": 0.13382995128631592,
"learning_rate": 0.0001,
"loss": 1.4458,
"step": 1317
},
{
"epoch": 0.44124539671911617,
"grad_norm": 0.1267126202583313,
"learning_rate": 0.0001,
"loss": 1.5709,
"step": 1318
},
{
"epoch": 0.4415801807833947,
"grad_norm": 0.12839357554912567,
"learning_rate": 0.0001,
"loss": 1.5377,
"step": 1319
},
{
"epoch": 0.44191496484767323,
"grad_norm": 0.13176332414150238,
"learning_rate": 0.0001,
"loss": 1.4342,
"step": 1320
},
{
"epoch": 0.4422497489119518,
"grad_norm": 0.13202795386314392,
"learning_rate": 0.0001,
"loss": 1.5997,
"step": 1321
},
{
"epoch": 0.44258453297623035,
"grad_norm": 0.12316932529211044,
"learning_rate": 0.0001,
"loss": 1.4323,
"step": 1322
},
{
"epoch": 0.44291931704050885,
"grad_norm": 0.1301979273557663,
"learning_rate": 0.0001,
"loss": 1.5882,
"step": 1323
},
{
"epoch": 0.4432541011047874,
"grad_norm": 0.1263076364994049,
"learning_rate": 0.0001,
"loss": 1.4469,
"step": 1324
},
{
"epoch": 0.44358888516906597,
"grad_norm": 0.12310474365949631,
"learning_rate": 0.0001,
"loss": 1.4898,
"step": 1325
},
{
"epoch": 0.4439236692333445,
"grad_norm": 0.12039102613925934,
"learning_rate": 0.0001,
"loss": 1.5324,
"step": 1326
},
{
"epoch": 0.44425845329762303,
"grad_norm": 0.12545818090438843,
"learning_rate": 0.0001,
"loss": 1.6171,
"step": 1327
},
{
"epoch": 0.4445932373619016,
"grad_norm": 0.1259836107492447,
"learning_rate": 0.0001,
"loss": 1.5059,
"step": 1328
},
{
"epoch": 0.4449280214261801,
"grad_norm": 0.12518031895160675,
"learning_rate": 0.0001,
"loss": 1.5958,
"step": 1329
},
{
"epoch": 0.44526280549045866,
"grad_norm": 0.12583878636360168,
"learning_rate": 0.0001,
"loss": 1.4837,
"step": 1330
},
{
"epoch": 0.4455975895547372,
"grad_norm": 0.12569929659366608,
"learning_rate": 0.0001,
"loss": 1.536,
"step": 1331
},
{
"epoch": 0.4459323736190157,
"grad_norm": 0.1288549304008484,
"learning_rate": 0.0001,
"loss": 1.5525,
"step": 1332
},
{
"epoch": 0.4462671576832943,
"grad_norm": 0.13198384642601013,
"learning_rate": 0.0001,
"loss": 1.542,
"step": 1333
},
{
"epoch": 0.44660194174757284,
"grad_norm": 0.1238170713186264,
"learning_rate": 0.0001,
"loss": 1.4021,
"step": 1334
},
{
"epoch": 0.44693672581185134,
"grad_norm": 0.13295157253742218,
"learning_rate": 0.0001,
"loss": 1.5553,
"step": 1335
},
{
"epoch": 0.4472715098761299,
"grad_norm": 0.13403776288032532,
"learning_rate": 0.0001,
"loss": 1.4761,
"step": 1336
},
{
"epoch": 0.44760629394040846,
"grad_norm": 0.13343052566051483,
"learning_rate": 0.0001,
"loss": 1.573,
"step": 1337
},
{
"epoch": 0.44794107800468697,
"grad_norm": 0.125327467918396,
"learning_rate": 0.0001,
"loss": 1.5682,
"step": 1338
},
{
"epoch": 0.4482758620689655,
"grad_norm": 0.12958160042762756,
"learning_rate": 0.0001,
"loss": 1.5294,
"step": 1339
},
{
"epoch": 0.4486106461332441,
"grad_norm": 0.1384599506855011,
"learning_rate": 0.0001,
"loss": 1.5791,
"step": 1340
},
{
"epoch": 0.4489454301975226,
"grad_norm": 0.1257963478565216,
"learning_rate": 0.0001,
"loss": 1.5732,
"step": 1341
},
{
"epoch": 0.44928021426180115,
"grad_norm": 0.12630927562713623,
"learning_rate": 0.0001,
"loss": 1.5558,
"step": 1342
},
{
"epoch": 0.44961499832607965,
"grad_norm": 0.1268066167831421,
"learning_rate": 0.0001,
"loss": 1.5958,
"step": 1343
},
{
"epoch": 0.4499497823903582,
"grad_norm": 0.12455032020807266,
"learning_rate": 0.0001,
"loss": 1.5607,
"step": 1344
},
{
"epoch": 0.45028456645463677,
"grad_norm": 0.12265735119581223,
"learning_rate": 0.0001,
"loss": 1.5197,
"step": 1345
},
{
"epoch": 0.4506193505189153,
"grad_norm": 0.1307050883769989,
"learning_rate": 0.0001,
"loss": 1.6407,
"step": 1346
},
{
"epoch": 0.45095413458319383,
"grad_norm": 0.13128429651260376,
"learning_rate": 0.0001,
"loss": 1.5559,
"step": 1347
},
{
"epoch": 0.4512889186474724,
"grad_norm": 0.13010568916797638,
"learning_rate": 0.0001,
"loss": 1.5332,
"step": 1348
},
{
"epoch": 0.4516237027117509,
"grad_norm": 0.12650929391384125,
"learning_rate": 0.0001,
"loss": 1.6047,
"step": 1349
},
{
"epoch": 0.45195848677602946,
"grad_norm": 0.12306904792785645,
"learning_rate": 0.0001,
"loss": 1.5499,
"step": 1350
},
{
"epoch": 0.452293270840308,
"grad_norm": 0.13351021707057953,
"learning_rate": 0.0001,
"loss": 1.4737,
"step": 1351
},
{
"epoch": 0.4526280549045865,
"grad_norm": 0.12178155779838562,
"learning_rate": 0.0001,
"loss": 1.4775,
"step": 1352
},
{
"epoch": 0.4529628389688651,
"grad_norm": 0.13516512513160706,
"learning_rate": 0.0001,
"loss": 1.6391,
"step": 1353
},
{
"epoch": 0.45329762303314364,
"grad_norm": 0.12909267842769623,
"learning_rate": 0.0001,
"loss": 1.4684,
"step": 1354
},
{
"epoch": 0.45363240709742214,
"grad_norm": 0.12209142744541168,
"learning_rate": 0.0001,
"loss": 1.5198,
"step": 1355
},
{
"epoch": 0.4539671911617007,
"grad_norm": 0.1269826740026474,
"learning_rate": 0.0001,
"loss": 1.5294,
"step": 1356
},
{
"epoch": 0.45430197522597926,
"grad_norm": 0.13762542605400085,
"learning_rate": 0.0001,
"loss": 1.5567,
"step": 1357
},
{
"epoch": 0.45463675929025776,
"grad_norm": 0.1306358128786087,
"learning_rate": 0.0001,
"loss": 1.5829,
"step": 1358
},
{
"epoch": 0.4549715433545363,
"grad_norm": 0.1383924037218094,
"learning_rate": 0.0001,
"loss": 1.6382,
"step": 1359
},
{
"epoch": 0.4553063274188149,
"grad_norm": 0.13577204942703247,
"learning_rate": 0.0001,
"loss": 1.6067,
"step": 1360
},
{
"epoch": 0.4556411114830934,
"grad_norm": 0.12534180283546448,
"learning_rate": 0.0001,
"loss": 1.574,
"step": 1361
},
{
"epoch": 0.45597589554737195,
"grad_norm": 0.12367561459541321,
"learning_rate": 0.0001,
"loss": 1.5089,
"step": 1362
},
{
"epoch": 0.4563106796116505,
"grad_norm": 0.14012429118156433,
"learning_rate": 0.0001,
"loss": 1.6044,
"step": 1363
},
{
"epoch": 0.456645463675929,
"grad_norm": 0.13164697587490082,
"learning_rate": 0.0001,
"loss": 1.6058,
"step": 1364
},
{
"epoch": 0.45698024774020757,
"grad_norm": 0.14275015890598297,
"learning_rate": 0.0001,
"loss": 1.6945,
"step": 1365
},
{
"epoch": 0.4573150318044861,
"grad_norm": 0.1312190294265747,
"learning_rate": 0.0001,
"loss": 1.5595,
"step": 1366
},
{
"epoch": 0.45764981586876463,
"grad_norm": 0.1276426464319229,
"learning_rate": 0.0001,
"loss": 1.5639,
"step": 1367
},
{
"epoch": 0.4579845999330432,
"grad_norm": 0.12928691506385803,
"learning_rate": 0.0001,
"loss": 1.6555,
"step": 1368
},
{
"epoch": 0.45831938399732175,
"grad_norm": 0.12562155723571777,
"learning_rate": 0.0001,
"loss": 1.5017,
"step": 1369
},
{
"epoch": 0.45865416806160025,
"grad_norm": 0.12555162608623505,
"learning_rate": 0.0001,
"loss": 1.5133,
"step": 1370
},
{
"epoch": 0.4589889521258788,
"grad_norm": 0.13354945182800293,
"learning_rate": 0.0001,
"loss": 1.5802,
"step": 1371
},
{
"epoch": 0.4593237361901574,
"grad_norm": 0.13059929013252258,
"learning_rate": 0.0001,
"loss": 1.5152,
"step": 1372
},
{
"epoch": 0.4596585202544359,
"grad_norm": 0.1313420981168747,
"learning_rate": 0.0001,
"loss": 1.5411,
"step": 1373
},
{
"epoch": 0.45999330431871444,
"grad_norm": 0.13619214296340942,
"learning_rate": 0.0001,
"loss": 1.5348,
"step": 1374
},
{
"epoch": 0.460328088382993,
"grad_norm": 0.12227842211723328,
"learning_rate": 0.0001,
"loss": 1.5258,
"step": 1375
},
{
"epoch": 0.4606628724472715,
"grad_norm": 0.12962037324905396,
"learning_rate": 0.0001,
"loss": 1.6469,
"step": 1376
},
{
"epoch": 0.46099765651155006,
"grad_norm": 0.128581240773201,
"learning_rate": 0.0001,
"loss": 1.6151,
"step": 1377
},
{
"epoch": 0.4613324405758286,
"grad_norm": 0.12887564301490784,
"learning_rate": 0.0001,
"loss": 1.5741,
"step": 1378
},
{
"epoch": 0.4616672246401071,
"grad_norm": 0.12684863805770874,
"learning_rate": 0.0001,
"loss": 1.6168,
"step": 1379
},
{
"epoch": 0.4620020087043857,
"grad_norm": 0.11986137181520462,
"learning_rate": 0.0001,
"loss": 1.5278,
"step": 1380
},
{
"epoch": 0.4623367927686642,
"grad_norm": 0.12904709577560425,
"learning_rate": 0.0001,
"loss": 1.5247,
"step": 1381
},
{
"epoch": 0.46267157683294274,
"grad_norm": 0.12737007439136505,
"learning_rate": 0.0001,
"loss": 1.6354,
"step": 1382
},
{
"epoch": 0.4630063608972213,
"grad_norm": 0.13845406472682953,
"learning_rate": 0.0001,
"loss": 1.5696,
"step": 1383
},
{
"epoch": 0.4633411449614998,
"grad_norm": 0.1215730682015419,
"learning_rate": 0.0001,
"loss": 1.5277,
"step": 1384
},
{
"epoch": 0.46367592902577837,
"grad_norm": 0.12643855810165405,
"learning_rate": 0.0001,
"loss": 1.5691,
"step": 1385
},
{
"epoch": 0.4640107130900569,
"grad_norm": 0.12575271725654602,
"learning_rate": 0.0001,
"loss": 1.5075,
"step": 1386
},
{
"epoch": 0.46434549715433543,
"grad_norm": 0.13134850561618805,
"learning_rate": 0.0001,
"loss": 1.6195,
"step": 1387
},
{
"epoch": 0.464680281218614,
"grad_norm": 0.12751908600330353,
"learning_rate": 0.0001,
"loss": 1.5396,
"step": 1388
},
{
"epoch": 0.46501506528289255,
"grad_norm": 0.1260857880115509,
"learning_rate": 0.0001,
"loss": 1.581,
"step": 1389
},
{
"epoch": 0.46534984934717105,
"grad_norm": 0.13056620955467224,
"learning_rate": 0.0001,
"loss": 1.5604,
"step": 1390
},
{
"epoch": 0.4656846334114496,
"grad_norm": 0.12854252755641937,
"learning_rate": 0.0001,
"loss": 1.5729,
"step": 1391
},
{
"epoch": 0.46601941747572817,
"grad_norm": 0.12587207555770874,
"learning_rate": 0.0001,
"loss": 1.5685,
"step": 1392
},
{
"epoch": 0.4663542015400067,
"grad_norm": 0.13984687626361847,
"learning_rate": 0.0001,
"loss": 1.5327,
"step": 1393
},
{
"epoch": 0.46668898560428523,
"grad_norm": 0.1340693235397339,
"learning_rate": 0.0001,
"loss": 1.5047,
"step": 1394
},
{
"epoch": 0.4670237696685638,
"grad_norm": 0.12426851689815521,
"learning_rate": 0.0001,
"loss": 1.5614,
"step": 1395
},
{
"epoch": 0.4673585537328423,
"grad_norm": 0.14335423707962036,
"learning_rate": 0.0001,
"loss": 1.5968,
"step": 1396
},
{
"epoch": 0.46769333779712086,
"grad_norm": 0.1285167783498764,
"learning_rate": 0.0001,
"loss": 1.4816,
"step": 1397
},
{
"epoch": 0.4680281218613994,
"grad_norm": 0.12221338599920273,
"learning_rate": 0.0001,
"loss": 1.5412,
"step": 1398
},
{
"epoch": 0.4683629059256779,
"grad_norm": 0.13749419152736664,
"learning_rate": 0.0001,
"loss": 1.6426,
"step": 1399
},
{
"epoch": 0.4686976899899565,
"grad_norm": 0.1292765736579895,
"learning_rate": 0.0001,
"loss": 1.4826,
"step": 1400
},
{
"epoch": 0.46903247405423504,
"grad_norm": 0.12175814807415009,
"learning_rate": 0.0001,
"loss": 1.4674,
"step": 1401
},
{
"epoch": 0.46936725811851354,
"grad_norm": 0.13381820917129517,
"learning_rate": 0.0001,
"loss": 1.515,
"step": 1402
},
{
"epoch": 0.4697020421827921,
"grad_norm": 0.13659454882144928,
"learning_rate": 0.0001,
"loss": 1.5513,
"step": 1403
},
{
"epoch": 0.47003682624707066,
"grad_norm": 0.12511052191257477,
"learning_rate": 0.0001,
"loss": 1.5457,
"step": 1404
},
{
"epoch": 0.47037161031134916,
"grad_norm": 0.13325883448123932,
"learning_rate": 0.0001,
"loss": 1.5893,
"step": 1405
},
{
"epoch": 0.4707063943756277,
"grad_norm": 0.12582562863826752,
"learning_rate": 0.0001,
"loss": 1.5285,
"step": 1406
},
{
"epoch": 0.4710411784399063,
"grad_norm": 0.13141517341136932,
"learning_rate": 0.0001,
"loss": 1.5865,
"step": 1407
},
{
"epoch": 0.4713759625041848,
"grad_norm": 0.13099296391010284,
"learning_rate": 0.0001,
"loss": 1.5322,
"step": 1408
},
{
"epoch": 0.47171074656846335,
"grad_norm": 0.146238312125206,
"learning_rate": 0.0001,
"loss": 1.6397,
"step": 1409
},
{
"epoch": 0.4720455306327419,
"grad_norm": 0.12129180878400803,
"learning_rate": 0.0001,
"loss": 1.5033,
"step": 1410
},
{
"epoch": 0.4723803146970204,
"grad_norm": 0.125573992729187,
"learning_rate": 0.0001,
"loss": 1.571,
"step": 1411
},
{
"epoch": 0.47271509876129897,
"grad_norm": 0.14334800839424133,
"learning_rate": 0.0001,
"loss": 1.5323,
"step": 1412
},
{
"epoch": 0.47304988282557753,
"grad_norm": 0.1354663372039795,
"learning_rate": 0.0001,
"loss": 1.5733,
"step": 1413
},
{
"epoch": 0.47338466688985603,
"grad_norm": 0.13040928542613983,
"learning_rate": 0.0001,
"loss": 1.4702,
"step": 1414
},
{
"epoch": 0.4737194509541346,
"grad_norm": 0.12931925058364868,
"learning_rate": 0.0001,
"loss": 1.6017,
"step": 1415
},
{
"epoch": 0.47405423501841315,
"grad_norm": 0.13492871820926666,
"learning_rate": 0.0001,
"loss": 1.5827,
"step": 1416
},
{
"epoch": 0.47438901908269165,
"grad_norm": 0.12549789249897003,
"learning_rate": 0.0001,
"loss": 1.5856,
"step": 1417
},
{
"epoch": 0.4747238031469702,
"grad_norm": 0.13328687846660614,
"learning_rate": 0.0001,
"loss": 1.6163,
"step": 1418
},
{
"epoch": 0.4750585872112487,
"grad_norm": 0.13430629670619965,
"learning_rate": 0.0001,
"loss": 1.5663,
"step": 1419
},
{
"epoch": 0.4753933712755273,
"grad_norm": 0.12909024953842163,
"learning_rate": 0.0001,
"loss": 1.6085,
"step": 1420
},
{
"epoch": 0.47572815533980584,
"grad_norm": 0.13095097243785858,
"learning_rate": 0.0001,
"loss": 1.585,
"step": 1421
},
{
"epoch": 0.47606293940408434,
"grad_norm": 0.1313266009092331,
"learning_rate": 0.0001,
"loss": 1.5279,
"step": 1422
},
{
"epoch": 0.4763977234683629,
"grad_norm": 0.12739764153957367,
"learning_rate": 0.0001,
"loss": 1.6473,
"step": 1423
},
{
"epoch": 0.47673250753264146,
"grad_norm": 0.12780874967575073,
"learning_rate": 0.0001,
"loss": 1.5566,
"step": 1424
},
{
"epoch": 0.47706729159691996,
"grad_norm": 0.12299945950508118,
"learning_rate": 0.0001,
"loss": 1.5632,
"step": 1425
},
{
"epoch": 0.4774020756611985,
"grad_norm": 0.12845619022846222,
"learning_rate": 0.0001,
"loss": 1.5799,
"step": 1426
},
{
"epoch": 0.4777368597254771,
"grad_norm": 0.12429885566234589,
"learning_rate": 0.0001,
"loss": 1.565,
"step": 1427
},
{
"epoch": 0.4780716437897556,
"grad_norm": 0.12623021006584167,
"learning_rate": 0.0001,
"loss": 1.5579,
"step": 1428
},
{
"epoch": 0.47840642785403414,
"grad_norm": 0.121118925511837,
"learning_rate": 0.0001,
"loss": 1.5044,
"step": 1429
},
{
"epoch": 0.4787412119183127,
"grad_norm": 0.13029584288597107,
"learning_rate": 0.0001,
"loss": 1.5945,
"step": 1430
},
{
"epoch": 0.4790759959825912,
"grad_norm": 0.1309075504541397,
"learning_rate": 0.0001,
"loss": 1.5638,
"step": 1431
},
{
"epoch": 0.47941078004686977,
"grad_norm": 0.12302339822053909,
"learning_rate": 0.0001,
"loss": 1.553,
"step": 1432
},
{
"epoch": 0.4797455641111483,
"grad_norm": 0.13640674948692322,
"learning_rate": 0.0001,
"loss": 1.6299,
"step": 1433
},
{
"epoch": 0.48008034817542683,
"grad_norm": 0.12669233977794647,
"learning_rate": 0.0001,
"loss": 1.5603,
"step": 1434
},
{
"epoch": 0.4804151322397054,
"grad_norm": 0.14192534983158112,
"learning_rate": 0.0001,
"loss": 1.5648,
"step": 1435
},
{
"epoch": 0.48074991630398395,
"grad_norm": 0.12855654954910278,
"learning_rate": 0.0001,
"loss": 1.5782,
"step": 1436
},
{
"epoch": 0.48108470036826245,
"grad_norm": 0.13193868100643158,
"learning_rate": 0.0001,
"loss": 1.4815,
"step": 1437
},
{
"epoch": 0.481419484432541,
"grad_norm": 0.1313331574201584,
"learning_rate": 0.0001,
"loss": 1.597,
"step": 1438
},
{
"epoch": 0.48175426849681957,
"grad_norm": 0.14010664820671082,
"learning_rate": 0.0001,
"loss": 1.5911,
"step": 1439
},
{
"epoch": 0.4820890525610981,
"grad_norm": 0.12899306416511536,
"learning_rate": 0.0001,
"loss": 1.5346,
"step": 1440
},
{
"epoch": 0.48242383662537663,
"grad_norm": 0.14157001674175262,
"learning_rate": 0.0001,
"loss": 1.4947,
"step": 1441
},
{
"epoch": 0.4827586206896552,
"grad_norm": 0.12598420679569244,
"learning_rate": 0.0001,
"loss": 1.5713,
"step": 1442
},
{
"epoch": 0.4830934047539337,
"grad_norm": 0.12368304282426834,
"learning_rate": 0.0001,
"loss": 1.4691,
"step": 1443
},
{
"epoch": 0.48342818881821226,
"grad_norm": 0.15252211689949036,
"learning_rate": 0.0001,
"loss": 1.5298,
"step": 1444
},
{
"epoch": 0.4837629728824908,
"grad_norm": 0.12461958080530167,
"learning_rate": 0.0001,
"loss": 1.5377,
"step": 1445
},
{
"epoch": 0.4840977569467693,
"grad_norm": 0.13883721828460693,
"learning_rate": 0.0001,
"loss": 1.5754,
"step": 1446
},
{
"epoch": 0.4844325410110479,
"grad_norm": 0.14833161234855652,
"learning_rate": 0.0001,
"loss": 1.514,
"step": 1447
},
{
"epoch": 0.48476732507532644,
"grad_norm": 0.12511619925498962,
"learning_rate": 0.0001,
"loss": 1.5765,
"step": 1448
},
{
"epoch": 0.48510210913960494,
"grad_norm": 0.1352238804101944,
"learning_rate": 0.0001,
"loss": 1.5231,
"step": 1449
},
{
"epoch": 0.4854368932038835,
"grad_norm": 0.14310289919376373,
"learning_rate": 0.0001,
"loss": 1.5516,
"step": 1450
},
{
"epoch": 0.48577167726816206,
"grad_norm": 0.1293793022632599,
"learning_rate": 0.0001,
"loss": 1.6124,
"step": 1451
},
{
"epoch": 0.48610646133244056,
"grad_norm": 0.1351606398820877,
"learning_rate": 0.0001,
"loss": 1.5535,
"step": 1452
},
{
"epoch": 0.4864412453967191,
"grad_norm": 0.1305823028087616,
"learning_rate": 0.0001,
"loss": 1.505,
"step": 1453
},
{
"epoch": 0.4867760294609977,
"grad_norm": 0.12973332405090332,
"learning_rate": 0.0001,
"loss": 1.6027,
"step": 1454
},
{
"epoch": 0.4871108135252762,
"grad_norm": 0.1279638260602951,
"learning_rate": 0.0001,
"loss": 1.5664,
"step": 1455
},
{
"epoch": 0.48744559758955475,
"grad_norm": 0.1322777271270752,
"learning_rate": 0.0001,
"loss": 1.605,
"step": 1456
},
{
"epoch": 0.48778038165383325,
"grad_norm": 0.14680039882659912,
"learning_rate": 0.0001,
"loss": 1.5243,
"step": 1457
},
{
"epoch": 0.4881151657181118,
"grad_norm": 0.12435714155435562,
"learning_rate": 0.0001,
"loss": 1.4835,
"step": 1458
},
{
"epoch": 0.48844994978239037,
"grad_norm": 0.13253144919872284,
"learning_rate": 0.0001,
"loss": 1.5797,
"step": 1459
},
{
"epoch": 0.4887847338466689,
"grad_norm": 0.14123192429542542,
"learning_rate": 0.0001,
"loss": 1.5795,
"step": 1460
},
{
"epoch": 0.48911951791094743,
"grad_norm": 0.1254579871892929,
"learning_rate": 0.0001,
"loss": 1.4829,
"step": 1461
},
{
"epoch": 0.489454301975226,
"grad_norm": 0.1407458633184433,
"learning_rate": 0.0001,
"loss": 1.5746,
"step": 1462
},
{
"epoch": 0.4897890860395045,
"grad_norm": 0.13967539370059967,
"learning_rate": 0.0001,
"loss": 1.611,
"step": 1463
},
{
"epoch": 0.49012387010378305,
"grad_norm": 0.13044650852680206,
"learning_rate": 0.0001,
"loss": 1.5614,
"step": 1464
},
{
"epoch": 0.4904586541680616,
"grad_norm": 0.13819964230060577,
"learning_rate": 0.0001,
"loss": 1.5579,
"step": 1465
},
{
"epoch": 0.4907934382323401,
"grad_norm": 0.12795104086399078,
"learning_rate": 0.0001,
"loss": 1.5373,
"step": 1466
},
{
"epoch": 0.4911282222966187,
"grad_norm": 0.13034126162528992,
"learning_rate": 0.0001,
"loss": 1.5077,
"step": 1467
},
{
"epoch": 0.49146300636089724,
"grad_norm": 0.1358436644077301,
"learning_rate": 0.0001,
"loss": 1.6376,
"step": 1468
},
{
"epoch": 0.49179779042517574,
"grad_norm": 0.12750184535980225,
"learning_rate": 0.0001,
"loss": 1.5638,
"step": 1469
},
{
"epoch": 0.4921325744894543,
"grad_norm": 0.13034793734550476,
"learning_rate": 0.0001,
"loss": 1.5053,
"step": 1470
},
{
"epoch": 0.49246735855373286,
"grad_norm": 0.1303941309452057,
"learning_rate": 0.0001,
"loss": 1.5342,
"step": 1471
},
{
"epoch": 0.49280214261801136,
"grad_norm": 0.12955164909362793,
"learning_rate": 0.0001,
"loss": 1.5396,
"step": 1472
},
{
"epoch": 0.4931369266822899,
"grad_norm": 0.12884975969791412,
"learning_rate": 0.0001,
"loss": 1.5389,
"step": 1473
},
{
"epoch": 0.4934717107465685,
"grad_norm": 0.1278049647808075,
"learning_rate": 0.0001,
"loss": 1.5937,
"step": 1474
},
{
"epoch": 0.493806494810847,
"grad_norm": 0.12420760840177536,
"learning_rate": 0.0001,
"loss": 1.4753,
"step": 1475
},
{
"epoch": 0.49414127887512554,
"grad_norm": 0.12760096788406372,
"learning_rate": 0.0001,
"loss": 1.647,
"step": 1476
},
{
"epoch": 0.4944760629394041,
"grad_norm": 0.1320486068725586,
"learning_rate": 0.0001,
"loss": 1.5758,
"step": 1477
},
{
"epoch": 0.4948108470036826,
"grad_norm": 0.13898344337940216,
"learning_rate": 0.0001,
"loss": 1.6265,
"step": 1478
},
{
"epoch": 0.49514563106796117,
"grad_norm": 0.12908297777175903,
"learning_rate": 0.0001,
"loss": 1.6294,
"step": 1479
},
{
"epoch": 0.4954804151322397,
"grad_norm": 0.13149291276931763,
"learning_rate": 0.0001,
"loss": 1.5297,
"step": 1480
},
{
"epoch": 0.49581519919651823,
"grad_norm": 0.13526497781276703,
"learning_rate": 0.0001,
"loss": 1.5374,
"step": 1481
},
{
"epoch": 0.4961499832607968,
"grad_norm": 0.12223420292139053,
"learning_rate": 0.0001,
"loss": 1.5424,
"step": 1482
},
{
"epoch": 0.49648476732507535,
"grad_norm": 0.1266697198152542,
"learning_rate": 0.0001,
"loss": 1.5847,
"step": 1483
},
{
"epoch": 0.49681955138935385,
"grad_norm": 0.14440171420574188,
"learning_rate": 0.0001,
"loss": 1.5362,
"step": 1484
},
{
"epoch": 0.4971543354536324,
"grad_norm": 0.12831640243530273,
"learning_rate": 0.0001,
"loss": 1.5803,
"step": 1485
},
{
"epoch": 0.49748911951791097,
"grad_norm": 0.13665077090263367,
"learning_rate": 0.0001,
"loss": 1.5741,
"step": 1486
},
{
"epoch": 0.4978239035821895,
"grad_norm": 0.13725218176841736,
"learning_rate": 0.0001,
"loss": 1.6207,
"step": 1487
},
{
"epoch": 0.49815868764646803,
"grad_norm": 0.1271527111530304,
"learning_rate": 0.0001,
"loss": 1.6129,
"step": 1488
},
{
"epoch": 0.4984934717107466,
"grad_norm": 0.15319159626960754,
"learning_rate": 0.0001,
"loss": 1.6247,
"step": 1489
},
{
"epoch": 0.4988282557750251,
"grad_norm": 0.12440894544124603,
"learning_rate": 0.0001,
"loss": 1.4354,
"step": 1490
},
{
"epoch": 0.49916303983930366,
"grad_norm": 0.1261643022298813,
"learning_rate": 0.0001,
"loss": 1.609,
"step": 1491
},
{
"epoch": 0.4994978239035822,
"grad_norm": 0.14216668903827667,
"learning_rate": 0.0001,
"loss": 1.5599,
"step": 1492
},
{
"epoch": 0.4998326079678607,
"grad_norm": 0.13173174858093262,
"learning_rate": 0.0001,
"loss": 1.5056,
"step": 1493
},
{
"epoch": 0.5001673920321392,
"grad_norm": 0.12335377931594849,
"learning_rate": 0.0001,
"loss": 1.5544,
"step": 1494
},
{
"epoch": 0.5005021760964178,
"grad_norm": 0.13367588818073273,
"learning_rate": 0.0001,
"loss": 1.4908,
"step": 1495
},
{
"epoch": 0.5008369601606963,
"grad_norm": 0.13830317556858063,
"learning_rate": 0.0001,
"loss": 1.6147,
"step": 1496
},
{
"epoch": 0.5011717442249749,
"grad_norm": 0.13441935181617737,
"learning_rate": 0.0001,
"loss": 1.6855,
"step": 1497
},
{
"epoch": 0.5015065282892535,
"grad_norm": 0.14937585592269897,
"learning_rate": 0.0001,
"loss": 1.6021,
"step": 1498
},
{
"epoch": 0.501841312353532,
"grad_norm": 0.1289912909269333,
"learning_rate": 0.0001,
"loss": 1.5516,
"step": 1499
},
{
"epoch": 0.5021760964178105,
"grad_norm": 0.12371324002742767,
"learning_rate": 0.0001,
"loss": 1.5842,
"step": 1500
},
{
"epoch": 0.502510880482089,
"grad_norm": 0.12764602899551392,
"learning_rate": 0.0001,
"loss": 1.5836,
"step": 1501
},
{
"epoch": 0.5028456645463676,
"grad_norm": 0.12929953634738922,
"learning_rate": 0.0001,
"loss": 1.5656,
"step": 1502
},
{
"epoch": 0.5031804486106461,
"grad_norm": 0.1252906322479248,
"learning_rate": 0.0001,
"loss": 1.4856,
"step": 1503
},
{
"epoch": 0.5035152326749247,
"grad_norm": 0.13477809727191925,
"learning_rate": 0.0001,
"loss": 1.6185,
"step": 1504
},
{
"epoch": 0.5038500167392033,
"grad_norm": 0.12459214776754379,
"learning_rate": 0.0001,
"loss": 1.5323,
"step": 1505
},
{
"epoch": 0.5041848008034817,
"grad_norm": 0.12989842891693115,
"learning_rate": 0.0001,
"loss": 1.5325,
"step": 1506
},
{
"epoch": 0.5045195848677603,
"grad_norm": 0.12878334522247314,
"learning_rate": 0.0001,
"loss": 1.6504,
"step": 1507
},
{
"epoch": 0.5048543689320388,
"grad_norm": 0.14765828847885132,
"learning_rate": 0.0001,
"loss": 1.5978,
"step": 1508
},
{
"epoch": 0.5051891529963174,
"grad_norm": 0.1294100284576416,
"learning_rate": 0.0001,
"loss": 1.6909,
"step": 1509
},
{
"epoch": 0.505523937060596,
"grad_norm": 0.1304991990327835,
"learning_rate": 0.0001,
"loss": 1.6513,
"step": 1510
},
{
"epoch": 0.5058587211248745,
"grad_norm": 0.1318545788526535,
"learning_rate": 0.0001,
"loss": 1.5489,
"step": 1511
},
{
"epoch": 0.506193505189153,
"grad_norm": 0.13185527920722961,
"learning_rate": 0.0001,
"loss": 1.6317,
"step": 1512
},
{
"epoch": 0.5065282892534315,
"grad_norm": 0.13133597373962402,
"learning_rate": 0.0001,
"loss": 1.5853,
"step": 1513
},
{
"epoch": 0.5068630733177101,
"grad_norm": 0.14132916927337646,
"learning_rate": 0.0001,
"loss": 1.6844,
"step": 1514
},
{
"epoch": 0.5071978573819886,
"grad_norm": 0.12680397927761078,
"learning_rate": 0.0001,
"loss": 1.6048,
"step": 1515
},
{
"epoch": 0.5075326414462672,
"grad_norm": 0.125723198056221,
"learning_rate": 0.0001,
"loss": 1.5296,
"step": 1516
},
{
"epoch": 0.5078674255105456,
"grad_norm": 0.135573148727417,
"learning_rate": 0.0001,
"loss": 1.6619,
"step": 1517
},
{
"epoch": 0.5082022095748242,
"grad_norm": 0.12755006551742554,
"learning_rate": 0.0001,
"loss": 1.5376,
"step": 1518
},
{
"epoch": 0.5085369936391028,
"grad_norm": 0.1527450680732727,
"learning_rate": 0.0001,
"loss": 1.4984,
"step": 1519
},
{
"epoch": 0.5088717777033813,
"grad_norm": 0.12978217005729675,
"learning_rate": 0.0001,
"loss": 1.514,
"step": 1520
},
{
"epoch": 0.5092065617676599,
"grad_norm": 0.13393737375736237,
"learning_rate": 0.0001,
"loss": 1.5267,
"step": 1521
},
{
"epoch": 0.5095413458319384,
"grad_norm": 0.13406458497047424,
"learning_rate": 0.0001,
"loss": 1.4858,
"step": 1522
},
{
"epoch": 0.5098761298962169,
"grad_norm": 0.13214215636253357,
"learning_rate": 0.0001,
"loss": 1.5391,
"step": 1523
},
{
"epoch": 0.5102109139604954,
"grad_norm": 0.13335101306438446,
"learning_rate": 0.0001,
"loss": 1.5791,
"step": 1524
},
{
"epoch": 0.510545698024774,
"grad_norm": 0.12885718047618866,
"learning_rate": 0.0001,
"loss": 1.532,
"step": 1525
},
{
"epoch": 0.5108804820890526,
"grad_norm": 0.12838226556777954,
"learning_rate": 0.0001,
"loss": 1.5186,
"step": 1526
},
{
"epoch": 0.5112152661533311,
"grad_norm": 0.13160903751850128,
"learning_rate": 0.0001,
"loss": 1.5792,
"step": 1527
},
{
"epoch": 0.5115500502176097,
"grad_norm": 0.1264614462852478,
"learning_rate": 0.0001,
"loss": 1.6005,
"step": 1528
},
{
"epoch": 0.5118848342818881,
"grad_norm": 0.13425403833389282,
"learning_rate": 0.0001,
"loss": 1.5413,
"step": 1529
},
{
"epoch": 0.5122196183461667,
"grad_norm": 0.12175809592008591,
"learning_rate": 0.0001,
"loss": 1.5128,
"step": 1530
},
{
"epoch": 0.5125544024104453,
"grad_norm": 0.1299484223127365,
"learning_rate": 0.0001,
"loss": 1.4981,
"step": 1531
},
{
"epoch": 0.5128891864747238,
"grad_norm": 0.12358542531728745,
"learning_rate": 0.0001,
"loss": 1.4794,
"step": 1532
},
{
"epoch": 0.5132239705390024,
"grad_norm": 0.12457676231861115,
"learning_rate": 0.0001,
"loss": 1.462,
"step": 1533
},
{
"epoch": 0.5135587546032809,
"grad_norm": 0.12775678932666779,
"learning_rate": 0.0001,
"loss": 1.4993,
"step": 1534
},
{
"epoch": 0.5138935386675594,
"grad_norm": 0.12386265397071838,
"learning_rate": 0.0001,
"loss": 1.504,
"step": 1535
},
{
"epoch": 0.5142283227318379,
"grad_norm": 0.13995805382728577,
"learning_rate": 0.0001,
"loss": 1.5912,
"step": 1536
},
{
"epoch": 0.5145631067961165,
"grad_norm": 0.1274706870317459,
"learning_rate": 0.0001,
"loss": 1.6514,
"step": 1537
},
{
"epoch": 0.5148978908603951,
"grad_norm": 0.12781144678592682,
"learning_rate": 0.0001,
"loss": 1.5379,
"step": 1538
},
{
"epoch": 0.5152326749246736,
"grad_norm": 0.12408823519945145,
"learning_rate": 0.0001,
"loss": 1.4709,
"step": 1539
},
{
"epoch": 0.5155674589889522,
"grad_norm": 0.12711866199970245,
"learning_rate": 0.0001,
"loss": 1.5529,
"step": 1540
},
{
"epoch": 0.5159022430532306,
"grad_norm": 0.12433881312608719,
"learning_rate": 0.0001,
"loss": 1.4641,
"step": 1541
},
{
"epoch": 0.5162370271175092,
"grad_norm": 0.13031256198883057,
"learning_rate": 0.0001,
"loss": 1.6042,
"step": 1542
},
{
"epoch": 0.5165718111817877,
"grad_norm": 0.1294173002243042,
"learning_rate": 0.0001,
"loss": 1.5269,
"step": 1543
},
{
"epoch": 0.5169065952460663,
"grad_norm": 0.1273273229598999,
"learning_rate": 0.0001,
"loss": 1.5984,
"step": 1544
},
{
"epoch": 0.5172413793103449,
"grad_norm": 0.13191919028759003,
"learning_rate": 0.0001,
"loss": 1.5684,
"step": 1545
},
{
"epoch": 0.5175761633746234,
"grad_norm": 0.13768093287944794,
"learning_rate": 0.0001,
"loss": 1.555,
"step": 1546
},
{
"epoch": 0.5179109474389019,
"grad_norm": 0.12926150858402252,
"learning_rate": 0.0001,
"loss": 1.4731,
"step": 1547
},
{
"epoch": 0.5182457315031804,
"grad_norm": 0.12586715817451477,
"learning_rate": 0.0001,
"loss": 1.4794,
"step": 1548
},
{
"epoch": 0.518580515567459,
"grad_norm": 0.12548579275608063,
"learning_rate": 0.0001,
"loss": 1.5266,
"step": 1549
},
{
"epoch": 0.5189152996317375,
"grad_norm": 0.12171539664268494,
"learning_rate": 0.0001,
"loss": 1.4205,
"step": 1550
},
{
"epoch": 0.5192500836960161,
"grad_norm": 0.13130709528923035,
"learning_rate": 0.0001,
"loss": 1.5927,
"step": 1551
},
{
"epoch": 0.5195848677602946,
"grad_norm": 0.1342555582523346,
"learning_rate": 0.0001,
"loss": 1.5756,
"step": 1552
},
{
"epoch": 0.5199196518245731,
"grad_norm": 0.12991021573543549,
"learning_rate": 0.0001,
"loss": 1.646,
"step": 1553
},
{
"epoch": 0.5202544358888517,
"grad_norm": 0.13074184954166412,
"learning_rate": 0.0001,
"loss": 1.4619,
"step": 1554
},
{
"epoch": 0.5205892199531302,
"grad_norm": 0.12969058752059937,
"learning_rate": 0.0001,
"loss": 1.5048,
"step": 1555
},
{
"epoch": 0.5209240040174088,
"grad_norm": 0.12283259630203247,
"learning_rate": 0.0001,
"loss": 1.4968,
"step": 1556
},
{
"epoch": 0.5212587880816874,
"grad_norm": 0.14244720339775085,
"learning_rate": 0.0001,
"loss": 1.5984,
"step": 1557
},
{
"epoch": 0.5215935721459658,
"grad_norm": 0.12856322526931763,
"learning_rate": 0.0001,
"loss": 1.5382,
"step": 1558
},
{
"epoch": 0.5219283562102444,
"grad_norm": 0.1262657344341278,
"learning_rate": 0.0001,
"loss": 1.5191,
"step": 1559
},
{
"epoch": 0.5222631402745229,
"grad_norm": 0.1350589543581009,
"learning_rate": 0.0001,
"loss": 1.5812,
"step": 1560
},
{
"epoch": 0.5225979243388015,
"grad_norm": 0.13602742552757263,
"learning_rate": 0.0001,
"loss": 1.6252,
"step": 1561
},
{
"epoch": 0.52293270840308,
"grad_norm": 0.1273350566625595,
"learning_rate": 0.0001,
"loss": 1.5607,
"step": 1562
},
{
"epoch": 0.5232674924673586,
"grad_norm": 0.1261235773563385,
"learning_rate": 0.0001,
"loss": 1.4537,
"step": 1563
},
{
"epoch": 0.523602276531637,
"grad_norm": 0.123395174741745,
"learning_rate": 0.0001,
"loss": 1.4839,
"step": 1564
},
{
"epoch": 0.5239370605959156,
"grad_norm": 0.12707623839378357,
"learning_rate": 0.0001,
"loss": 1.5671,
"step": 1565
},
{
"epoch": 0.5242718446601942,
"grad_norm": 0.119587741792202,
"learning_rate": 0.0001,
"loss": 1.4637,
"step": 1566
},
{
"epoch": 0.5246066287244727,
"grad_norm": 0.12568604946136475,
"learning_rate": 0.0001,
"loss": 1.5196,
"step": 1567
},
{
"epoch": 0.5249414127887513,
"grad_norm": 0.13292740285396576,
"learning_rate": 0.0001,
"loss": 1.5909,
"step": 1568
},
{
"epoch": 0.5252761968530298,
"grad_norm": 0.13198155164718628,
"learning_rate": 0.0001,
"loss": 1.6039,
"step": 1569
},
{
"epoch": 0.5256109809173083,
"grad_norm": 0.12587766349315643,
"learning_rate": 0.0001,
"loss": 1.5418,
"step": 1570
},
{
"epoch": 0.5259457649815868,
"grad_norm": 0.12726300954818726,
"learning_rate": 0.0001,
"loss": 1.5366,
"step": 1571
},
{
"epoch": 0.5262805490458654,
"grad_norm": 0.12479355186223984,
"learning_rate": 0.0001,
"loss": 1.5486,
"step": 1572
},
{
"epoch": 0.526615333110144,
"grad_norm": 0.1242307722568512,
"learning_rate": 0.0001,
"loss": 1.4547,
"step": 1573
},
{
"epoch": 0.5269501171744225,
"grad_norm": 0.12753188610076904,
"learning_rate": 0.0001,
"loss": 1.6649,
"step": 1574
},
{
"epoch": 0.5272849012387011,
"grad_norm": 0.12815521657466888,
"learning_rate": 0.0001,
"loss": 1.4489,
"step": 1575
},
{
"epoch": 0.5276196853029795,
"grad_norm": 0.1192578375339508,
"learning_rate": 0.0001,
"loss": 1.4078,
"step": 1576
},
{
"epoch": 0.5279544693672581,
"grad_norm": 0.12596169114112854,
"learning_rate": 0.0001,
"loss": 1.5369,
"step": 1577
},
{
"epoch": 0.5282892534315367,
"grad_norm": 0.13193419575691223,
"learning_rate": 0.0001,
"loss": 1.5601,
"step": 1578
},
{
"epoch": 0.5286240374958152,
"grad_norm": 0.1277266889810562,
"learning_rate": 0.0001,
"loss": 1.5336,
"step": 1579
},
{
"epoch": 0.5289588215600938,
"grad_norm": 0.12819704413414001,
"learning_rate": 0.0001,
"loss": 1.4713,
"step": 1580
},
{
"epoch": 0.5292936056243723,
"grad_norm": 0.1399090439081192,
"learning_rate": 0.0001,
"loss": 1.5978,
"step": 1581
},
{
"epoch": 0.5296283896886508,
"grad_norm": 0.1373160183429718,
"learning_rate": 0.0001,
"loss": 1.6614,
"step": 1582
},
{
"epoch": 0.5299631737529293,
"grad_norm": 0.1253012716770172,
"learning_rate": 0.0001,
"loss": 1.5317,
"step": 1583
},
{
"epoch": 0.5302979578172079,
"grad_norm": 0.124544158577919,
"learning_rate": 0.0001,
"loss": 1.4947,
"step": 1584
},
{
"epoch": 0.5306327418814865,
"grad_norm": 0.13060353696346283,
"learning_rate": 0.0001,
"loss": 1.5342,
"step": 1585
},
{
"epoch": 0.530967525945765,
"grad_norm": 0.12680500745773315,
"learning_rate": 0.0001,
"loss": 1.4597,
"step": 1586
},
{
"epoch": 0.5313023100100436,
"grad_norm": 0.13112664222717285,
"learning_rate": 0.0001,
"loss": 1.5978,
"step": 1587
},
{
"epoch": 0.531637094074322,
"grad_norm": 0.13016077876091003,
"learning_rate": 0.0001,
"loss": 1.5575,
"step": 1588
},
{
"epoch": 0.5319718781386006,
"grad_norm": 0.1273767054080963,
"learning_rate": 0.0001,
"loss": 1.607,
"step": 1589
},
{
"epoch": 0.5323066622028791,
"grad_norm": 0.1310475915670395,
"learning_rate": 0.0001,
"loss": 1.5066,
"step": 1590
},
{
"epoch": 0.5326414462671577,
"grad_norm": 0.12938565015792847,
"learning_rate": 0.0001,
"loss": 1.4933,
"step": 1591
},
{
"epoch": 0.5329762303314363,
"grad_norm": 0.12316200882196426,
"learning_rate": 0.0001,
"loss": 1.4752,
"step": 1592
},
{
"epoch": 0.5333110143957147,
"grad_norm": 0.13205035030841827,
"learning_rate": 0.0001,
"loss": 1.5061,
"step": 1593
},
{
"epoch": 0.5336457984599933,
"grad_norm": 0.12517520785331726,
"learning_rate": 0.0001,
"loss": 1.5237,
"step": 1594
},
{
"epoch": 0.5339805825242718,
"grad_norm": 0.1309306025505066,
"learning_rate": 0.0001,
"loss": 1.5975,
"step": 1595
},
{
"epoch": 0.5343153665885504,
"grad_norm": 0.13565212488174438,
"learning_rate": 0.0001,
"loss": 1.6888,
"step": 1596
},
{
"epoch": 0.534650150652829,
"grad_norm": 0.13044795393943787,
"learning_rate": 0.0001,
"loss": 1.547,
"step": 1597
},
{
"epoch": 0.5349849347171075,
"grad_norm": 0.12757791578769684,
"learning_rate": 0.0001,
"loss": 1.5788,
"step": 1598
},
{
"epoch": 0.535319718781386,
"grad_norm": 0.12625539302825928,
"learning_rate": 0.0001,
"loss": 1.6271,
"step": 1599
},
{
"epoch": 0.5356545028456645,
"grad_norm": 0.12980274856090546,
"learning_rate": 0.0001,
"loss": 1.4808,
"step": 1600
},
{
"epoch": 0.5359892869099431,
"grad_norm": 0.1339329481124878,
"learning_rate": 0.0001,
"loss": 1.5838,
"step": 1601
},
{
"epoch": 0.5363240709742216,
"grad_norm": 0.13570533692836761,
"learning_rate": 0.0001,
"loss": 1.5526,
"step": 1602
},
{
"epoch": 0.5366588550385002,
"grad_norm": 0.13043223321437836,
"learning_rate": 0.0001,
"loss": 1.5046,
"step": 1603
},
{
"epoch": 0.5369936391027788,
"grad_norm": 0.1268492341041565,
"learning_rate": 0.0001,
"loss": 1.4846,
"step": 1604
},
{
"epoch": 0.5373284231670572,
"grad_norm": 0.12844318151474,
"learning_rate": 0.0001,
"loss": 1.622,
"step": 1605
},
{
"epoch": 0.5376632072313358,
"grad_norm": 0.12543794512748718,
"learning_rate": 0.0001,
"loss": 1.4895,
"step": 1606
},
{
"epoch": 0.5379979912956143,
"grad_norm": 0.13247263431549072,
"learning_rate": 0.0001,
"loss": 1.5431,
"step": 1607
},
{
"epoch": 0.5383327753598929,
"grad_norm": 0.12495877593755722,
"learning_rate": 0.0001,
"loss": 1.5534,
"step": 1608
},
{
"epoch": 0.5386675594241714,
"grad_norm": 0.12770773470401764,
"learning_rate": 0.0001,
"loss": 1.5296,
"step": 1609
},
{
"epoch": 0.53900234348845,
"grad_norm": 0.1249793991446495,
"learning_rate": 0.0001,
"loss": 1.549,
"step": 1610
},
{
"epoch": 0.5393371275527284,
"grad_norm": 0.13602420687675476,
"learning_rate": 0.0001,
"loss": 1.6911,
"step": 1611
},
{
"epoch": 0.539671911617007,
"grad_norm": 0.1260257512331009,
"learning_rate": 0.0001,
"loss": 1.6155,
"step": 1612
},
{
"epoch": 0.5400066956812856,
"grad_norm": 0.13716067373752594,
"learning_rate": 0.0001,
"loss": 1.5017,
"step": 1613
},
{
"epoch": 0.5403414797455641,
"grad_norm": 0.12322457879781723,
"learning_rate": 0.0001,
"loss": 1.4567,
"step": 1614
},
{
"epoch": 0.5406762638098427,
"grad_norm": 0.1295168548822403,
"learning_rate": 0.0001,
"loss": 1.5388,
"step": 1615
},
{
"epoch": 0.5410110478741212,
"grad_norm": 0.13598200678825378,
"learning_rate": 0.0001,
"loss": 1.6189,
"step": 1616
},
{
"epoch": 0.5413458319383997,
"grad_norm": 0.12514351308345795,
"learning_rate": 0.0001,
"loss": 1.5957,
"step": 1617
},
{
"epoch": 0.5416806160026783,
"grad_norm": 0.13243642449378967,
"learning_rate": 0.0001,
"loss": 1.5211,
"step": 1618
},
{
"epoch": 0.5420154000669568,
"grad_norm": 0.14331547915935516,
"learning_rate": 0.0001,
"loss": 1.628,
"step": 1619
},
{
"epoch": 0.5423501841312354,
"grad_norm": 0.13204847276210785,
"learning_rate": 0.0001,
"loss": 1.6131,
"step": 1620
},
{
"epoch": 0.5426849681955139,
"grad_norm": 0.13828937709331512,
"learning_rate": 0.0001,
"loss": 1.6206,
"step": 1621
},
{
"epoch": 0.5430197522597925,
"grad_norm": 0.13166444003582,
"learning_rate": 0.0001,
"loss": 1.556,
"step": 1622
},
{
"epoch": 0.5433545363240709,
"grad_norm": 0.131551131606102,
"learning_rate": 0.0001,
"loss": 1.5884,
"step": 1623
},
{
"epoch": 0.5436893203883495,
"grad_norm": 0.1386868953704834,
"learning_rate": 0.0001,
"loss": 1.626,
"step": 1624
},
{
"epoch": 0.544024104452628,
"grad_norm": 0.12754793465137482,
"learning_rate": 0.0001,
"loss": 1.5419,
"step": 1625
},
{
"epoch": 0.5443588885169066,
"grad_norm": 0.13059911131858826,
"learning_rate": 0.0001,
"loss": 1.5886,
"step": 1626
},
{
"epoch": 0.5446936725811852,
"grad_norm": 0.13056625425815582,
"learning_rate": 0.0001,
"loss": 1.5093,
"step": 1627
},
{
"epoch": 0.5450284566454636,
"grad_norm": 0.12965354323387146,
"learning_rate": 0.0001,
"loss": 1.5766,
"step": 1628
},
{
"epoch": 0.5453632407097422,
"grad_norm": 0.12052886188030243,
"learning_rate": 0.0001,
"loss": 1.5315,
"step": 1629
},
{
"epoch": 0.5456980247740207,
"grad_norm": 0.12897798418998718,
"learning_rate": 0.0001,
"loss": 1.6129,
"step": 1630
},
{
"epoch": 0.5460328088382993,
"grad_norm": 0.12880270183086395,
"learning_rate": 0.0001,
"loss": 1.6111,
"step": 1631
},
{
"epoch": 0.5463675929025779,
"grad_norm": 0.13251414895057678,
"learning_rate": 0.0001,
"loss": 1.5786,
"step": 1632
},
{
"epoch": 0.5467023769668564,
"grad_norm": 0.13067522644996643,
"learning_rate": 0.0001,
"loss": 1.5724,
"step": 1633
},
{
"epoch": 0.5470371610311349,
"grad_norm": 0.127615824341774,
"learning_rate": 0.0001,
"loss": 1.4672,
"step": 1634
},
{
"epoch": 0.5473719450954134,
"grad_norm": 0.12785358726978302,
"learning_rate": 0.0001,
"loss": 1.4379,
"step": 1635
},
{
"epoch": 0.547706729159692,
"grad_norm": 0.1336808055639267,
"learning_rate": 0.0001,
"loss": 1.5894,
"step": 1636
},
{
"epoch": 0.5480415132239705,
"grad_norm": 0.12709666788578033,
"learning_rate": 0.0001,
"loss": 1.5646,
"step": 1637
},
{
"epoch": 0.5483762972882491,
"grad_norm": 0.1278083175420761,
"learning_rate": 0.0001,
"loss": 1.5481,
"step": 1638
},
{
"epoch": 0.5487110813525277,
"grad_norm": 0.1273607462644577,
"learning_rate": 0.0001,
"loss": 1.6099,
"step": 1639
},
{
"epoch": 0.5490458654168061,
"grad_norm": 0.13073420524597168,
"learning_rate": 0.0001,
"loss": 1.6554,
"step": 1640
},
{
"epoch": 0.5493806494810847,
"grad_norm": 0.12339271605014801,
"learning_rate": 0.0001,
"loss": 1.4866,
"step": 1641
},
{
"epoch": 0.5497154335453632,
"grad_norm": 0.12296874821186066,
"learning_rate": 0.0001,
"loss": 1.4542,
"step": 1642
},
{
"epoch": 0.5500502176096418,
"grad_norm": 0.12228816747665405,
"learning_rate": 0.0001,
"loss": 1.5008,
"step": 1643
},
{
"epoch": 0.5503850016739203,
"grad_norm": 0.12167999148368835,
"learning_rate": 0.0001,
"loss": 1.4793,
"step": 1644
},
{
"epoch": 0.5507197857381989,
"grad_norm": 0.1323646754026413,
"learning_rate": 0.0001,
"loss": 1.6053,
"step": 1645
},
{
"epoch": 0.5510545698024774,
"grad_norm": 0.13682882487773895,
"learning_rate": 0.0001,
"loss": 1.5962,
"step": 1646
},
{
"epoch": 0.5513893538667559,
"grad_norm": 0.13337336480617523,
"learning_rate": 0.0001,
"loss": 1.6422,
"step": 1647
},
{
"epoch": 0.5517241379310345,
"grad_norm": 0.12662284076213837,
"learning_rate": 0.0001,
"loss": 1.4729,
"step": 1648
},
{
"epoch": 0.552058921995313,
"grad_norm": 0.13070893287658691,
"learning_rate": 0.0001,
"loss": 1.5548,
"step": 1649
},
{
"epoch": 0.5523937060595916,
"grad_norm": 0.1237405389547348,
"learning_rate": 0.0001,
"loss": 1.5731,
"step": 1650
},
{
"epoch": 0.5527284901238702,
"grad_norm": 0.12684407830238342,
"learning_rate": 0.0001,
"loss": 1.5927,
"step": 1651
},
{
"epoch": 0.5530632741881486,
"grad_norm": 0.13257922232151031,
"learning_rate": 0.0001,
"loss": 1.6194,
"step": 1652
},
{
"epoch": 0.5533980582524272,
"grad_norm": 0.12506547570228577,
"learning_rate": 0.0001,
"loss": 1.4954,
"step": 1653
},
{
"epoch": 0.5537328423167057,
"grad_norm": 0.13652825355529785,
"learning_rate": 0.0001,
"loss": 1.5936,
"step": 1654
},
{
"epoch": 0.5540676263809843,
"grad_norm": 0.1281632035970688,
"learning_rate": 0.0001,
"loss": 1.5239,
"step": 1655
},
{
"epoch": 0.5544024104452628,
"grad_norm": 0.1302935630083084,
"learning_rate": 0.0001,
"loss": 1.5731,
"step": 1656
},
{
"epoch": 0.5547371945095414,
"grad_norm": 0.13843512535095215,
"learning_rate": 0.0001,
"loss": 1.6028,
"step": 1657
},
{
"epoch": 0.5550719785738198,
"grad_norm": 0.13132615387439728,
"learning_rate": 0.0001,
"loss": 1.5167,
"step": 1658
},
{
"epoch": 0.5554067626380984,
"grad_norm": 0.1269274204969406,
"learning_rate": 0.0001,
"loss": 1.3276,
"step": 1659
},
{
"epoch": 0.555741546702377,
"grad_norm": 0.14026238024234772,
"learning_rate": 0.0001,
"loss": 1.5699,
"step": 1660
},
{
"epoch": 0.5560763307666555,
"grad_norm": 0.13259948790073395,
"learning_rate": 0.0001,
"loss": 1.5627,
"step": 1661
},
{
"epoch": 0.5564111148309341,
"grad_norm": 0.1282505840063095,
"learning_rate": 0.0001,
"loss": 1.601,
"step": 1662
},
{
"epoch": 0.5567458988952126,
"grad_norm": 0.14385761320590973,
"learning_rate": 0.0001,
"loss": 1.5731,
"step": 1663
},
{
"epoch": 0.5570806829594911,
"grad_norm": 0.12249067425727844,
"learning_rate": 0.0001,
"loss": 1.5416,
"step": 1664
},
{
"epoch": 0.5574154670237697,
"grad_norm": 0.13182908296585083,
"learning_rate": 0.0001,
"loss": 1.5313,
"step": 1665
},
{
"epoch": 0.5577502510880482,
"grad_norm": 0.14085689187049866,
"learning_rate": 0.0001,
"loss": 1.5736,
"step": 1666
},
{
"epoch": 0.5580850351523268,
"grad_norm": 0.14808295667171478,
"learning_rate": 0.0001,
"loss": 1.6265,
"step": 1667
},
{
"epoch": 0.5584198192166053,
"grad_norm": 0.13931553065776825,
"learning_rate": 0.0001,
"loss": 1.5729,
"step": 1668
},
{
"epoch": 0.5587546032808838,
"grad_norm": 0.14633771777153015,
"learning_rate": 0.0001,
"loss": 1.5433,
"step": 1669
},
{
"epoch": 0.5590893873451623,
"grad_norm": 0.1228380873799324,
"learning_rate": 0.0001,
"loss": 1.544,
"step": 1670
},
{
"epoch": 0.5594241714094409,
"grad_norm": 0.12809088826179504,
"learning_rate": 0.0001,
"loss": 1.5724,
"step": 1671
},
{
"epoch": 0.5597589554737195,
"grad_norm": 0.13453969359397888,
"learning_rate": 0.0001,
"loss": 1.5062,
"step": 1672
},
{
"epoch": 0.560093739537998,
"grad_norm": 0.13969993591308594,
"learning_rate": 0.0001,
"loss": 1.6302,
"step": 1673
},
{
"epoch": 0.5604285236022766,
"grad_norm": 0.13022400438785553,
"learning_rate": 0.0001,
"loss": 1.6323,
"step": 1674
},
{
"epoch": 0.560763307666555,
"grad_norm": 0.13372890651226044,
"learning_rate": 0.0001,
"loss": 1.6017,
"step": 1675
},
{
"epoch": 0.5610980917308336,
"grad_norm": 0.1426994502544403,
"learning_rate": 0.0001,
"loss": 1.5737,
"step": 1676
},
{
"epoch": 0.5614328757951121,
"grad_norm": 0.1358005702495575,
"learning_rate": 0.0001,
"loss": 1.5812,
"step": 1677
},
{
"epoch": 0.5617676598593907,
"grad_norm": 0.1320638507604599,
"learning_rate": 0.0001,
"loss": 1.5414,
"step": 1678
},
{
"epoch": 0.5621024439236693,
"grad_norm": 0.13449324667453766,
"learning_rate": 0.0001,
"loss": 1.4752,
"step": 1679
},
{
"epoch": 0.5624372279879478,
"grad_norm": 0.13063769042491913,
"learning_rate": 0.0001,
"loss": 1.5002,
"step": 1680
},
{
"epoch": 0.5627720120522263,
"grad_norm": 0.12591435015201569,
"learning_rate": 0.0001,
"loss": 1.5331,
"step": 1681
},
{
"epoch": 0.5631067961165048,
"grad_norm": 0.144126296043396,
"learning_rate": 0.0001,
"loss": 1.6207,
"step": 1682
},
{
"epoch": 0.5634415801807834,
"grad_norm": 0.13355223834514618,
"learning_rate": 0.0001,
"loss": 1.546,
"step": 1683
},
{
"epoch": 0.563776364245062,
"grad_norm": 0.12519478797912598,
"learning_rate": 0.0001,
"loss": 1.5836,
"step": 1684
},
{
"epoch": 0.5641111483093405,
"grad_norm": 0.1350811868906021,
"learning_rate": 0.0001,
"loss": 1.577,
"step": 1685
},
{
"epoch": 0.5644459323736191,
"grad_norm": 0.14059753715991974,
"learning_rate": 0.0001,
"loss": 1.5457,
"step": 1686
},
{
"epoch": 0.5647807164378975,
"grad_norm": 0.13620074093341827,
"learning_rate": 0.0001,
"loss": 1.5318,
"step": 1687
},
{
"epoch": 0.5651155005021761,
"grad_norm": 0.13117417693138123,
"learning_rate": 0.0001,
"loss": 1.5413,
"step": 1688
},
{
"epoch": 0.5654502845664546,
"grad_norm": 0.14555278420448303,
"learning_rate": 0.0001,
"loss": 1.5775,
"step": 1689
},
{
"epoch": 0.5657850686307332,
"grad_norm": 0.12660092115402222,
"learning_rate": 0.0001,
"loss": 1.5034,
"step": 1690
},
{
"epoch": 0.5661198526950117,
"grad_norm": 0.12967108190059662,
"learning_rate": 0.0001,
"loss": 1.5755,
"step": 1691
},
{
"epoch": 0.5664546367592903,
"grad_norm": 0.13999544084072113,
"learning_rate": 0.0001,
"loss": 1.4471,
"step": 1692
},
{
"epoch": 0.5667894208235688,
"grad_norm": 0.13235735893249512,
"learning_rate": 0.0001,
"loss": 1.4967,
"step": 1693
},
{
"epoch": 0.5671242048878473,
"grad_norm": 0.1373562067747116,
"learning_rate": 0.0001,
"loss": 1.6267,
"step": 1694
},
{
"epoch": 0.5674589889521259,
"grad_norm": 0.1320851445198059,
"learning_rate": 0.0001,
"loss": 1.5259,
"step": 1695
},
{
"epoch": 0.5677937730164044,
"grad_norm": 0.13309001922607422,
"learning_rate": 0.0001,
"loss": 1.5604,
"step": 1696
},
{
"epoch": 0.568128557080683,
"grad_norm": 0.12666000425815582,
"learning_rate": 0.0001,
"loss": 1.5352,
"step": 1697
},
{
"epoch": 0.5684633411449616,
"grad_norm": 0.12397143244743347,
"learning_rate": 0.0001,
"loss": 1.5474,
"step": 1698
},
{
"epoch": 0.56879812520924,
"grad_norm": 0.1286936104297638,
"learning_rate": 0.0001,
"loss": 1.5125,
"step": 1699
},
{
"epoch": 0.5691329092735186,
"grad_norm": 0.12525172531604767,
"learning_rate": 0.0001,
"loss": 1.4172,
"step": 1700
},
{
"epoch": 0.5694676933377971,
"grad_norm": 0.13234922289848328,
"learning_rate": 0.0001,
"loss": 1.5374,
"step": 1701
},
{
"epoch": 0.5698024774020757,
"grad_norm": 0.13341423869132996,
"learning_rate": 0.0001,
"loss": 1.5615,
"step": 1702
},
{
"epoch": 0.5701372614663542,
"grad_norm": 0.12672466039657593,
"learning_rate": 0.0001,
"loss": 1.4147,
"step": 1703
},
{
"epoch": 0.5704720455306327,
"grad_norm": 0.13073183596134186,
"learning_rate": 0.0001,
"loss": 1.5237,
"step": 1704
},
{
"epoch": 0.5708068295949112,
"grad_norm": 0.13044412434101105,
"learning_rate": 0.0001,
"loss": 1.6044,
"step": 1705
},
{
"epoch": 0.5711416136591898,
"grad_norm": 0.13865146040916443,
"learning_rate": 0.0001,
"loss": 1.5648,
"step": 1706
},
{
"epoch": 0.5714763977234684,
"grad_norm": 0.13418787717819214,
"learning_rate": 0.0001,
"loss": 1.5948,
"step": 1707
},
{
"epoch": 0.5718111817877469,
"grad_norm": 0.1279216855764389,
"learning_rate": 0.0001,
"loss": 1.5465,
"step": 1708
},
{
"epoch": 0.5721459658520255,
"grad_norm": 0.13305789232254028,
"learning_rate": 0.0001,
"loss": 1.5768,
"step": 1709
},
{
"epoch": 0.5724807499163039,
"grad_norm": 0.12358289957046509,
"learning_rate": 0.0001,
"loss": 1.4377,
"step": 1710
},
{
"epoch": 0.5728155339805825,
"grad_norm": 0.128280371427536,
"learning_rate": 0.0001,
"loss": 1.5684,
"step": 1711
},
{
"epoch": 0.573150318044861,
"grad_norm": 0.1336420327425003,
"learning_rate": 0.0001,
"loss": 1.5438,
"step": 1712
},
{
"epoch": 0.5734851021091396,
"grad_norm": 0.13142135739326477,
"learning_rate": 0.0001,
"loss": 1.5821,
"step": 1713
},
{
"epoch": 0.5738198861734182,
"grad_norm": 0.1367759257555008,
"learning_rate": 0.0001,
"loss": 1.5294,
"step": 1714
},
{
"epoch": 0.5741546702376967,
"grad_norm": 0.1364768147468567,
"learning_rate": 0.0001,
"loss": 1.4889,
"step": 1715
},
{
"epoch": 0.5744894543019752,
"grad_norm": 0.12675487995147705,
"learning_rate": 0.0001,
"loss": 1.5789,
"step": 1716
},
{
"epoch": 0.5748242383662537,
"grad_norm": 0.13054460287094116,
"learning_rate": 0.0001,
"loss": 1.5653,
"step": 1717
},
{
"epoch": 0.5751590224305323,
"grad_norm": 0.14481523633003235,
"learning_rate": 0.0001,
"loss": 1.6135,
"step": 1718
},
{
"epoch": 0.5754938064948109,
"grad_norm": 0.1317768394947052,
"learning_rate": 0.0001,
"loss": 1.5015,
"step": 1719
},
{
"epoch": 0.5758285905590894,
"grad_norm": 0.13205017149448395,
"learning_rate": 0.0001,
"loss": 1.5667,
"step": 1720
},
{
"epoch": 0.576163374623368,
"grad_norm": 0.13702328503131866,
"learning_rate": 0.0001,
"loss": 1.5487,
"step": 1721
},
{
"epoch": 0.5764981586876464,
"grad_norm": 0.13435296714305878,
"learning_rate": 0.0001,
"loss": 1.6059,
"step": 1722
},
{
"epoch": 0.576832942751925,
"grad_norm": 0.13013921678066254,
"learning_rate": 0.0001,
"loss": 1.5948,
"step": 1723
},
{
"epoch": 0.5771677268162035,
"grad_norm": 0.12254009395837784,
"learning_rate": 0.0001,
"loss": 1.485,
"step": 1724
},
{
"epoch": 0.5775025108804821,
"grad_norm": 0.13023540377616882,
"learning_rate": 0.0001,
"loss": 1.6237,
"step": 1725
},
{
"epoch": 0.5778372949447607,
"grad_norm": 0.1339290589094162,
"learning_rate": 0.0001,
"loss": 1.5983,
"step": 1726
},
{
"epoch": 0.5781720790090392,
"grad_norm": 0.13126787543296814,
"learning_rate": 0.0001,
"loss": 1.5947,
"step": 1727
},
{
"epoch": 0.5785068630733177,
"grad_norm": 0.12525591254234314,
"learning_rate": 0.0001,
"loss": 1.4519,
"step": 1728
},
{
"epoch": 0.5788416471375962,
"grad_norm": 0.12789173424243927,
"learning_rate": 0.0001,
"loss": 1.5293,
"step": 1729
},
{
"epoch": 0.5791764312018748,
"grad_norm": 0.12775948643684387,
"learning_rate": 0.0001,
"loss": 1.5971,
"step": 1730
},
{
"epoch": 0.5795112152661533,
"grad_norm": 0.13437266647815704,
"learning_rate": 0.0001,
"loss": 1.595,
"step": 1731
},
{
"epoch": 0.5798459993304319,
"grad_norm": 0.13249057531356812,
"learning_rate": 0.0001,
"loss": 1.5524,
"step": 1732
},
{
"epoch": 0.5801807833947105,
"grad_norm": 0.12838158011436462,
"learning_rate": 0.0001,
"loss": 1.4641,
"step": 1733
},
{
"epoch": 0.5805155674589889,
"grad_norm": 0.1311095654964447,
"learning_rate": 0.0001,
"loss": 1.5964,
"step": 1734
},
{
"epoch": 0.5808503515232675,
"grad_norm": 0.12928825616836548,
"learning_rate": 0.0001,
"loss": 1.5153,
"step": 1735
},
{
"epoch": 0.581185135587546,
"grad_norm": 0.1317373663187027,
"learning_rate": 0.0001,
"loss": 1.5805,
"step": 1736
},
{
"epoch": 0.5815199196518246,
"grad_norm": 0.1291595846414566,
"learning_rate": 0.0001,
"loss": 1.4974,
"step": 1737
},
{
"epoch": 0.5818547037161031,
"grad_norm": 0.12890678644180298,
"learning_rate": 0.0001,
"loss": 1.5778,
"step": 1738
},
{
"epoch": 0.5821894877803817,
"grad_norm": 0.13605663180351257,
"learning_rate": 0.0001,
"loss": 1.5206,
"step": 1739
},
{
"epoch": 0.5825242718446602,
"grad_norm": 0.12535326182842255,
"learning_rate": 0.0001,
"loss": 1.4989,
"step": 1740
},
{
"epoch": 0.5828590559089387,
"grad_norm": 0.13682806491851807,
"learning_rate": 0.0001,
"loss": 1.5558,
"step": 1741
},
{
"epoch": 0.5831938399732173,
"grad_norm": 0.12900637090206146,
"learning_rate": 0.0001,
"loss": 1.5687,
"step": 1742
},
{
"epoch": 0.5835286240374958,
"grad_norm": 0.1287071406841278,
"learning_rate": 0.0001,
"loss": 1.5349,
"step": 1743
},
{
"epoch": 0.5838634081017744,
"grad_norm": 0.12810088694095612,
"learning_rate": 0.0001,
"loss": 1.5363,
"step": 1744
},
{
"epoch": 0.5841981921660528,
"grad_norm": 0.13105565309524536,
"learning_rate": 0.0001,
"loss": 1.5633,
"step": 1745
},
{
"epoch": 0.5845329762303314,
"grad_norm": 0.13414978981018066,
"learning_rate": 0.0001,
"loss": 1.5965,
"step": 1746
},
{
"epoch": 0.58486776029461,
"grad_norm": 0.12767766416072845,
"learning_rate": 0.0001,
"loss": 1.517,
"step": 1747
},
{
"epoch": 0.5852025443588885,
"grad_norm": 0.12798413634300232,
"learning_rate": 0.0001,
"loss": 1.4184,
"step": 1748
},
{
"epoch": 0.5855373284231671,
"grad_norm": 0.13183465600013733,
"learning_rate": 0.0001,
"loss": 1.4812,
"step": 1749
},
{
"epoch": 0.5858721124874456,
"grad_norm": 0.12950639426708221,
"learning_rate": 0.0001,
"loss": 1.4371,
"step": 1750
},
{
"epoch": 0.5862068965517241,
"grad_norm": 0.1397038698196411,
"learning_rate": 0.0001,
"loss": 1.5023,
"step": 1751
},
{
"epoch": 0.5865416806160026,
"grad_norm": 0.1396951824426651,
"learning_rate": 0.0001,
"loss": 1.5174,
"step": 1752
},
{
"epoch": 0.5868764646802812,
"grad_norm": 0.13188160955905914,
"learning_rate": 0.0001,
"loss": 1.511,
"step": 1753
},
{
"epoch": 0.5872112487445598,
"grad_norm": 0.13433519005775452,
"learning_rate": 0.0001,
"loss": 1.5214,
"step": 1754
},
{
"epoch": 0.5875460328088383,
"grad_norm": 0.13022519648075104,
"learning_rate": 0.0001,
"loss": 1.5629,
"step": 1755
},
{
"epoch": 0.5878808168731169,
"grad_norm": 0.12651024758815765,
"learning_rate": 0.0001,
"loss": 1.4469,
"step": 1756
},
{
"epoch": 0.5882156009373953,
"grad_norm": 0.13489894568920135,
"learning_rate": 0.0001,
"loss": 1.5363,
"step": 1757
},
{
"epoch": 0.5885503850016739,
"grad_norm": 0.13707391917705536,
"learning_rate": 0.0001,
"loss": 1.6495,
"step": 1758
},
{
"epoch": 0.5888851690659525,
"grad_norm": 0.12528660893440247,
"learning_rate": 0.0001,
"loss": 1.5296,
"step": 1759
},
{
"epoch": 0.589219953130231,
"grad_norm": 0.14160814881324768,
"learning_rate": 0.0001,
"loss": 1.5977,
"step": 1760
},
{
"epoch": 0.5895547371945096,
"grad_norm": 0.12557724118232727,
"learning_rate": 0.0001,
"loss": 1.4915,
"step": 1761
},
{
"epoch": 0.5898895212587881,
"grad_norm": 0.12706881761550903,
"learning_rate": 0.0001,
"loss": 1.5775,
"step": 1762
},
{
"epoch": 0.5902243053230666,
"grad_norm": 0.13343869149684906,
"learning_rate": 0.0001,
"loss": 1.6033,
"step": 1763
},
{
"epoch": 0.5905590893873451,
"grad_norm": 0.1284165382385254,
"learning_rate": 0.0001,
"loss": 1.5255,
"step": 1764
},
{
"epoch": 0.5908938734516237,
"grad_norm": 0.12860101461410522,
"learning_rate": 0.0001,
"loss": 1.4694,
"step": 1765
},
{
"epoch": 0.5912286575159023,
"grad_norm": 0.12808945775032043,
"learning_rate": 0.0001,
"loss": 1.6068,
"step": 1766
},
{
"epoch": 0.5915634415801808,
"grad_norm": 0.13219839334487915,
"learning_rate": 0.0001,
"loss": 1.5519,
"step": 1767
},
{
"epoch": 0.5918982256444594,
"grad_norm": 0.12471086531877518,
"learning_rate": 0.0001,
"loss": 1.4465,
"step": 1768
},
{
"epoch": 0.5922330097087378,
"grad_norm": 0.13721035420894623,
"learning_rate": 0.0001,
"loss": 1.5656,
"step": 1769
},
{
"epoch": 0.5925677937730164,
"grad_norm": 0.1299833208322525,
"learning_rate": 0.0001,
"loss": 1.4767,
"step": 1770
},
{
"epoch": 0.5929025778372949,
"grad_norm": 0.13570041954517365,
"learning_rate": 0.0001,
"loss": 1.5929,
"step": 1771
},
{
"epoch": 0.5932373619015735,
"grad_norm": 0.12360662966966629,
"learning_rate": 0.0001,
"loss": 1.4179,
"step": 1772
},
{
"epoch": 0.5935721459658521,
"grad_norm": 0.138414204120636,
"learning_rate": 0.0001,
"loss": 1.6123,
"step": 1773
},
{
"epoch": 0.5939069300301306,
"grad_norm": 0.1347961127758026,
"learning_rate": 0.0001,
"loss": 1.6135,
"step": 1774
},
{
"epoch": 0.5942417140944091,
"grad_norm": 0.1333123743534088,
"learning_rate": 0.0001,
"loss": 1.3935,
"step": 1775
},
{
"epoch": 0.5945764981586876,
"grad_norm": 0.13112439215183258,
"learning_rate": 0.0001,
"loss": 1.5531,
"step": 1776
},
{
"epoch": 0.5949112822229662,
"grad_norm": 0.1356613039970398,
"learning_rate": 0.0001,
"loss": 1.5338,
"step": 1777
},
{
"epoch": 0.5952460662872447,
"grad_norm": 0.13762056827545166,
"learning_rate": 0.0001,
"loss": 1.5684,
"step": 1778
},
{
"epoch": 0.5955808503515233,
"grad_norm": 0.13242678344249725,
"learning_rate": 0.0001,
"loss": 1.5946,
"step": 1779
},
{
"epoch": 0.5959156344158018,
"grad_norm": 0.1304038166999817,
"learning_rate": 0.0001,
"loss": 1.5634,
"step": 1780
},
{
"epoch": 0.5962504184800803,
"grad_norm": 0.13004854321479797,
"learning_rate": 0.0001,
"loss": 1.5612,
"step": 1781
},
{
"epoch": 0.5965852025443589,
"grad_norm": 0.13909399509429932,
"learning_rate": 0.0001,
"loss": 1.5613,
"step": 1782
},
{
"epoch": 0.5969199866086374,
"grad_norm": 0.13109537959098816,
"learning_rate": 0.0001,
"loss": 1.5769,
"step": 1783
},
{
"epoch": 0.597254770672916,
"grad_norm": 0.13889670372009277,
"learning_rate": 0.0001,
"loss": 1.5788,
"step": 1784
},
{
"epoch": 0.5975895547371946,
"grad_norm": 0.12981747090816498,
"learning_rate": 0.0001,
"loss": 1.5294,
"step": 1785
},
{
"epoch": 0.597924338801473,
"grad_norm": 0.12865106761455536,
"learning_rate": 0.0001,
"loss": 1.5907,
"step": 1786
},
{
"epoch": 0.5982591228657516,
"grad_norm": 0.13081815838813782,
"learning_rate": 0.0001,
"loss": 1.6513,
"step": 1787
},
{
"epoch": 0.5985939069300301,
"grad_norm": 0.1357847899198532,
"learning_rate": 0.0001,
"loss": 1.6925,
"step": 1788
},
{
"epoch": 0.5989286909943087,
"grad_norm": 0.1296125054359436,
"learning_rate": 0.0001,
"loss": 1.5362,
"step": 1789
},
{
"epoch": 0.5992634750585872,
"grad_norm": 0.13272371888160706,
"learning_rate": 0.0001,
"loss": 1.669,
"step": 1790
},
{
"epoch": 0.5995982591228658,
"grad_norm": 0.1340399980545044,
"learning_rate": 0.0001,
"loss": 1.5674,
"step": 1791
},
{
"epoch": 0.5999330431871442,
"grad_norm": 0.12497217208147049,
"learning_rate": 0.0001,
"loss": 1.4629,
"step": 1792
},
{
"epoch": 0.6002678272514228,
"grad_norm": 0.14285002648830414,
"learning_rate": 0.0001,
"loss": 1.5278,
"step": 1793
},
{
"epoch": 0.6006026113157014,
"grad_norm": 0.1328384429216385,
"learning_rate": 0.0001,
"loss": 1.5532,
"step": 1794
},
{
"epoch": 0.6009373953799799,
"grad_norm": 0.13168397545814514,
"learning_rate": 0.0001,
"loss": 1.6406,
"step": 1795
},
{
"epoch": 0.6012721794442585,
"grad_norm": 0.12567539513111115,
"learning_rate": 0.0001,
"loss": 1.5389,
"step": 1796
},
{
"epoch": 0.601606963508537,
"grad_norm": 0.13105528056621552,
"learning_rate": 0.0001,
"loss": 1.5754,
"step": 1797
},
{
"epoch": 0.6019417475728155,
"grad_norm": 0.1292327493429184,
"learning_rate": 0.0001,
"loss": 1.4713,
"step": 1798
},
{
"epoch": 0.602276531637094,
"grad_norm": 0.12788547575473785,
"learning_rate": 0.0001,
"loss": 1.5787,
"step": 1799
},
{
"epoch": 0.6026113157013726,
"grad_norm": 0.1307074874639511,
"learning_rate": 0.0001,
"loss": 1.6191,
"step": 1800
},
{
"epoch": 0.6029460997656512,
"grad_norm": 0.136485293507576,
"learning_rate": 0.0001,
"loss": 1.6063,
"step": 1801
},
{
"epoch": 0.6032808838299297,
"grad_norm": 0.12938566505908966,
"learning_rate": 0.0001,
"loss": 1.5466,
"step": 1802
},
{
"epoch": 0.6036156678942083,
"grad_norm": 0.12429405003786087,
"learning_rate": 0.0001,
"loss": 1.4672,
"step": 1803
},
{
"epoch": 0.6039504519584867,
"grad_norm": 0.12657684087753296,
"learning_rate": 0.0001,
"loss": 1.5159,
"step": 1804
},
{
"epoch": 0.6042852360227653,
"grad_norm": 0.13287223875522614,
"learning_rate": 0.0001,
"loss": 1.5838,
"step": 1805
},
{
"epoch": 0.6046200200870439,
"grad_norm": 0.13268281519412994,
"learning_rate": 0.0001,
"loss": 1.5282,
"step": 1806
},
{
"epoch": 0.6049548041513224,
"grad_norm": 0.1264685094356537,
"learning_rate": 0.0001,
"loss": 1.5795,
"step": 1807
},
{
"epoch": 0.605289588215601,
"grad_norm": 0.1276138424873352,
"learning_rate": 0.0001,
"loss": 1.4648,
"step": 1808
},
{
"epoch": 0.6056243722798795,
"grad_norm": 0.13063056766986847,
"learning_rate": 0.0001,
"loss": 1.5692,
"step": 1809
},
{
"epoch": 0.605959156344158,
"grad_norm": 0.12172877043485641,
"learning_rate": 0.0001,
"loss": 1.4785,
"step": 1810
},
{
"epoch": 0.6062939404084365,
"grad_norm": 0.13516037166118622,
"learning_rate": 0.0001,
"loss": 1.5316,
"step": 1811
},
{
"epoch": 0.6066287244727151,
"grad_norm": 0.12978719174861908,
"learning_rate": 0.0001,
"loss": 1.5103,
"step": 1812
},
{
"epoch": 0.6069635085369937,
"grad_norm": 0.1354977786540985,
"learning_rate": 0.0001,
"loss": 1.5368,
"step": 1813
},
{
"epoch": 0.6072982926012722,
"grad_norm": 0.12445911020040512,
"learning_rate": 0.0001,
"loss": 1.4966,
"step": 1814
},
{
"epoch": 0.6076330766655507,
"grad_norm": 0.13546685874462128,
"learning_rate": 0.0001,
"loss": 1.62,
"step": 1815
},
{
"epoch": 0.6079678607298292,
"grad_norm": 0.12861642241477966,
"learning_rate": 0.0001,
"loss": 1.5895,
"step": 1816
},
{
"epoch": 0.6083026447941078,
"grad_norm": 0.13455091416835785,
"learning_rate": 0.0001,
"loss": 1.5217,
"step": 1817
},
{
"epoch": 0.6086374288583863,
"grad_norm": 0.13514240086078644,
"learning_rate": 0.0001,
"loss": 1.5947,
"step": 1818
},
{
"epoch": 0.6089722129226649,
"grad_norm": 0.12753477692604065,
"learning_rate": 0.0001,
"loss": 1.492,
"step": 1819
},
{
"epoch": 0.6093069969869435,
"grad_norm": 0.1335463970899582,
"learning_rate": 0.0001,
"loss": 1.5806,
"step": 1820
},
{
"epoch": 0.6096417810512219,
"grad_norm": 0.14587751030921936,
"learning_rate": 0.0001,
"loss": 1.5679,
"step": 1821
},
{
"epoch": 0.6099765651155005,
"grad_norm": 0.13787920773029327,
"learning_rate": 0.0001,
"loss": 1.4759,
"step": 1822
},
{
"epoch": 0.610311349179779,
"grad_norm": 0.135360449552536,
"learning_rate": 0.0001,
"loss": 1.4968,
"step": 1823
},
{
"epoch": 0.6106461332440576,
"grad_norm": 0.13543657958507538,
"learning_rate": 0.0001,
"loss": 1.5321,
"step": 1824
},
{
"epoch": 0.6109809173083361,
"grad_norm": 0.127221018075943,
"learning_rate": 0.0001,
"loss": 1.5239,
"step": 1825
},
{
"epoch": 0.6113157013726147,
"grad_norm": 0.1439230740070343,
"learning_rate": 0.0001,
"loss": 1.6458,
"step": 1826
},
{
"epoch": 0.6116504854368932,
"grad_norm": 0.13141925632953644,
"learning_rate": 0.0001,
"loss": 1.504,
"step": 1827
},
{
"epoch": 0.6119852695011717,
"grad_norm": 0.12811610102653503,
"learning_rate": 0.0001,
"loss": 1.6137,
"step": 1828
},
{
"epoch": 0.6123200535654503,
"grad_norm": 0.13353578746318817,
"learning_rate": 0.0001,
"loss": 1.5209,
"step": 1829
},
{
"epoch": 0.6126548376297288,
"grad_norm": 0.13006985187530518,
"learning_rate": 0.0001,
"loss": 1.4776,
"step": 1830
},
{
"epoch": 0.6129896216940074,
"grad_norm": 0.1350172609090805,
"learning_rate": 0.0001,
"loss": 1.5994,
"step": 1831
},
{
"epoch": 0.613324405758286,
"grad_norm": 0.13640815019607544,
"learning_rate": 0.0001,
"loss": 1.6383,
"step": 1832
},
{
"epoch": 0.6136591898225644,
"grad_norm": 0.14161550998687744,
"learning_rate": 0.0001,
"loss": 1.5486,
"step": 1833
},
{
"epoch": 0.613993973886843,
"grad_norm": 0.12927186489105225,
"learning_rate": 0.0001,
"loss": 1.5166,
"step": 1834
},
{
"epoch": 0.6143287579511215,
"grad_norm": 0.1287536919116974,
"learning_rate": 0.0001,
"loss": 1.496,
"step": 1835
},
{
"epoch": 0.6146635420154001,
"grad_norm": 0.13734175264835358,
"learning_rate": 0.0001,
"loss": 1.5638,
"step": 1836
},
{
"epoch": 0.6149983260796786,
"grad_norm": 0.13784490525722504,
"learning_rate": 0.0001,
"loss": 1.593,
"step": 1837
},
{
"epoch": 0.6153331101439572,
"grad_norm": 0.1259312480688095,
"learning_rate": 0.0001,
"loss": 1.5208,
"step": 1838
},
{
"epoch": 0.6156678942082356,
"grad_norm": 0.15089771151542664,
"learning_rate": 0.0001,
"loss": 1.5251,
"step": 1839
},
{
"epoch": 0.6160026782725142,
"grad_norm": 0.14801523089408875,
"learning_rate": 0.0001,
"loss": 1.5706,
"step": 1840
},
{
"epoch": 0.6163374623367928,
"grad_norm": 0.1345253735780716,
"learning_rate": 0.0001,
"loss": 1.5695,
"step": 1841
},
{
"epoch": 0.6166722464010713,
"grad_norm": 0.15094773471355438,
"learning_rate": 0.0001,
"loss": 1.5744,
"step": 1842
},
{
"epoch": 0.6170070304653499,
"grad_norm": 0.13193759322166443,
"learning_rate": 0.0001,
"loss": 1.5345,
"step": 1843
},
{
"epoch": 0.6173418145296284,
"grad_norm": 0.12728765606880188,
"learning_rate": 0.0001,
"loss": 1.5026,
"step": 1844
},
{
"epoch": 0.6176765985939069,
"grad_norm": 0.14725570380687714,
"learning_rate": 0.0001,
"loss": 1.581,
"step": 1845
},
{
"epoch": 0.6180113826581854,
"grad_norm": 0.13824598491191864,
"learning_rate": 0.0001,
"loss": 1.5359,
"step": 1846
},
{
"epoch": 0.618346166722464,
"grad_norm": 0.12178414314985275,
"learning_rate": 0.0001,
"loss": 1.4936,
"step": 1847
},
{
"epoch": 0.6186809507867426,
"grad_norm": 0.156047984957695,
"learning_rate": 0.0001,
"loss": 1.5737,
"step": 1848
},
{
"epoch": 0.6190157348510211,
"grad_norm": 0.15707126259803772,
"learning_rate": 0.0001,
"loss": 1.6287,
"step": 1849
},
{
"epoch": 0.6193505189152997,
"grad_norm": 0.1378837376832962,
"learning_rate": 0.0001,
"loss": 1.616,
"step": 1850
},
{
"epoch": 0.6196853029795781,
"grad_norm": 0.1423729658126831,
"learning_rate": 0.0001,
"loss": 1.5409,
"step": 1851
},
{
"epoch": 0.6200200870438567,
"grad_norm": 0.16630493104457855,
"learning_rate": 0.0001,
"loss": 1.6264,
"step": 1852
},
{
"epoch": 0.6203548711081353,
"grad_norm": 0.13753686845302582,
"learning_rate": 0.0001,
"loss": 1.6104,
"step": 1853
},
{
"epoch": 0.6206896551724138,
"grad_norm": 0.13337332010269165,
"learning_rate": 0.0001,
"loss": 1.5104,
"step": 1854
},
{
"epoch": 0.6210244392366924,
"grad_norm": 0.14229977130889893,
"learning_rate": 0.0001,
"loss": 1.4228,
"step": 1855
},
{
"epoch": 0.6213592233009708,
"grad_norm": 0.1403966248035431,
"learning_rate": 0.0001,
"loss": 1.5623,
"step": 1856
},
{
"epoch": 0.6216940073652494,
"grad_norm": 0.12786665558815002,
"learning_rate": 0.0001,
"loss": 1.5058,
"step": 1857
},
{
"epoch": 0.6220287914295279,
"grad_norm": 0.14748771488666534,
"learning_rate": 0.0001,
"loss": 1.5004,
"step": 1858
},
{
"epoch": 0.6223635754938065,
"grad_norm": 0.14041772484779358,
"learning_rate": 0.0001,
"loss": 1.6154,
"step": 1859
},
{
"epoch": 0.6226983595580851,
"grad_norm": 0.1256851702928543,
"learning_rate": 0.0001,
"loss": 1.4634,
"step": 1860
},
{
"epoch": 0.6230331436223636,
"grad_norm": 0.12676502764225006,
"learning_rate": 0.0001,
"loss": 1.5163,
"step": 1861
},
{
"epoch": 0.6233679276866421,
"grad_norm": 0.14927968382835388,
"learning_rate": 0.0001,
"loss": 1.5686,
"step": 1862
},
{
"epoch": 0.6237027117509206,
"grad_norm": 0.1308298408985138,
"learning_rate": 0.0001,
"loss": 1.5032,
"step": 1863
},
{
"epoch": 0.6240374958151992,
"grad_norm": 0.13208165764808655,
"learning_rate": 0.0001,
"loss": 1.5519,
"step": 1864
},
{
"epoch": 0.6243722798794777,
"grad_norm": 0.13822416961193085,
"learning_rate": 0.0001,
"loss": 1.5664,
"step": 1865
},
{
"epoch": 0.6247070639437563,
"grad_norm": 0.13646993041038513,
"learning_rate": 0.0001,
"loss": 1.5361,
"step": 1866
},
{
"epoch": 0.6250418480080349,
"grad_norm": 0.1273556500673294,
"learning_rate": 0.0001,
"loss": 1.546,
"step": 1867
},
{
"epoch": 0.6253766320723133,
"grad_norm": 0.13555049896240234,
"learning_rate": 0.0001,
"loss": 1.5288,
"step": 1868
},
{
"epoch": 0.6257114161365919,
"grad_norm": 0.13126762211322784,
"learning_rate": 0.0001,
"loss": 1.4659,
"step": 1869
},
{
"epoch": 0.6260462002008704,
"grad_norm": 0.1348927766084671,
"learning_rate": 0.0001,
"loss": 1.5812,
"step": 1870
},
{
"epoch": 0.626380984265149,
"grad_norm": 0.1363980621099472,
"learning_rate": 0.0001,
"loss": 1.6506,
"step": 1871
},
{
"epoch": 0.6267157683294275,
"grad_norm": 0.13422980904579163,
"learning_rate": 0.0001,
"loss": 1.5298,
"step": 1872
},
{
"epoch": 0.6270505523937061,
"grad_norm": 0.12745925784111023,
"learning_rate": 0.0001,
"loss": 1.4898,
"step": 1873
},
{
"epoch": 0.6273853364579846,
"grad_norm": 0.1292264759540558,
"learning_rate": 0.0001,
"loss": 1.548,
"step": 1874
},
{
"epoch": 0.6277201205222631,
"grad_norm": 0.1412927806377411,
"learning_rate": 0.0001,
"loss": 1.5228,
"step": 1875
},
{
"epoch": 0.6280549045865417,
"grad_norm": 0.1328163594007492,
"learning_rate": 0.0001,
"loss": 1.5521,
"step": 1876
},
{
"epoch": 0.6283896886508202,
"grad_norm": 0.1258804351091385,
"learning_rate": 0.0001,
"loss": 1.4781,
"step": 1877
},
{
"epoch": 0.6287244727150988,
"grad_norm": 0.128944993019104,
"learning_rate": 0.0001,
"loss": 1.5123,
"step": 1878
},
{
"epoch": 0.6290592567793774,
"grad_norm": 0.1244087815284729,
"learning_rate": 0.0001,
"loss": 1.4386,
"step": 1879
},
{
"epoch": 0.6293940408436558,
"grad_norm": 0.12890097498893738,
"learning_rate": 0.0001,
"loss": 1.5266,
"step": 1880
},
{
"epoch": 0.6297288249079344,
"grad_norm": 0.1312391459941864,
"learning_rate": 0.0001,
"loss": 1.5395,
"step": 1881
},
{
"epoch": 0.6300636089722129,
"grad_norm": 0.13363149762153625,
"learning_rate": 0.0001,
"loss": 1.5721,
"step": 1882
},
{
"epoch": 0.6303983930364915,
"grad_norm": 0.13130998611450195,
"learning_rate": 0.0001,
"loss": 1.5542,
"step": 1883
},
{
"epoch": 0.63073317710077,
"grad_norm": 0.13050179183483124,
"learning_rate": 0.0001,
"loss": 1.5422,
"step": 1884
},
{
"epoch": 0.6310679611650486,
"grad_norm": 0.13548725843429565,
"learning_rate": 0.0001,
"loss": 1.5597,
"step": 1885
},
{
"epoch": 0.631402745229327,
"grad_norm": 0.13810521364212036,
"learning_rate": 0.0001,
"loss": 1.6428,
"step": 1886
},
{
"epoch": 0.6317375292936056,
"grad_norm": 0.12898769974708557,
"learning_rate": 0.0001,
"loss": 1.5091,
"step": 1887
},
{
"epoch": 0.6320723133578842,
"grad_norm": 0.13874949514865875,
"learning_rate": 0.0001,
"loss": 1.473,
"step": 1888
},
{
"epoch": 0.6324070974221627,
"grad_norm": 0.1275644749403,
"learning_rate": 0.0001,
"loss": 1.5844,
"step": 1889
},
{
"epoch": 0.6327418814864413,
"grad_norm": 0.13245896995067596,
"learning_rate": 0.0001,
"loss": 1.602,
"step": 1890
},
{
"epoch": 0.6330766655507197,
"grad_norm": 0.13937050104141235,
"learning_rate": 0.0001,
"loss": 1.6106,
"step": 1891
},
{
"epoch": 0.6334114496149983,
"grad_norm": 0.13569729030132294,
"learning_rate": 0.0001,
"loss": 1.523,
"step": 1892
},
{
"epoch": 0.6337462336792768,
"grad_norm": 0.1360468864440918,
"learning_rate": 0.0001,
"loss": 1.5032,
"step": 1893
},
{
"epoch": 0.6340810177435554,
"grad_norm": 0.12757538259029388,
"learning_rate": 0.0001,
"loss": 1.487,
"step": 1894
},
{
"epoch": 0.634415801807834,
"grad_norm": 0.13325755298137665,
"learning_rate": 0.0001,
"loss": 1.5386,
"step": 1895
},
{
"epoch": 0.6347505858721125,
"grad_norm": 0.1348341703414917,
"learning_rate": 0.0001,
"loss": 1.6195,
"step": 1896
},
{
"epoch": 0.635085369936391,
"grad_norm": 0.14284925162792206,
"learning_rate": 0.0001,
"loss": 1.636,
"step": 1897
},
{
"epoch": 0.6354201540006695,
"grad_norm": 0.12641146779060364,
"learning_rate": 0.0001,
"loss": 1.5172,
"step": 1898
},
{
"epoch": 0.6357549380649481,
"grad_norm": 0.1327671855688095,
"learning_rate": 0.0001,
"loss": 1.6519,
"step": 1899
},
{
"epoch": 0.6360897221292267,
"grad_norm": 0.13408274948596954,
"learning_rate": 0.0001,
"loss": 1.4722,
"step": 1900
},
{
"epoch": 0.6364245061935052,
"grad_norm": 0.13136939704418182,
"learning_rate": 0.0001,
"loss": 1.56,
"step": 1901
},
{
"epoch": 0.6367592902577838,
"grad_norm": 0.13018733263015747,
"learning_rate": 0.0001,
"loss": 1.5499,
"step": 1902
},
{
"epoch": 0.6370940743220622,
"grad_norm": 0.137217178940773,
"learning_rate": 0.0001,
"loss": 1.6224,
"step": 1903
},
{
"epoch": 0.6374288583863408,
"grad_norm": 0.12886135280132294,
"learning_rate": 0.0001,
"loss": 1.5993,
"step": 1904
},
{
"epoch": 0.6377636424506193,
"grad_norm": 0.12878277897834778,
"learning_rate": 0.0001,
"loss": 1.4407,
"step": 1905
},
{
"epoch": 0.6380984265148979,
"grad_norm": 0.12817195057868958,
"learning_rate": 0.0001,
"loss": 1.5113,
"step": 1906
},
{
"epoch": 0.6384332105791765,
"grad_norm": 0.12779603898525238,
"learning_rate": 0.0001,
"loss": 1.573,
"step": 1907
},
{
"epoch": 0.638767994643455,
"grad_norm": 0.13575701415538788,
"learning_rate": 0.0001,
"loss": 1.5689,
"step": 1908
},
{
"epoch": 0.6391027787077335,
"grad_norm": 0.1292586326599121,
"learning_rate": 0.0001,
"loss": 1.5853,
"step": 1909
},
{
"epoch": 0.639437562772012,
"grad_norm": 0.13209429383277893,
"learning_rate": 0.0001,
"loss": 1.5374,
"step": 1910
},
{
"epoch": 0.6397723468362906,
"grad_norm": 0.13795161247253418,
"learning_rate": 0.0001,
"loss": 1.5752,
"step": 1911
},
{
"epoch": 0.6401071309005691,
"grad_norm": 0.13106195628643036,
"learning_rate": 0.0001,
"loss": 1.5074,
"step": 1912
},
{
"epoch": 0.6404419149648477,
"grad_norm": 0.1364029496908188,
"learning_rate": 0.0001,
"loss": 1.4415,
"step": 1913
},
{
"epoch": 0.6407766990291263,
"grad_norm": 0.13437704741954803,
"learning_rate": 0.0001,
"loss": 1.5179,
"step": 1914
},
{
"epoch": 0.6411114830934047,
"grad_norm": 0.12899838387966156,
"learning_rate": 0.0001,
"loss": 1.4437,
"step": 1915
},
{
"epoch": 0.6414462671576833,
"grad_norm": 0.1336640864610672,
"learning_rate": 0.0001,
"loss": 1.4988,
"step": 1916
},
{
"epoch": 0.6417810512219618,
"grad_norm": 0.13116469979286194,
"learning_rate": 0.0001,
"loss": 1.5944,
"step": 1917
},
{
"epoch": 0.6421158352862404,
"grad_norm": 0.1323315054178238,
"learning_rate": 0.0001,
"loss": 1.6378,
"step": 1918
},
{
"epoch": 0.642450619350519,
"grad_norm": 0.13012604415416718,
"learning_rate": 0.0001,
"loss": 1.591,
"step": 1919
},
{
"epoch": 0.6427854034147975,
"grad_norm": 0.13358043134212494,
"learning_rate": 0.0001,
"loss": 1.4948,
"step": 1920
},
{
"epoch": 0.643120187479076,
"grad_norm": 0.13027198612689972,
"learning_rate": 0.0001,
"loss": 1.5749,
"step": 1921
},
{
"epoch": 0.6434549715433545,
"grad_norm": 0.11880921572446823,
"learning_rate": 0.0001,
"loss": 1.434,
"step": 1922
},
{
"epoch": 0.6437897556076331,
"grad_norm": 0.1275249421596527,
"learning_rate": 0.0001,
"loss": 1.5074,
"step": 1923
},
{
"epoch": 0.6441245396719116,
"grad_norm": 0.13402846455574036,
"learning_rate": 0.0001,
"loss": 1.6019,
"step": 1924
},
{
"epoch": 0.6444593237361902,
"grad_norm": 0.1263839304447174,
"learning_rate": 0.0001,
"loss": 1.494,
"step": 1925
},
{
"epoch": 0.6447941078004688,
"grad_norm": 0.12889358401298523,
"learning_rate": 0.0001,
"loss": 1.4811,
"step": 1926
},
{
"epoch": 0.6451288918647472,
"grad_norm": 0.13030682504177094,
"learning_rate": 0.0001,
"loss": 1.5573,
"step": 1927
},
{
"epoch": 0.6454636759290258,
"grad_norm": 0.12815749645233154,
"learning_rate": 0.0001,
"loss": 1.5839,
"step": 1928
},
{
"epoch": 0.6457984599933043,
"grad_norm": 0.13763943314552307,
"learning_rate": 0.0001,
"loss": 1.4967,
"step": 1929
},
{
"epoch": 0.6461332440575829,
"grad_norm": 0.12890425324440002,
"learning_rate": 0.0001,
"loss": 1.4861,
"step": 1930
},
{
"epoch": 0.6464680281218614,
"grad_norm": 0.13768140971660614,
"learning_rate": 0.0001,
"loss": 1.5095,
"step": 1931
},
{
"epoch": 0.6468028121861399,
"grad_norm": 0.1268666833639145,
"learning_rate": 0.0001,
"loss": 1.5237,
"step": 1932
},
{
"epoch": 0.6471375962504184,
"grad_norm": 0.13325713574886322,
"learning_rate": 0.0001,
"loss": 1.593,
"step": 1933
},
{
"epoch": 0.647472380314697,
"grad_norm": 0.13848131895065308,
"learning_rate": 0.0001,
"loss": 1.4935,
"step": 1934
},
{
"epoch": 0.6478071643789756,
"grad_norm": 0.1393735706806183,
"learning_rate": 0.0001,
"loss": 1.6234,
"step": 1935
},
{
"epoch": 0.6481419484432541,
"grad_norm": 0.1441955864429474,
"learning_rate": 0.0001,
"loss": 1.6218,
"step": 1936
},
{
"epoch": 0.6484767325075327,
"grad_norm": 0.13111312687397003,
"learning_rate": 0.0001,
"loss": 1.5639,
"step": 1937
},
{
"epoch": 0.6488115165718111,
"grad_norm": 0.12940305471420288,
"learning_rate": 0.0001,
"loss": 1.5864,
"step": 1938
},
{
"epoch": 0.6491463006360897,
"grad_norm": 0.13657227158546448,
"learning_rate": 0.0001,
"loss": 1.5125,
"step": 1939
},
{
"epoch": 0.6494810847003682,
"grad_norm": 0.12390992790460587,
"learning_rate": 0.0001,
"loss": 1.4631,
"step": 1940
},
{
"epoch": 0.6498158687646468,
"grad_norm": 0.1316480040550232,
"learning_rate": 0.0001,
"loss": 1.5343,
"step": 1941
},
{
"epoch": 0.6501506528289254,
"grad_norm": 0.13427673280239105,
"learning_rate": 0.0001,
"loss": 1.5456,
"step": 1942
},
{
"epoch": 0.6504854368932039,
"grad_norm": 0.1284562200307846,
"learning_rate": 0.0001,
"loss": 1.5017,
"step": 1943
},
{
"epoch": 0.6508202209574824,
"grad_norm": 0.13431181013584137,
"learning_rate": 0.0001,
"loss": 1.45,
"step": 1944
},
{
"epoch": 0.6511550050217609,
"grad_norm": 0.13080428540706635,
"learning_rate": 0.0001,
"loss": 1.5035,
"step": 1945
},
{
"epoch": 0.6514897890860395,
"grad_norm": 0.13691136240959167,
"learning_rate": 0.0001,
"loss": 1.5145,
"step": 1946
},
{
"epoch": 0.651824573150318,
"grad_norm": 0.12990237772464752,
"learning_rate": 0.0001,
"loss": 1.5393,
"step": 1947
},
{
"epoch": 0.6521593572145966,
"grad_norm": 0.12529443204402924,
"learning_rate": 0.0001,
"loss": 1.468,
"step": 1948
},
{
"epoch": 0.6524941412788752,
"grad_norm": 0.13029485940933228,
"learning_rate": 0.0001,
"loss": 1.5229,
"step": 1949
},
{
"epoch": 0.6528289253431536,
"grad_norm": 0.13873140513896942,
"learning_rate": 0.0001,
"loss": 1.5667,
"step": 1950
},
{
"epoch": 0.6531637094074322,
"grad_norm": 0.13176368176937103,
"learning_rate": 0.0001,
"loss": 1.4231,
"step": 1951
},
{
"epoch": 0.6534984934717107,
"grad_norm": 0.13046538829803467,
"learning_rate": 0.0001,
"loss": 1.5151,
"step": 1952
},
{
"epoch": 0.6538332775359893,
"grad_norm": 0.1290617287158966,
"learning_rate": 0.0001,
"loss": 1.6184,
"step": 1953
},
{
"epoch": 0.6541680616002679,
"grad_norm": 0.13826888799667358,
"learning_rate": 0.0001,
"loss": 1.5597,
"step": 1954
},
{
"epoch": 0.6545028456645464,
"grad_norm": 0.1341448426246643,
"learning_rate": 0.0001,
"loss": 1.5763,
"step": 1955
},
{
"epoch": 0.6548376297288249,
"grad_norm": 0.1293526589870453,
"learning_rate": 0.0001,
"loss": 1.4475,
"step": 1956
},
{
"epoch": 0.6551724137931034,
"grad_norm": 0.12727828323841095,
"learning_rate": 0.0001,
"loss": 1.5195,
"step": 1957
},
{
"epoch": 0.655507197857382,
"grad_norm": 0.13981108367443085,
"learning_rate": 0.0001,
"loss": 1.6515,
"step": 1958
},
{
"epoch": 0.6558419819216605,
"grad_norm": 0.1339573711156845,
"learning_rate": 0.0001,
"loss": 1.4506,
"step": 1959
},
{
"epoch": 0.6561767659859391,
"grad_norm": 0.13203227519989014,
"learning_rate": 0.0001,
"loss": 1.5553,
"step": 1960
},
{
"epoch": 0.6565115500502177,
"grad_norm": 0.1276148110628128,
"learning_rate": 0.0001,
"loss": 1.5442,
"step": 1961
},
{
"epoch": 0.6568463341144961,
"grad_norm": 0.13206414878368378,
"learning_rate": 0.0001,
"loss": 1.4193,
"step": 1962
},
{
"epoch": 0.6571811181787747,
"grad_norm": 0.14616969227790833,
"learning_rate": 0.0001,
"loss": 1.6147,
"step": 1963
},
{
"epoch": 0.6575159022430532,
"grad_norm": 0.13604846596717834,
"learning_rate": 0.0001,
"loss": 1.5652,
"step": 1964
},
{
"epoch": 0.6578506863073318,
"grad_norm": 0.13196608424186707,
"learning_rate": 0.0001,
"loss": 1.565,
"step": 1965
},
{
"epoch": 0.6581854703716103,
"grad_norm": 0.14214178919792175,
"learning_rate": 0.0001,
"loss": 1.5692,
"step": 1966
},
{
"epoch": 0.6585202544358888,
"grad_norm": 0.1290048062801361,
"learning_rate": 0.0001,
"loss": 1.5004,
"step": 1967
},
{
"epoch": 0.6588550385001674,
"grad_norm": 0.13306178152561188,
"learning_rate": 0.0001,
"loss": 1.5913,
"step": 1968
},
{
"epoch": 0.6591898225644459,
"grad_norm": 0.1337195485830307,
"learning_rate": 0.0001,
"loss": 1.5888,
"step": 1969
},
{
"epoch": 0.6595246066287245,
"grad_norm": 0.1345224380493164,
"learning_rate": 0.0001,
"loss": 1.5513,
"step": 1970
},
{
"epoch": 0.659859390693003,
"grad_norm": 0.12885946035385132,
"learning_rate": 0.0001,
"loss": 1.4686,
"step": 1971
},
{
"epoch": 0.6601941747572816,
"grad_norm": 0.1352531760931015,
"learning_rate": 0.0001,
"loss": 1.5958,
"step": 1972
},
{
"epoch": 0.66052895882156,
"grad_norm": 0.12501929700374603,
"learning_rate": 0.0001,
"loss": 1.4162,
"step": 1973
},
{
"epoch": 0.6608637428858386,
"grad_norm": 0.1291869580745697,
"learning_rate": 0.0001,
"loss": 1.4463,
"step": 1974
},
{
"epoch": 0.6611985269501172,
"grad_norm": 0.14670369029045105,
"learning_rate": 0.0001,
"loss": 1.4661,
"step": 1975
},
{
"epoch": 0.6615333110143957,
"grad_norm": 0.13643884658813477,
"learning_rate": 0.0001,
"loss": 1.5677,
"step": 1976
},
{
"epoch": 0.6618680950786743,
"grad_norm": 0.13746634125709534,
"learning_rate": 0.0001,
"loss": 1.4903,
"step": 1977
},
{
"epoch": 0.6622028791429528,
"grad_norm": 0.14677157998085022,
"learning_rate": 0.0001,
"loss": 1.5492,
"step": 1978
},
{
"epoch": 0.6625376632072313,
"grad_norm": 0.1345069259405136,
"learning_rate": 0.0001,
"loss": 1.6059,
"step": 1979
},
{
"epoch": 0.6628724472715098,
"grad_norm": 0.13783417642116547,
"learning_rate": 0.0001,
"loss": 1.5546,
"step": 1980
},
{
"epoch": 0.6632072313357884,
"grad_norm": 0.13266097009181976,
"learning_rate": 0.0001,
"loss": 1.4469,
"step": 1981
},
{
"epoch": 0.663542015400067,
"grad_norm": 0.13931085169315338,
"learning_rate": 0.0001,
"loss": 1.5797,
"step": 1982
},
{
"epoch": 0.6638767994643455,
"grad_norm": 0.13039837777614594,
"learning_rate": 0.0001,
"loss": 1.4508,
"step": 1983
},
{
"epoch": 0.6642115835286241,
"grad_norm": 0.13921616971492767,
"learning_rate": 0.0001,
"loss": 1.6177,
"step": 1984
},
{
"epoch": 0.6645463675929025,
"grad_norm": 0.1381753534078598,
"learning_rate": 0.0001,
"loss": 1.6578,
"step": 1985
},
{
"epoch": 0.6648811516571811,
"grad_norm": 0.1361846625804901,
"learning_rate": 0.0001,
"loss": 1.5422,
"step": 1986
},
{
"epoch": 0.6652159357214597,
"grad_norm": 0.14170324802398682,
"learning_rate": 0.0001,
"loss": 1.6339,
"step": 1987
},
{
"epoch": 0.6655507197857382,
"grad_norm": 0.13164804875850677,
"learning_rate": 0.0001,
"loss": 1.5623,
"step": 1988
},
{
"epoch": 0.6658855038500168,
"grad_norm": 0.13766439259052277,
"learning_rate": 0.0001,
"loss": 1.5661,
"step": 1989
},
{
"epoch": 0.6662202879142953,
"grad_norm": 0.1340639889240265,
"learning_rate": 0.0001,
"loss": 1.6035,
"step": 1990
},
{
"epoch": 0.6665550719785738,
"grad_norm": 0.132024347782135,
"learning_rate": 0.0001,
"loss": 1.6319,
"step": 1991
},
{
"epoch": 0.6668898560428523,
"grad_norm": 0.13272161781787872,
"learning_rate": 0.0001,
"loss": 1.4522,
"step": 1992
},
{
"epoch": 0.6672246401071309,
"grad_norm": 0.14372223615646362,
"learning_rate": 0.0001,
"loss": 1.581,
"step": 1993
},
{
"epoch": 0.6675594241714095,
"grad_norm": 0.13869139552116394,
"learning_rate": 0.0001,
"loss": 1.6178,
"step": 1994
},
{
"epoch": 0.667894208235688,
"grad_norm": 0.12776124477386475,
"learning_rate": 0.0001,
"loss": 1.51,
"step": 1995
},
{
"epoch": 0.6682289922999666,
"grad_norm": 0.13583005964756012,
"learning_rate": 0.0001,
"loss": 1.5771,
"step": 1996
},
{
"epoch": 0.668563776364245,
"grad_norm": 0.13394635915756226,
"learning_rate": 0.0001,
"loss": 1.5605,
"step": 1997
},
{
"epoch": 0.6688985604285236,
"grad_norm": 0.13842739164829254,
"learning_rate": 0.0001,
"loss": 1.5541,
"step": 1998
},
{
"epoch": 0.6692333444928021,
"grad_norm": 0.13265378773212433,
"learning_rate": 0.0001,
"loss": 1.5772,
"step": 1999
},
{
"epoch": 0.6695681285570807,
"grad_norm": 0.13662943243980408,
"learning_rate": 0.0001,
"loss": 1.591,
"step": 2000
},
{
"epoch": 0.6699029126213593,
"grad_norm": 0.12512929737567902,
"learning_rate": 0.0001,
"loss": 1.4162,
"step": 2001
},
{
"epoch": 0.6702376966856378,
"grad_norm": 0.1327543556690216,
"learning_rate": 0.0001,
"loss": 1.4978,
"step": 2002
},
{
"epoch": 0.6705724807499163,
"grad_norm": 0.13269194960594177,
"learning_rate": 0.0001,
"loss": 1.5998,
"step": 2003
},
{
"epoch": 0.6709072648141948,
"grad_norm": 0.14017336070537567,
"learning_rate": 0.0001,
"loss": 1.5785,
"step": 2004
},
{
"epoch": 0.6712420488784734,
"grad_norm": 0.1304367482662201,
"learning_rate": 0.0001,
"loss": 1.4781,
"step": 2005
},
{
"epoch": 0.671576832942752,
"grad_norm": 0.13442495465278625,
"learning_rate": 0.0001,
"loss": 1.5358,
"step": 2006
},
{
"epoch": 0.6719116170070305,
"grad_norm": 0.13490137457847595,
"learning_rate": 0.0001,
"loss": 1.6273,
"step": 2007
},
{
"epoch": 0.672246401071309,
"grad_norm": 0.1324394941329956,
"learning_rate": 0.0001,
"loss": 1.5884,
"step": 2008
},
{
"epoch": 0.6725811851355875,
"grad_norm": 0.12797103822231293,
"learning_rate": 0.0001,
"loss": 1.551,
"step": 2009
},
{
"epoch": 0.6729159691998661,
"grad_norm": 0.13374999165534973,
"learning_rate": 0.0001,
"loss": 1.5571,
"step": 2010
},
{
"epoch": 0.6732507532641446,
"grad_norm": 0.13020572066307068,
"learning_rate": 0.0001,
"loss": 1.4756,
"step": 2011
},
{
"epoch": 0.6735855373284232,
"grad_norm": 0.12501733005046844,
"learning_rate": 0.0001,
"loss": 1.5073,
"step": 2012
},
{
"epoch": 0.6739203213927017,
"grad_norm": 0.12433689087629318,
"learning_rate": 0.0001,
"loss": 1.4574,
"step": 2013
},
{
"epoch": 0.6742551054569802,
"grad_norm": 0.14026397466659546,
"learning_rate": 0.0001,
"loss": 1.5513,
"step": 2014
},
{
"epoch": 0.6745898895212588,
"grad_norm": 0.1340554803609848,
"learning_rate": 0.0001,
"loss": 1.5686,
"step": 2015
},
{
"epoch": 0.6749246735855373,
"grad_norm": 0.12796646356582642,
"learning_rate": 0.0001,
"loss": 1.4842,
"step": 2016
},
{
"epoch": 0.6752594576498159,
"grad_norm": 0.1362949162721634,
"learning_rate": 0.0001,
"loss": 1.5763,
"step": 2017
},
{
"epoch": 0.6755942417140944,
"grad_norm": 0.1347300410270691,
"learning_rate": 0.0001,
"loss": 1.5975,
"step": 2018
},
{
"epoch": 0.675929025778373,
"grad_norm": 0.13647662103176117,
"learning_rate": 0.0001,
"loss": 1.5395,
"step": 2019
},
{
"epoch": 0.6762638098426514,
"grad_norm": 0.13441947102546692,
"learning_rate": 0.0001,
"loss": 1.5726,
"step": 2020
},
{
"epoch": 0.67659859390693,
"grad_norm": 0.13435856997966766,
"learning_rate": 0.0001,
"loss": 1.6806,
"step": 2021
},
{
"epoch": 0.6769333779712086,
"grad_norm": 0.1239754781126976,
"learning_rate": 0.0001,
"loss": 1.4045,
"step": 2022
},
{
"epoch": 0.6772681620354871,
"grad_norm": 0.13493669033050537,
"learning_rate": 0.0001,
"loss": 1.5606,
"step": 2023
},
{
"epoch": 0.6776029460997657,
"grad_norm": 0.12938407063484192,
"learning_rate": 0.0001,
"loss": 1.5201,
"step": 2024
},
{
"epoch": 0.6779377301640442,
"grad_norm": 0.12213901430368423,
"learning_rate": 0.0001,
"loss": 1.4436,
"step": 2025
},
{
"epoch": 0.6782725142283227,
"grad_norm": 0.14107517898082733,
"learning_rate": 0.0001,
"loss": 1.5584,
"step": 2026
},
{
"epoch": 0.6786072982926012,
"grad_norm": 0.13082027435302734,
"learning_rate": 0.0001,
"loss": 1.5278,
"step": 2027
},
{
"epoch": 0.6789420823568798,
"grad_norm": 0.14623381197452545,
"learning_rate": 0.0001,
"loss": 1.668,
"step": 2028
},
{
"epoch": 0.6792768664211584,
"grad_norm": 0.12862159311771393,
"learning_rate": 0.0001,
"loss": 1.5534,
"step": 2029
},
{
"epoch": 0.6796116504854369,
"grad_norm": 0.13177117705345154,
"learning_rate": 0.0001,
"loss": 1.5564,
"step": 2030
},
{
"epoch": 0.6799464345497155,
"grad_norm": 0.12835298478603363,
"learning_rate": 0.0001,
"loss": 1.479,
"step": 2031
},
{
"epoch": 0.6802812186139939,
"grad_norm": 0.14096349477767944,
"learning_rate": 0.0001,
"loss": 1.6175,
"step": 2032
},
{
"epoch": 0.6806160026782725,
"grad_norm": 0.12646090984344482,
"learning_rate": 0.0001,
"loss": 1.4861,
"step": 2033
},
{
"epoch": 0.680950786742551,
"grad_norm": 0.137931689620018,
"learning_rate": 0.0001,
"loss": 1.5051,
"step": 2034
},
{
"epoch": 0.6812855708068296,
"grad_norm": 0.13240592181682587,
"learning_rate": 0.0001,
"loss": 1.5868,
"step": 2035
},
{
"epoch": 0.6816203548711082,
"grad_norm": 0.1362670511007309,
"learning_rate": 0.0001,
"loss": 1.5899,
"step": 2036
},
{
"epoch": 0.6819551389353867,
"grad_norm": 0.13148629665374756,
"learning_rate": 0.0001,
"loss": 1.521,
"step": 2037
},
{
"epoch": 0.6822899229996652,
"grad_norm": 0.13285885751247406,
"learning_rate": 0.0001,
"loss": 1.5122,
"step": 2038
},
{
"epoch": 0.6826247070639437,
"grad_norm": 0.1264655739068985,
"learning_rate": 0.0001,
"loss": 1.4886,
"step": 2039
},
{
"epoch": 0.6829594911282223,
"grad_norm": 0.12677529454231262,
"learning_rate": 0.0001,
"loss": 1.5068,
"step": 2040
},
{
"epoch": 0.6832942751925009,
"grad_norm": 0.13277101516723633,
"learning_rate": 0.0001,
"loss": 1.6065,
"step": 2041
},
{
"epoch": 0.6836290592567794,
"grad_norm": 0.13291488587856293,
"learning_rate": 0.0001,
"loss": 1.5755,
"step": 2042
},
{
"epoch": 0.6839638433210579,
"grad_norm": 0.13058260083198547,
"learning_rate": 0.0001,
"loss": 1.5286,
"step": 2043
},
{
"epoch": 0.6842986273853364,
"grad_norm": 0.13059435784816742,
"learning_rate": 0.0001,
"loss": 1.5803,
"step": 2044
},
{
"epoch": 0.684633411449615,
"grad_norm": 0.12917304039001465,
"learning_rate": 0.0001,
"loss": 1.576,
"step": 2045
},
{
"epoch": 0.6849681955138935,
"grad_norm": 0.12822791934013367,
"learning_rate": 0.0001,
"loss": 1.5201,
"step": 2046
},
{
"epoch": 0.6853029795781721,
"grad_norm": 0.14006927609443665,
"learning_rate": 0.0001,
"loss": 1.5445,
"step": 2047
},
{
"epoch": 0.6856377636424507,
"grad_norm": 0.13502942025661469,
"learning_rate": 0.0001,
"loss": 1.543,
"step": 2048
},
{
"epoch": 0.6859725477067291,
"grad_norm": 0.1351221352815628,
"learning_rate": 0.0001,
"loss": 1.5594,
"step": 2049
},
{
"epoch": 0.6863073317710077,
"grad_norm": 0.13474461436271667,
"learning_rate": 0.0001,
"loss": 1.5984,
"step": 2050
},
{
"epoch": 0.6866421158352862,
"grad_norm": 0.1317591369152069,
"learning_rate": 0.0001,
"loss": 1.5681,
"step": 2051
},
{
"epoch": 0.6869768998995648,
"grad_norm": 0.1300475299358368,
"learning_rate": 0.0001,
"loss": 1.5426,
"step": 2052
},
{
"epoch": 0.6873116839638433,
"grad_norm": 0.1308741718530655,
"learning_rate": 0.0001,
"loss": 1.5649,
"step": 2053
},
{
"epoch": 0.6876464680281219,
"grad_norm": 0.1339602768421173,
"learning_rate": 0.0001,
"loss": 1.5422,
"step": 2054
},
{
"epoch": 0.6879812520924004,
"grad_norm": 0.12556122243404388,
"learning_rate": 0.0001,
"loss": 1.3939,
"step": 2055
},
{
"epoch": 0.6883160361566789,
"grad_norm": 0.1331097036600113,
"learning_rate": 0.0001,
"loss": 1.5725,
"step": 2056
},
{
"epoch": 0.6886508202209575,
"grad_norm": 0.12769033014774323,
"learning_rate": 0.0001,
"loss": 1.5133,
"step": 2057
},
{
"epoch": 0.688985604285236,
"grad_norm": 0.13246020674705505,
"learning_rate": 0.0001,
"loss": 1.5533,
"step": 2058
},
{
"epoch": 0.6893203883495146,
"grad_norm": 0.13371361792087555,
"learning_rate": 0.0001,
"loss": 1.6253,
"step": 2059
},
{
"epoch": 0.6896551724137931,
"grad_norm": 0.1314792037010193,
"learning_rate": 0.0001,
"loss": 1.4943,
"step": 2060
},
{
"epoch": 0.6899899564780716,
"grad_norm": 0.13194666802883148,
"learning_rate": 0.0001,
"loss": 1.5983,
"step": 2061
},
{
"epoch": 0.6903247405423502,
"grad_norm": 0.13631388545036316,
"learning_rate": 0.0001,
"loss": 1.4932,
"step": 2062
},
{
"epoch": 0.6906595246066287,
"grad_norm": 0.1319463849067688,
"learning_rate": 0.0001,
"loss": 1.5848,
"step": 2063
},
{
"epoch": 0.6909943086709073,
"grad_norm": 0.14124637842178345,
"learning_rate": 0.0001,
"loss": 1.6066,
"step": 2064
},
{
"epoch": 0.6913290927351858,
"grad_norm": 0.12954577803611755,
"learning_rate": 0.0001,
"loss": 1.4153,
"step": 2065
},
{
"epoch": 0.6916638767994644,
"grad_norm": 0.1325748711824417,
"learning_rate": 0.0001,
"loss": 1.5766,
"step": 2066
},
{
"epoch": 0.6919986608637428,
"grad_norm": 0.13064290583133698,
"learning_rate": 0.0001,
"loss": 1.4995,
"step": 2067
},
{
"epoch": 0.6923334449280214,
"grad_norm": 0.1248745545744896,
"learning_rate": 0.0001,
"loss": 1.5077,
"step": 2068
},
{
"epoch": 0.6926682289923,
"grad_norm": 0.1278417706489563,
"learning_rate": 0.0001,
"loss": 1.5449,
"step": 2069
},
{
"epoch": 0.6930030130565785,
"grad_norm": 0.13311515748500824,
"learning_rate": 0.0001,
"loss": 1.5251,
"step": 2070
},
{
"epoch": 0.6933377971208571,
"grad_norm": 0.13218218088150024,
"learning_rate": 0.0001,
"loss": 1.5359,
"step": 2071
},
{
"epoch": 0.6936725811851356,
"grad_norm": 0.13042452931404114,
"learning_rate": 0.0001,
"loss": 1.5534,
"step": 2072
},
{
"epoch": 0.6940073652494141,
"grad_norm": 0.1393493264913559,
"learning_rate": 0.0001,
"loss": 1.594,
"step": 2073
},
{
"epoch": 0.6943421493136926,
"grad_norm": 0.1298573911190033,
"learning_rate": 0.0001,
"loss": 1.518,
"step": 2074
},
{
"epoch": 0.6946769333779712,
"grad_norm": 0.13325051963329315,
"learning_rate": 0.0001,
"loss": 1.5068,
"step": 2075
},
{
"epoch": 0.6950117174422498,
"grad_norm": 0.1269649714231491,
"learning_rate": 0.0001,
"loss": 1.4805,
"step": 2076
},
{
"epoch": 0.6953465015065283,
"grad_norm": 0.12699490785598755,
"learning_rate": 0.0001,
"loss": 1.4228,
"step": 2077
},
{
"epoch": 0.6956812855708069,
"grad_norm": 0.1379399597644806,
"learning_rate": 0.0001,
"loss": 1.4138,
"step": 2078
},
{
"epoch": 0.6960160696350853,
"grad_norm": 0.13343951106071472,
"learning_rate": 0.0001,
"loss": 1.5947,
"step": 2079
},
{
"epoch": 0.6963508536993639,
"grad_norm": 0.13461847603321075,
"learning_rate": 0.0001,
"loss": 1.5333,
"step": 2080
},
{
"epoch": 0.6966856377636425,
"grad_norm": 0.1299065202474594,
"learning_rate": 0.0001,
"loss": 1.5415,
"step": 2081
},
{
"epoch": 0.697020421827921,
"grad_norm": 0.1272873431444168,
"learning_rate": 0.0001,
"loss": 1.4443,
"step": 2082
},
{
"epoch": 0.6973552058921996,
"grad_norm": 0.136282280087471,
"learning_rate": 0.0001,
"loss": 1.4996,
"step": 2083
},
{
"epoch": 0.697689989956478,
"grad_norm": 0.12842769920825958,
"learning_rate": 0.0001,
"loss": 1.5574,
"step": 2084
},
{
"epoch": 0.6980247740207566,
"grad_norm": 0.12897315621376038,
"learning_rate": 0.0001,
"loss": 1.6162,
"step": 2085
},
{
"epoch": 0.6983595580850351,
"grad_norm": 0.13097885251045227,
"learning_rate": 0.0001,
"loss": 1.4949,
"step": 2086
},
{
"epoch": 0.6986943421493137,
"grad_norm": 0.13251438736915588,
"learning_rate": 0.0001,
"loss": 1.5041,
"step": 2087
},
{
"epoch": 0.6990291262135923,
"grad_norm": 0.1319066435098648,
"learning_rate": 0.0001,
"loss": 1.5499,
"step": 2088
},
{
"epoch": 0.6993639102778708,
"grad_norm": 0.13142657279968262,
"learning_rate": 0.0001,
"loss": 1.452,
"step": 2089
},
{
"epoch": 0.6996986943421493,
"grad_norm": 0.13348999619483948,
"learning_rate": 0.0001,
"loss": 1.4905,
"step": 2090
},
{
"epoch": 0.7000334784064278,
"grad_norm": 0.13037413358688354,
"learning_rate": 0.0001,
"loss": 1.4949,
"step": 2091
},
{
"epoch": 0.7003682624707064,
"grad_norm": 0.15308037400245667,
"learning_rate": 0.0001,
"loss": 1.6023,
"step": 2092
},
{
"epoch": 0.7007030465349849,
"grad_norm": 0.128286212682724,
"learning_rate": 0.0001,
"loss": 1.5298,
"step": 2093
},
{
"epoch": 0.7010378305992635,
"grad_norm": 0.13967067003250122,
"learning_rate": 0.0001,
"loss": 1.5577,
"step": 2094
},
{
"epoch": 0.7013726146635421,
"grad_norm": 0.13320837914943695,
"learning_rate": 0.0001,
"loss": 1.5923,
"step": 2095
},
{
"epoch": 0.7017073987278205,
"grad_norm": 0.12857401371002197,
"learning_rate": 0.0001,
"loss": 1.4623,
"step": 2096
},
{
"epoch": 0.7020421827920991,
"grad_norm": 0.12525291740894318,
"learning_rate": 0.0001,
"loss": 1.5126,
"step": 2097
},
{
"epoch": 0.7023769668563776,
"grad_norm": 0.1316770762205124,
"learning_rate": 0.0001,
"loss": 1.5433,
"step": 2098
},
{
"epoch": 0.7027117509206562,
"grad_norm": 0.1343490481376648,
"learning_rate": 0.0001,
"loss": 1.5085,
"step": 2099
},
{
"epoch": 0.7030465349849347,
"grad_norm": 0.12864871323108673,
"learning_rate": 0.0001,
"loss": 1.46,
"step": 2100
},
{
"epoch": 0.7033813190492133,
"grad_norm": 0.13915804028511047,
"learning_rate": 0.0001,
"loss": 1.6961,
"step": 2101
},
{
"epoch": 0.7037161031134918,
"grad_norm": 0.12709419429302216,
"learning_rate": 0.0001,
"loss": 1.4931,
"step": 2102
},
{
"epoch": 0.7040508871777703,
"grad_norm": 0.1383008360862732,
"learning_rate": 0.0001,
"loss": 1.5925,
"step": 2103
},
{
"epoch": 0.7043856712420489,
"grad_norm": 0.1338641494512558,
"learning_rate": 0.0001,
"loss": 1.4715,
"step": 2104
},
{
"epoch": 0.7047204553063274,
"grad_norm": 0.12291635572910309,
"learning_rate": 0.0001,
"loss": 1.3746,
"step": 2105
},
{
"epoch": 0.705055239370606,
"grad_norm": 0.13391555845737457,
"learning_rate": 0.0001,
"loss": 1.627,
"step": 2106
},
{
"epoch": 0.7053900234348845,
"grad_norm": 0.13259120285511017,
"learning_rate": 0.0001,
"loss": 1.6069,
"step": 2107
},
{
"epoch": 0.705724807499163,
"grad_norm": 0.13009488582611084,
"learning_rate": 0.0001,
"loss": 1.534,
"step": 2108
},
{
"epoch": 0.7060595915634416,
"grad_norm": 0.12612484395503998,
"learning_rate": 0.0001,
"loss": 1.4612,
"step": 2109
},
{
"epoch": 0.7063943756277201,
"grad_norm": 0.12470883876085281,
"learning_rate": 0.0001,
"loss": 1.4388,
"step": 2110
},
{
"epoch": 0.7067291596919987,
"grad_norm": 0.13072682917118073,
"learning_rate": 0.0001,
"loss": 1.5083,
"step": 2111
},
{
"epoch": 0.7070639437562772,
"grad_norm": 0.13037820160388947,
"learning_rate": 0.0001,
"loss": 1.4514,
"step": 2112
},
{
"epoch": 0.7073987278205558,
"grad_norm": 0.1304703801870346,
"learning_rate": 0.0001,
"loss": 1.4644,
"step": 2113
},
{
"epoch": 0.7077335118848342,
"grad_norm": 0.1345730423927307,
"learning_rate": 0.0001,
"loss": 1.4849,
"step": 2114
},
{
"epoch": 0.7080682959491128,
"grad_norm": 0.14024527370929718,
"learning_rate": 0.0001,
"loss": 1.5851,
"step": 2115
},
{
"epoch": 0.7084030800133914,
"grad_norm": 0.13666972517967224,
"learning_rate": 0.0001,
"loss": 1.4858,
"step": 2116
},
{
"epoch": 0.7087378640776699,
"grad_norm": 0.13574914634227753,
"learning_rate": 0.0001,
"loss": 1.5258,
"step": 2117
},
{
"epoch": 0.7090726481419485,
"grad_norm": 0.1362755447626114,
"learning_rate": 0.0001,
"loss": 1.5592,
"step": 2118
},
{
"epoch": 0.7094074322062269,
"grad_norm": 0.12771886587142944,
"learning_rate": 0.0001,
"loss": 1.459,
"step": 2119
},
{
"epoch": 0.7097422162705055,
"grad_norm": 0.13762152194976807,
"learning_rate": 0.0001,
"loss": 1.5934,
"step": 2120
},
{
"epoch": 0.710077000334784,
"grad_norm": 0.13554149866104126,
"learning_rate": 0.0001,
"loss": 1.5728,
"step": 2121
},
{
"epoch": 0.7104117843990626,
"grad_norm": 0.1313951313495636,
"learning_rate": 0.0001,
"loss": 1.517,
"step": 2122
},
{
"epoch": 0.7107465684633412,
"grad_norm": 0.12920212745666504,
"learning_rate": 0.0001,
"loss": 1.4647,
"step": 2123
},
{
"epoch": 0.7110813525276197,
"grad_norm": 0.13671697676181793,
"learning_rate": 0.0001,
"loss": 1.4987,
"step": 2124
},
{
"epoch": 0.7114161365918982,
"grad_norm": 0.12860006093978882,
"learning_rate": 0.0001,
"loss": 1.5304,
"step": 2125
},
{
"epoch": 0.7117509206561767,
"grad_norm": 0.12372934073209763,
"learning_rate": 0.0001,
"loss": 1.4964,
"step": 2126
},
{
"epoch": 0.7120857047204553,
"grad_norm": 0.13640989363193512,
"learning_rate": 0.0001,
"loss": 1.521,
"step": 2127
},
{
"epoch": 0.7124204887847339,
"grad_norm": 0.13121746480464935,
"learning_rate": 0.0001,
"loss": 1.5118,
"step": 2128
},
{
"epoch": 0.7127552728490124,
"grad_norm": 0.1307837963104248,
"learning_rate": 0.0001,
"loss": 1.5688,
"step": 2129
},
{
"epoch": 0.713090056913291,
"grad_norm": 0.13141870498657227,
"learning_rate": 0.0001,
"loss": 1.5435,
"step": 2130
},
{
"epoch": 0.7134248409775694,
"grad_norm": 0.13490049540996552,
"learning_rate": 0.0001,
"loss": 1.5421,
"step": 2131
},
{
"epoch": 0.713759625041848,
"grad_norm": 0.13801416754722595,
"learning_rate": 0.0001,
"loss": 1.6097,
"step": 2132
},
{
"epoch": 0.7140944091061265,
"grad_norm": 0.13066011667251587,
"learning_rate": 0.0001,
"loss": 1.4629,
"step": 2133
},
{
"epoch": 0.7144291931704051,
"grad_norm": 0.13355465233325958,
"learning_rate": 0.0001,
"loss": 1.6363,
"step": 2134
},
{
"epoch": 0.7147639772346837,
"grad_norm": 0.12968328595161438,
"learning_rate": 0.0001,
"loss": 1.4454,
"step": 2135
},
{
"epoch": 0.7150987612989622,
"grad_norm": 0.14093713462352753,
"learning_rate": 0.0001,
"loss": 1.6115,
"step": 2136
},
{
"epoch": 0.7154335453632407,
"grad_norm": 0.13097916543483734,
"learning_rate": 0.0001,
"loss": 1.5531,
"step": 2137
},
{
"epoch": 0.7157683294275192,
"grad_norm": 0.1295294314622879,
"learning_rate": 0.0001,
"loss": 1.5923,
"step": 2138
},
{
"epoch": 0.7161031134917978,
"grad_norm": 0.13776849210262299,
"learning_rate": 0.0001,
"loss": 1.5992,
"step": 2139
},
{
"epoch": 0.7164378975560763,
"grad_norm": 0.13502860069274902,
"learning_rate": 0.0001,
"loss": 1.4677,
"step": 2140
},
{
"epoch": 0.7167726816203549,
"grad_norm": 0.13480490446090698,
"learning_rate": 0.0001,
"loss": 1.6244,
"step": 2141
},
{
"epoch": 0.7171074656846335,
"grad_norm": 0.13483154773712158,
"learning_rate": 0.0001,
"loss": 1.616,
"step": 2142
},
{
"epoch": 0.7174422497489119,
"grad_norm": 0.14340271055698395,
"learning_rate": 0.0001,
"loss": 1.6287,
"step": 2143
},
{
"epoch": 0.7177770338131905,
"grad_norm": 0.13620589673519135,
"learning_rate": 0.0001,
"loss": 1.5193,
"step": 2144
},
{
"epoch": 0.718111817877469,
"grad_norm": 0.13150522112846375,
"learning_rate": 0.0001,
"loss": 1.5038,
"step": 2145
},
{
"epoch": 0.7184466019417476,
"grad_norm": 0.13259613513946533,
"learning_rate": 0.0001,
"loss": 1.5666,
"step": 2146
},
{
"epoch": 0.7187813860060261,
"grad_norm": 0.1307973563671112,
"learning_rate": 0.0001,
"loss": 1.5762,
"step": 2147
},
{
"epoch": 0.7191161700703047,
"grad_norm": 0.13372613489627838,
"learning_rate": 0.0001,
"loss": 1.5352,
"step": 2148
},
{
"epoch": 0.7194509541345832,
"grad_norm": 0.13534867763519287,
"learning_rate": 0.0001,
"loss": 1.4652,
"step": 2149
},
{
"epoch": 0.7197857381988617,
"grad_norm": 0.1332571804523468,
"learning_rate": 0.0001,
"loss": 1.5532,
"step": 2150
},
{
"epoch": 0.7201205222631403,
"grad_norm": 0.13172098994255066,
"learning_rate": 0.0001,
"loss": 1.4728,
"step": 2151
},
{
"epoch": 0.7204553063274188,
"grad_norm": 0.12765897810459137,
"learning_rate": 0.0001,
"loss": 1.4597,
"step": 2152
},
{
"epoch": 0.7207900903916974,
"grad_norm": 0.13026951253414154,
"learning_rate": 0.0001,
"loss": 1.4877,
"step": 2153
},
{
"epoch": 0.721124874455976,
"grad_norm": 0.1389724761247635,
"learning_rate": 0.0001,
"loss": 1.5332,
"step": 2154
},
{
"epoch": 0.7214596585202544,
"grad_norm": 0.13382194936275482,
"learning_rate": 0.0001,
"loss": 1.5179,
"step": 2155
},
{
"epoch": 0.721794442584533,
"grad_norm": 0.12780801951885223,
"learning_rate": 0.0001,
"loss": 1.4393,
"step": 2156
},
{
"epoch": 0.7221292266488115,
"grad_norm": 0.1323569118976593,
"learning_rate": 0.0001,
"loss": 1.5528,
"step": 2157
},
{
"epoch": 0.7224640107130901,
"grad_norm": 0.1358579397201538,
"learning_rate": 0.0001,
"loss": 1.4996,
"step": 2158
},
{
"epoch": 0.7227987947773686,
"grad_norm": 0.13905704021453857,
"learning_rate": 0.0001,
"loss": 1.5979,
"step": 2159
},
{
"epoch": 0.7231335788416471,
"grad_norm": 0.1356305480003357,
"learning_rate": 0.0001,
"loss": 1.5851,
"step": 2160
},
{
"epoch": 0.7234683629059256,
"grad_norm": 0.13545480370521545,
"learning_rate": 0.0001,
"loss": 1.5622,
"step": 2161
},
{
"epoch": 0.7238031469702042,
"grad_norm": 0.13289092481136322,
"learning_rate": 0.0001,
"loss": 1.5253,
"step": 2162
},
{
"epoch": 0.7241379310344828,
"grad_norm": 0.130274698138237,
"learning_rate": 0.0001,
"loss": 1.4498,
"step": 2163
},
{
"epoch": 0.7244727150987613,
"grad_norm": 0.13009384274482727,
"learning_rate": 0.0001,
"loss": 1.5593,
"step": 2164
},
{
"epoch": 0.7248074991630399,
"grad_norm": 0.13778330385684967,
"learning_rate": 0.0001,
"loss": 1.4054,
"step": 2165
},
{
"epoch": 0.7251422832273183,
"grad_norm": 0.14639288187026978,
"learning_rate": 0.0001,
"loss": 1.5563,
"step": 2166
},
{
"epoch": 0.7254770672915969,
"grad_norm": 0.14019513130187988,
"learning_rate": 0.0001,
"loss": 1.6143,
"step": 2167
},
{
"epoch": 0.7258118513558754,
"grad_norm": 0.15255634486675262,
"learning_rate": 0.0001,
"loss": 1.4999,
"step": 2168
},
{
"epoch": 0.726146635420154,
"grad_norm": 0.133973628282547,
"learning_rate": 0.0001,
"loss": 1.5648,
"step": 2169
},
{
"epoch": 0.7264814194844326,
"grad_norm": 0.14227105677127838,
"learning_rate": 0.0001,
"loss": 1.5372,
"step": 2170
},
{
"epoch": 0.7268162035487111,
"grad_norm": 0.13694263994693756,
"learning_rate": 0.0001,
"loss": 1.5454,
"step": 2171
},
{
"epoch": 0.7271509876129896,
"grad_norm": 0.1395786851644516,
"learning_rate": 0.0001,
"loss": 1.6018,
"step": 2172
},
{
"epoch": 0.7274857716772681,
"grad_norm": 0.13695751130580902,
"learning_rate": 0.0001,
"loss": 1.5542,
"step": 2173
},
{
"epoch": 0.7278205557415467,
"grad_norm": 0.14114227890968323,
"learning_rate": 0.0001,
"loss": 1.4742,
"step": 2174
},
{
"epoch": 0.7281553398058253,
"grad_norm": 0.14633609354496002,
"learning_rate": 0.0001,
"loss": 1.5335,
"step": 2175
},
{
"epoch": 0.7284901238701038,
"grad_norm": 0.12929964065551758,
"learning_rate": 0.0001,
"loss": 1.4759,
"step": 2176
},
{
"epoch": 0.7288249079343824,
"grad_norm": 0.14383701980113983,
"learning_rate": 0.0001,
"loss": 1.5744,
"step": 2177
},
{
"epoch": 0.7291596919986608,
"grad_norm": 0.14609093964099884,
"learning_rate": 0.0001,
"loss": 1.4927,
"step": 2178
},
{
"epoch": 0.7294944760629394,
"grad_norm": 0.13813704252243042,
"learning_rate": 0.0001,
"loss": 1.535,
"step": 2179
},
{
"epoch": 0.7298292601272179,
"grad_norm": 0.13343721628189087,
"learning_rate": 0.0001,
"loss": 1.5239,
"step": 2180
},
{
"epoch": 0.7301640441914965,
"grad_norm": 0.13793961703777313,
"learning_rate": 0.0001,
"loss": 1.4959,
"step": 2181
},
{
"epoch": 0.7304988282557751,
"grad_norm": 0.14635740220546722,
"learning_rate": 0.0001,
"loss": 1.5759,
"step": 2182
},
{
"epoch": 0.7308336123200536,
"grad_norm": 0.13331273198127747,
"learning_rate": 0.0001,
"loss": 1.5169,
"step": 2183
},
{
"epoch": 0.7311683963843321,
"grad_norm": 0.13492250442504883,
"learning_rate": 0.0001,
"loss": 1.4711,
"step": 2184
},
{
"epoch": 0.7315031804486106,
"grad_norm": 0.14489556849002838,
"learning_rate": 0.0001,
"loss": 1.584,
"step": 2185
},
{
"epoch": 0.7318379645128892,
"grad_norm": 0.13701508939266205,
"learning_rate": 0.0001,
"loss": 1.5844,
"step": 2186
},
{
"epoch": 0.7321727485771677,
"grad_norm": 0.1370009034872055,
"learning_rate": 0.0001,
"loss": 1.5287,
"step": 2187
},
{
"epoch": 0.7325075326414463,
"grad_norm": 0.14577260613441467,
"learning_rate": 0.0001,
"loss": 1.4752,
"step": 2188
},
{
"epoch": 0.7328423167057249,
"grad_norm": 0.1377391368150711,
"learning_rate": 0.0001,
"loss": 1.5484,
"step": 2189
},
{
"epoch": 0.7331771007700033,
"grad_norm": 0.1396346390247345,
"learning_rate": 0.0001,
"loss": 1.5405,
"step": 2190
},
{
"epoch": 0.7335118848342819,
"grad_norm": 0.1492149382829666,
"learning_rate": 0.0001,
"loss": 1.5028,
"step": 2191
},
{
"epoch": 0.7338466688985604,
"grad_norm": 0.13928255438804626,
"learning_rate": 0.0001,
"loss": 1.6229,
"step": 2192
},
{
"epoch": 0.734181452962839,
"grad_norm": 0.13838155567646027,
"learning_rate": 0.0001,
"loss": 1.5661,
"step": 2193
},
{
"epoch": 0.7345162370271175,
"grad_norm": 0.1435183733701706,
"learning_rate": 0.0001,
"loss": 1.6133,
"step": 2194
},
{
"epoch": 0.734851021091396,
"grad_norm": 0.13500259816646576,
"learning_rate": 0.0001,
"loss": 1.5728,
"step": 2195
},
{
"epoch": 0.7351858051556746,
"grad_norm": 0.13238045573234558,
"learning_rate": 0.0001,
"loss": 1.5435,
"step": 2196
},
{
"epoch": 0.7355205892199531,
"grad_norm": 0.13493601977825165,
"learning_rate": 0.0001,
"loss": 1.5117,
"step": 2197
},
{
"epoch": 0.7358553732842317,
"grad_norm": 0.1433602273464203,
"learning_rate": 0.0001,
"loss": 1.5921,
"step": 2198
},
{
"epoch": 0.7361901573485102,
"grad_norm": 0.13165898621082306,
"learning_rate": 0.0001,
"loss": 1.5648,
"step": 2199
},
{
"epoch": 0.7365249414127888,
"grad_norm": 0.1355050653219223,
"learning_rate": 0.0001,
"loss": 1.5998,
"step": 2200
},
{
"epoch": 0.7368597254770672,
"grad_norm": 0.1296299695968628,
"learning_rate": 0.0001,
"loss": 1.3903,
"step": 2201
},
{
"epoch": 0.7371945095413458,
"grad_norm": 0.13563255965709686,
"learning_rate": 0.0001,
"loss": 1.5462,
"step": 2202
},
{
"epoch": 0.7375292936056244,
"grad_norm": 0.13449116051197052,
"learning_rate": 0.0001,
"loss": 1.5344,
"step": 2203
},
{
"epoch": 0.7378640776699029,
"grad_norm": 0.12928107380867004,
"learning_rate": 0.0001,
"loss": 1.5212,
"step": 2204
},
{
"epoch": 0.7381988617341815,
"grad_norm": 0.13199785351753235,
"learning_rate": 0.0001,
"loss": 1.5408,
"step": 2205
},
{
"epoch": 0.73853364579846,
"grad_norm": 0.13608896732330322,
"learning_rate": 0.0001,
"loss": 1.6036,
"step": 2206
},
{
"epoch": 0.7388684298627385,
"grad_norm": 0.1248575821518898,
"learning_rate": 0.0001,
"loss": 1.4513,
"step": 2207
},
{
"epoch": 0.739203213927017,
"grad_norm": 0.1319798231124878,
"learning_rate": 0.0001,
"loss": 1.5231,
"step": 2208
},
{
"epoch": 0.7395379979912956,
"grad_norm": 0.1297694742679596,
"learning_rate": 0.0001,
"loss": 1.492,
"step": 2209
},
{
"epoch": 0.7398727820555742,
"grad_norm": 0.13263830542564392,
"learning_rate": 0.0001,
"loss": 1.5746,
"step": 2210
},
{
"epoch": 0.7402075661198527,
"grad_norm": 0.1352548599243164,
"learning_rate": 0.0001,
"loss": 1.567,
"step": 2211
},
{
"epoch": 0.7405423501841313,
"grad_norm": 0.13107185065746307,
"learning_rate": 0.0001,
"loss": 1.5053,
"step": 2212
},
{
"epoch": 0.7408771342484097,
"grad_norm": 0.13326485455036163,
"learning_rate": 0.0001,
"loss": 1.5838,
"step": 2213
},
{
"epoch": 0.7412119183126883,
"grad_norm": 0.14211507141590118,
"learning_rate": 0.0001,
"loss": 1.5694,
"step": 2214
},
{
"epoch": 0.7415467023769668,
"grad_norm": 0.13121196627616882,
"learning_rate": 0.0001,
"loss": 1.4977,
"step": 2215
},
{
"epoch": 0.7418814864412454,
"grad_norm": 0.13140466809272766,
"learning_rate": 0.0001,
"loss": 1.568,
"step": 2216
},
{
"epoch": 0.742216270505524,
"grad_norm": 0.1365407258272171,
"learning_rate": 0.0001,
"loss": 1.6667,
"step": 2217
},
{
"epoch": 0.7425510545698025,
"grad_norm": 0.13460293412208557,
"learning_rate": 0.0001,
"loss": 1.5813,
"step": 2218
},
{
"epoch": 0.742885838634081,
"grad_norm": 0.13729612529277802,
"learning_rate": 0.0001,
"loss": 1.5491,
"step": 2219
},
{
"epoch": 0.7432206226983595,
"grad_norm": 0.13383755087852478,
"learning_rate": 0.0001,
"loss": 1.5678,
"step": 2220
},
{
"epoch": 0.7435554067626381,
"grad_norm": 0.13744328916072845,
"learning_rate": 0.0001,
"loss": 1.5336,
"step": 2221
},
{
"epoch": 0.7438901908269167,
"grad_norm": 0.12934266030788422,
"learning_rate": 0.0001,
"loss": 1.5429,
"step": 2222
},
{
"epoch": 0.7442249748911952,
"grad_norm": 0.1308993250131607,
"learning_rate": 0.0001,
"loss": 1.5449,
"step": 2223
},
{
"epoch": 0.7445597589554738,
"grad_norm": 0.1382169872522354,
"learning_rate": 0.0001,
"loss": 1.6019,
"step": 2224
},
{
"epoch": 0.7448945430197522,
"grad_norm": 0.13184891641139984,
"learning_rate": 0.0001,
"loss": 1.5357,
"step": 2225
},
{
"epoch": 0.7452293270840308,
"grad_norm": 0.1404266655445099,
"learning_rate": 0.0001,
"loss": 1.5935,
"step": 2226
},
{
"epoch": 0.7455641111483093,
"grad_norm": 0.13625003397464752,
"learning_rate": 0.0001,
"loss": 1.5588,
"step": 2227
},
{
"epoch": 0.7458988952125879,
"grad_norm": 0.1287645548582077,
"learning_rate": 0.0001,
"loss": 1.435,
"step": 2228
},
{
"epoch": 0.7462336792768665,
"grad_norm": 0.13726918399333954,
"learning_rate": 0.0001,
"loss": 1.5453,
"step": 2229
},
{
"epoch": 0.746568463341145,
"grad_norm": 0.13299064338207245,
"learning_rate": 0.0001,
"loss": 1.4996,
"step": 2230
},
{
"epoch": 0.7469032474054235,
"grad_norm": 0.13553793728351593,
"learning_rate": 0.0001,
"loss": 1.5395,
"step": 2231
},
{
"epoch": 0.747238031469702,
"grad_norm": 0.13683359324932098,
"learning_rate": 0.0001,
"loss": 1.661,
"step": 2232
},
{
"epoch": 0.7475728155339806,
"grad_norm": 0.13002213835716248,
"learning_rate": 0.0001,
"loss": 1.5245,
"step": 2233
},
{
"epoch": 0.7479075995982591,
"grad_norm": 0.13479109108448029,
"learning_rate": 0.0001,
"loss": 1.5724,
"step": 2234
},
{
"epoch": 0.7482423836625377,
"grad_norm": 0.13677366077899933,
"learning_rate": 0.0001,
"loss": 1.6276,
"step": 2235
},
{
"epoch": 0.7485771677268162,
"grad_norm": 0.14970214664936066,
"learning_rate": 0.0001,
"loss": 1.6145,
"step": 2236
},
{
"epoch": 0.7489119517910947,
"grad_norm": 0.1285363882780075,
"learning_rate": 0.0001,
"loss": 1.4591,
"step": 2237
},
{
"epoch": 0.7492467358553733,
"grad_norm": 0.14044371247291565,
"learning_rate": 0.0001,
"loss": 1.511,
"step": 2238
},
{
"epoch": 0.7495815199196518,
"grad_norm": 0.13310682773590088,
"learning_rate": 0.0001,
"loss": 1.5777,
"step": 2239
},
{
"epoch": 0.7499163039839304,
"grad_norm": 0.14290130138397217,
"learning_rate": 0.0001,
"loss": 1.5075,
"step": 2240
},
{
"epoch": 0.750251088048209,
"grad_norm": 0.1509731411933899,
"learning_rate": 0.0001,
"loss": 1.6198,
"step": 2241
},
{
"epoch": 0.7505858721124874,
"grad_norm": 0.13322798907756805,
"learning_rate": 0.0001,
"loss": 1.5722,
"step": 2242
},
{
"epoch": 0.750920656176766,
"grad_norm": 0.1355818659067154,
"learning_rate": 0.0001,
"loss": 1.4922,
"step": 2243
},
{
"epoch": 0.7512554402410445,
"grad_norm": 0.14394080638885498,
"learning_rate": 0.0001,
"loss": 1.5976,
"step": 2244
},
{
"epoch": 0.7515902243053231,
"grad_norm": 0.135832279920578,
"learning_rate": 0.0001,
"loss": 1.5138,
"step": 2245
},
{
"epoch": 0.7519250083696016,
"grad_norm": 0.13906393945217133,
"learning_rate": 0.0001,
"loss": 1.5351,
"step": 2246
},
{
"epoch": 0.7522597924338802,
"grad_norm": 0.13090325891971588,
"learning_rate": 0.0001,
"loss": 1.4505,
"step": 2247
},
{
"epoch": 0.7525945764981586,
"grad_norm": 0.13537496328353882,
"learning_rate": 0.0001,
"loss": 1.4955,
"step": 2248
},
{
"epoch": 0.7529293605624372,
"grad_norm": 0.1373416930437088,
"learning_rate": 0.0001,
"loss": 1.541,
"step": 2249
},
{
"epoch": 0.7532641446267158,
"grad_norm": 0.1294248253107071,
"learning_rate": 0.0001,
"loss": 1.4943,
"step": 2250
},
{
"epoch": 0.7535989286909943,
"grad_norm": 0.12977437674999237,
"learning_rate": 0.0001,
"loss": 1.5315,
"step": 2251
},
{
"epoch": 0.7539337127552729,
"grad_norm": 0.13353915512561798,
"learning_rate": 0.0001,
"loss": 1.4855,
"step": 2252
},
{
"epoch": 0.7542684968195514,
"grad_norm": 0.1338808536529541,
"learning_rate": 0.0001,
"loss": 1.5483,
"step": 2253
},
{
"epoch": 0.7546032808838299,
"grad_norm": 0.13082879781723022,
"learning_rate": 0.0001,
"loss": 1.5276,
"step": 2254
},
{
"epoch": 0.7549380649481084,
"grad_norm": 0.12903323769569397,
"learning_rate": 0.0001,
"loss": 1.5506,
"step": 2255
},
{
"epoch": 0.755272849012387,
"grad_norm": 0.1312693953514099,
"learning_rate": 0.0001,
"loss": 1.4347,
"step": 2256
},
{
"epoch": 0.7556076330766656,
"grad_norm": 0.13503922522068024,
"learning_rate": 0.0001,
"loss": 1.5089,
"step": 2257
},
{
"epoch": 0.7559424171409441,
"grad_norm": 0.13478560745716095,
"learning_rate": 0.0001,
"loss": 1.4717,
"step": 2258
},
{
"epoch": 0.7562772012052227,
"grad_norm": 0.14111362397670746,
"learning_rate": 0.0001,
"loss": 1.4982,
"step": 2259
},
{
"epoch": 0.7566119852695011,
"grad_norm": 0.13715283572673798,
"learning_rate": 0.0001,
"loss": 1.5166,
"step": 2260
},
{
"epoch": 0.7569467693337797,
"grad_norm": 0.14457426965236664,
"learning_rate": 0.0001,
"loss": 1.6322,
"step": 2261
},
{
"epoch": 0.7572815533980582,
"grad_norm": 0.13212622702121735,
"learning_rate": 0.0001,
"loss": 1.4653,
"step": 2262
},
{
"epoch": 0.7576163374623368,
"grad_norm": 0.136484295129776,
"learning_rate": 0.0001,
"loss": 1.4416,
"step": 2263
},
{
"epoch": 0.7579511215266154,
"grad_norm": 0.13701216876506805,
"learning_rate": 0.0001,
"loss": 1.5158,
"step": 2264
},
{
"epoch": 0.7582859055908939,
"grad_norm": 0.13045822083950043,
"learning_rate": 0.0001,
"loss": 1.4805,
"step": 2265
},
{
"epoch": 0.7586206896551724,
"grad_norm": 0.13484729826450348,
"learning_rate": 0.0001,
"loss": 1.4919,
"step": 2266
},
{
"epoch": 0.7589554737194509,
"grad_norm": 0.1352708488702774,
"learning_rate": 0.0001,
"loss": 1.5632,
"step": 2267
},
{
"epoch": 0.7592902577837295,
"grad_norm": 0.13968177139759064,
"learning_rate": 0.0001,
"loss": 1.5983,
"step": 2268
},
{
"epoch": 0.759625041848008,
"grad_norm": 0.13527031242847443,
"learning_rate": 0.0001,
"loss": 1.5361,
"step": 2269
},
{
"epoch": 0.7599598259122866,
"grad_norm": 0.13342413306236267,
"learning_rate": 0.0001,
"loss": 1.5487,
"step": 2270
},
{
"epoch": 0.7602946099765651,
"grad_norm": 0.13037632405757904,
"learning_rate": 0.0001,
"loss": 1.4433,
"step": 2271
},
{
"epoch": 0.7606293940408436,
"grad_norm": 0.12888109683990479,
"learning_rate": 0.0001,
"loss": 1.5565,
"step": 2272
},
{
"epoch": 0.7609641781051222,
"grad_norm": 0.13160650432109833,
"learning_rate": 0.0001,
"loss": 1.6344,
"step": 2273
},
{
"epoch": 0.7612989621694007,
"grad_norm": 0.13456179201602936,
"learning_rate": 0.0001,
"loss": 1.5983,
"step": 2274
},
{
"epoch": 0.7616337462336793,
"grad_norm": 0.12624886631965637,
"learning_rate": 0.0001,
"loss": 1.4877,
"step": 2275
},
{
"epoch": 0.7619685302979579,
"grad_norm": 0.13493984937667847,
"learning_rate": 0.0001,
"loss": 1.6083,
"step": 2276
},
{
"epoch": 0.7623033143622363,
"grad_norm": 0.13616621494293213,
"learning_rate": 0.0001,
"loss": 1.59,
"step": 2277
},
{
"epoch": 0.7626380984265149,
"grad_norm": 0.1309913843870163,
"learning_rate": 0.0001,
"loss": 1.5356,
"step": 2278
},
{
"epoch": 0.7629728824907934,
"grad_norm": 0.1269841343164444,
"learning_rate": 0.0001,
"loss": 1.442,
"step": 2279
},
{
"epoch": 0.763307666555072,
"grad_norm": 0.13083530962467194,
"learning_rate": 0.0001,
"loss": 1.4919,
"step": 2280
},
{
"epoch": 0.7636424506193505,
"grad_norm": 0.13288795948028564,
"learning_rate": 0.0001,
"loss": 1.5919,
"step": 2281
},
{
"epoch": 0.7639772346836291,
"grad_norm": 0.1334894597530365,
"learning_rate": 0.0001,
"loss": 1.5203,
"step": 2282
},
{
"epoch": 0.7643120187479076,
"grad_norm": 0.1322222203016281,
"learning_rate": 0.0001,
"loss": 1.4987,
"step": 2283
},
{
"epoch": 0.7646468028121861,
"grad_norm": 0.13740068674087524,
"learning_rate": 0.0001,
"loss": 1.5966,
"step": 2284
},
{
"epoch": 0.7649815868764647,
"grad_norm": 0.13021446764469147,
"learning_rate": 0.0001,
"loss": 1.5163,
"step": 2285
},
{
"epoch": 0.7653163709407432,
"grad_norm": 0.13992641866207123,
"learning_rate": 0.0001,
"loss": 1.5116,
"step": 2286
},
{
"epoch": 0.7656511550050218,
"grad_norm": 0.13332848250865936,
"learning_rate": 0.0001,
"loss": 1.5066,
"step": 2287
},
{
"epoch": 0.7659859390693003,
"grad_norm": 0.12683235108852386,
"learning_rate": 0.0001,
"loss": 1.4933,
"step": 2288
},
{
"epoch": 0.7663207231335788,
"grad_norm": 0.13610418140888214,
"learning_rate": 0.0001,
"loss": 1.5115,
"step": 2289
},
{
"epoch": 0.7666555071978574,
"grad_norm": 0.13530276715755463,
"learning_rate": 0.0001,
"loss": 1.5899,
"step": 2290
},
{
"epoch": 0.7669902912621359,
"grad_norm": 0.13067664206027985,
"learning_rate": 0.0001,
"loss": 1.4806,
"step": 2291
},
{
"epoch": 0.7673250753264145,
"grad_norm": 0.12956401705741882,
"learning_rate": 0.0001,
"loss": 1.4432,
"step": 2292
},
{
"epoch": 0.767659859390693,
"grad_norm": 0.1368110626935959,
"learning_rate": 0.0001,
"loss": 1.5858,
"step": 2293
},
{
"epoch": 0.7679946434549716,
"grad_norm": 0.13342629373073578,
"learning_rate": 0.0001,
"loss": 1.4773,
"step": 2294
},
{
"epoch": 0.76832942751925,
"grad_norm": 0.13525448739528656,
"learning_rate": 0.0001,
"loss": 1.5574,
"step": 2295
},
{
"epoch": 0.7686642115835286,
"grad_norm": 0.14219002425670624,
"learning_rate": 0.0001,
"loss": 1.6207,
"step": 2296
},
{
"epoch": 0.7689989956478072,
"grad_norm": 0.13410523533821106,
"learning_rate": 0.0001,
"loss": 1.5414,
"step": 2297
},
{
"epoch": 0.7693337797120857,
"grad_norm": 0.1366255283355713,
"learning_rate": 0.0001,
"loss": 1.5588,
"step": 2298
},
{
"epoch": 0.7696685637763643,
"grad_norm": 0.14335733652114868,
"learning_rate": 0.0001,
"loss": 1.4797,
"step": 2299
},
{
"epoch": 0.7700033478406428,
"grad_norm": 0.13368913531303406,
"learning_rate": 0.0001,
"loss": 1.5068,
"step": 2300
},
{
"epoch": 0.7703381319049213,
"grad_norm": 0.14045390486717224,
"learning_rate": 0.0001,
"loss": 1.5532,
"step": 2301
},
{
"epoch": 0.7706729159691998,
"grad_norm": 0.13820236921310425,
"learning_rate": 0.0001,
"loss": 1.4334,
"step": 2302
},
{
"epoch": 0.7710077000334784,
"grad_norm": 0.13486477732658386,
"learning_rate": 0.0001,
"loss": 1.6277,
"step": 2303
},
{
"epoch": 0.771342484097757,
"grad_norm": 0.1374381184577942,
"learning_rate": 0.0001,
"loss": 1.4995,
"step": 2304
},
{
"epoch": 0.7716772681620355,
"grad_norm": 0.14841946959495544,
"learning_rate": 0.0001,
"loss": 1.6044,
"step": 2305
},
{
"epoch": 0.7720120522263141,
"grad_norm": 0.13106206059455872,
"learning_rate": 0.0001,
"loss": 1.5009,
"step": 2306
},
{
"epoch": 0.7723468362905925,
"grad_norm": 0.13768276572227478,
"learning_rate": 0.0001,
"loss": 1.5289,
"step": 2307
},
{
"epoch": 0.7726816203548711,
"grad_norm": 0.14987289905548096,
"learning_rate": 0.0001,
"loss": 1.5654,
"step": 2308
},
{
"epoch": 0.7730164044191496,
"grad_norm": 0.13422365486621857,
"learning_rate": 0.0001,
"loss": 1.5781,
"step": 2309
},
{
"epoch": 0.7733511884834282,
"grad_norm": 0.14007548987865448,
"learning_rate": 0.0001,
"loss": 1.468,
"step": 2310
},
{
"epoch": 0.7736859725477068,
"grad_norm": 0.140237495303154,
"learning_rate": 0.0001,
"loss": 1.4408,
"step": 2311
},
{
"epoch": 0.7740207566119852,
"grad_norm": 0.1331593543291092,
"learning_rate": 0.0001,
"loss": 1.5213,
"step": 2312
},
{
"epoch": 0.7743555406762638,
"grad_norm": 0.13670580089092255,
"learning_rate": 0.0001,
"loss": 1.5034,
"step": 2313
},
{
"epoch": 0.7746903247405423,
"grad_norm": 0.13198411464691162,
"learning_rate": 0.0001,
"loss": 1.4633,
"step": 2314
},
{
"epoch": 0.7750251088048209,
"grad_norm": 0.14384810626506805,
"learning_rate": 0.0001,
"loss": 1.6254,
"step": 2315
},
{
"epoch": 0.7753598928690995,
"grad_norm": 0.12808088958263397,
"learning_rate": 0.0001,
"loss": 1.4751,
"step": 2316
},
{
"epoch": 0.775694676933378,
"grad_norm": 0.14130346477031708,
"learning_rate": 0.0001,
"loss": 1.5306,
"step": 2317
},
{
"epoch": 0.7760294609976565,
"grad_norm": 0.13153797388076782,
"learning_rate": 0.0001,
"loss": 1.5046,
"step": 2318
},
{
"epoch": 0.776364245061935,
"grad_norm": 0.13447383046150208,
"learning_rate": 0.0001,
"loss": 1.5288,
"step": 2319
},
{
"epoch": 0.7766990291262136,
"grad_norm": 0.13588428497314453,
"learning_rate": 0.0001,
"loss": 1.5792,
"step": 2320
},
{
"epoch": 0.7770338131904921,
"grad_norm": 0.1414654701948166,
"learning_rate": 0.0001,
"loss": 1.6252,
"step": 2321
},
{
"epoch": 0.7773685972547707,
"grad_norm": 0.14798319339752197,
"learning_rate": 0.0001,
"loss": 1.5182,
"step": 2322
},
{
"epoch": 0.7777033813190493,
"grad_norm": 0.13594651222229004,
"learning_rate": 0.0001,
"loss": 1.59,
"step": 2323
},
{
"epoch": 0.7780381653833277,
"grad_norm": 0.13689537346363068,
"learning_rate": 0.0001,
"loss": 1.5312,
"step": 2324
},
{
"epoch": 0.7783729494476063,
"grad_norm": 0.13842853903770447,
"learning_rate": 0.0001,
"loss": 1.5453,
"step": 2325
},
{
"epoch": 0.7787077335118848,
"grad_norm": 0.14006944000720978,
"learning_rate": 0.0001,
"loss": 1.5789,
"step": 2326
},
{
"epoch": 0.7790425175761634,
"grad_norm": 0.1328335702419281,
"learning_rate": 0.0001,
"loss": 1.5183,
"step": 2327
},
{
"epoch": 0.7793773016404419,
"grad_norm": 0.1366383582353592,
"learning_rate": 0.0001,
"loss": 1.5861,
"step": 2328
},
{
"epoch": 0.7797120857047205,
"grad_norm": 0.1384078413248062,
"learning_rate": 0.0001,
"loss": 1.4768,
"step": 2329
},
{
"epoch": 0.780046869768999,
"grad_norm": 0.13138563930988312,
"learning_rate": 0.0001,
"loss": 1.5415,
"step": 2330
},
{
"epoch": 0.7803816538332775,
"grad_norm": 0.13533802330493927,
"learning_rate": 0.0001,
"loss": 1.5351,
"step": 2331
},
{
"epoch": 0.7807164378975561,
"grad_norm": 0.12634359300136566,
"learning_rate": 0.0001,
"loss": 1.4854,
"step": 2332
},
{
"epoch": 0.7810512219618346,
"grad_norm": 0.14045196771621704,
"learning_rate": 0.0001,
"loss": 1.5979,
"step": 2333
},
{
"epoch": 0.7813860060261132,
"grad_norm": 0.12970393896102905,
"learning_rate": 0.0001,
"loss": 1.4883,
"step": 2334
},
{
"epoch": 0.7817207900903917,
"grad_norm": 0.13416926562786102,
"learning_rate": 0.0001,
"loss": 1.538,
"step": 2335
},
{
"epoch": 0.7820555741546702,
"grad_norm": 0.12993508577346802,
"learning_rate": 0.0001,
"loss": 1.3861,
"step": 2336
},
{
"epoch": 0.7823903582189488,
"grad_norm": 0.1441780924797058,
"learning_rate": 0.0001,
"loss": 1.5082,
"step": 2337
},
{
"epoch": 0.7827251422832273,
"grad_norm": 0.1340634673833847,
"learning_rate": 0.0001,
"loss": 1.5308,
"step": 2338
},
{
"epoch": 0.7830599263475059,
"grad_norm": 0.1375696063041687,
"learning_rate": 0.0001,
"loss": 1.4726,
"step": 2339
},
{
"epoch": 0.7833947104117844,
"grad_norm": 0.13143296539783478,
"learning_rate": 0.0001,
"loss": 1.5403,
"step": 2340
},
{
"epoch": 0.783729494476063,
"grad_norm": 0.14144007861614227,
"learning_rate": 0.0001,
"loss": 1.5596,
"step": 2341
},
{
"epoch": 0.7840642785403414,
"grad_norm": 0.1288491189479828,
"learning_rate": 0.0001,
"loss": 1.4793,
"step": 2342
},
{
"epoch": 0.78439906260462,
"grad_norm": 0.13762634992599487,
"learning_rate": 0.0001,
"loss": 1.5224,
"step": 2343
},
{
"epoch": 0.7847338466688986,
"grad_norm": 0.1369268000125885,
"learning_rate": 0.0001,
"loss": 1.5678,
"step": 2344
},
{
"epoch": 0.7850686307331771,
"grad_norm": 0.1348867565393448,
"learning_rate": 0.0001,
"loss": 1.5764,
"step": 2345
},
{
"epoch": 0.7854034147974557,
"grad_norm": 0.13499613106250763,
"learning_rate": 0.0001,
"loss": 1.5317,
"step": 2346
},
{
"epoch": 0.7857381988617341,
"grad_norm": 0.136494979262352,
"learning_rate": 0.0001,
"loss": 1.6178,
"step": 2347
},
{
"epoch": 0.7860729829260127,
"grad_norm": 0.13742174208164215,
"learning_rate": 0.0001,
"loss": 1.5524,
"step": 2348
},
{
"epoch": 0.7864077669902912,
"grad_norm": 0.1315702348947525,
"learning_rate": 0.0001,
"loss": 1.5199,
"step": 2349
},
{
"epoch": 0.7867425510545698,
"grad_norm": 0.1344085931777954,
"learning_rate": 0.0001,
"loss": 1.5222,
"step": 2350
},
{
"epoch": 0.7870773351188484,
"grad_norm": 0.1331881582736969,
"learning_rate": 0.0001,
"loss": 1.4746,
"step": 2351
},
{
"epoch": 0.7874121191831269,
"grad_norm": 0.13880756497383118,
"learning_rate": 0.0001,
"loss": 1.5027,
"step": 2352
},
{
"epoch": 0.7877469032474054,
"grad_norm": 0.1315576285123825,
"learning_rate": 0.0001,
"loss": 1.5833,
"step": 2353
},
{
"epoch": 0.7880816873116839,
"grad_norm": 0.1278029829263687,
"learning_rate": 0.0001,
"loss": 1.4475,
"step": 2354
},
{
"epoch": 0.7884164713759625,
"grad_norm": 0.14114075899124146,
"learning_rate": 0.0001,
"loss": 1.4451,
"step": 2355
},
{
"epoch": 0.788751255440241,
"grad_norm": 0.1352827101945877,
"learning_rate": 0.0001,
"loss": 1.4816,
"step": 2356
},
{
"epoch": 0.7890860395045196,
"grad_norm": 0.1316574364900589,
"learning_rate": 0.0001,
"loss": 1.4572,
"step": 2357
},
{
"epoch": 0.7894208235687982,
"grad_norm": 0.13792237639427185,
"learning_rate": 0.0001,
"loss": 1.6108,
"step": 2358
},
{
"epoch": 0.7897556076330766,
"grad_norm": 0.1365162879228592,
"learning_rate": 0.0001,
"loss": 1.5303,
"step": 2359
},
{
"epoch": 0.7900903916973552,
"grad_norm": 0.13918493688106537,
"learning_rate": 0.0001,
"loss": 1.6387,
"step": 2360
},
{
"epoch": 0.7904251757616337,
"grad_norm": 0.1277536302804947,
"learning_rate": 0.0001,
"loss": 1.5365,
"step": 2361
},
{
"epoch": 0.7907599598259123,
"grad_norm": 0.13407327234745026,
"learning_rate": 0.0001,
"loss": 1.4571,
"step": 2362
},
{
"epoch": 0.7910947438901909,
"grad_norm": 0.1346539407968521,
"learning_rate": 0.0001,
"loss": 1.4506,
"step": 2363
},
{
"epoch": 0.7914295279544694,
"grad_norm": 0.13160093128681183,
"learning_rate": 0.0001,
"loss": 1.4457,
"step": 2364
},
{
"epoch": 0.7917643120187479,
"grad_norm": 0.13025003671646118,
"learning_rate": 0.0001,
"loss": 1.56,
"step": 2365
},
{
"epoch": 0.7920990960830264,
"grad_norm": 0.14476409554481506,
"learning_rate": 0.0001,
"loss": 1.5876,
"step": 2366
},
{
"epoch": 0.792433880147305,
"grad_norm": 0.13053929805755615,
"learning_rate": 0.0001,
"loss": 1.4338,
"step": 2367
},
{
"epoch": 0.7927686642115835,
"grad_norm": 0.13872520625591278,
"learning_rate": 0.0001,
"loss": 1.6427,
"step": 2368
},
{
"epoch": 0.7931034482758621,
"grad_norm": 0.14061668515205383,
"learning_rate": 0.0001,
"loss": 1.4886,
"step": 2369
},
{
"epoch": 0.7934382323401407,
"grad_norm": 0.130232036113739,
"learning_rate": 0.0001,
"loss": 1.4023,
"step": 2370
},
{
"epoch": 0.7937730164044191,
"grad_norm": 0.23358748853206635,
"learning_rate": 0.0001,
"loss": 1.457,
"step": 2371
},
{
"epoch": 0.7941078004686977,
"grad_norm": 0.13233914971351624,
"learning_rate": 0.0001,
"loss": 1.4307,
"step": 2372
},
{
"epoch": 0.7944425845329762,
"grad_norm": 0.13504283130168915,
"learning_rate": 0.0001,
"loss": 1.5976,
"step": 2373
},
{
"epoch": 0.7947773685972548,
"grad_norm": 0.13976161181926727,
"learning_rate": 0.0001,
"loss": 1.6455,
"step": 2374
},
{
"epoch": 0.7951121526615333,
"grad_norm": 0.1336098313331604,
"learning_rate": 0.0001,
"loss": 1.4469,
"step": 2375
},
{
"epoch": 0.7954469367258119,
"grad_norm": 0.13648861646652222,
"learning_rate": 0.0001,
"loss": 1.4964,
"step": 2376
},
{
"epoch": 0.7957817207900904,
"grad_norm": 0.13627798855304718,
"learning_rate": 0.0001,
"loss": 1.5834,
"step": 2377
},
{
"epoch": 0.7961165048543689,
"grad_norm": 0.14114542305469513,
"learning_rate": 0.0001,
"loss": 1.5566,
"step": 2378
},
{
"epoch": 0.7964512889186475,
"grad_norm": 0.13499446213245392,
"learning_rate": 0.0001,
"loss": 1.5174,
"step": 2379
},
{
"epoch": 0.796786072982926,
"grad_norm": 0.14620280265808105,
"learning_rate": 0.0001,
"loss": 1.6778,
"step": 2380
},
{
"epoch": 0.7971208570472046,
"grad_norm": 0.13239939510822296,
"learning_rate": 0.0001,
"loss": 1.5274,
"step": 2381
},
{
"epoch": 0.7974556411114831,
"grad_norm": 0.13517913222312927,
"learning_rate": 0.0001,
"loss": 1.5291,
"step": 2382
},
{
"epoch": 0.7977904251757616,
"grad_norm": 0.1352391242980957,
"learning_rate": 0.0001,
"loss": 1.5285,
"step": 2383
},
{
"epoch": 0.7981252092400402,
"grad_norm": 0.14000670611858368,
"learning_rate": 0.0001,
"loss": 1.6194,
"step": 2384
},
{
"epoch": 0.7984599933043187,
"grad_norm": 0.1349296271800995,
"learning_rate": 0.0001,
"loss": 1.5001,
"step": 2385
},
{
"epoch": 0.7987947773685973,
"grad_norm": 0.1352308988571167,
"learning_rate": 0.0001,
"loss": 1.6213,
"step": 2386
},
{
"epoch": 0.7991295614328758,
"grad_norm": 0.1368694305419922,
"learning_rate": 0.0001,
"loss": 1.5861,
"step": 2387
},
{
"epoch": 0.7994643454971543,
"grad_norm": 0.1355554759502411,
"learning_rate": 0.0001,
"loss": 1.5377,
"step": 2388
},
{
"epoch": 0.7997991295614328,
"grad_norm": 0.13328254222869873,
"learning_rate": 0.0001,
"loss": 1.5517,
"step": 2389
},
{
"epoch": 0.8001339136257114,
"grad_norm": 0.13724930584430695,
"learning_rate": 0.0001,
"loss": 1.5987,
"step": 2390
},
{
"epoch": 0.80046869768999,
"grad_norm": 0.13542616367340088,
"learning_rate": 0.0001,
"loss": 1.6654,
"step": 2391
},
{
"epoch": 0.8008034817542685,
"grad_norm": 0.1366943120956421,
"learning_rate": 0.0001,
"loss": 1.6196,
"step": 2392
},
{
"epoch": 0.8011382658185471,
"grad_norm": 0.13868063688278198,
"learning_rate": 0.0001,
"loss": 1.587,
"step": 2393
},
{
"epoch": 0.8014730498828255,
"grad_norm": 0.1393207311630249,
"learning_rate": 0.0001,
"loss": 1.5559,
"step": 2394
},
{
"epoch": 0.8018078339471041,
"grad_norm": 0.13909262418746948,
"learning_rate": 0.0001,
"loss": 1.5007,
"step": 2395
},
{
"epoch": 0.8021426180113826,
"grad_norm": 0.12949267029762268,
"learning_rate": 0.0001,
"loss": 1.5108,
"step": 2396
},
{
"epoch": 0.8024774020756612,
"grad_norm": 0.12755730748176575,
"learning_rate": 0.0001,
"loss": 1.5008,
"step": 2397
},
{
"epoch": 0.8028121861399398,
"grad_norm": 0.12899887561798096,
"learning_rate": 0.0001,
"loss": 1.3877,
"step": 2398
},
{
"epoch": 0.8031469702042183,
"grad_norm": 0.1423116773366928,
"learning_rate": 0.0001,
"loss": 1.4996,
"step": 2399
},
{
"epoch": 0.8034817542684968,
"grad_norm": 0.13548225164413452,
"learning_rate": 0.0001,
"loss": 1.5214,
"step": 2400
},
{
"epoch": 0.8038165383327753,
"grad_norm": 0.13150808215141296,
"learning_rate": 0.0001,
"loss": 1.4772,
"step": 2401
},
{
"epoch": 0.8041513223970539,
"grad_norm": 0.13790038228034973,
"learning_rate": 0.0001,
"loss": 1.5704,
"step": 2402
},
{
"epoch": 0.8044861064613325,
"grad_norm": 0.13106264173984528,
"learning_rate": 0.0001,
"loss": 1.5073,
"step": 2403
},
{
"epoch": 0.804820890525611,
"grad_norm": 0.13568797707557678,
"learning_rate": 0.0001,
"loss": 1.6371,
"step": 2404
},
{
"epoch": 0.8051556745898896,
"grad_norm": 0.13882842659950256,
"learning_rate": 0.0001,
"loss": 1.5571,
"step": 2405
},
{
"epoch": 0.805490458654168,
"grad_norm": 0.1312180459499359,
"learning_rate": 0.0001,
"loss": 1.5625,
"step": 2406
},
{
"epoch": 0.8058252427184466,
"grad_norm": 0.12823453545570374,
"learning_rate": 0.0001,
"loss": 1.5046,
"step": 2407
},
{
"epoch": 0.8061600267827251,
"grad_norm": 0.13207179307937622,
"learning_rate": 0.0001,
"loss": 1.5031,
"step": 2408
},
{
"epoch": 0.8064948108470037,
"grad_norm": 0.1277305632829666,
"learning_rate": 0.0001,
"loss": 1.4867,
"step": 2409
},
{
"epoch": 0.8068295949112823,
"grad_norm": 0.13227322697639465,
"learning_rate": 0.0001,
"loss": 1.5019,
"step": 2410
},
{
"epoch": 0.8071643789755608,
"grad_norm": 0.1336304098367691,
"learning_rate": 0.0001,
"loss": 1.4424,
"step": 2411
},
{
"epoch": 0.8074991630398393,
"grad_norm": 0.13859078288078308,
"learning_rate": 0.0001,
"loss": 1.5301,
"step": 2412
},
{
"epoch": 0.8078339471041178,
"grad_norm": 0.1342136412858963,
"learning_rate": 0.0001,
"loss": 1.485,
"step": 2413
},
{
"epoch": 0.8081687311683964,
"grad_norm": 0.14003999531269073,
"learning_rate": 0.0001,
"loss": 1.5313,
"step": 2414
},
{
"epoch": 0.8085035152326749,
"grad_norm": 0.13216662406921387,
"learning_rate": 0.0001,
"loss": 1.52,
"step": 2415
},
{
"epoch": 0.8088382992969535,
"grad_norm": 0.1373407393693924,
"learning_rate": 0.0001,
"loss": 1.5157,
"step": 2416
},
{
"epoch": 0.8091730833612321,
"grad_norm": 0.13850343227386475,
"learning_rate": 0.0001,
"loss": 1.4971,
"step": 2417
},
{
"epoch": 0.8095078674255105,
"grad_norm": 0.1334608793258667,
"learning_rate": 0.0001,
"loss": 1.5237,
"step": 2418
},
{
"epoch": 0.8098426514897891,
"grad_norm": 0.13133668899536133,
"learning_rate": 0.0001,
"loss": 1.5053,
"step": 2419
},
{
"epoch": 0.8101774355540676,
"grad_norm": 0.13715368509292603,
"learning_rate": 0.0001,
"loss": 1.6357,
"step": 2420
},
{
"epoch": 0.8105122196183462,
"grad_norm": 0.14129430055618286,
"learning_rate": 0.0001,
"loss": 1.5736,
"step": 2421
},
{
"epoch": 0.8108470036826247,
"grad_norm": 0.133287250995636,
"learning_rate": 0.0001,
"loss": 1.4701,
"step": 2422
},
{
"epoch": 0.8111817877469032,
"grad_norm": 0.137081116437912,
"learning_rate": 0.0001,
"loss": 1.4562,
"step": 2423
},
{
"epoch": 0.8115165718111818,
"grad_norm": 0.13136571645736694,
"learning_rate": 0.0001,
"loss": 1.5014,
"step": 2424
},
{
"epoch": 0.8118513558754603,
"grad_norm": 0.13660964369773865,
"learning_rate": 0.0001,
"loss": 1.5533,
"step": 2425
},
{
"epoch": 0.8121861399397389,
"grad_norm": 0.145840123295784,
"learning_rate": 0.0001,
"loss": 1.6406,
"step": 2426
},
{
"epoch": 0.8125209240040174,
"grad_norm": 0.13612517714500427,
"learning_rate": 0.0001,
"loss": 1.4968,
"step": 2427
},
{
"epoch": 0.812855708068296,
"grad_norm": 0.14182846248149872,
"learning_rate": 0.0001,
"loss": 1.5507,
"step": 2428
},
{
"epoch": 0.8131904921325744,
"grad_norm": 0.13697752356529236,
"learning_rate": 0.0001,
"loss": 1.5241,
"step": 2429
},
{
"epoch": 0.813525276196853,
"grad_norm": 0.14000248908996582,
"learning_rate": 0.0001,
"loss": 1.6002,
"step": 2430
},
{
"epoch": 0.8138600602611316,
"grad_norm": 0.13774293661117554,
"learning_rate": 0.0001,
"loss": 1.5198,
"step": 2431
},
{
"epoch": 0.8141948443254101,
"grad_norm": 0.13524143397808075,
"learning_rate": 0.0001,
"loss": 1.5326,
"step": 2432
},
{
"epoch": 0.8145296283896887,
"grad_norm": 0.13584178686141968,
"learning_rate": 0.0001,
"loss": 1.5313,
"step": 2433
},
{
"epoch": 0.8148644124539672,
"grad_norm": 0.13589173555374146,
"learning_rate": 0.0001,
"loss": 1.5097,
"step": 2434
},
{
"epoch": 0.8151991965182457,
"grad_norm": 0.1420723795890808,
"learning_rate": 0.0001,
"loss": 1.593,
"step": 2435
},
{
"epoch": 0.8155339805825242,
"grad_norm": 0.13078542053699493,
"learning_rate": 0.0001,
"loss": 1.4239,
"step": 2436
},
{
"epoch": 0.8158687646468028,
"grad_norm": 0.14007273316383362,
"learning_rate": 0.0001,
"loss": 1.5912,
"step": 2437
},
{
"epoch": 0.8162035487110814,
"grad_norm": 0.13472947478294373,
"learning_rate": 0.0001,
"loss": 1.5146,
"step": 2438
},
{
"epoch": 0.8165383327753599,
"grad_norm": 0.13456539809703827,
"learning_rate": 0.0001,
"loss": 1.5277,
"step": 2439
},
{
"epoch": 0.8168731168396385,
"grad_norm": 0.13376279175281525,
"learning_rate": 0.0001,
"loss": 1.4554,
"step": 2440
},
{
"epoch": 0.8172079009039169,
"grad_norm": 0.13720721006393433,
"learning_rate": 0.0001,
"loss": 1.5463,
"step": 2441
},
{
"epoch": 0.8175426849681955,
"grad_norm": 0.1363624483346939,
"learning_rate": 0.0001,
"loss": 1.537,
"step": 2442
},
{
"epoch": 0.817877469032474,
"grad_norm": 0.13379956781864166,
"learning_rate": 0.0001,
"loss": 1.5831,
"step": 2443
},
{
"epoch": 0.8182122530967526,
"grad_norm": 0.13432839512825012,
"learning_rate": 0.0001,
"loss": 1.5511,
"step": 2444
},
{
"epoch": 0.8185470371610312,
"grad_norm": 0.1365717500448227,
"learning_rate": 0.0001,
"loss": 1.4519,
"step": 2445
},
{
"epoch": 0.8188818212253097,
"grad_norm": 0.13430190086364746,
"learning_rate": 0.0001,
"loss": 1.4878,
"step": 2446
},
{
"epoch": 0.8192166052895882,
"grad_norm": 0.13606110215187073,
"learning_rate": 0.0001,
"loss": 1.5585,
"step": 2447
},
{
"epoch": 0.8195513893538667,
"grad_norm": 0.13404667377471924,
"learning_rate": 0.0001,
"loss": 1.5156,
"step": 2448
},
{
"epoch": 0.8198861734181453,
"grad_norm": 0.14223212003707886,
"learning_rate": 0.0001,
"loss": 1.5904,
"step": 2449
},
{
"epoch": 0.8202209574824239,
"grad_norm": 0.13209384679794312,
"learning_rate": 0.0001,
"loss": 1.551,
"step": 2450
},
{
"epoch": 0.8205557415467024,
"grad_norm": 0.13522854447364807,
"learning_rate": 0.0001,
"loss": 1.5325,
"step": 2451
},
{
"epoch": 0.820890525610981,
"grad_norm": 0.13555531203746796,
"learning_rate": 0.0001,
"loss": 1.5327,
"step": 2452
},
{
"epoch": 0.8212253096752594,
"grad_norm": 0.13121196627616882,
"learning_rate": 0.0001,
"loss": 1.5208,
"step": 2453
},
{
"epoch": 0.821560093739538,
"grad_norm": 0.13988123834133148,
"learning_rate": 0.0001,
"loss": 1.6188,
"step": 2454
},
{
"epoch": 0.8218948778038165,
"grad_norm": 0.1347675770521164,
"learning_rate": 0.0001,
"loss": 1.5212,
"step": 2455
},
{
"epoch": 0.8222296618680951,
"grad_norm": 0.13975632190704346,
"learning_rate": 0.0001,
"loss": 1.6152,
"step": 2456
},
{
"epoch": 0.8225644459323737,
"grad_norm": 0.1271917223930359,
"learning_rate": 0.0001,
"loss": 1.4209,
"step": 2457
},
{
"epoch": 0.8228992299966521,
"grad_norm": 0.13226144015789032,
"learning_rate": 0.0001,
"loss": 1.5397,
"step": 2458
},
{
"epoch": 0.8232340140609307,
"grad_norm": 0.1391698569059372,
"learning_rate": 0.0001,
"loss": 1.5394,
"step": 2459
},
{
"epoch": 0.8235687981252092,
"grad_norm": 0.13757720589637756,
"learning_rate": 0.0001,
"loss": 1.5465,
"step": 2460
},
{
"epoch": 0.8239035821894878,
"grad_norm": 0.13116374611854553,
"learning_rate": 0.0001,
"loss": 1.5072,
"step": 2461
},
{
"epoch": 0.8242383662537663,
"grad_norm": 0.13408921658992767,
"learning_rate": 0.0001,
"loss": 1.5398,
"step": 2462
},
{
"epoch": 0.8245731503180449,
"grad_norm": 0.13682673871517181,
"learning_rate": 0.0001,
"loss": 1.574,
"step": 2463
},
{
"epoch": 0.8249079343823233,
"grad_norm": 0.12918630242347717,
"learning_rate": 0.0001,
"loss": 1.4619,
"step": 2464
},
{
"epoch": 0.8252427184466019,
"grad_norm": 0.14337001740932465,
"learning_rate": 0.0001,
"loss": 1.5494,
"step": 2465
},
{
"epoch": 0.8255775025108805,
"grad_norm": 0.13083745539188385,
"learning_rate": 0.0001,
"loss": 1.4594,
"step": 2466
},
{
"epoch": 0.825912286575159,
"grad_norm": 0.13452093303203583,
"learning_rate": 0.0001,
"loss": 1.5114,
"step": 2467
},
{
"epoch": 0.8262470706394376,
"grad_norm": 0.1375538408756256,
"learning_rate": 0.0001,
"loss": 1.5472,
"step": 2468
},
{
"epoch": 0.8265818547037161,
"grad_norm": 0.13618512451648712,
"learning_rate": 0.0001,
"loss": 1.5067,
"step": 2469
},
{
"epoch": 0.8269166387679946,
"grad_norm": 0.13334475457668304,
"learning_rate": 0.0001,
"loss": 1.5626,
"step": 2470
},
{
"epoch": 0.8272514228322732,
"grad_norm": 0.12935003638267517,
"learning_rate": 0.0001,
"loss": 1.4524,
"step": 2471
},
{
"epoch": 0.8275862068965517,
"grad_norm": 0.1333768367767334,
"learning_rate": 0.0001,
"loss": 1.4809,
"step": 2472
},
{
"epoch": 0.8279209909608303,
"grad_norm": 0.139461949467659,
"learning_rate": 0.0001,
"loss": 1.5265,
"step": 2473
},
{
"epoch": 0.8282557750251088,
"grad_norm": 0.14345921576023102,
"learning_rate": 0.0001,
"loss": 1.5911,
"step": 2474
},
{
"epoch": 0.8285905590893874,
"grad_norm": 0.12835142016410828,
"learning_rate": 0.0001,
"loss": 1.4934,
"step": 2475
},
{
"epoch": 0.8289253431536658,
"grad_norm": 0.13207587599754333,
"learning_rate": 0.0001,
"loss": 1.5013,
"step": 2476
},
{
"epoch": 0.8292601272179444,
"grad_norm": 0.14216424524784088,
"learning_rate": 0.0001,
"loss": 1.5783,
"step": 2477
},
{
"epoch": 0.829594911282223,
"grad_norm": 0.1372382938861847,
"learning_rate": 0.0001,
"loss": 1.5487,
"step": 2478
},
{
"epoch": 0.8299296953465015,
"grad_norm": 0.14100505411624908,
"learning_rate": 0.0001,
"loss": 1.5893,
"step": 2479
},
{
"epoch": 0.8302644794107801,
"grad_norm": 0.13831539452075958,
"learning_rate": 0.0001,
"loss": 1.5308,
"step": 2480
},
{
"epoch": 0.8305992634750586,
"grad_norm": 0.13254091143608093,
"learning_rate": 0.0001,
"loss": 1.509,
"step": 2481
},
{
"epoch": 0.8309340475393371,
"grad_norm": 0.13434451818466187,
"learning_rate": 0.0001,
"loss": 1.4544,
"step": 2482
},
{
"epoch": 0.8312688316036156,
"grad_norm": 0.13452693819999695,
"learning_rate": 0.0001,
"loss": 1.4875,
"step": 2483
},
{
"epoch": 0.8316036156678942,
"grad_norm": 0.13497060537338257,
"learning_rate": 0.0001,
"loss": 1.4973,
"step": 2484
},
{
"epoch": 0.8319383997321728,
"grad_norm": 0.13919363915920258,
"learning_rate": 0.0001,
"loss": 1.4425,
"step": 2485
},
{
"epoch": 0.8322731837964513,
"grad_norm": 0.14376235008239746,
"learning_rate": 0.0001,
"loss": 1.5438,
"step": 2486
},
{
"epoch": 0.8326079678607299,
"grad_norm": 0.13027647137641907,
"learning_rate": 0.0001,
"loss": 1.4899,
"step": 2487
},
{
"epoch": 0.8329427519250083,
"grad_norm": 0.1342213749885559,
"learning_rate": 0.0001,
"loss": 1.4716,
"step": 2488
},
{
"epoch": 0.8332775359892869,
"grad_norm": 0.1298682540655136,
"learning_rate": 0.0001,
"loss": 1.4359,
"step": 2489
},
{
"epoch": 0.8336123200535654,
"grad_norm": 0.13764667510986328,
"learning_rate": 0.0001,
"loss": 1.6205,
"step": 2490
},
{
"epoch": 0.833947104117844,
"grad_norm": 0.13023105263710022,
"learning_rate": 0.0001,
"loss": 1.4276,
"step": 2491
},
{
"epoch": 0.8342818881821226,
"grad_norm": 0.1355689913034439,
"learning_rate": 0.0001,
"loss": 1.4635,
"step": 2492
},
{
"epoch": 0.8346166722464011,
"grad_norm": 0.13397172093391418,
"learning_rate": 0.0001,
"loss": 1.5855,
"step": 2493
},
{
"epoch": 0.8349514563106796,
"grad_norm": 0.13192683458328247,
"learning_rate": 0.0001,
"loss": 1.5209,
"step": 2494
},
{
"epoch": 0.8352862403749581,
"grad_norm": 0.13405252993106842,
"learning_rate": 0.0001,
"loss": 1.5144,
"step": 2495
},
{
"epoch": 0.8356210244392367,
"grad_norm": 0.13375818729400635,
"learning_rate": 0.0001,
"loss": 1.4467,
"step": 2496
},
{
"epoch": 0.8359558085035153,
"grad_norm": 0.12543916702270508,
"learning_rate": 0.0001,
"loss": 1.3992,
"step": 2497
},
{
"epoch": 0.8362905925677938,
"grad_norm": 0.13587196171283722,
"learning_rate": 0.0001,
"loss": 1.487,
"step": 2498
},
{
"epoch": 0.8366253766320723,
"grad_norm": 0.13462427258491516,
"learning_rate": 0.0001,
"loss": 1.5455,
"step": 2499
},
{
"epoch": 0.8369601606963508,
"grad_norm": 0.13338516652584076,
"learning_rate": 0.0001,
"loss": 1.5612,
"step": 2500
}
],
"logging_steps": 1,
"max_steps": 2987,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.419234898870272e+19,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}