27b-1-lora / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
3a8c17e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1638,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003663003663003663,
"grad_norm": 2.875383138656616,
"learning_rate": 2.0000000000000002e-07,
"loss": 2.6876986026763916,
"step": 2
},
{
"epoch": 0.007326007326007326,
"grad_norm": 0.7562136650085449,
"learning_rate": 6.000000000000001e-07,
"loss": 1.6656783819198608,
"step": 4
},
{
"epoch": 0.01098901098901099,
"grad_norm": 0.39378538727760315,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.8813486099243164,
"step": 6
},
{
"epoch": 0.014652014652014652,
"grad_norm": 0.1678856462240219,
"learning_rate": 1.4000000000000001e-06,
"loss": 2.073314666748047,
"step": 8
},
{
"epoch": 0.018315018315018316,
"grad_norm": 0.16630569100379944,
"learning_rate": 1.8000000000000001e-06,
"loss": 2.2603981494903564,
"step": 10
},
{
"epoch": 0.02197802197802198,
"grad_norm": 1.2313497066497803,
"learning_rate": 2.2e-06,
"loss": 2.0936238765716553,
"step": 12
},
{
"epoch": 0.02564102564102564,
"grad_norm": 0.6229072213172913,
"learning_rate": 2.6e-06,
"loss": 1.786569595336914,
"step": 14
},
{
"epoch": 0.029304029304029304,
"grad_norm": 0.21894435584545135,
"learning_rate": 3e-06,
"loss": 1.9302886724472046,
"step": 16
},
{
"epoch": 0.03296703296703297,
"grad_norm": 0.8146782517433167,
"learning_rate": 3.4000000000000005e-06,
"loss": 1.9158211946487427,
"step": 18
},
{
"epoch": 0.03663003663003663,
"grad_norm": 0.139973446726799,
"learning_rate": 3.8000000000000005e-06,
"loss": 1.801032543182373,
"step": 20
},
{
"epoch": 0.040293040293040296,
"grad_norm": 0.9421126842498779,
"learning_rate": 4.2000000000000004e-06,
"loss": 1.4370536804199219,
"step": 22
},
{
"epoch": 0.04395604395604396,
"grad_norm": 0.2687402665615082,
"learning_rate": 4.600000000000001e-06,
"loss": 1.680647611618042,
"step": 24
},
{
"epoch": 0.047619047619047616,
"grad_norm": 0.15750955045223236,
"learning_rate": 5e-06,
"loss": 1.6444307565689087,
"step": 26
},
{
"epoch": 0.05128205128205128,
"grad_norm": 0.3872029483318329,
"learning_rate": 5.400000000000001e-06,
"loss": 1.9668141603469849,
"step": 28
},
{
"epoch": 0.054945054945054944,
"grad_norm": 0.7753072381019592,
"learning_rate": 5.8e-06,
"loss": 1.345158576965332,
"step": 30
},
{
"epoch": 0.05860805860805861,
"grad_norm": 4.0286335945129395,
"learning_rate": 6.200000000000001e-06,
"loss": 1.2935595512390137,
"step": 32
},
{
"epoch": 0.06227106227106227,
"grad_norm": 0.11804597079753876,
"learning_rate": 6.600000000000001e-06,
"loss": 1.310804843902588,
"step": 34
},
{
"epoch": 0.06593406593406594,
"grad_norm": 0.3004632592201233,
"learning_rate": 7e-06,
"loss": 1.6871081590652466,
"step": 36
},
{
"epoch": 0.0695970695970696,
"grad_norm": 0.3273477554321289,
"learning_rate": 7.4e-06,
"loss": 1.4714224338531494,
"step": 38
},
{
"epoch": 0.07326007326007326,
"grad_norm": 0.38304704427719116,
"learning_rate": 7.800000000000002e-06,
"loss": 1.5327234268188477,
"step": 40
},
{
"epoch": 0.07692307692307693,
"grad_norm": 0.17035049200057983,
"learning_rate": 8.2e-06,
"loss": 1.2890535593032837,
"step": 42
},
{
"epoch": 0.08058608058608059,
"grad_norm": 0.18172013759613037,
"learning_rate": 8.6e-06,
"loss": 1.5969315767288208,
"step": 44
},
{
"epoch": 0.08424908424908426,
"grad_norm": 0.1807372272014618,
"learning_rate": 9e-06,
"loss": 1.6807602643966675,
"step": 46
},
{
"epoch": 0.08791208791208792,
"grad_norm": 0.2631019353866577,
"learning_rate": 9.4e-06,
"loss": 1.1396199464797974,
"step": 48
},
{
"epoch": 0.09157509157509157,
"grad_norm": 0.10772737115621567,
"learning_rate": 9.800000000000001e-06,
"loss": 1.576991319656372,
"step": 50
},
{
"epoch": 0.09523809523809523,
"grad_norm": 0.1471075862646103,
"learning_rate": 9.999991193950434e-06,
"loss": 0.8256194591522217,
"step": 52
},
{
"epoch": 0.0989010989010989,
"grad_norm": 2.466968059539795,
"learning_rate": 9.999920745760685e-06,
"loss": 1.1205874681472778,
"step": 54
},
{
"epoch": 0.10256410256410256,
"grad_norm": 0.4292626678943634,
"learning_rate": 9.99977985048407e-06,
"loss": 1.397847294807434,
"step": 56
},
{
"epoch": 0.10622710622710622,
"grad_norm": 0.11925622820854187,
"learning_rate": 9.999568510326332e-06,
"loss": 1.2620929479599,
"step": 58
},
{
"epoch": 0.10989010989010989,
"grad_norm": 0.08886076509952545,
"learning_rate": 9.999286728596034e-06,
"loss": 1.502614140510559,
"step": 60
},
{
"epoch": 0.11355311355311355,
"grad_norm": 0.31627047061920166,
"learning_rate": 9.998934509704524e-06,
"loss": 1.5195817947387695,
"step": 62
},
{
"epoch": 0.11721611721611722,
"grad_norm": 0.12464763224124908,
"learning_rate": 9.998511859165853e-06,
"loss": 1.4700745344161987,
"step": 64
},
{
"epoch": 0.12087912087912088,
"grad_norm": 0.2327689528465271,
"learning_rate": 9.998018783596694e-06,
"loss": 1.4286034107208252,
"step": 66
},
{
"epoch": 0.12454212454212454,
"grad_norm": 0.14609983563423157,
"learning_rate": 9.997455290716233e-06,
"loss": 1.3966401815414429,
"step": 68
},
{
"epoch": 0.1282051282051282,
"grad_norm": 0.4661511182785034,
"learning_rate": 9.996821389346058e-06,
"loss": 1.2351371049880981,
"step": 70
},
{
"epoch": 0.13186813186813187,
"grad_norm": 0.15036970376968384,
"learning_rate": 9.99611708941001e-06,
"loss": 1.5122816562652588,
"step": 72
},
{
"epoch": 0.13553113553113552,
"grad_norm": 0.1798364669084549,
"learning_rate": 9.995342401934034e-06,
"loss": 1.6327002048492432,
"step": 74
},
{
"epoch": 0.1391941391941392,
"grad_norm": 0.10958249866962433,
"learning_rate": 9.994497339046004e-06,
"loss": 1.124930739402771,
"step": 76
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.09284580498933792,
"learning_rate": 9.993581913975538e-06,
"loss": 1.516735553741455,
"step": 78
},
{
"epoch": 0.14652014652014653,
"grad_norm": 0.3077796399593353,
"learning_rate": 9.99259614105378e-06,
"loss": 1.620766043663025,
"step": 80
},
{
"epoch": 0.15018315018315018,
"grad_norm": 0.1259474903345108,
"learning_rate": 9.99154003571319e-06,
"loss": 1.2517590522766113,
"step": 82
},
{
"epoch": 0.15384615384615385,
"grad_norm": 0.10728312283754349,
"learning_rate": 9.990413614487288e-06,
"loss": 1.4343254566192627,
"step": 84
},
{
"epoch": 0.1575091575091575,
"grad_norm": 0.3186304569244385,
"learning_rate": 9.989216895010406e-06,
"loss": 1.5559083223342896,
"step": 86
},
{
"epoch": 0.16117216117216118,
"grad_norm": 0.47098618745803833,
"learning_rate": 9.987949896017412e-06,
"loss": 0.7234257459640503,
"step": 88
},
{
"epoch": 0.16483516483516483,
"grad_norm": 0.07171591371297836,
"learning_rate": 9.986612637343402e-06,
"loss": 0.920280396938324,
"step": 90
},
{
"epoch": 0.1684981684981685,
"grad_norm": 0.15488727390766144,
"learning_rate": 9.985205139923408e-06,
"loss": 1.374828577041626,
"step": 92
},
{
"epoch": 0.17216117216117216,
"grad_norm": 0.17309579253196716,
"learning_rate": 9.983727425792066e-06,
"loss": 1.40683114528656,
"step": 94
},
{
"epoch": 0.17582417582417584,
"grad_norm": 0.47913244366645813,
"learning_rate": 9.982179518083255e-06,
"loss": 1.296237826347351,
"step": 96
},
{
"epoch": 0.1794871794871795,
"grad_norm": 0.2404627501964569,
"learning_rate": 9.980561441029761e-06,
"loss": 1.3930805921554565,
"step": 98
},
{
"epoch": 0.18315018315018314,
"grad_norm": 0.273111492395401,
"learning_rate": 9.978873219962874e-06,
"loss": 1.218533992767334,
"step": 100
},
{
"epoch": 0.18681318681318682,
"grad_norm": 0.524663507938385,
"learning_rate": 9.977114881312008e-06,
"loss": 1.3098607063293457,
"step": 102
},
{
"epoch": 0.19047619047619047,
"grad_norm": 0.15053874254226685,
"learning_rate": 9.975286452604275e-06,
"loss": 1.0048173666000366,
"step": 104
},
{
"epoch": 0.19413919413919414,
"grad_norm": 0.3083300292491913,
"learning_rate": 9.973387962464066e-06,
"loss": 1.1155184507369995,
"step": 106
},
{
"epoch": 0.1978021978021978,
"grad_norm": 0.2675297260284424,
"learning_rate": 9.971419440612591e-06,
"loss": 1.3713957071304321,
"step": 108
},
{
"epoch": 0.20146520146520147,
"grad_norm": 0.12364854663610458,
"learning_rate": 9.969380917867421e-06,
"loss": 1.2191200256347656,
"step": 110
},
{
"epoch": 0.20512820512820512,
"grad_norm": 0.09928097575902939,
"learning_rate": 9.967272426142007e-06,
"loss": 1.275339961051941,
"step": 112
},
{
"epoch": 0.2087912087912088,
"grad_norm": 0.24085743725299835,
"learning_rate": 9.965093998445174e-06,
"loss": 0.6748014092445374,
"step": 114
},
{
"epoch": 0.21245421245421245,
"grad_norm": 0.3158385157585144,
"learning_rate": 9.962845668880606e-06,
"loss": 1.343500018119812,
"step": 116
},
{
"epoch": 0.21611721611721613,
"grad_norm": 0.18101683259010315,
"learning_rate": 9.96052747264632e-06,
"loss": 1.3496334552764893,
"step": 118
},
{
"epoch": 0.21978021978021978,
"grad_norm": 0.22877711057662964,
"learning_rate": 9.9581394460341e-06,
"loss": 1.399277925491333,
"step": 120
},
{
"epoch": 0.22344322344322345,
"grad_norm": 0.1900375783443451,
"learning_rate": 9.955681626428944e-06,
"loss": 1.351351261138916,
"step": 122
},
{
"epoch": 0.2271062271062271,
"grad_norm": 0.2790425419807434,
"learning_rate": 9.95315405230847e-06,
"loss": 1.0575695037841797,
"step": 124
},
{
"epoch": 0.23076923076923078,
"grad_norm": 0.1452687829732895,
"learning_rate": 9.950556763242316e-06,
"loss": 0.9282295107841492,
"step": 126
},
{
"epoch": 0.23443223443223443,
"grad_norm": 0.4646700322628021,
"learning_rate": 9.947889799891517e-06,
"loss": 1.2313032150268555,
"step": 128
},
{
"epoch": 0.23809523809523808,
"grad_norm": 0.2979331612586975,
"learning_rate": 9.94515320400788e-06,
"loss": 1.3399178981781006,
"step": 130
},
{
"epoch": 0.24175824175824176,
"grad_norm": 0.13976508378982544,
"learning_rate": 9.942347018433312e-06,
"loss": 1.439223289489746,
"step": 132
},
{
"epoch": 0.2454212454212454,
"grad_norm": 0.18163283169269562,
"learning_rate": 9.939471287099167e-06,
"loss": 1.3410747051239014,
"step": 134
},
{
"epoch": 0.2490842490842491,
"grad_norm": 0.62637859582901,
"learning_rate": 9.936526055025547e-06,
"loss": 1.3174734115600586,
"step": 136
},
{
"epoch": 0.25274725274725274,
"grad_norm": 0.33270207047462463,
"learning_rate": 9.933511368320602e-06,
"loss": 1.1196776628494263,
"step": 138
},
{
"epoch": 0.2564102564102564,
"grad_norm": 0.10809160023927689,
"learning_rate": 9.930427274179808e-06,
"loss": 1.3199713230133057,
"step": 140
},
{
"epoch": 0.2600732600732601,
"grad_norm": 0.31561923027038574,
"learning_rate": 9.927273820885223e-06,
"loss": 1.3330121040344238,
"step": 142
},
{
"epoch": 0.26373626373626374,
"grad_norm": 0.15577837824821472,
"learning_rate": 9.924051057804742e-06,
"loss": 1.2973798513412476,
"step": 144
},
{
"epoch": 0.2673992673992674,
"grad_norm": 0.2457117736339569,
"learning_rate": 9.920759035391308e-06,
"loss": 1.454565405845642,
"step": 146
},
{
"epoch": 0.27106227106227104,
"grad_norm": 0.8564723134040833,
"learning_rate": 9.917397805182145e-06,
"loss": 1.354149580001831,
"step": 148
},
{
"epoch": 0.27472527472527475,
"grad_norm": 0.11519961059093475,
"learning_rate": 9.913967419797924e-06,
"loss": 0.9727800488471985,
"step": 150
},
{
"epoch": 0.2783882783882784,
"grad_norm": 0.253650039434433,
"learning_rate": 9.910467932941962e-06,
"loss": 0.6865445375442505,
"step": 152
},
{
"epoch": 0.28205128205128205,
"grad_norm": 0.2849763333797455,
"learning_rate": 9.90689939939937e-06,
"loss": 1.126017451286316,
"step": 154
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.17028306424617767,
"learning_rate": 9.903261875036192e-06,
"loss": 1.3710747957229614,
"step": 156
},
{
"epoch": 0.2893772893772894,
"grad_norm": 0.13653086125850677,
"learning_rate": 9.899555416798546e-06,
"loss": 1.3111331462860107,
"step": 158
},
{
"epoch": 0.29304029304029305,
"grad_norm": 0.5475143790245056,
"learning_rate": 9.895780082711717e-06,
"loss": 0.9597386717796326,
"step": 160
},
{
"epoch": 0.2967032967032967,
"grad_norm": 0.3660220503807068,
"learning_rate": 9.891935931879252e-06,
"loss": 1.5575504302978516,
"step": 162
},
{
"epoch": 0.30036630036630035,
"grad_norm": 0.12948466837406158,
"learning_rate": 9.888023024482041e-06,
"loss": 1.0819988250732422,
"step": 164
},
{
"epoch": 0.304029304029304,
"grad_norm": 0.5461969375610352,
"learning_rate": 9.884041421777369e-06,
"loss": 1.4256272315979004,
"step": 166
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.16308943927288055,
"learning_rate": 9.879991186097959e-06,
"loss": 1.1550545692443848,
"step": 168
},
{
"epoch": 0.31135531135531136,
"grad_norm": 0.23025067150592804,
"learning_rate": 9.875872380850992e-06,
"loss": 1.2838108539581299,
"step": 170
},
{
"epoch": 0.315018315018315,
"grad_norm": 1.0842785835266113,
"learning_rate": 9.871685070517124e-06,
"loss": 1.027992606163025,
"step": 172
},
{
"epoch": 0.31868131868131866,
"grad_norm": 0.8840537071228027,
"learning_rate": 9.86742932064947e-06,
"loss": 0.8895283341407776,
"step": 174
},
{
"epoch": 0.32234432234432236,
"grad_norm": 0.4256756901741028,
"learning_rate": 9.863105197872574e-06,
"loss": 1.4210491180419922,
"step": 176
},
{
"epoch": 0.326007326007326,
"grad_norm": 0.32473987340927124,
"learning_rate": 9.858712769881375e-06,
"loss": 0.940653920173645,
"step": 178
},
{
"epoch": 0.32967032967032966,
"grad_norm": 0.1946435272693634,
"learning_rate": 9.854252105440142e-06,
"loss": 1.523209810256958,
"step": 180
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.12392517179250717,
"learning_rate": 9.849723274381395e-06,
"loss": 0.9991880059242249,
"step": 182
},
{
"epoch": 0.336996336996337,
"grad_norm": 0.18956027925014496,
"learning_rate": 9.845126347604818e-06,
"loss": 1.2698228359222412,
"step": 184
},
{
"epoch": 0.34065934065934067,
"grad_norm": 0.31590884923934937,
"learning_rate": 9.840461397076147e-06,
"loss": 1.3860504627227783,
"step": 186
},
{
"epoch": 0.3443223443223443,
"grad_norm": 0.11410943418741226,
"learning_rate": 9.835728495826036e-06,
"loss": 1.1887812614440918,
"step": 188
},
{
"epoch": 0.34798534798534797,
"grad_norm": 0.29438552260398865,
"learning_rate": 9.830927717948929e-06,
"loss": 1.294023036956787,
"step": 190
},
{
"epoch": 0.3516483516483517,
"grad_norm": 1.1163926124572754,
"learning_rate": 9.826059138601883e-06,
"loss": 1.124396800994873,
"step": 192
},
{
"epoch": 0.3553113553113553,
"grad_norm": 0.09197133034467697,
"learning_rate": 9.821122834003407e-06,
"loss": 1.27751624584198,
"step": 194
},
{
"epoch": 0.358974358974359,
"grad_norm": 0.23845773935317993,
"learning_rate": 9.816118881432255e-06,
"loss": 1.2824617624282837,
"step": 196
},
{
"epoch": 0.3626373626373626,
"grad_norm": 0.16290828585624695,
"learning_rate": 9.811047359226224e-06,
"loss": 0.8826183080673218,
"step": 198
},
{
"epoch": 0.3663003663003663,
"grad_norm": 0.24791596829891205,
"learning_rate": 9.805908346780929e-06,
"loss": 1.044391393661499,
"step": 200
},
{
"epoch": 0.36996336996337,
"grad_norm": 0.2740170955657959,
"learning_rate": 9.80070192454855e-06,
"loss": 1.4561749696731567,
"step": 202
},
{
"epoch": 0.37362637362637363,
"grad_norm": 0.33053258061408997,
"learning_rate": 9.795428174036591e-06,
"loss": 1.2278764247894287,
"step": 204
},
{
"epoch": 0.3772893772893773,
"grad_norm": 0.7583060264587402,
"learning_rate": 9.790087177806584e-06,
"loss": 0.7968496084213257,
"step": 206
},
{
"epoch": 0.38095238095238093,
"grad_norm": 0.2668805718421936,
"learning_rate": 9.784679019472809e-06,
"loss": 1.1589165925979614,
"step": 208
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.21432484686374664,
"learning_rate": 9.779203783700972e-06,
"loss": 1.4328304529190063,
"step": 210
},
{
"epoch": 0.3882783882783883,
"grad_norm": 0.191499263048172,
"learning_rate": 9.773661556206903e-06,
"loss": 1.0945113897323608,
"step": 212
},
{
"epoch": 0.39194139194139194,
"grad_norm": 0.15214745700359344,
"learning_rate": 9.768052423755192e-06,
"loss": 1.1581294536590576,
"step": 214
},
{
"epoch": 0.3956043956043956,
"grad_norm": 0.23848576843738556,
"learning_rate": 9.762376474157839e-06,
"loss": 1.2475342750549316,
"step": 216
},
{
"epoch": 0.3992673992673993,
"grad_norm": 0.2269514501094818,
"learning_rate": 9.756633796272876e-06,
"loss": 1.2841179370880127,
"step": 218
},
{
"epoch": 0.40293040293040294,
"grad_norm": 0.07938987016677856,
"learning_rate": 9.750824480002982e-06,
"loss": 0.623121976852417,
"step": 220
},
{
"epoch": 0.4065934065934066,
"grad_norm": 0.3509514331817627,
"learning_rate": 9.744948616294074e-06,
"loss": 1.364533543586731,
"step": 222
},
{
"epoch": 0.41025641025641024,
"grad_norm": 0.20469792187213898,
"learning_rate": 9.739006297133878e-06,
"loss": 1.0975794792175293,
"step": 224
},
{
"epoch": 0.4139194139194139,
"grad_norm": 0.2600097358226776,
"learning_rate": 9.732997615550495e-06,
"loss": 1.2632966041564941,
"step": 226
},
{
"epoch": 0.4175824175824176,
"grad_norm": 0.15840594470500946,
"learning_rate": 9.726922665610935e-06,
"loss": 1.3373838663101196,
"step": 228
},
{
"epoch": 0.42124542124542125,
"grad_norm": 0.43822696805000305,
"learning_rate": 9.720781542419662e-06,
"loss": 1.2531630992889404,
"step": 230
},
{
"epoch": 0.4249084249084249,
"grad_norm": 0.5942100286483765,
"learning_rate": 9.714574342117086e-06,
"loss": 1.0207842588424683,
"step": 232
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.23664861917495728,
"learning_rate": 9.70830116187807e-06,
"loss": 1.5113677978515625,
"step": 234
},
{
"epoch": 0.43223443223443225,
"grad_norm": 0.3284321427345276,
"learning_rate": 9.701962099910407e-06,
"loss": 1.0360337495803833,
"step": 236
},
{
"epoch": 0.4358974358974359,
"grad_norm": 0.2513348460197449,
"learning_rate": 9.695557255453273e-06,
"loss": 1.0973368883132935,
"step": 238
},
{
"epoch": 0.43956043956043955,
"grad_norm": 0.45316001772880554,
"learning_rate": 9.68908672877569e-06,
"loss": 0.9152914881706238,
"step": 240
},
{
"epoch": 0.4432234432234432,
"grad_norm": 0.2768547236919403,
"learning_rate": 9.682550621174942e-06,
"loss": 0.8826823830604553,
"step": 242
},
{
"epoch": 0.4468864468864469,
"grad_norm": 0.14853699505329132,
"learning_rate": 9.675949034974992e-06,
"loss": 0.5798932313919067,
"step": 244
},
{
"epoch": 0.45054945054945056,
"grad_norm": 0.1571403294801712,
"learning_rate": 9.669282073524892e-06,
"loss": 1.2800544500350952,
"step": 246
},
{
"epoch": 0.4542124542124542,
"grad_norm": 0.20789006352424622,
"learning_rate": 9.662549841197148e-06,
"loss": 0.893764853477478,
"step": 248
},
{
"epoch": 0.45787545787545786,
"grad_norm": 0.7506678104400635,
"learning_rate": 9.655752443386092e-06,
"loss": 1.2865655422210693,
"step": 250
},
{
"epoch": 0.46153846153846156,
"grad_norm": 0.39902183413505554,
"learning_rate": 9.64888998650624e-06,
"loss": 1.1993688344955444,
"step": 252
},
{
"epoch": 0.4652014652014652,
"grad_norm": 0.3465142846107483,
"learning_rate": 9.641962577990614e-06,
"loss": 0.9851580262184143,
"step": 254
},
{
"epoch": 0.46886446886446886,
"grad_norm": 0.18256494402885437,
"learning_rate": 9.634970326289071e-06,
"loss": 1.2847747802734375,
"step": 256
},
{
"epoch": 0.4725274725274725,
"grad_norm": 0.24586841464042664,
"learning_rate": 9.627913340866597e-06,
"loss": 1.3066174983978271,
"step": 258
},
{
"epoch": 0.47619047619047616,
"grad_norm": 0.11027955263853073,
"learning_rate": 9.620791732201595e-06,
"loss": 0.8039655685424805,
"step": 260
},
{
"epoch": 0.47985347985347987,
"grad_norm": 0.15749269723892212,
"learning_rate": 9.613605611784158e-06,
"loss": 1.1634037494659424,
"step": 262
},
{
"epoch": 0.4835164835164835,
"grad_norm": 0.23077067732810974,
"learning_rate": 9.606355092114327e-06,
"loss": 1.2528202533721924,
"step": 264
},
{
"epoch": 0.48717948717948717,
"grad_norm": 0.18674089014530182,
"learning_rate": 9.599040286700317e-06,
"loss": 1.5212275981903076,
"step": 266
},
{
"epoch": 0.4908424908424908,
"grad_norm": 0.4802699089050293,
"learning_rate": 9.591661310056753e-06,
"loss": 0.8288567662239075,
"step": 268
},
{
"epoch": 0.4945054945054945,
"grad_norm": 0.1448894888162613,
"learning_rate": 9.58421827770287e-06,
"loss": 1.2230876684188843,
"step": 270
},
{
"epoch": 0.4981684981684982,
"grad_norm": 0.19190412759780884,
"learning_rate": 9.57671130616071e-06,
"loss": 0.9024039506912231,
"step": 272
},
{
"epoch": 0.5018315018315018,
"grad_norm": 0.3073454797267914,
"learning_rate": 9.569140512953296e-06,
"loss": 1.2714391946792603,
"step": 274
},
{
"epoch": 0.5054945054945055,
"grad_norm": 0.3199959993362427,
"learning_rate": 9.561506016602782e-06,
"loss": 0.8202919363975525,
"step": 276
},
{
"epoch": 0.5091575091575091,
"grad_norm": 0.09401345998048782,
"learning_rate": 9.553807936628617e-06,
"loss": 0.8935064673423767,
"step": 278
},
{
"epoch": 0.5128205128205128,
"grad_norm": 0.21345993876457214,
"learning_rate": 9.546046393545655e-06,
"loss": 1.2741483449935913,
"step": 280
},
{
"epoch": 0.5164835164835165,
"grad_norm": 0.23345427215099335,
"learning_rate": 9.538221508862284e-06,
"loss": 1.2695109844207764,
"step": 282
},
{
"epoch": 0.5201465201465202,
"grad_norm": 0.16931022703647614,
"learning_rate": 9.530333405078512e-06,
"loss": 1.274514079093933,
"step": 284
},
{
"epoch": 0.5238095238095238,
"grad_norm": 0.33658501505851746,
"learning_rate": 9.522382205684053e-06,
"loss": 1.0144422054290771,
"step": 286
},
{
"epoch": 0.5274725274725275,
"grad_norm": 0.21759743988513947,
"learning_rate": 9.514368035156398e-06,
"loss": 1.2731945514678955,
"step": 288
},
{
"epoch": 0.5311355311355311,
"grad_norm": 0.17717669904232025,
"learning_rate": 9.506291018958857e-06,
"loss": 1.2374247312545776,
"step": 290
},
{
"epoch": 0.5347985347985348,
"grad_norm": 0.337706983089447,
"learning_rate": 9.498151283538608e-06,
"loss": 0.7559359669685364,
"step": 292
},
{
"epoch": 0.5384615384615384,
"grad_norm": 0.035663675516843796,
"learning_rate": 9.489948956324706e-06,
"loss": 0.9581714868545532,
"step": 294
},
{
"epoch": 0.5421245421245421,
"grad_norm": 0.12138810753822327,
"learning_rate": 9.481684165726086e-06,
"loss": 1.0345128774642944,
"step": 296
},
{
"epoch": 0.5457875457875457,
"grad_norm": 0.39733827114105225,
"learning_rate": 9.473357041129572e-06,
"loss": 1.3242045640945435,
"step": 298
},
{
"epoch": 0.5494505494505495,
"grad_norm": 0.16901174187660217,
"learning_rate": 9.464967712897828e-06,
"loss": 1.2276860475540161,
"step": 300
},
{
"epoch": 0.5531135531135531,
"grad_norm": 0.5484493374824524,
"learning_rate": 9.456516312367328e-06,
"loss": 1.2076282501220703,
"step": 302
},
{
"epoch": 0.5567765567765568,
"grad_norm": 0.17032906413078308,
"learning_rate": 9.448002971846307e-06,
"loss": 0.9942311644554138,
"step": 304
},
{
"epoch": 0.5604395604395604,
"grad_norm": 0.24507595598697662,
"learning_rate": 9.439427824612673e-06,
"loss": 0.9752069115638733,
"step": 306
},
{
"epoch": 0.5641025641025641,
"grad_norm": 0.40566012263298035,
"learning_rate": 9.430791004911934e-06,
"loss": 1.4564454555511475,
"step": 308
},
{
"epoch": 0.5677655677655677,
"grad_norm": 0.1568066030740738,
"learning_rate": 9.42209264795509e-06,
"loss": 1.0061030387878418,
"step": 310
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.16984346508979797,
"learning_rate": 9.41333288991652e-06,
"loss": 1.2216694355010986,
"step": 312
},
{
"epoch": 0.575091575091575,
"grad_norm": 0.09158849716186523,
"learning_rate": 9.404511867931847e-06,
"loss": 1.1522339582443237,
"step": 314
},
{
"epoch": 0.5787545787545788,
"grad_norm": 0.16296543180942535,
"learning_rate": 9.39562972009579e-06,
"loss": 1.293960452079773,
"step": 316
},
{
"epoch": 0.5824175824175825,
"grad_norm": 0.24195973575115204,
"learning_rate": 9.386686585460011e-06,
"loss": 1.1431677341461182,
"step": 318
},
{
"epoch": 0.5860805860805861,
"grad_norm": 0.1092909500002861,
"learning_rate": 9.377682604030925e-06,
"loss": 1.3567752838134766,
"step": 320
},
{
"epoch": 0.5897435897435898,
"grad_norm": 0.1672687828540802,
"learning_rate": 9.368617916767517e-06,
"loss": 1.5480321645736694,
"step": 322
},
{
"epoch": 0.5934065934065934,
"grad_norm": 0.18804782629013062,
"learning_rate": 9.359492665579136e-06,
"loss": 1.2884105443954468,
"step": 324
},
{
"epoch": 0.5970695970695971,
"grad_norm": 0.2078697383403778,
"learning_rate": 9.350306993323265e-06,
"loss": 1.3802863359451294,
"step": 326
},
{
"epoch": 0.6007326007326007,
"grad_norm": 0.16467250883579254,
"learning_rate": 9.34106104380329e-06,
"loss": 1.2509921789169312,
"step": 328
},
{
"epoch": 0.6043956043956044,
"grad_norm": 0.46313583850860596,
"learning_rate": 9.331754961766257e-06,
"loss": 1.140839695930481,
"step": 330
},
{
"epoch": 0.608058608058608,
"grad_norm": 0.14376887679100037,
"learning_rate": 9.322388892900587e-06,
"loss": 1.201643943786621,
"step": 332
},
{
"epoch": 0.6117216117216118,
"grad_norm": 0.1362253874540329,
"learning_rate": 9.312962983833815e-06,
"loss": 1.3028783798217773,
"step": 334
},
{
"epoch": 0.6153846153846154,
"grad_norm": 3.4290378093719482,
"learning_rate": 9.303477382130278e-06,
"loss": 0.973407506942749,
"step": 336
},
{
"epoch": 0.6190476190476191,
"grad_norm": 0.16140861809253693,
"learning_rate": 9.293932236288816e-06,
"loss": 1.2559469938278198,
"step": 338
},
{
"epoch": 0.6227106227106227,
"grad_norm": 0.1613743007183075,
"learning_rate": 9.284327695740441e-06,
"loss": 1.256553292274475,
"step": 340
},
{
"epoch": 0.6263736263736264,
"grad_norm": 0.34570202231407166,
"learning_rate": 9.274663910846004e-06,
"loss": 0.5801024436950684,
"step": 342
},
{
"epoch": 0.63003663003663,
"grad_norm": 0.28319358825683594,
"learning_rate": 9.264941032893836e-06,
"loss": 1.4648103713989258,
"step": 344
},
{
"epoch": 0.6336996336996337,
"grad_norm": 16.52604866027832,
"learning_rate": 9.255159214097374e-06,
"loss": 0.6978890895843506,
"step": 346
},
{
"epoch": 0.6373626373626373,
"grad_norm": 0.19958341121673584,
"learning_rate": 9.245318607592795e-06,
"loss": 1.1675150394439697,
"step": 348
},
{
"epoch": 0.6410256410256411,
"grad_norm": 0.24133825302124023,
"learning_rate": 9.235419367436602e-06,
"loss": 0.8993176221847534,
"step": 350
},
{
"epoch": 0.6446886446886447,
"grad_norm": 0.20524722337722778,
"learning_rate": 9.225461648603223e-06,
"loss": 0.9288710951805115,
"step": 352
},
{
"epoch": 0.6483516483516484,
"grad_norm": 0.391886830329895,
"learning_rate": 9.215445606982573e-06,
"loss": 0.9668469429016113,
"step": 354
},
{
"epoch": 0.652014652014652,
"grad_norm": 0.2540344297885895,
"learning_rate": 9.205371399377628e-06,
"loss": 1.1877306699752808,
"step": 356
},
{
"epoch": 0.6556776556776557,
"grad_norm": 0.21765393018722534,
"learning_rate": 9.195239183501961e-06,
"loss": 1.1672714948654175,
"step": 358
},
{
"epoch": 0.6593406593406593,
"grad_norm": 0.19967345893383026,
"learning_rate": 9.185049117977276e-06,
"loss": 0.7011613845825195,
"step": 360
},
{
"epoch": 0.663003663003663,
"grad_norm": 0.7372376322746277,
"learning_rate": 9.17480136233092e-06,
"loss": 0.9566145539283752,
"step": 362
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.20093770325183868,
"learning_rate": 9.164496076993395e-06,
"loss": 0.946535587310791,
"step": 364
},
{
"epoch": 0.6703296703296703,
"grad_norm": 0.2989659607410431,
"learning_rate": 9.154133423295836e-06,
"loss": 1.203826904296875,
"step": 366
},
{
"epoch": 0.673992673992674,
"grad_norm": 0.25106337666511536,
"learning_rate": 9.143713563467495e-06,
"loss": 1.0666961669921875,
"step": 368
},
{
"epoch": 0.6776556776556777,
"grad_norm": 0.11923722177743912,
"learning_rate": 9.133236660633192e-06,
"loss": 1.097327709197998,
"step": 370
},
{
"epoch": 0.6813186813186813,
"grad_norm": 0.3943967819213867,
"learning_rate": 9.12270287881077e-06,
"loss": 1.2011562585830688,
"step": 372
},
{
"epoch": 0.684981684981685,
"grad_norm": 0.1692187637090683,
"learning_rate": 9.112112382908516e-06,
"loss": 1.2239218950271606,
"step": 374
},
{
"epoch": 0.6886446886446886,
"grad_norm": 0.10792715102434158,
"learning_rate": 9.101465338722596e-06,
"loss": 0.9010005593299866,
"step": 376
},
{
"epoch": 0.6923076923076923,
"grad_norm": 0.1825140416622162,
"learning_rate": 9.090761912934441e-06,
"loss": 0.8389140367507935,
"step": 378
},
{
"epoch": 0.6959706959706959,
"grad_norm": 0.28178316354751587,
"learning_rate": 9.080002273108155e-06,
"loss": 1.0628230571746826,
"step": 380
},
{
"epoch": 0.6996336996336996,
"grad_norm": 0.11631765961647034,
"learning_rate": 9.069186587687872e-06,
"loss": 0.9880151152610779,
"step": 382
},
{
"epoch": 0.7032967032967034,
"grad_norm": 0.1353641152381897,
"learning_rate": 9.058315025995142e-06,
"loss": 1.2020447254180908,
"step": 384
},
{
"epoch": 0.706959706959707,
"grad_norm": 0.6966851949691772,
"learning_rate": 9.047387758226261e-06,
"loss": 1.1148114204406738,
"step": 386
},
{
"epoch": 0.7106227106227107,
"grad_norm": 0.08536599576473236,
"learning_rate": 9.036404955449615e-06,
"loss": 0.8987938165664673,
"step": 388
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.164885014295578,
"learning_rate": 9.025366789603002e-06,
"loss": 1.0990866422653198,
"step": 390
},
{
"epoch": 0.717948717948718,
"grad_norm": 0.1607430875301361,
"learning_rate": 9.014273433490938e-06,
"loss": 1.1975574493408203,
"step": 392
},
{
"epoch": 0.7216117216117216,
"grad_norm": 0.3020445704460144,
"learning_rate": 9.003125060781951e-06,
"loss": 1.1362345218658447,
"step": 394
},
{
"epoch": 0.7252747252747253,
"grad_norm": 0.23285327851772308,
"learning_rate": 8.99192184600587e-06,
"loss": 1.264463186264038,
"step": 396
},
{
"epoch": 0.7289377289377289,
"grad_norm": 0.1471405327320099,
"learning_rate": 8.98066396455108e-06,
"loss": 0.9352213740348816,
"step": 398
},
{
"epoch": 0.7326007326007326,
"grad_norm": 0.39496904611587524,
"learning_rate": 8.969351592661787e-06,
"loss": 0.8601157665252686,
"step": 400
},
{
"epoch": 0.7362637362637363,
"grad_norm": 0.10789740085601807,
"learning_rate": 8.957984907435254e-06,
"loss": 1.2675104141235352,
"step": 402
},
{
"epoch": 0.73992673992674,
"grad_norm": 0.13899658620357513,
"learning_rate": 8.946564086819025e-06,
"loss": 0.8569284081459045,
"step": 404
},
{
"epoch": 0.7435897435897436,
"grad_norm": 0.2528247833251953,
"learning_rate": 8.935089309608152e-06,
"loss": 1.0413234233856201,
"step": 406
},
{
"epoch": 0.7472527472527473,
"grad_norm": 0.14703120291233063,
"learning_rate": 8.92356075544238e-06,
"loss": 1.0387818813323975,
"step": 408
},
{
"epoch": 0.7509157509157509,
"grad_norm": 0.39029836654663086,
"learning_rate": 8.911978604803346e-06,
"loss": 0.8937767744064331,
"step": 410
},
{
"epoch": 0.7545787545787546,
"grad_norm": 0.23668618500232697,
"learning_rate": 8.900343039011745e-06,
"loss": 1.1923093795776367,
"step": 412
},
{
"epoch": 0.7582417582417582,
"grad_norm": 0.4645112454891205,
"learning_rate": 8.888654240224503e-06,
"loss": 1.0234112739562988,
"step": 414
},
{
"epoch": 0.7619047619047619,
"grad_norm": 0.35793423652648926,
"learning_rate": 8.876912391431913e-06,
"loss": 1.2955764532089233,
"step": 416
},
{
"epoch": 0.7655677655677655,
"grad_norm": 0.09080661088228226,
"learning_rate": 8.86511767645478e-06,
"loss": 0.6778948903083801,
"step": 418
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.5035378932952881,
"learning_rate": 8.853270279941533e-06,
"loss": 1.2608743906021118,
"step": 420
},
{
"epoch": 0.7728937728937729,
"grad_norm": 0.48378103971481323,
"learning_rate": 8.841370387365344e-06,
"loss": 1.015937328338623,
"step": 422
},
{
"epoch": 0.7765567765567766,
"grad_norm": 0.18112313747406006,
"learning_rate": 8.829418185021221e-06,
"loss": 0.5042012929916382,
"step": 424
},
{
"epoch": 0.7802197802197802,
"grad_norm": 0.2163010537624359,
"learning_rate": 8.817413860023089e-06,
"loss": 0.8268504738807678,
"step": 426
},
{
"epoch": 0.7838827838827839,
"grad_norm": 0.2586314082145691,
"learning_rate": 8.805357600300863e-06,
"loss": 1.0975161790847778,
"step": 428
},
{
"epoch": 0.7875457875457875,
"grad_norm": 0.32366475462913513,
"learning_rate": 8.793249594597508e-06,
"loss": 1.2304267883300781,
"step": 430
},
{
"epoch": 0.7912087912087912,
"grad_norm": 0.18010596930980682,
"learning_rate": 8.781090032466079e-06,
"loss": 1.3180345296859741,
"step": 432
},
{
"epoch": 0.7948717948717948,
"grad_norm": 0.15381111204624176,
"learning_rate": 8.768879104266758e-06,
"loss": 0.894809901714325,
"step": 434
},
{
"epoch": 0.7985347985347986,
"grad_norm": 0.3722904920578003,
"learning_rate": 8.756617001163869e-06,
"loss": 0.9750258326530457,
"step": 436
},
{
"epoch": 0.8021978021978022,
"grad_norm": 0.1223578080534935,
"learning_rate": 8.744303915122895e-06,
"loss": 0.8995143175125122,
"step": 438
},
{
"epoch": 0.8058608058608059,
"grad_norm": 0.17111073434352875,
"learning_rate": 8.73194003890746e-06,
"loss": 1.3294800519943237,
"step": 440
},
{
"epoch": 0.8095238095238095,
"grad_norm": 0.8301756978034973,
"learning_rate": 8.719525566076322e-06,
"loss": 1.2234307527542114,
"step": 442
},
{
"epoch": 0.8131868131868132,
"grad_norm": 0.26485782861709595,
"learning_rate": 8.707060690980334e-06,
"loss": 1.2229658365249634,
"step": 444
},
{
"epoch": 0.8168498168498168,
"grad_norm": 0.4809357225894928,
"learning_rate": 8.69454560875941e-06,
"loss": 1.2412751913070679,
"step": 446
},
{
"epoch": 0.8205128205128205,
"grad_norm": 0.19155052304267883,
"learning_rate": 8.681980515339464e-06,
"loss": 1.2396912574768066,
"step": 448
},
{
"epoch": 0.8241758241758241,
"grad_norm": 0.11245301365852356,
"learning_rate": 8.669365607429344e-06,
"loss": 1.467288851737976,
"step": 450
},
{
"epoch": 0.8278388278388278,
"grad_norm": 0.203065425157547,
"learning_rate": 8.656701082517752e-06,
"loss": 1.008663296699524,
"step": 452
},
{
"epoch": 0.8315018315018315,
"grad_norm": 0.0740152969956398,
"learning_rate": 8.643987138870156e-06,
"loss": 0.5013046860694885,
"step": 454
},
{
"epoch": 0.8351648351648352,
"grad_norm": 0.06140409782528877,
"learning_rate": 8.631223975525683e-06,
"loss": 0.9132590293884277,
"step": 456
},
{
"epoch": 0.8388278388278388,
"grad_norm": 0.7447589635848999,
"learning_rate": 8.618411792293997e-06,
"loss": 0.8399595618247986,
"step": 458
},
{
"epoch": 0.8424908424908425,
"grad_norm": 0.43025752902030945,
"learning_rate": 8.605550789752191e-06,
"loss": 1.0485363006591797,
"step": 460
},
{
"epoch": 0.8461538461538461,
"grad_norm": 0.1829681098461151,
"learning_rate": 8.592641169241622e-06,
"loss": 1.2453057765960693,
"step": 462
},
{
"epoch": 0.8498168498168498,
"grad_norm": 0.24352803826332092,
"learning_rate": 8.579683132864769e-06,
"loss": 1.193666696548462,
"step": 464
},
{
"epoch": 0.8534798534798534,
"grad_norm": 0.08289831876754761,
"learning_rate": 8.56667688348208e-06,
"loss": 1.2128486633300781,
"step": 466
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.13429510593414307,
"learning_rate": 8.553622624708778e-06,
"loss": 0.8921034932136536,
"step": 468
},
{
"epoch": 0.8608058608058609,
"grad_norm": 0.2065141648054123,
"learning_rate": 8.540520560911688e-06,
"loss": 0.9356565475463867,
"step": 470
},
{
"epoch": 0.8644688644688645,
"grad_norm": 0.8355104923248291,
"learning_rate": 8.527370897206024e-06,
"loss": 1.1900638341903687,
"step": 472
},
{
"epoch": 0.8681318681318682,
"grad_norm": 0.7807270884513855,
"learning_rate": 8.514173839452194e-06,
"loss": 0.9948893189430237,
"step": 474
},
{
"epoch": 0.8717948717948718,
"grad_norm": 0.13567706942558289,
"learning_rate": 8.50092959425256e-06,
"loss": 1.1165426969528198,
"step": 476
},
{
"epoch": 0.8754578754578755,
"grad_norm": 0.2913936376571655,
"learning_rate": 8.487638368948221e-06,
"loss": 1.0576797723770142,
"step": 478
},
{
"epoch": 0.8791208791208791,
"grad_norm": 0.502364993095398,
"learning_rate": 8.47430037161575e-06,
"loss": 1.0835438966751099,
"step": 480
},
{
"epoch": 0.8827838827838828,
"grad_norm": 0.7570469379425049,
"learning_rate": 8.460915811063952e-06,
"loss": 1.204832673072815,
"step": 482
},
{
"epoch": 0.8864468864468864,
"grad_norm": 0.20631951093673706,
"learning_rate": 8.447484896830581e-06,
"loss": 1.2826550006866455,
"step": 484
},
{
"epoch": 0.8901098901098901,
"grad_norm": 0.18968936800956726,
"learning_rate": 8.43400783917907e-06,
"loss": 0.9170786142349243,
"step": 486
},
{
"epoch": 0.8937728937728938,
"grad_norm": 0.357719749212265,
"learning_rate": 8.420484849095233e-06,
"loss": 1.1806507110595703,
"step": 488
},
{
"epoch": 0.8974358974358975,
"grad_norm": 0.10344009846448898,
"learning_rate": 8.406916138283971e-06,
"loss": 1.1227405071258545,
"step": 490
},
{
"epoch": 0.9010989010989011,
"grad_norm": 0.18624161183834076,
"learning_rate": 8.393301919165947e-06,
"loss": 1.067802906036377,
"step": 492
},
{
"epoch": 0.9047619047619048,
"grad_norm": 0.23537158966064453,
"learning_rate": 8.379642404874261e-06,
"loss": 0.5906503796577454,
"step": 494
},
{
"epoch": 0.9084249084249084,
"grad_norm": 0.28865331411361694,
"learning_rate": 8.365937809251124e-06,
"loss": 1.2992898225784302,
"step": 496
},
{
"epoch": 0.9120879120879121,
"grad_norm": 0.25028523802757263,
"learning_rate": 8.352188346844501e-06,
"loss": 1.1510648727416992,
"step": 498
},
{
"epoch": 0.9157509157509157,
"grad_norm": 0.16311851143836975,
"learning_rate": 8.338394232904753e-06,
"loss": 0.8221940398216248,
"step": 500
},
{
"epoch": 0.9194139194139194,
"grad_norm": 0.040686819702386856,
"learning_rate": 8.324555683381276e-06,
"loss": 0.8739909529685974,
"step": 502
},
{
"epoch": 0.9230769230769231,
"grad_norm": 0.22888082265853882,
"learning_rate": 8.3106729149191e-06,
"loss": 0.954814076423645,
"step": 504
},
{
"epoch": 0.9267399267399268,
"grad_norm": 0.1041514128446579,
"learning_rate": 8.296746144855525e-06,
"loss": 0.8583929538726807,
"step": 506
},
{
"epoch": 0.9304029304029304,
"grad_norm": 0.1827676147222519,
"learning_rate": 8.282775591216691e-06,
"loss": 1.1817222833633423,
"step": 508
},
{
"epoch": 0.9340659340659341,
"grad_norm": 0.21790483593940735,
"learning_rate": 8.268761472714193e-06,
"loss": 1.2396169900894165,
"step": 510
},
{
"epoch": 0.9377289377289377,
"grad_norm": 0.16166400909423828,
"learning_rate": 8.254704008741629e-06,
"loss": 1.1866990327835083,
"step": 512
},
{
"epoch": 0.9413919413919414,
"grad_norm": 0.1379297822713852,
"learning_rate": 8.240603419371181e-06,
"loss": 0.9622292518615723,
"step": 514
},
{
"epoch": 0.945054945054945,
"grad_norm": 0.12670904397964478,
"learning_rate": 8.22645992535017e-06,
"loss": 1.1956757307052612,
"step": 516
},
{
"epoch": 0.9487179487179487,
"grad_norm": 0.09928123652935028,
"learning_rate": 8.2122737480976e-06,
"loss": 1.2315433025360107,
"step": 518
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.3714980185031891,
"learning_rate": 8.19804510970068e-06,
"loss": 1.2325382232666016,
"step": 520
},
{
"epoch": 0.9560439560439561,
"grad_norm": 0.1642584353685379,
"learning_rate": 8.183774232911362e-06,
"loss": 0.8959419131278992,
"step": 522
},
{
"epoch": 0.9597069597069597,
"grad_norm": 0.30108144879341125,
"learning_rate": 8.169461341142848e-06,
"loss": 1.0453133583068848,
"step": 524
},
{
"epoch": 0.9633699633699634,
"grad_norm": 0.2879306972026825,
"learning_rate": 8.155106658466094e-06,
"loss": 0.9845118522644043,
"step": 526
},
{
"epoch": 0.967032967032967,
"grad_norm": 0.32583609223365784,
"learning_rate": 8.140710409606289e-06,
"loss": 0.657010018825531,
"step": 528
},
{
"epoch": 0.9706959706959707,
"grad_norm": 0.19444482028484344,
"learning_rate": 8.126272819939364e-06,
"loss": 0.9151591062545776,
"step": 530
},
{
"epoch": 0.9743589743589743,
"grad_norm": 1.4497026205062866,
"learning_rate": 8.111794115488437e-06,
"loss": 1.3719483613967896,
"step": 532
},
{
"epoch": 0.978021978021978,
"grad_norm": 0.6636638641357422,
"learning_rate": 8.097274522920291e-06,
"loss": 0.6158185601234436,
"step": 534
},
{
"epoch": 0.9816849816849816,
"grad_norm": 0.23120753467082977,
"learning_rate": 8.082714269541814e-06,
"loss": 0.8738659620285034,
"step": 536
},
{
"epoch": 0.9853479853479854,
"grad_norm": 0.21588478982448578,
"learning_rate": 8.068113583296456e-06,
"loss": 0.8082484602928162,
"step": 538
},
{
"epoch": 0.989010989010989,
"grad_norm": 0.37794405221939087,
"learning_rate": 8.053472692760643e-06,
"loss": 1.1449978351593018,
"step": 540
},
{
"epoch": 0.9926739926739927,
"grad_norm": 0.21002766489982605,
"learning_rate": 8.038791827140208e-06,
"loss": 1.1861510276794434,
"step": 542
},
{
"epoch": 0.9963369963369964,
"grad_norm": 0.2881741523742676,
"learning_rate": 8.0240712162668e-06,
"loss": 1.2074682712554932,
"step": 544
},
{
"epoch": 1.0,
"grad_norm": 0.14409518241882324,
"learning_rate": 8.009311090594297e-06,
"loss": 1.3737009763717651,
"step": 546
},
{
"epoch": 1.0036630036630036,
"grad_norm": 0.3364971876144409,
"learning_rate": 7.994511681195175e-06,
"loss": 1.010398268699646,
"step": 548
},
{
"epoch": 1.0073260073260073,
"grad_norm": 0.20106230676174164,
"learning_rate": 7.97967321975691e-06,
"loss": 1.2091031074523926,
"step": 550
},
{
"epoch": 1.010989010989011,
"grad_norm": 0.3865948021411896,
"learning_rate": 7.964795938578347e-06,
"loss": 0.7033045887947083,
"step": 552
},
{
"epoch": 1.0146520146520146,
"grad_norm": 0.1542910784482956,
"learning_rate": 7.949880070566058e-06,
"loss": 1.161207914352417,
"step": 554
},
{
"epoch": 1.0183150183150182,
"grad_norm": 0.17765195667743683,
"learning_rate": 7.9349258492307e-06,
"loss": 1.050184726715088,
"step": 556
},
{
"epoch": 1.021978021978022,
"grad_norm": 0.13882611691951752,
"learning_rate": 7.91993350868336e-06,
"loss": 1.2430739402770996,
"step": 558
},
{
"epoch": 1.0256410256410255,
"grad_norm": 0.16586051881313324,
"learning_rate": 7.904903283631884e-06,
"loss": 0.9440419673919678,
"step": 560
},
{
"epoch": 1.0293040293040292,
"grad_norm": 0.19672122597694397,
"learning_rate": 7.88983540937721e-06,
"loss": 1.1903315782546997,
"step": 562
},
{
"epoch": 1.032967032967033,
"grad_norm": 0.23967498540878296,
"learning_rate": 7.87473012180968e-06,
"loss": 1.1820085048675537,
"step": 564
},
{
"epoch": 1.0366300366300367,
"grad_norm": 0.17814430594444275,
"learning_rate": 7.859587657405353e-06,
"loss": 1.196739912033081,
"step": 566
},
{
"epoch": 1.0402930402930404,
"grad_norm": 0.1445707082748413,
"learning_rate": 7.84440825322229e-06,
"loss": 1.084715723991394,
"step": 568
},
{
"epoch": 1.043956043956044,
"grad_norm": 0.18223969638347626,
"learning_rate": 7.829192146896854e-06,
"loss": 1.1364811658859253,
"step": 570
},
{
"epoch": 1.0476190476190477,
"grad_norm": 0.1871526837348938,
"learning_rate": 7.813939576639993e-06,
"loss": 1.2730778455734253,
"step": 572
},
{
"epoch": 1.0512820512820513,
"grad_norm": 0.48518720269203186,
"learning_rate": 7.798650781233495e-06,
"loss": 1.1072925329208374,
"step": 574
},
{
"epoch": 1.054945054945055,
"grad_norm": 0.2393021285533905,
"learning_rate": 7.783326000026266e-06,
"loss": 1.2872074842453003,
"step": 576
},
{
"epoch": 1.0586080586080586,
"grad_norm": 0.17235060036182404,
"learning_rate": 7.767965472930575e-06,
"loss": 1.2461888790130615,
"step": 578
},
{
"epoch": 1.0622710622710623,
"grad_norm": 0.501559853553772,
"learning_rate": 7.752569440418297e-06,
"loss": 1.2427866458892822,
"step": 580
},
{
"epoch": 1.065934065934066,
"grad_norm": 0.2508564889431,
"learning_rate": 7.737138143517153e-06,
"loss": 1.0125867128372192,
"step": 582
},
{
"epoch": 1.0695970695970696,
"grad_norm": 0.34929409623146057,
"learning_rate": 7.721671823806934e-06,
"loss": 1.1760741472244263,
"step": 584
},
{
"epoch": 1.0732600732600732,
"grad_norm": 0.11314375698566437,
"learning_rate": 7.70617072341572e-06,
"loss": 1.127938151359558,
"step": 586
},
{
"epoch": 1.0769230769230769,
"grad_norm": 0.1307147890329361,
"learning_rate": 7.690635085016087e-06,
"loss": 0.8474472165107727,
"step": 588
},
{
"epoch": 1.0805860805860805,
"grad_norm": 0.19595587253570557,
"learning_rate": 7.675065151821313e-06,
"loss": 1.2290217876434326,
"step": 590
},
{
"epoch": 1.0842490842490842,
"grad_norm": 0.05816657096147537,
"learning_rate": 7.659461167581564e-06,
"loss": 1.065525770187378,
"step": 592
},
{
"epoch": 1.0879120879120878,
"grad_norm": 0.1757960468530655,
"learning_rate": 7.643823376580087e-06,
"loss": 1.0110828876495361,
"step": 594
},
{
"epoch": 1.0915750915750915,
"grad_norm": 0.4253121614456177,
"learning_rate": 7.628152023629369e-06,
"loss": 1.0302798748016357,
"step": 596
},
{
"epoch": 1.0952380952380953,
"grad_norm": 0.17801064252853394,
"learning_rate": 7.61244735406733e-06,
"loss": 1.1227004528045654,
"step": 598
},
{
"epoch": 1.098901098901099,
"grad_norm": 0.24273711442947388,
"learning_rate": 7.596709613753457e-06,
"loss": 1.1816527843475342,
"step": 600
},
{
"epoch": 1.1025641025641026,
"grad_norm": 1.4160579442977905,
"learning_rate": 7.5809390490649685e-06,
"loss": 0.8195367455482483,
"step": 602
},
{
"epoch": 1.1062271062271063,
"grad_norm": 0.25498881936073303,
"learning_rate": 7.565135906892954e-06,
"loss": 0.9860736727714539,
"step": 604
},
{
"epoch": 1.10989010989011,
"grad_norm": 0.2312251329421997,
"learning_rate": 7.549300434638515e-06,
"loss": 0.9298585057258606,
"step": 606
},
{
"epoch": 1.1135531135531136,
"grad_norm": 0.40971049666404724,
"learning_rate": 7.533432880208879e-06,
"loss": 1.3407394886016846,
"step": 608
},
{
"epoch": 1.1172161172161172,
"grad_norm": 0.26371797919273376,
"learning_rate": 7.517533492013527e-06,
"loss": 0.7484307289123535,
"step": 610
},
{
"epoch": 1.120879120879121,
"grad_norm": 0.18549509346485138,
"learning_rate": 7.501602518960308e-06,
"loss": 1.2191801071166992,
"step": 612
},
{
"epoch": 1.1245421245421245,
"grad_norm": 0.2595532238483429,
"learning_rate": 7.485640210451535e-06,
"loss": 1.0103733539581299,
"step": 614
},
{
"epoch": 1.1282051282051282,
"grad_norm": 0.1361607164144516,
"learning_rate": 7.469646816380085e-06,
"loss": 1.1822372674942017,
"step": 616
},
{
"epoch": 1.1318681318681318,
"grad_norm": 0.36064237356185913,
"learning_rate": 7.453622587125479e-06,
"loss": 0.42253974080085754,
"step": 618
},
{
"epoch": 1.1355311355311355,
"grad_norm": 0.25480979681015015,
"learning_rate": 7.437567773549976e-06,
"loss": 1.1068378686904907,
"step": 620
},
{
"epoch": 1.1391941391941391,
"grad_norm": 0.4690706133842468,
"learning_rate": 7.421482626994635e-06,
"loss": 0.7852658629417419,
"step": 622
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.2576858401298523,
"learning_rate": 7.405367399275384e-06,
"loss": 1.1447125673294067,
"step": 624
},
{
"epoch": 1.1465201465201464,
"grad_norm": 0.1483878493309021,
"learning_rate": 7.389222342679073e-06,
"loss": 1.4121488332748413,
"step": 626
},
{
"epoch": 1.15018315018315,
"grad_norm": 0.4962548017501831,
"learning_rate": 7.373047709959537e-06,
"loss": 0.4587477743625641,
"step": 628
},
{
"epoch": 1.1538461538461537,
"grad_norm": 0.05175771191716194,
"learning_rate": 7.356843754333626e-06,
"loss": 1.1379830837249756,
"step": 630
},
{
"epoch": 1.1575091575091574,
"grad_norm": 0.8406626582145691,
"learning_rate": 7.340610729477242e-06,
"loss": 1.1201821565628052,
"step": 632
},
{
"epoch": 1.1611721611721613,
"grad_norm": 0.25670814514160156,
"learning_rate": 7.324348889521377e-06,
"loss": 0.8717086315155029,
"step": 634
},
{
"epoch": 1.164835164835165,
"grad_norm": 0.15090753138065338,
"learning_rate": 7.308058489048125e-06,
"loss": 1.0203039646148682,
"step": 636
},
{
"epoch": 1.1684981684981686,
"grad_norm": 0.34527626633644104,
"learning_rate": 7.291739783086701e-06,
"loss": 1.2124344110488892,
"step": 638
},
{
"epoch": 1.1721611721611722,
"grad_norm": 0.05906078591942787,
"learning_rate": 7.275393027109451e-06,
"loss": 0.761792778968811,
"step": 640
},
{
"epoch": 1.1758241758241759,
"grad_norm": 0.22481679916381836,
"learning_rate": 7.259018477027842e-06,
"loss": 1.1472866535186768,
"step": 642
},
{
"epoch": 1.1794871794871795,
"grad_norm": 0.1917589157819748,
"learning_rate": 7.242616389188472e-06,
"loss": 1.1815375089645386,
"step": 644
},
{
"epoch": 1.1831501831501832,
"grad_norm": 0.1334036886692047,
"learning_rate": 7.226187020369039e-06,
"loss": 0.7848197817802429,
"step": 646
},
{
"epoch": 1.1868131868131868,
"grad_norm": 0.6945109367370605,
"learning_rate": 7.209730627774333e-06,
"loss": 0.93724524974823,
"step": 648
},
{
"epoch": 1.1904761904761905,
"grad_norm": 0.22903983294963837,
"learning_rate": 7.193247469032209e-06,
"loss": 1.1586498022079468,
"step": 650
},
{
"epoch": 1.1941391941391941,
"grad_norm": 0.1996290683746338,
"learning_rate": 7.1767378021895464e-06,
"loss": 0.6816765666007996,
"step": 652
},
{
"epoch": 1.1978021978021978,
"grad_norm": 0.3227924108505249,
"learning_rate": 7.160201885708219e-06,
"loss": 1.3321443796157837,
"step": 654
},
{
"epoch": 1.2014652014652014,
"grad_norm": 0.1688106805086136,
"learning_rate": 7.143639978461038e-06,
"loss": 0.8470932841300964,
"step": 656
},
{
"epoch": 1.205128205128205,
"grad_norm": 1.263344645500183,
"learning_rate": 7.127052339727708e-06,
"loss": 0.9178895950317383,
"step": 658
},
{
"epoch": 1.2087912087912087,
"grad_norm": 0.17517027258872986,
"learning_rate": 7.110439229190762e-06,
"loss": 1.1735132932662964,
"step": 660
},
{
"epoch": 1.2124542124542124,
"grad_norm": 0.36022061109542847,
"learning_rate": 7.093800906931505e-06,
"loss": 1.0736725330352783,
"step": 662
},
{
"epoch": 1.2161172161172162,
"grad_norm": 0.14516492187976837,
"learning_rate": 7.077137633425928e-06,
"loss": 0.9138533473014832,
"step": 664
},
{
"epoch": 1.2197802197802199,
"grad_norm": 0.18955856561660767,
"learning_rate": 7.060449669540646e-06,
"loss": 0.8576375842094421,
"step": 666
},
{
"epoch": 1.2234432234432235,
"grad_norm": 1.3548957109451294,
"learning_rate": 7.043737276528799e-06,
"loss": 0.9260948896408081,
"step": 668
},
{
"epoch": 1.2271062271062272,
"grad_norm": 0.4962684214115143,
"learning_rate": 7.027000716025975e-06,
"loss": 0.710183322429657,
"step": 670
},
{
"epoch": 1.2307692307692308,
"grad_norm": 0.1915455311536789,
"learning_rate": 7.010240250046109e-06,
"loss": 1.2020713090896606,
"step": 672
},
{
"epoch": 1.2344322344322345,
"grad_norm": 0.2438817024230957,
"learning_rate": 6.9934561409773724e-06,
"loss": 1.1766732931137085,
"step": 674
},
{
"epoch": 1.2380952380952381,
"grad_norm": 0.1694273203611374,
"learning_rate": 6.976648651578087e-06,
"loss": 1.1996291875839233,
"step": 676
},
{
"epoch": 1.2417582417582418,
"grad_norm": 0.16501298546791077,
"learning_rate": 6.959818044972585e-06,
"loss": 0.7851068377494812,
"step": 678
},
{
"epoch": 1.2454212454212454,
"grad_norm": 0.13900168240070343,
"learning_rate": 6.942964584647109e-06,
"loss": 0.8421606421470642,
"step": 680
},
{
"epoch": 1.249084249084249,
"grad_norm": 0.9977712035179138,
"learning_rate": 6.926088534445682e-06,
"loss": 1.2277159690856934,
"step": 682
},
{
"epoch": 1.2527472527472527,
"grad_norm": 0.13499097526073456,
"learning_rate": 6.909190158565973e-06,
"loss": 1.1799771785736084,
"step": 684
},
{
"epoch": 1.2564102564102564,
"grad_norm": 0.11800684034824371,
"learning_rate": 6.892269721555161e-06,
"loss": 0.8362367153167725,
"step": 686
},
{
"epoch": 1.26007326007326,
"grad_norm": 0.23727478086948395,
"learning_rate": 6.875327488305805e-06,
"loss": 1.1368072032928467,
"step": 688
},
{
"epoch": 1.2637362637362637,
"grad_norm": 0.6073961853981018,
"learning_rate": 6.858363724051678e-06,
"loss": 1.1791174411773682,
"step": 690
},
{
"epoch": 1.2673992673992673,
"grad_norm": 0.23184753954410553,
"learning_rate": 6.841378694363631e-06,
"loss": 1.2035536766052246,
"step": 692
},
{
"epoch": 1.271062271062271,
"grad_norm": 0.17120184004306793,
"learning_rate": 6.824372665145424e-06,
"loss": 0.9986141324043274,
"step": 694
},
{
"epoch": 1.2747252747252746,
"grad_norm": 0.279720276594162,
"learning_rate": 6.80734590262958e-06,
"loss": 1.0483033657073975,
"step": 696
},
{
"epoch": 1.2783882783882783,
"grad_norm": 0.16889701783657074,
"learning_rate": 6.79029867337319e-06,
"loss": 0.9515765905380249,
"step": 698
},
{
"epoch": 1.282051282051282,
"grad_norm": 0.06983523815870285,
"learning_rate": 6.773231244253766e-06,
"loss": 0.6616621017456055,
"step": 700
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.26745325326919556,
"learning_rate": 6.756143882465051e-06,
"loss": 0.552936851978302,
"step": 702
},
{
"epoch": 1.2893772893772895,
"grad_norm": 1.2814058065414429,
"learning_rate": 6.739036855512835e-06,
"loss": 1.3208832740783691,
"step": 704
},
{
"epoch": 1.293040293040293,
"grad_norm": 0.24780850112438202,
"learning_rate": 6.721910431210771e-06,
"loss": 1.0253862142562866,
"step": 706
},
{
"epoch": 1.2967032967032968,
"grad_norm": 0.07103469967842102,
"learning_rate": 6.704764877676181e-06,
"loss": 0.9762220978736877,
"step": 708
},
{
"epoch": 1.3003663003663004,
"grad_norm": 0.1995328813791275,
"learning_rate": 6.687600463325859e-06,
"loss": 0.5912091732025146,
"step": 710
},
{
"epoch": 1.304029304029304,
"grad_norm": 0.3986748456954956,
"learning_rate": 6.670417456871871e-06,
"loss": 0.9025965929031372,
"step": 712
},
{
"epoch": 1.3076923076923077,
"grad_norm": 0.12167935073375702,
"learning_rate": 6.653216127317338e-06,
"loss": 1.2370021343231201,
"step": 714
},
{
"epoch": 1.3113553113553114,
"grad_norm": 1.2277730703353882,
"learning_rate": 6.635996743952242e-06,
"loss": 1.1707024574279785,
"step": 716
},
{
"epoch": 1.315018315018315,
"grad_norm": 0.1129893958568573,
"learning_rate": 6.618759576349196e-06,
"loss": 0.9717994928359985,
"step": 718
},
{
"epoch": 1.3186813186813187,
"grad_norm": 0.11017405986785889,
"learning_rate": 6.601504894359227e-06,
"loss": 0.943675696849823,
"step": 720
},
{
"epoch": 1.3223443223443223,
"grad_norm": 0.1213424950838089,
"learning_rate": 6.584232968107557e-06,
"loss": 0.9619688987731934,
"step": 722
},
{
"epoch": 1.326007326007326,
"grad_norm": 0.46485310792922974,
"learning_rate": 6.566944067989366e-06,
"loss": 1.2262362241744995,
"step": 724
},
{
"epoch": 1.3296703296703296,
"grad_norm": 0.16346322000026703,
"learning_rate": 6.549638464665566e-06,
"loss": 1.1035256385803223,
"step": 726
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.8575840592384338,
"learning_rate": 6.532316429058562e-06,
"loss": 1.1614726781845093,
"step": 728
},
{
"epoch": 1.3369963369963371,
"grad_norm": 0.2909935712814331,
"learning_rate": 6.514978232348003e-06,
"loss": 0.765929639339447,
"step": 730
},
{
"epoch": 1.3406593406593408,
"grad_norm": 0.27958768606185913,
"learning_rate": 6.497624145966549e-06,
"loss": 0.8523128032684326,
"step": 732
},
{
"epoch": 1.3443223443223444,
"grad_norm": 0.05668744817376137,
"learning_rate": 6.480254441595615e-06,
"loss": 0.635466456413269,
"step": 734
},
{
"epoch": 1.347985347985348,
"grad_norm": 0.243035688996315,
"learning_rate": 6.462869391161116e-06,
"loss": 1.1623685359954834,
"step": 736
},
{
"epoch": 1.3516483516483517,
"grad_norm": 0.18739135563373566,
"learning_rate": 6.445469266829214e-06,
"loss": 0.7601761817932129,
"step": 738
},
{
"epoch": 1.3553113553113554,
"grad_norm": 0.18719348311424255,
"learning_rate": 6.428054341002058e-06,
"loss": 1.0253567695617676,
"step": 740
},
{
"epoch": 1.358974358974359,
"grad_norm": 0.44151896238327026,
"learning_rate": 6.41062488631351e-06,
"loss": 0.6381222605705261,
"step": 742
},
{
"epoch": 1.3626373626373627,
"grad_norm": 0.15360459685325623,
"learning_rate": 6.393181175624893e-06,
"loss": 1.2534339427947998,
"step": 744
},
{
"epoch": 1.3663003663003663,
"grad_norm": 0.1375494748353958,
"learning_rate": 6.375723482020702e-06,
"loss": 1.0938211679458618,
"step": 746
},
{
"epoch": 1.36996336996337,
"grad_norm": 0.2471369057893753,
"learning_rate": 6.3582520788043465e-06,
"loss": 1.0957386493682861,
"step": 748
},
{
"epoch": 1.3736263736263736,
"grad_norm": 0.4160969853401184,
"learning_rate": 6.340767239493851e-06,
"loss": 0.8028813600540161,
"step": 750
},
{
"epoch": 1.3772893772893773,
"grad_norm": 0.1555003523826599,
"learning_rate": 6.323269237817595e-06,
"loss": 1.1485873460769653,
"step": 752
},
{
"epoch": 1.380952380952381,
"grad_norm": 0.15514326095581055,
"learning_rate": 6.3057583477100114e-06,
"loss": 0.9761220812797546,
"step": 754
},
{
"epoch": 1.3846153846153846,
"grad_norm": 0.2655651867389679,
"learning_rate": 6.288234843307304e-06,
"loss": 1.3567599058151245,
"step": 756
},
{
"epoch": 1.3882783882783882,
"grad_norm": 0.22227928042411804,
"learning_rate": 6.270698998943158e-06,
"loss": 1.2285981178283691,
"step": 758
},
{
"epoch": 1.3919413919413919,
"grad_norm": 0.08404932916164398,
"learning_rate": 6.253151089144443e-06,
"loss": 1.1724284887313843,
"step": 760
},
{
"epoch": 1.3956043956043955,
"grad_norm": 0.270246684551239,
"learning_rate": 6.235591388626916e-06,
"loss": 1.1640665531158447,
"step": 762
},
{
"epoch": 1.3992673992673992,
"grad_norm": 0.22832772135734558,
"learning_rate": 6.218020172290912e-06,
"loss": 0.7001198530197144,
"step": 764
},
{
"epoch": 1.4029304029304028,
"grad_norm": 0.12530791759490967,
"learning_rate": 6.2004377152170595e-06,
"loss": 0.9129507541656494,
"step": 766
},
{
"epoch": 1.4065934065934065,
"grad_norm": 1.337783932685852,
"learning_rate": 6.182844292661955e-06,
"loss": 0.947498619556427,
"step": 768
},
{
"epoch": 1.4102564102564101,
"grad_norm": 0.20773783326148987,
"learning_rate": 6.165240180053864e-06,
"loss": 1.2057294845581055,
"step": 770
},
{
"epoch": 1.4139194139194138,
"grad_norm": 0.21221360564231873,
"learning_rate": 6.147625652988409e-06,
"loss": 1.2334250211715698,
"step": 772
},
{
"epoch": 1.4175824175824177,
"grad_norm": 0.2252884805202484,
"learning_rate": 6.130000987224252e-06,
"loss": 1.0532145500183105,
"step": 774
},
{
"epoch": 1.4212454212454213,
"grad_norm": 0.1621122807264328,
"learning_rate": 6.11236645867877e-06,
"loss": 1.1797196865081787,
"step": 776
},
{
"epoch": 1.424908424908425,
"grad_norm": 0.4637870192527771,
"learning_rate": 6.09472234342376e-06,
"loss": 0.8449276685714722,
"step": 778
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.16811135411262512,
"learning_rate": 6.077068917681085e-06,
"loss": 1.383507490158081,
"step": 780
},
{
"epoch": 1.4322344322344323,
"grad_norm": 0.5571224093437195,
"learning_rate": 6.059406457818372e-06,
"loss": 1.2903873920440674,
"step": 782
},
{
"epoch": 1.435897435897436,
"grad_norm": 0.19635361433029175,
"learning_rate": 6.0417352403446815e-06,
"loss": 1.178612232208252,
"step": 784
},
{
"epoch": 1.4395604395604396,
"grad_norm": 0.5237776637077332,
"learning_rate": 6.024055541906171e-06,
"loss": 1.0071418285369873,
"step": 786
},
{
"epoch": 1.4432234432234432,
"grad_norm": 0.22411468625068665,
"learning_rate": 6.006367639281773e-06,
"loss": 1.157625436782837,
"step": 788
},
{
"epoch": 1.4468864468864469,
"grad_norm": 0.17982420325279236,
"learning_rate": 5.988671809378851e-06,
"loss": 0.7583225965499878,
"step": 790
},
{
"epoch": 1.4505494505494505,
"grad_norm": 0.16371974349021912,
"learning_rate": 5.970968329228884e-06,
"loss": 0.9400377869606018,
"step": 792
},
{
"epoch": 1.4542124542124542,
"grad_norm": 0.27952075004577637,
"learning_rate": 5.953257475983104e-06,
"loss": 0.818259060382843,
"step": 794
},
{
"epoch": 1.4578754578754578,
"grad_norm": 0.10578649491071701,
"learning_rate": 5.935539526908178e-06,
"loss": 1.1710015535354614,
"step": 796
},
{
"epoch": 1.4615384615384617,
"grad_norm": 0.18775279819965363,
"learning_rate": 5.917814759381857e-06,
"loss": 0.811826765537262,
"step": 798
},
{
"epoch": 1.4652014652014653,
"grad_norm": 0.6075829267501831,
"learning_rate": 5.900083450888636e-06,
"loss": 1.0531878471374512,
"step": 800
},
{
"epoch": 1.468864468864469,
"grad_norm": 0.47543665766716003,
"learning_rate": 5.882345879015412e-06,
"loss": 1.5944072008132935,
"step": 802
},
{
"epoch": 1.4725274725274726,
"grad_norm": 0.19731402397155762,
"learning_rate": 5.864602321447133e-06,
"loss": 1.0520607233047485,
"step": 804
},
{
"epoch": 1.4761904761904763,
"grad_norm": 0.7726924419403076,
"learning_rate": 5.846853055962456e-06,
"loss": 1.2063556909561157,
"step": 806
},
{
"epoch": 1.47985347985348,
"grad_norm": 0.15811191499233246,
"learning_rate": 5.829098360429397e-06,
"loss": 1.1925911903381348,
"step": 808
},
{
"epoch": 1.4835164835164836,
"grad_norm": 0.4315653145313263,
"learning_rate": 5.811338512800983e-06,
"loss": 1.0306977033615112,
"step": 810
},
{
"epoch": 1.4871794871794872,
"grad_norm": 0.16353972256183624,
"learning_rate": 5.793573791110888e-06,
"loss": 0.8834646940231323,
"step": 812
},
{
"epoch": 1.4908424908424909,
"grad_norm": 0.13097569346427917,
"learning_rate": 5.775804473469104e-06,
"loss": 1.2225075960159302,
"step": 814
},
{
"epoch": 1.4945054945054945,
"grad_norm": 0.16530992090702057,
"learning_rate": 5.758030838057562e-06,
"loss": 1.1620936393737793,
"step": 816
},
{
"epoch": 1.4981684981684982,
"grad_norm": 0.06754113733768463,
"learning_rate": 5.7402531631257975e-06,
"loss": 0.43427881598472595,
"step": 818
},
{
"epoch": 1.5018315018315018,
"grad_norm": 0.3140871226787567,
"learning_rate": 5.722471726986577e-06,
"loss": 1.1740379333496094,
"step": 820
},
{
"epoch": 1.5054945054945055,
"grad_norm": 1.0577014684677124,
"learning_rate": 5.7046868080115554e-06,
"loss": 0.9447726011276245,
"step": 822
},
{
"epoch": 1.5091575091575091,
"grad_norm": 0.18314354121685028,
"learning_rate": 5.686898684626909e-06,
"loss": 0.8410064578056335,
"step": 824
},
{
"epoch": 1.5128205128205128,
"grad_norm": 0.41597187519073486,
"learning_rate": 5.6691076353089836e-06,
"loss": 1.0812100172042847,
"step": 826
},
{
"epoch": 1.5164835164835164,
"grad_norm": 0.6020085215568542,
"learning_rate": 5.651313938579925e-06,
"loss": 1.0242727994918823,
"step": 828
},
{
"epoch": 1.52014652014652,
"grad_norm": 0.23912610113620758,
"learning_rate": 5.633517873003329e-06,
"loss": 1.0295336246490479,
"step": 830
},
{
"epoch": 1.5238095238095237,
"grad_norm": 0.18377065658569336,
"learning_rate": 5.615719717179877e-06,
"loss": 1.2712222337722778,
"step": 832
},
{
"epoch": 1.5274725274725274,
"grad_norm": 0.41018202900886536,
"learning_rate": 5.59791974974297e-06,
"loss": 0.825635552406311,
"step": 834
},
{
"epoch": 1.531135531135531,
"grad_norm": 0.17661726474761963,
"learning_rate": 5.580118249354371e-06,
"loss": 1.1780991554260254,
"step": 836
},
{
"epoch": 1.5347985347985347,
"grad_norm": 0.2789783477783203,
"learning_rate": 5.562315494699845e-06,
"loss": 1.231903314590454,
"step": 838
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.38799595832824707,
"learning_rate": 5.544511764484788e-06,
"loss": 1.0772476196289062,
"step": 840
},
{
"epoch": 1.542124542124542,
"grad_norm": 0.12171674519777298,
"learning_rate": 5.526707337429871e-06,
"loss": 0.896016001701355,
"step": 842
},
{
"epoch": 1.5457875457875456,
"grad_norm": 2.428612232208252,
"learning_rate": 5.508902492266676e-06,
"loss": 1.067119836807251,
"step": 844
},
{
"epoch": 1.5494505494505495,
"grad_norm": 0.12386277318000793,
"learning_rate": 5.491097507733326e-06,
"loss": 1.2031583786010742,
"step": 846
},
{
"epoch": 1.5531135531135531,
"grad_norm": 0.6815460920333862,
"learning_rate": 5.473292662570131e-06,
"loss": 0.5977136492729187,
"step": 848
},
{
"epoch": 1.5567765567765568,
"grad_norm": 0.2659470736980438,
"learning_rate": 5.455488235515214e-06,
"loss": 1.0548949241638184,
"step": 850
},
{
"epoch": 1.5604395604395604,
"grad_norm": 0.14139074087142944,
"learning_rate": 5.4376845053001585e-06,
"loss": 1.2025469541549683,
"step": 852
},
{
"epoch": 1.564102564102564,
"grad_norm": 0.3239622414112091,
"learning_rate": 5.41988175064563e-06,
"loss": 0.8870663642883301,
"step": 854
},
{
"epoch": 1.5677655677655677,
"grad_norm": 0.1715698391199112,
"learning_rate": 5.402080250257031e-06,
"loss": 0.8264884948730469,
"step": 856
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.3756884038448334,
"learning_rate": 5.384280282820126e-06,
"loss": 0.974433422088623,
"step": 858
},
{
"epoch": 1.575091575091575,
"grad_norm": 0.21274326741695404,
"learning_rate": 5.3664821269966714e-06,
"loss": 0.7866367101669312,
"step": 860
},
{
"epoch": 1.578754578754579,
"grad_norm": 0.16765311360359192,
"learning_rate": 5.348686061420078e-06,
"loss": 1.1772246360778809,
"step": 862
},
{
"epoch": 1.5824175824175826,
"grad_norm": 0.31508204340934753,
"learning_rate": 5.330892364691018e-06,
"loss": 0.9374992251396179,
"step": 864
},
{
"epoch": 1.5860805860805862,
"grad_norm": 0.19520021975040436,
"learning_rate": 5.3131013153730916e-06,
"loss": 1.2052057981491089,
"step": 866
},
{
"epoch": 1.5897435897435899,
"grad_norm": 0.13115696609020233,
"learning_rate": 5.295313191988447e-06,
"loss": 1.2084887027740479,
"step": 868
},
{
"epoch": 1.5934065934065935,
"grad_norm": 0.7018041014671326,
"learning_rate": 5.277528273013425e-06,
"loss": 0.609009325504303,
"step": 870
},
{
"epoch": 1.5970695970695972,
"grad_norm": 0.16342367231845856,
"learning_rate": 5.259746836874203e-06,
"loss": 1.2070071697235107,
"step": 872
},
{
"epoch": 1.6007326007326008,
"grad_norm": 0.14944781363010406,
"learning_rate": 5.2419691619424396e-06,
"loss": 1.091475486755371,
"step": 874
},
{
"epoch": 1.6043956043956045,
"grad_norm": 0.3255991041660309,
"learning_rate": 5.224195526530897e-06,
"loss": 0.8270645141601562,
"step": 876
},
{
"epoch": 1.6080586080586081,
"grad_norm": 0.3492541015148163,
"learning_rate": 5.206426208889113e-06,
"loss": 1.054788589477539,
"step": 878
},
{
"epoch": 1.6117216117216118,
"grad_norm": 0.1763620376586914,
"learning_rate": 5.18866148719902e-06,
"loss": 0.9270405769348145,
"step": 880
},
{
"epoch": 1.6153846153846154,
"grad_norm": 0.37243205308914185,
"learning_rate": 5.170901639570605e-06,
"loss": 1.163893222808838,
"step": 882
},
{
"epoch": 1.619047619047619,
"grad_norm": 0.20206376910209656,
"learning_rate": 5.153146944037545e-06,
"loss": 0.978087842464447,
"step": 884
},
{
"epoch": 1.6227106227106227,
"grad_norm": 0.1183437779545784,
"learning_rate": 5.135397678552869e-06,
"loss": 1.1707783937454224,
"step": 886
},
{
"epoch": 1.6263736263736264,
"grad_norm": 0.19354073703289032,
"learning_rate": 5.11765412098459e-06,
"loss": 0.8640797138214111,
"step": 888
},
{
"epoch": 1.63003663003663,
"grad_norm": 0.13133522868156433,
"learning_rate": 5.099916549111365e-06,
"loss": 1.2608891725540161,
"step": 890
},
{
"epoch": 1.6336996336996337,
"grad_norm": 0.12352308630943298,
"learning_rate": 5.082185240618146e-06,
"loss": 0.9242100715637207,
"step": 892
},
{
"epoch": 1.6373626373626373,
"grad_norm": 0.20042523741722107,
"learning_rate": 5.064460473091823e-06,
"loss": 1.2130396366119385,
"step": 894
},
{
"epoch": 1.641025641025641,
"grad_norm": 0.5343178510665894,
"learning_rate": 5.046742524016899e-06,
"loss": 0.7868685722351074,
"step": 896
},
{
"epoch": 1.6446886446886446,
"grad_norm": 0.22285579144954681,
"learning_rate": 5.029031670771119e-06,
"loss": 1.1629694700241089,
"step": 898
},
{
"epoch": 1.6483516483516483,
"grad_norm": 0.05358535423874855,
"learning_rate": 5.0113281906211485e-06,
"loss": 0.8543750643730164,
"step": 900
},
{
"epoch": 1.652014652014652,
"grad_norm": 0.51787930727005,
"learning_rate": 4.99363236071823e-06,
"loss": 0.9102002382278442,
"step": 902
},
{
"epoch": 1.6556776556776556,
"grad_norm": 0.15900075435638428,
"learning_rate": 4.975944458093831e-06,
"loss": 1.042647361755371,
"step": 904
},
{
"epoch": 1.6593406593406592,
"grad_norm": 0.7878313064575195,
"learning_rate": 4.958264759655319e-06,
"loss": 1.166403889656067,
"step": 906
},
{
"epoch": 1.6630036630036629,
"grad_norm": 0.25159403681755066,
"learning_rate": 4.940593542181629e-06,
"loss": 0.7760780453681946,
"step": 908
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.43353399634361267,
"learning_rate": 4.922931082318917e-06,
"loss": 1.2228432893753052,
"step": 910
},
{
"epoch": 1.6703296703296702,
"grad_norm": 0.13905330002307892,
"learning_rate": 4.905277656576243e-06,
"loss": 1.241356611251831,
"step": 912
},
{
"epoch": 1.673992673992674,
"grad_norm": 0.14692410826683044,
"learning_rate": 4.8876335413212305e-06,
"loss": 1.1272119283676147,
"step": 914
},
{
"epoch": 1.6776556776556777,
"grad_norm": 0.2749308943748474,
"learning_rate": 4.86999901277575e-06,
"loss": 1.240549921989441,
"step": 916
},
{
"epoch": 1.6813186813186813,
"grad_norm": 0.2792946994304657,
"learning_rate": 4.852374347011591e-06,
"loss": 0.8525235056877136,
"step": 918
},
{
"epoch": 1.684981684981685,
"grad_norm": 0.10312946885824203,
"learning_rate": 4.834759819946137e-06,
"loss": 1.2491165399551392,
"step": 920
},
{
"epoch": 1.6886446886446886,
"grad_norm": 0.18224607408046722,
"learning_rate": 4.817155707338048e-06,
"loss": 1.0514307022094727,
"step": 922
},
{
"epoch": 1.6923076923076923,
"grad_norm": 0.1929437667131424,
"learning_rate": 4.799562284782944e-06,
"loss": 0.8574016690254211,
"step": 924
},
{
"epoch": 1.695970695970696,
"grad_norm": 3.134303092956543,
"learning_rate": 4.78197982770909e-06,
"loss": 1.052782654762268,
"step": 926
},
{
"epoch": 1.6996336996336996,
"grad_norm": 0.31613659858703613,
"learning_rate": 4.7644086113730855e-06,
"loss": 0.8344395756721497,
"step": 928
},
{
"epoch": 1.7032967032967035,
"grad_norm": 0.03118388168513775,
"learning_rate": 4.746848910855558e-06,
"loss": 0.9363417625427246,
"step": 930
},
{
"epoch": 1.7069597069597071,
"grad_norm": 0.22982323169708252,
"learning_rate": 4.729301001056842e-06,
"loss": 0.959007978439331,
"step": 932
},
{
"epoch": 1.7106227106227108,
"grad_norm": 0.20650729537010193,
"learning_rate": 4.711765156692697e-06,
"loss": 1.4183884859085083,
"step": 934
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.29698464274406433,
"learning_rate": 4.694241652289992e-06,
"loss": 1.221863865852356,
"step": 936
},
{
"epoch": 1.717948717948718,
"grad_norm": 0.23697128891944885,
"learning_rate": 4.676730762182407e-06,
"loss": 0.47039785981178284,
"step": 938
},
{
"epoch": 1.7216117216117217,
"grad_norm": 0.16988730430603027,
"learning_rate": 4.659232760506149e-06,
"loss": 0.6852482557296753,
"step": 940
},
{
"epoch": 1.7252747252747254,
"grad_norm": 0.28183671832084656,
"learning_rate": 4.641747921195657e-06,
"loss": 1.152092456817627,
"step": 942
},
{
"epoch": 1.728937728937729,
"grad_norm": 0.614911675453186,
"learning_rate": 4.624276517979298e-06,
"loss": 0.9434917569160461,
"step": 944
},
{
"epoch": 1.7326007326007327,
"grad_norm": 0.19769613444805145,
"learning_rate": 4.606818824375109e-06,
"loss": 1.1814640760421753,
"step": 946
},
{
"epoch": 1.7362637362637363,
"grad_norm": 0.2297975867986679,
"learning_rate": 4.589375113686492e-06,
"loss": 1.1364959478378296,
"step": 948
},
{
"epoch": 1.73992673992674,
"grad_norm": 0.14440670609474182,
"learning_rate": 4.571945658997944e-06,
"loss": 1.0138092041015625,
"step": 950
},
{
"epoch": 1.7435897435897436,
"grad_norm": 0.0975588783621788,
"learning_rate": 4.554530733170788e-06,
"loss": 0.9809228181838989,
"step": 952
},
{
"epoch": 1.7472527472527473,
"grad_norm": 0.14117704331874847,
"learning_rate": 4.5371306088388856e-06,
"loss": 1.239669680595398,
"step": 954
},
{
"epoch": 1.750915750915751,
"grad_norm": 0.228751540184021,
"learning_rate": 4.519745558404387e-06,
"loss": 0.8224953413009644,
"step": 956
},
{
"epoch": 1.7545787545787546,
"grad_norm": 0.7675461173057556,
"learning_rate": 4.502375854033453e-06,
"loss": 1.2402071952819824,
"step": 958
},
{
"epoch": 1.7582417582417582,
"grad_norm": 0.11008962988853455,
"learning_rate": 4.4850217676519995e-06,
"loss": 0.5849726796150208,
"step": 960
},
{
"epoch": 1.7619047619047619,
"grad_norm": 0.3784470558166504,
"learning_rate": 4.46768357094144e-06,
"loss": 1.023523211479187,
"step": 962
},
{
"epoch": 1.7655677655677655,
"grad_norm": 0.16590407490730286,
"learning_rate": 4.4503615353344346e-06,
"loss": 1.091076135635376,
"step": 964
},
{
"epoch": 1.7692307692307692,
"grad_norm": 0.16390341520309448,
"learning_rate": 4.433055932010635e-06,
"loss": 1.2073513269424438,
"step": 966
},
{
"epoch": 1.7728937728937728,
"grad_norm": 1.0820486545562744,
"learning_rate": 4.4157670318924454e-06,
"loss": 0.5969150066375732,
"step": 968
},
{
"epoch": 1.7765567765567765,
"grad_norm": 0.16822820901870728,
"learning_rate": 4.398495105640774e-06,
"loss": 0.8644286394119263,
"step": 970
},
{
"epoch": 1.7802197802197801,
"grad_norm": 0.2925519645214081,
"learning_rate": 4.381240423650805e-06,
"loss": 0.9442048072814941,
"step": 972
},
{
"epoch": 1.7838827838827838,
"grad_norm": 0.4801477789878845,
"learning_rate": 4.364003256047758e-06,
"loss": 1.279288649559021,
"step": 974
},
{
"epoch": 1.7875457875457874,
"grad_norm": 0.1608477234840393,
"learning_rate": 4.346783872682662e-06,
"loss": 1.2263715267181396,
"step": 976
},
{
"epoch": 1.791208791208791,
"grad_norm": 0.13645397126674652,
"learning_rate": 4.329582543128131e-06,
"loss": 0.9317041635513306,
"step": 978
},
{
"epoch": 1.7948717948717947,
"grad_norm": 0.258089154958725,
"learning_rate": 4.312399536674141e-06,
"loss": 0.9728096723556519,
"step": 980
},
{
"epoch": 1.7985347985347986,
"grad_norm": 2.324678421020508,
"learning_rate": 4.295235122323822e-06,
"loss": 1.1650446653366089,
"step": 982
},
{
"epoch": 1.8021978021978022,
"grad_norm": 0.2941892147064209,
"learning_rate": 4.278089568789231e-06,
"loss": 1.1338319778442383,
"step": 984
},
{
"epoch": 1.8058608058608059,
"grad_norm": 0.08391306549310684,
"learning_rate": 4.260963144487168e-06,
"loss": 0.6776608824729919,
"step": 986
},
{
"epoch": 1.8095238095238095,
"grad_norm": 0.4403177797794342,
"learning_rate": 4.2438561175349505e-06,
"loss": 0.8319576382637024,
"step": 988
},
{
"epoch": 1.8131868131868132,
"grad_norm": 0.34780237078666687,
"learning_rate": 4.2267687557462345e-06,
"loss": 0.7826079726219177,
"step": 990
},
{
"epoch": 1.8168498168498168,
"grad_norm": 0.11686074733734131,
"learning_rate": 4.209701326626812e-06,
"loss": 0.795200526714325,
"step": 992
},
{
"epoch": 1.8205128205128205,
"grad_norm": 0.12649790942668915,
"learning_rate": 4.192654097370423e-06,
"loss": 0.8667728900909424,
"step": 994
},
{
"epoch": 1.8241758241758241,
"grad_norm": 0.5255754590034485,
"learning_rate": 4.175627334854575e-06,
"loss": 1.1568585634231567,
"step": 996
},
{
"epoch": 1.8278388278388278,
"grad_norm": 0.10009193420410156,
"learning_rate": 4.1586213056363724e-06,
"loss": 0.8747377991676331,
"step": 998
},
{
"epoch": 1.8315018315018317,
"grad_norm": 0.17984943091869354,
"learning_rate": 4.141636275948324e-06,
"loss": 1.1325833797454834,
"step": 1000
},
{
"epoch": 1.8351648351648353,
"grad_norm": 0.17336614429950714,
"learning_rate": 4.1246725116941964e-06,
"loss": 1.166914463043213,
"step": 1002
},
{
"epoch": 1.838827838827839,
"grad_norm": 0.20862817764282227,
"learning_rate": 4.10773027844484e-06,
"loss": 1.1623833179473877,
"step": 1004
},
{
"epoch": 1.8424908424908426,
"grad_norm": 0.15748779475688934,
"learning_rate": 4.090809841434029e-06,
"loss": 1.164290428161621,
"step": 1006
},
{
"epoch": 1.8461538461538463,
"grad_norm": 0.31245389580726624,
"learning_rate": 4.073911465554319e-06,
"loss": 0.8208089470863342,
"step": 1008
},
{
"epoch": 1.84981684981685,
"grad_norm": 0.18492500483989716,
"learning_rate": 4.057035415352892e-06,
"loss": 1.1237512826919556,
"step": 1010
},
{
"epoch": 1.8534798534798536,
"grad_norm": 0.14317144453525543,
"learning_rate": 4.0401819550274165e-06,
"loss": 0.7784026861190796,
"step": 1012
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.1404157131910324,
"learning_rate": 4.023351348421915e-06,
"loss": 1.176824688911438,
"step": 1014
},
{
"epoch": 1.8608058608058609,
"grad_norm": 0.14713755249977112,
"learning_rate": 4.006543859022628e-06,
"loss": 1.1646744012832642,
"step": 1016
},
{
"epoch": 1.8644688644688645,
"grad_norm": 0.19819317758083344,
"learning_rate": 3.989759749953893e-06,
"loss": 1.256286382675171,
"step": 1018
},
{
"epoch": 1.8681318681318682,
"grad_norm": 0.034005679190158844,
"learning_rate": 3.972999283974026e-06,
"loss": 0.9847078323364258,
"step": 1020
},
{
"epoch": 1.8717948717948718,
"grad_norm": 0.17176663875579834,
"learning_rate": 3.956262723471203e-06,
"loss": 1.1373211145401,
"step": 1022
},
{
"epoch": 1.8754578754578755,
"grad_norm": 0.3484453558921814,
"learning_rate": 3.9395503304593565e-06,
"loss": 0.5924882292747498,
"step": 1024
},
{
"epoch": 1.879120879120879,
"grad_norm": 0.16725610196590424,
"learning_rate": 3.922862366574074e-06,
"loss": 1.1780312061309814,
"step": 1026
},
{
"epoch": 1.8827838827838828,
"grad_norm": 0.1592869609594345,
"learning_rate": 3.906199093068497e-06,
"loss": 0.9455581903457642,
"step": 1028
},
{
"epoch": 1.8864468864468864,
"grad_norm": 0.4676535129547119,
"learning_rate": 3.889560770809239e-06,
"loss": 1.1824193000793457,
"step": 1030
},
{
"epoch": 1.89010989010989,
"grad_norm": 0.13471902906894684,
"learning_rate": 3.872947660272295e-06,
"loss": 0.9769763350486755,
"step": 1032
},
{
"epoch": 1.8937728937728937,
"grad_norm": 0.24125701189041138,
"learning_rate": 3.856360021538964e-06,
"loss": 0.8109256029129028,
"step": 1034
},
{
"epoch": 1.8974358974358974,
"grad_norm": 0.27469104528427124,
"learning_rate": 3.8397981142917815e-06,
"loss": 1.2156492471694946,
"step": 1036
},
{
"epoch": 1.901098901098901,
"grad_norm": 0.16270951926708221,
"learning_rate": 3.823262197810454e-06,
"loss": 1.183699369430542,
"step": 1038
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.37082114815711975,
"learning_rate": 3.806752530967792e-06,
"loss": 1.2584105730056763,
"step": 1040
},
{
"epoch": 1.9084249084249083,
"grad_norm": 0.9490067362785339,
"learning_rate": 3.790269372225668e-06,
"loss": 0.6401211023330688,
"step": 1042
},
{
"epoch": 1.912087912087912,
"grad_norm": 0.15817776322364807,
"learning_rate": 3.773812979630964e-06,
"loss": 0.9084805250167847,
"step": 1044
},
{
"epoch": 1.9157509157509156,
"grad_norm": 0.1630954146385193,
"learning_rate": 3.7573836108115303e-06,
"loss": 1.1366910934448242,
"step": 1046
},
{
"epoch": 1.9194139194139193,
"grad_norm": 0.3042643964290619,
"learning_rate": 3.740981522972159e-06,
"loss": 0.514860987663269,
"step": 1048
},
{
"epoch": 1.9230769230769231,
"grad_norm": 0.25363633036613464,
"learning_rate": 3.724606972890551e-06,
"loss": 0.9003884792327881,
"step": 1050
},
{
"epoch": 1.9267399267399268,
"grad_norm": 0.22043615579605103,
"learning_rate": 3.7082602169132995e-06,
"loss": 0.8399287462234497,
"step": 1052
},
{
"epoch": 1.9304029304029304,
"grad_norm": 0.1377377063035965,
"learning_rate": 3.6919415109518776e-06,
"loss": 1.1453593969345093,
"step": 1054
},
{
"epoch": 1.934065934065934,
"grad_norm": 0.22288501262664795,
"learning_rate": 3.6756511104786254e-06,
"loss": 0.770913302898407,
"step": 1056
},
{
"epoch": 1.9377289377289377,
"grad_norm": 0.16052567958831787,
"learning_rate": 3.6593892705227586e-06,
"loss": 1.003678321838379,
"step": 1058
},
{
"epoch": 1.9413919413919414,
"grad_norm": 0.3310210704803467,
"learning_rate": 3.643156245666377e-06,
"loss": 1.1094727516174316,
"step": 1060
},
{
"epoch": 1.945054945054945,
"grad_norm": 0.18297162652015686,
"learning_rate": 3.626952290040463e-06,
"loss": 0.8664683103561401,
"step": 1062
},
{
"epoch": 1.9487179487179487,
"grad_norm": 0.1476193070411682,
"learning_rate": 3.6107776573209263e-06,
"loss": 0.8188486099243164,
"step": 1064
},
{
"epoch": 1.9523809523809523,
"grad_norm": 0.20653630793094635,
"learning_rate": 3.59463260072462e-06,
"loss": 1.1757713556289673,
"step": 1066
},
{
"epoch": 1.9560439560439562,
"grad_norm": 0.12281164526939392,
"learning_rate": 3.5785173730053667e-06,
"loss": 1.2063580751419067,
"step": 1068
},
{
"epoch": 1.9597069597069599,
"grad_norm": 0.07409633696079254,
"learning_rate": 3.5624322264500246e-06,
"loss": 0.7450681328773499,
"step": 1070
},
{
"epoch": 1.9633699633699635,
"grad_norm": 0.14261014759540558,
"learning_rate": 3.5463774128745232e-06,
"loss": 0.881243884563446,
"step": 1072
},
{
"epoch": 1.9670329670329672,
"grad_norm": 0.12503007054328918,
"learning_rate": 3.530353183619918e-06,
"loss": 1.161426067352295,
"step": 1074
},
{
"epoch": 1.9706959706959708,
"grad_norm": 0.11768339574337006,
"learning_rate": 3.514359789548466e-06,
"loss": 1.1456844806671143,
"step": 1076
},
{
"epoch": 1.9743589743589745,
"grad_norm": 0.23258298635482788,
"learning_rate": 3.4983974810396927e-06,
"loss": 1.0247056484222412,
"step": 1078
},
{
"epoch": 1.978021978021978,
"grad_norm": 0.28044548630714417,
"learning_rate": 3.4824665079864735e-06,
"loss": 1.1190541982650757,
"step": 1080
},
{
"epoch": 1.9816849816849818,
"grad_norm": 0.12241950631141663,
"learning_rate": 3.466567119791123e-06,
"loss": 1.126396656036377,
"step": 1082
},
{
"epoch": 1.9853479853479854,
"grad_norm": 0.05192271247506142,
"learning_rate": 3.4506995653614873e-06,
"loss": 0.7499899864196777,
"step": 1084
},
{
"epoch": 1.989010989010989,
"grad_norm": 0.5542802214622498,
"learning_rate": 3.4348640931070463e-06,
"loss": 0.981029748916626,
"step": 1086
},
{
"epoch": 1.9926739926739927,
"grad_norm": 0.24883772432804108,
"learning_rate": 3.4190609509350338e-06,
"loss": 1.0121923685073853,
"step": 1088
},
{
"epoch": 1.9963369963369964,
"grad_norm": 0.1047605574131012,
"learning_rate": 3.403290386246544e-06,
"loss": 0.9460771679878235,
"step": 1090
},
{
"epoch": 2.0,
"grad_norm": 0.14438582956790924,
"learning_rate": 3.3875526459326714e-06,
"loss": 1.0866570472717285,
"step": 1092
},
{
"epoch": 2.0036630036630036,
"grad_norm": 0.13476252555847168,
"learning_rate": 3.3718479763706324e-06,
"loss": 1.140030860900879,
"step": 1094
},
{
"epoch": 2.0073260073260073,
"grad_norm": 0.39099064469337463,
"learning_rate": 3.356176623419915e-06,
"loss": 1.1750749349594116,
"step": 1096
},
{
"epoch": 2.010989010989011,
"grad_norm": 0.15205055475234985,
"learning_rate": 3.340538832418436e-06,
"loss": 1.1374648809432983,
"step": 1098
},
{
"epoch": 2.0146520146520146,
"grad_norm": 0.4141819179058075,
"learning_rate": 3.3249348481786904e-06,
"loss": 1.2270292043685913,
"step": 1100
},
{
"epoch": 2.0183150183150182,
"grad_norm": 0.2034660130739212,
"learning_rate": 3.3093649149839148e-06,
"loss": 0.8838691711425781,
"step": 1102
},
{
"epoch": 2.021978021978022,
"grad_norm": 0.12057554721832275,
"learning_rate": 3.2938292765842817e-06,
"loss": 1.1789038181304932,
"step": 1104
},
{
"epoch": 2.0256410256410255,
"grad_norm": 0.2732870578765869,
"learning_rate": 3.2783281761930673e-06,
"loss": 0.8000632524490356,
"step": 1106
},
{
"epoch": 2.029304029304029,
"grad_norm": 0.10481557995080948,
"learning_rate": 3.262861856482849e-06,
"loss": 1.2116031646728516,
"step": 1108
},
{
"epoch": 2.032967032967033,
"grad_norm": 0.19622857868671417,
"learning_rate": 3.247430559581706e-06,
"loss": 0.9533130526542664,
"step": 1110
},
{
"epoch": 2.0366300366300365,
"grad_norm": 0.15817232429981232,
"learning_rate": 3.2320345270694263e-06,
"loss": 0.6461672186851501,
"step": 1112
},
{
"epoch": 2.04029304029304,
"grad_norm": 0.30624663829803467,
"learning_rate": 3.216673999973734e-06,
"loss": 0.893692672252655,
"step": 1114
},
{
"epoch": 2.043956043956044,
"grad_norm": 0.20368322730064392,
"learning_rate": 3.201349218766506e-06,
"loss": 1.2045972347259521,
"step": 1116
},
{
"epoch": 2.0476190476190474,
"grad_norm": 0.3576587736606598,
"learning_rate": 3.186060423360009e-06,
"loss": 1.1595624685287476,
"step": 1118
},
{
"epoch": 2.051282051282051,
"grad_norm": 0.15144126117229462,
"learning_rate": 3.170807853103146e-06,
"loss": 0.8582723736763,
"step": 1120
},
{
"epoch": 2.0549450549450547,
"grad_norm": 0.3102099895477295,
"learning_rate": 3.155591746777713e-06,
"loss": 1.3617991209030151,
"step": 1122
},
{
"epoch": 2.0586080586080584,
"grad_norm": 0.13681809604167938,
"learning_rate": 3.140412342594648e-06,
"loss": 1.1718530654907227,
"step": 1124
},
{
"epoch": 2.062271062271062,
"grad_norm": 0.14834214746952057,
"learning_rate": 3.12526987819032e-06,
"loss": 0.8169500827789307,
"step": 1126
},
{
"epoch": 2.065934065934066,
"grad_norm": 0.3267018795013428,
"learning_rate": 3.1101645906227924e-06,
"loss": 1.1410131454467773,
"step": 1128
},
{
"epoch": 2.06959706959707,
"grad_norm": 0.2117215096950531,
"learning_rate": 3.0950967163681177e-06,
"loss": 1.1394081115722656,
"step": 1130
},
{
"epoch": 2.0732600732600734,
"grad_norm": 0.18249589204788208,
"learning_rate": 3.08006649131664e-06,
"loss": 1.1663340330123901,
"step": 1132
},
{
"epoch": 2.076923076923077,
"grad_norm": 0.15602019429206848,
"learning_rate": 3.0650741507693004e-06,
"loss": 1.1466034650802612,
"step": 1134
},
{
"epoch": 2.0805860805860807,
"grad_norm": 0.3648541271686554,
"learning_rate": 3.0501199294339435e-06,
"loss": 0.8573122620582581,
"step": 1136
},
{
"epoch": 2.0842490842490844,
"grad_norm": 0.1585971862077713,
"learning_rate": 3.0352040614216555e-06,
"loss": 1.1506117582321167,
"step": 1138
},
{
"epoch": 2.087912087912088,
"grad_norm": 0.17453494668006897,
"learning_rate": 3.0203267802430915e-06,
"loss": 1.0754824876785278,
"step": 1140
},
{
"epoch": 2.0915750915750917,
"grad_norm": 0.1620694249868393,
"learning_rate": 3.0054883188048266e-06,
"loss": 1.1398316621780396,
"step": 1142
},
{
"epoch": 2.0952380952380953,
"grad_norm": 0.06643152236938477,
"learning_rate": 2.9906889094057062e-06,
"loss": 0.4219062924385071,
"step": 1144
},
{
"epoch": 2.098901098901099,
"grad_norm": 0.7500718235969543,
"learning_rate": 2.9759287837332007e-06,
"loss": 0.9941345453262329,
"step": 1146
},
{
"epoch": 2.1025641025641026,
"grad_norm": 0.17045439779758453,
"learning_rate": 2.961208172859794e-06,
"loss": 0.84036785364151,
"step": 1148
},
{
"epoch": 2.1062271062271063,
"grad_norm": 0.2622012197971344,
"learning_rate": 2.946527307239359e-06,
"loss": 0.8539763689041138,
"step": 1150
},
{
"epoch": 2.10989010989011,
"grad_norm": 0.42088064551353455,
"learning_rate": 2.9318864167035452e-06,
"loss": 0.985520601272583,
"step": 1152
},
{
"epoch": 2.1135531135531136,
"grad_norm": 0.3410366475582123,
"learning_rate": 2.9172857304581857e-06,
"loss": 0.900378942489624,
"step": 1154
},
{
"epoch": 2.1172161172161172,
"grad_norm": 0.3229033052921295,
"learning_rate": 2.902725477079711e-06,
"loss": 1.1304961442947388,
"step": 1156
},
{
"epoch": 2.120879120879121,
"grad_norm": 0.4168906807899475,
"learning_rate": 2.8882058845115633e-06,
"loss": 1.0916647911071777,
"step": 1158
},
{
"epoch": 2.1245421245421245,
"grad_norm": 0.34625813364982605,
"learning_rate": 2.873727180060637e-06,
"loss": 0.909528374671936,
"step": 1160
},
{
"epoch": 2.128205128205128,
"grad_norm": 0.2843841016292572,
"learning_rate": 2.8592895903937124e-06,
"loss": 0.8306626677513123,
"step": 1162
},
{
"epoch": 2.131868131868132,
"grad_norm": 0.23222553730010986,
"learning_rate": 2.8448933415339085e-06,
"loss": 0.9491928815841675,
"step": 1164
},
{
"epoch": 2.1355311355311355,
"grad_norm": 0.1373523473739624,
"learning_rate": 2.8305386588571517e-06,
"loss": 0.45827817916870117,
"step": 1166
},
{
"epoch": 2.139194139194139,
"grad_norm": 0.1498524397611618,
"learning_rate": 2.816225767088638e-06,
"loss": 0.4394649267196655,
"step": 1168
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.203684002161026,
"learning_rate": 2.801954890299322e-06,
"loss": 0.9728699922561646,
"step": 1170
},
{
"epoch": 2.1465201465201464,
"grad_norm": 0.4509437382221222,
"learning_rate": 2.7877262519024027e-06,
"loss": 1.19068443775177,
"step": 1172
},
{
"epoch": 2.15018315018315,
"grad_norm": 0.37610286474227905,
"learning_rate": 2.7735400746498302e-06,
"loss": 1.306997299194336,
"step": 1174
},
{
"epoch": 2.1538461538461537,
"grad_norm": 1.0810068845748901,
"learning_rate": 2.7593965806288204e-06,
"loss": 0.8269945979118347,
"step": 1176
},
{
"epoch": 2.1575091575091574,
"grad_norm": 0.11437740176916122,
"learning_rate": 2.7452959912583744e-06,
"loss": 1.174338698387146,
"step": 1178
},
{
"epoch": 2.161172161172161,
"grad_norm": 0.1378648579120636,
"learning_rate": 2.7312385272858087e-06,
"loss": 1.1485635042190552,
"step": 1180
},
{
"epoch": 2.1648351648351647,
"grad_norm": 0.1592278927564621,
"learning_rate": 2.7172244087833077e-06,
"loss": 1.208397388458252,
"step": 1182
},
{
"epoch": 2.1684981684981683,
"grad_norm": 0.1117565929889679,
"learning_rate": 2.7032538551444776e-06,
"loss": 1.175192952156067,
"step": 1184
},
{
"epoch": 2.172161172161172,
"grad_norm": 0.12953658401966095,
"learning_rate": 2.6893270850809024e-06,
"loss": 0.3722214698791504,
"step": 1186
},
{
"epoch": 2.1758241758241756,
"grad_norm": 0.27550461888313293,
"learning_rate": 2.6754443166187267e-06,
"loss": 1.2698341608047485,
"step": 1188
},
{
"epoch": 2.1794871794871793,
"grad_norm": 0.3155595660209656,
"learning_rate": 2.661605767095248e-06,
"loss": 0.8203377723693848,
"step": 1190
},
{
"epoch": 2.183150183150183,
"grad_norm": 0.17399396002292633,
"learning_rate": 2.6478116531554997e-06,
"loss": 1.01655912399292,
"step": 1192
},
{
"epoch": 2.186813186813187,
"grad_norm": 0.1317910999059677,
"learning_rate": 2.6340621907488777e-06,
"loss": 0.8621305823326111,
"step": 1194
},
{
"epoch": 2.1904761904761907,
"grad_norm": 0.2792171835899353,
"learning_rate": 2.620357595125742e-06,
"loss": 0.9206136465072632,
"step": 1196
},
{
"epoch": 2.1941391941391943,
"grad_norm": 0.15142017602920532,
"learning_rate": 2.6066980808340553e-06,
"loss": 1.1463533639907837,
"step": 1198
},
{
"epoch": 2.197802197802198,
"grad_norm": 0.2487816959619522,
"learning_rate": 2.5930838617160304e-06,
"loss": 0.8177496790885925,
"step": 1200
},
{
"epoch": 2.2014652014652016,
"grad_norm": 0.176479309797287,
"learning_rate": 2.579515150904767e-06,
"loss": 1.2105001211166382,
"step": 1202
},
{
"epoch": 2.2051282051282053,
"grad_norm": 0.2774566113948822,
"learning_rate": 2.5659921608209325e-06,
"loss": 1.165309190750122,
"step": 1204
},
{
"epoch": 2.208791208791209,
"grad_norm": 0.18029530346393585,
"learning_rate": 2.5525151031694214e-06,
"loss": 0.5955395102500916,
"step": 1206
},
{
"epoch": 2.2124542124542126,
"grad_norm": 0.44882774353027344,
"learning_rate": 2.5390841889360483e-06,
"loss": 0.7616056203842163,
"step": 1208
},
{
"epoch": 2.2161172161172162,
"grad_norm": 1.0745935440063477,
"learning_rate": 2.525699628384249e-06,
"loss": 0.6935135722160339,
"step": 1210
},
{
"epoch": 2.21978021978022,
"grad_norm": 0.17599527537822723,
"learning_rate": 2.5123616310517797e-06,
"loss": 1.1335649490356445,
"step": 1212
},
{
"epoch": 2.2234432234432235,
"grad_norm": 0.38573309779167175,
"learning_rate": 2.4990704057474405e-06,
"loss": 0.8549797534942627,
"step": 1214
},
{
"epoch": 2.227106227106227,
"grad_norm": 0.5317236185073853,
"learning_rate": 2.485826160547807e-06,
"loss": 0.9798864126205444,
"step": 1216
},
{
"epoch": 2.230769230769231,
"grad_norm": 0.12072915583848953,
"learning_rate": 2.4726291027939775e-06,
"loss": 1.137038230895996,
"step": 1218
},
{
"epoch": 2.2344322344322345,
"grad_norm": 0.13340038061141968,
"learning_rate": 2.459479439088314e-06,
"loss": 1.1505991220474243,
"step": 1220
},
{
"epoch": 2.238095238095238,
"grad_norm": 0.19366510212421417,
"learning_rate": 2.4463773752912232e-06,
"loss": 1.1624219417572021,
"step": 1222
},
{
"epoch": 2.241758241758242,
"grad_norm": 0.2833138108253479,
"learning_rate": 2.4333231165179226e-06,
"loss": 0.5617607831954956,
"step": 1224
},
{
"epoch": 2.2454212454212454,
"grad_norm": 0.14608268439769745,
"learning_rate": 2.420316867135232e-06,
"loss": 1.1109657287597656,
"step": 1226
},
{
"epoch": 2.249084249084249,
"grad_norm": 2.9962241649627686,
"learning_rate": 2.407358830758381e-06,
"loss": 0.6706120371818542,
"step": 1228
},
{
"epoch": 2.2527472527472527,
"grad_norm": 0.044207386672496796,
"learning_rate": 2.394449210247811e-06,
"loss": 0.6224187016487122,
"step": 1230
},
{
"epoch": 2.2564102564102564,
"grad_norm": 0.20471802353858948,
"learning_rate": 2.381588207706003e-06,
"loss": 0.6815849542617798,
"step": 1232
},
{
"epoch": 2.26007326007326,
"grad_norm": 0.3602707087993622,
"learning_rate": 2.3687760244743198e-06,
"loss": 1.157220482826233,
"step": 1234
},
{
"epoch": 2.2637362637362637,
"grad_norm": 0.8389260172843933,
"learning_rate": 2.356012861129845e-06,
"loss": 0.7905306220054626,
"step": 1236
},
{
"epoch": 2.2673992673992673,
"grad_norm": 0.12152452766895294,
"learning_rate": 2.3432989174822496e-06,
"loss": 0.998111367225647,
"step": 1238
},
{
"epoch": 2.271062271062271,
"grad_norm": 0.15299645066261292,
"learning_rate": 2.330634392570658e-06,
"loss": 0.9482631683349609,
"step": 1240
},
{
"epoch": 2.2747252747252746,
"grad_norm": 0.22156605124473572,
"learning_rate": 2.3180194846605367e-06,
"loss": 0.9491860866546631,
"step": 1242
},
{
"epoch": 2.2783882783882783,
"grad_norm": 0.1533634215593338,
"learning_rate": 2.3054543912405896e-06,
"loss": 1.1562466621398926,
"step": 1244
},
{
"epoch": 2.282051282051282,
"grad_norm": 0.12872643768787384,
"learning_rate": 2.2929393090196663e-06,
"loss": 0.7593182921409607,
"step": 1246
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.25250881910324097,
"learning_rate": 2.2804744339236796e-06,
"loss": 0.7431901097297668,
"step": 1248
},
{
"epoch": 2.2893772893772892,
"grad_norm": 0.1763988882303238,
"learning_rate": 2.268059961092541e-06,
"loss": 1.127759575843811,
"step": 1250
},
{
"epoch": 2.293040293040293,
"grad_norm": 0.2666459381580353,
"learning_rate": 2.255696084877107e-06,
"loss": 0.8839851021766663,
"step": 1252
},
{
"epoch": 2.2967032967032965,
"grad_norm": 0.18553560972213745,
"learning_rate": 2.2433829988361316e-06,
"loss": 1.2005871534347534,
"step": 1254
},
{
"epoch": 2.3003663003663,
"grad_norm": 0.20974372327327728,
"learning_rate": 2.231120895733245e-06,
"loss": 1.2160831689834595,
"step": 1256
},
{
"epoch": 2.304029304029304,
"grad_norm": 0.27016669511795044,
"learning_rate": 2.2189099675339233e-06,
"loss": 0.8103601336479187,
"step": 1258
},
{
"epoch": 2.3076923076923075,
"grad_norm": 0.2763507664203644,
"learning_rate": 2.206750405402493e-06,
"loss": 1.232648491859436,
"step": 1260
},
{
"epoch": 2.311355311355311,
"grad_norm": 0.20276162028312683,
"learning_rate": 2.194642399699138e-06,
"loss": 1.0822112560272217,
"step": 1262
},
{
"epoch": 2.315018315018315,
"grad_norm": 0.1820443570613861,
"learning_rate": 2.1825861399769126e-06,
"loss": 0.9380193948745728,
"step": 1264
},
{
"epoch": 2.3186813186813184,
"grad_norm": 0.20645156502723694,
"learning_rate": 2.17058181497878e-06,
"loss": 0.8565780520439148,
"step": 1266
},
{
"epoch": 2.3223443223443225,
"grad_norm": 0.30140256881713867,
"learning_rate": 2.1586296126346566e-06,
"loss": 0.8535648584365845,
"step": 1268
},
{
"epoch": 2.326007326007326,
"grad_norm": 0.27577510476112366,
"learning_rate": 2.1467297200584677e-06,
"loss": 1.2173646688461304,
"step": 1270
},
{
"epoch": 2.32967032967033,
"grad_norm": 0.1859835982322693,
"learning_rate": 2.134882323545221e-06,
"loss": 1.0475445985794067,
"step": 1272
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.5028762817382812,
"learning_rate": 2.123087608568088e-06,
"loss": 0.7030253410339355,
"step": 1274
},
{
"epoch": 2.336996336996337,
"grad_norm": 0.17414085566997528,
"learning_rate": 2.1113457597754977e-06,
"loss": 1.058994174003601,
"step": 1276
},
{
"epoch": 2.340659340659341,
"grad_norm": 0.195421501994133,
"learning_rate": 2.0996569609882555e-06,
"loss": 0.8695497512817383,
"step": 1278
},
{
"epoch": 2.3443223443223444,
"grad_norm": 0.1678563356399536,
"learning_rate": 2.0880213951966564e-06,
"loss": 0.7928240299224854,
"step": 1280
},
{
"epoch": 2.347985347985348,
"grad_norm": 0.15970492362976074,
"learning_rate": 2.076439244557622e-06,
"loss": 0.6427817344665527,
"step": 1282
},
{
"epoch": 2.3516483516483517,
"grad_norm": 0.15121600031852722,
"learning_rate": 2.064910690391849e-06,
"loss": 1.1278434991836548,
"step": 1284
},
{
"epoch": 2.3553113553113554,
"grad_norm": 2.746044397354126,
"learning_rate": 2.053435913180976e-06,
"loss": 0.6882444024085999,
"step": 1286
},
{
"epoch": 2.358974358974359,
"grad_norm": 0.14493419229984283,
"learning_rate": 2.0420150925647476e-06,
"loss": 0.9737670421600342,
"step": 1288
},
{
"epoch": 2.3626373626373627,
"grad_norm": 0.1830594837665558,
"learning_rate": 2.0306484073382144e-06,
"loss": 0.9390268325805664,
"step": 1290
},
{
"epoch": 2.3663003663003663,
"grad_norm": 0.17552392184734344,
"learning_rate": 2.019336035448922e-06,
"loss": 0.8130999207496643,
"step": 1292
},
{
"epoch": 2.36996336996337,
"grad_norm": 0.4816751182079315,
"learning_rate": 2.008078153994131e-06,
"loss": 0.9279530644416809,
"step": 1294
},
{
"epoch": 2.3736263736263736,
"grad_norm": 0.2578529119491577,
"learning_rate": 1.99687493921805e-06,
"loss": 1.3056340217590332,
"step": 1296
},
{
"epoch": 2.3772893772893773,
"grad_norm": 0.24960176646709442,
"learning_rate": 1.9857265665090637e-06,
"loss": 1.138514757156372,
"step": 1298
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.20973335206508636,
"learning_rate": 1.9746332103969994e-06,
"loss": 1.196106195449829,
"step": 1300
},
{
"epoch": 2.3846153846153846,
"grad_norm": 0.4483489990234375,
"learning_rate": 1.9635950445503867e-06,
"loss": 0.952997624874115,
"step": 1302
},
{
"epoch": 2.3882783882783882,
"grad_norm": 0.9477534890174866,
"learning_rate": 1.9526122417737396e-06,
"loss": 0.5085421204566956,
"step": 1304
},
{
"epoch": 2.391941391941392,
"grad_norm": 0.17980064451694489,
"learning_rate": 1.941684974004857e-06,
"loss": 0.9798279404640198,
"step": 1306
},
{
"epoch": 2.3956043956043955,
"grad_norm": 0.21208752691745758,
"learning_rate": 1.930813412312129e-06,
"loss": 1.1446267366409302,
"step": 1308
},
{
"epoch": 2.399267399267399,
"grad_norm": 0.14319059252738953,
"learning_rate": 1.919997726891847e-06,
"loss": 0.5433471202850342,
"step": 1310
},
{
"epoch": 2.402930402930403,
"grad_norm": 0.25561878085136414,
"learning_rate": 1.909238087065559e-06,
"loss": 1.1503570079803467,
"step": 1312
},
{
"epoch": 2.4065934065934065,
"grad_norm": 0.13398070633411407,
"learning_rate": 1.8985346612774058e-06,
"loss": 0.8720892667770386,
"step": 1314
},
{
"epoch": 2.41025641025641,
"grad_norm": 0.1498894989490509,
"learning_rate": 1.8878876170914862e-06,
"loss": 1.14559006690979,
"step": 1316
},
{
"epoch": 2.413919413919414,
"grad_norm": 0.3363962769508362,
"learning_rate": 1.877297121189233e-06,
"loss": 0.8333287239074707,
"step": 1318
},
{
"epoch": 2.4175824175824174,
"grad_norm": 0.15025848150253296,
"learning_rate": 1.8667633393668097e-06,
"loss": 0.8138965368270874,
"step": 1320
},
{
"epoch": 2.421245421245421,
"grad_norm": 0.21664276719093323,
"learning_rate": 1.856286436532506e-06,
"loss": 0.689363420009613,
"step": 1322
},
{
"epoch": 2.4249084249084247,
"grad_norm": 1.3246759176254272,
"learning_rate": 1.845866576704165e-06,
"loss": 0.7871432900428772,
"step": 1324
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.38431447744369507,
"learning_rate": 1.8355039230066068e-06,
"loss": 0.7976049184799194,
"step": 1326
},
{
"epoch": 2.4322344322344325,
"grad_norm": 0.07272528856992722,
"learning_rate": 1.8251986376690806e-06,
"loss": 0.734397292137146,
"step": 1328
},
{
"epoch": 2.435897435897436,
"grad_norm": 0.23010677099227905,
"learning_rate": 1.8149508820227258e-06,
"loss": 0.8264967799186707,
"step": 1330
},
{
"epoch": 2.4395604395604398,
"grad_norm": 0.2325713038444519,
"learning_rate": 1.8047608164980393e-06,
"loss": 1.1099257469177246,
"step": 1332
},
{
"epoch": 2.4432234432234434,
"grad_norm": 0.2141243815422058,
"learning_rate": 1.7946286006223728e-06,
"loss": 0.7992602586746216,
"step": 1334
},
{
"epoch": 2.446886446886447,
"grad_norm": 0.3261476755142212,
"learning_rate": 1.7845543930174288e-06,
"loss": 0.7330154776573181,
"step": 1336
},
{
"epoch": 2.4505494505494507,
"grad_norm": 0.19834889471530914,
"learning_rate": 1.7745383513967784e-06,
"loss": 1.0567998886108398,
"step": 1338
},
{
"epoch": 2.4542124542124544,
"grad_norm": 0.13338837027549744,
"learning_rate": 1.7645806325633975e-06,
"loss": 0.9307959675788879,
"step": 1340
},
{
"epoch": 2.457875457875458,
"grad_norm": 0.0941123366355896,
"learning_rate": 1.7546813924072064e-06,
"loss": 0.7225639820098877,
"step": 1342
},
{
"epoch": 2.4615384615384617,
"grad_norm": 0.11015522480010986,
"learning_rate": 1.7448407859026267e-06,
"loss": 0.8351444602012634,
"step": 1344
},
{
"epoch": 2.4652014652014653,
"grad_norm": 0.16074956953525543,
"learning_rate": 1.7350589671061657e-06,
"loss": 1.1353893280029297,
"step": 1346
},
{
"epoch": 2.468864468864469,
"grad_norm": 0.21541282534599304,
"learning_rate": 1.7253360891539963e-06,
"loss": 1.1350133419036865,
"step": 1348
},
{
"epoch": 2.4725274725274726,
"grad_norm": 0.18318095803260803,
"learning_rate": 1.7156723042595602e-06,
"loss": 0.7882329821586609,
"step": 1350
},
{
"epoch": 2.4761904761904763,
"grad_norm": 0.20827817916870117,
"learning_rate": 1.7060677637111863e-06,
"loss": 0.9048058390617371,
"step": 1352
},
{
"epoch": 2.47985347985348,
"grad_norm": 0.3399142622947693,
"learning_rate": 1.6965226178697237e-06,
"loss": 0.988274335861206,
"step": 1354
},
{
"epoch": 2.4835164835164836,
"grad_norm": 0.03409822657704353,
"learning_rate": 1.6870370161661852e-06,
"loss": 0.9388930201530457,
"step": 1356
},
{
"epoch": 2.4871794871794872,
"grad_norm": 0.11549941450357437,
"learning_rate": 1.6776111070994129e-06,
"loss": 1.1141780614852905,
"step": 1358
},
{
"epoch": 2.490842490842491,
"grad_norm": 0.21529677510261536,
"learning_rate": 1.6682450382337445e-06,
"loss": 0.9177558422088623,
"step": 1360
},
{
"epoch": 2.4945054945054945,
"grad_norm": 0.21112927794456482,
"learning_rate": 1.65893895619671e-06,
"loss": 0.8482896685600281,
"step": 1362
},
{
"epoch": 2.498168498168498,
"grad_norm": 0.3684331476688385,
"learning_rate": 1.6496930066767381e-06,
"loss": 0.8899385333061218,
"step": 1364
},
{
"epoch": 2.501831501831502,
"grad_norm": 0.5180490016937256,
"learning_rate": 1.6405073344208652e-06,
"loss": 1.1375821828842163,
"step": 1366
},
{
"epoch": 2.5054945054945055,
"grad_norm": 0.14490839838981628,
"learning_rate": 1.6313820832324833e-06,
"loss": 0.8489875793457031,
"step": 1368
},
{
"epoch": 2.509157509157509,
"grad_norm": 0.26114216446876526,
"learning_rate": 1.6223173959690766e-06,
"loss": 1.0175533294677734,
"step": 1370
},
{
"epoch": 2.5128205128205128,
"grad_norm": 0.07394483685493469,
"learning_rate": 1.6133134145399895e-06,
"loss": 0.679277777671814,
"step": 1372
},
{
"epoch": 2.5164835164835164,
"grad_norm": 0.22844818234443665,
"learning_rate": 1.6043702799042097e-06,
"loss": 0.8118609189987183,
"step": 1374
},
{
"epoch": 2.52014652014652,
"grad_norm": 0.946811854839325,
"learning_rate": 1.5954881320681541e-06,
"loss": 0.9923216700553894,
"step": 1376
},
{
"epoch": 2.5238095238095237,
"grad_norm": 0.46443161368370056,
"learning_rate": 1.586667110083481e-06,
"loss": 0.8106738924980164,
"step": 1378
},
{
"epoch": 2.5274725274725274,
"grad_norm": 0.1713973730802536,
"learning_rate": 1.5779073520449115e-06,
"loss": 0.9600465893745422,
"step": 1380
},
{
"epoch": 2.531135531135531,
"grad_norm": 0.023757750168442726,
"learning_rate": 1.5692089950880671e-06,
"loss": 0.9061873555183411,
"step": 1382
},
{
"epoch": 2.5347985347985347,
"grad_norm": 0.13470512628555298,
"learning_rate": 1.5605721753873273e-06,
"loss": 0.8136062622070312,
"step": 1384
},
{
"epoch": 2.5384615384615383,
"grad_norm": 0.6172438859939575,
"learning_rate": 1.5519970281536947e-06,
"loss": 1.1290100812911987,
"step": 1386
},
{
"epoch": 2.542124542124542,
"grad_norm": 0.229129359126091,
"learning_rate": 1.5434836876326723e-06,
"loss": 0.7960153222084045,
"step": 1388
},
{
"epoch": 2.5457875457875456,
"grad_norm": 0.23978465795516968,
"learning_rate": 1.5350322871021738e-06,
"loss": 0.8506826162338257,
"step": 1390
},
{
"epoch": 2.5494505494505493,
"grad_norm": 0.4824867844581604,
"learning_rate": 1.5266429588704294e-06,
"loss": 1.025938868522644,
"step": 1392
},
{
"epoch": 2.553113553113553,
"grad_norm": 0.18570579588413239,
"learning_rate": 1.518315834273915e-06,
"loss": 0.7308077216148376,
"step": 1394
},
{
"epoch": 2.5567765567765566,
"grad_norm": 0.11341089010238647,
"learning_rate": 1.510051043675297e-06,
"loss": 0.37588629126548767,
"step": 1396
},
{
"epoch": 2.5604395604395602,
"grad_norm": 0.19933566451072693,
"learning_rate": 1.5018487164613931e-06,
"loss": 1.1432240009307861,
"step": 1398
},
{
"epoch": 2.564102564102564,
"grad_norm": 0.2133670151233673,
"learning_rate": 1.4937089810411428e-06,
"loss": 1.141809105873108,
"step": 1400
},
{
"epoch": 2.5677655677655675,
"grad_norm": 0.3012371361255646,
"learning_rate": 1.4856319648436034e-06,
"loss": 0.9912227988243103,
"step": 1402
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.16756081581115723,
"learning_rate": 1.4776177943159484e-06,
"loss": 1.1359539031982422,
"step": 1404
},
{
"epoch": 2.575091575091575,
"grad_norm": 0.9049347043037415,
"learning_rate": 1.4696665949214889e-06,
"loss": 0.5541988611221313,
"step": 1406
},
{
"epoch": 2.578754578754579,
"grad_norm": 0.1514206826686859,
"learning_rate": 1.4617784911377158e-06,
"loss": 1.2034826278686523,
"step": 1408
},
{
"epoch": 2.5824175824175826,
"grad_norm": 0.5452237129211426,
"learning_rate": 1.4539536064543453e-06,
"loss": 0.9588869214057922,
"step": 1410
},
{
"epoch": 2.586080586080586,
"grad_norm": 0.43010222911834717,
"learning_rate": 1.446192063371385e-06,
"loss": 0.90684974193573,
"step": 1412
},
{
"epoch": 2.58974358974359,
"grad_norm": 0.2458840310573578,
"learning_rate": 1.4384939833972197e-06,
"loss": 1.0172938108444214,
"step": 1414
},
{
"epoch": 2.5934065934065935,
"grad_norm": 0.1467057466506958,
"learning_rate": 1.4308594870467056e-06,
"loss": 1.1102759838104248,
"step": 1416
},
{
"epoch": 2.597069597069597,
"grad_norm": 0.6239453554153442,
"learning_rate": 1.4232886938392893e-06,
"loss": 0.8101827502250671,
"step": 1418
},
{
"epoch": 2.600732600732601,
"grad_norm": 0.18740800023078918,
"learning_rate": 1.4157817222971312e-06,
"loss": 1.1065106391906738,
"step": 1420
},
{
"epoch": 2.6043956043956045,
"grad_norm": 0.5177209377288818,
"learning_rate": 1.4083386899432489e-06,
"loss": 1.074950933456421,
"step": 1422
},
{
"epoch": 2.608058608058608,
"grad_norm": 0.18076905608177185,
"learning_rate": 1.4009597132996842e-06,
"loss": 1.2177599668502808,
"step": 1424
},
{
"epoch": 2.6117216117216118,
"grad_norm": 0.7345294952392578,
"learning_rate": 1.393644907885674e-06,
"loss": 1.3366779088974,
"step": 1426
},
{
"epoch": 2.6153846153846154,
"grad_norm": 0.14318975806236267,
"learning_rate": 1.3863943882158417e-06,
"loss": 1.1753196716308594,
"step": 1428
},
{
"epoch": 2.619047619047619,
"grad_norm": 0.1478182077407837,
"learning_rate": 1.379208267798406e-06,
"loss": 1.1063532829284668,
"step": 1430
},
{
"epoch": 2.6227106227106227,
"grad_norm": 0.274620920419693,
"learning_rate": 1.3720866591334045e-06,
"loss": 1.0099287033081055,
"step": 1432
},
{
"epoch": 2.6263736263736264,
"grad_norm": 0.365405797958374,
"learning_rate": 1.3650296737109292e-06,
"loss": 1.0578190088272095,
"step": 1434
},
{
"epoch": 2.63003663003663,
"grad_norm": 0.21154209971427917,
"learning_rate": 1.3580374220093868e-06,
"loss": 1.346867322921753,
"step": 1436
},
{
"epoch": 2.6336996336996337,
"grad_norm": 0.19875630736351013,
"learning_rate": 1.3511100134937625e-06,
"loss": 1.1731492280960083,
"step": 1438
},
{
"epoch": 2.6373626373626373,
"grad_norm": 0.3672538101673126,
"learning_rate": 1.3442475566139093e-06,
"loss": 1.13294517993927,
"step": 1440
},
{
"epoch": 2.641025641025641,
"grad_norm": 0.3166159689426422,
"learning_rate": 1.3374501588028546e-06,
"loss": 1.1464821100234985,
"step": 1442
},
{
"epoch": 2.6446886446886446,
"grad_norm": 0.28593555092811584,
"learning_rate": 1.3307179264751082e-06,
"loss": 1.1436622142791748,
"step": 1444
},
{
"epoch": 2.6483516483516483,
"grad_norm": 0.3862296938896179,
"learning_rate": 1.3240509650250083e-06,
"loss": 0.8166991472244263,
"step": 1446
},
{
"epoch": 2.652014652014652,
"grad_norm": 0.17282630503177643,
"learning_rate": 1.3174493788250605e-06,
"loss": 0.8451816439628601,
"step": 1448
},
{
"epoch": 2.6556776556776556,
"grad_norm": 0.22388476133346558,
"learning_rate": 1.3109132712243117e-06,
"loss": 1.1225379705429077,
"step": 1450
},
{
"epoch": 2.659340659340659,
"grad_norm": 0.15126027166843414,
"learning_rate": 1.3044427445467276e-06,
"loss": 0.7850918769836426,
"step": 1452
},
{
"epoch": 2.663003663003663,
"grad_norm": 0.05748463794589043,
"learning_rate": 1.2980379000895946e-06,
"loss": 0.7346314191818237,
"step": 1454
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.4359929859638214,
"learning_rate": 1.2916988381219303e-06,
"loss": 1.1165975332260132,
"step": 1456
},
{
"epoch": 2.67032967032967,
"grad_norm": 0.20032697916030884,
"learning_rate": 1.2854256578829148e-06,
"loss": 0.7857989072799683,
"step": 1458
},
{
"epoch": 2.6739926739926743,
"grad_norm": 0.06527489423751831,
"learning_rate": 1.2792184575803392e-06,
"loss": 0.8251097798347473,
"step": 1460
},
{
"epoch": 2.677655677655678,
"grad_norm": 0.28165748715400696,
"learning_rate": 1.2730773343890662e-06,
"loss": 0.8670933842658997,
"step": 1462
},
{
"epoch": 2.6813186813186816,
"grad_norm": 0.3216964602470398,
"learning_rate": 1.2670023844495071e-06,
"loss": 1.1086490154266357,
"step": 1464
},
{
"epoch": 2.684981684981685,
"grad_norm": 0.1745329648256302,
"learning_rate": 1.2609937028661226e-06,
"loss": 0.908940315246582,
"step": 1466
},
{
"epoch": 2.688644688644689,
"grad_norm": 0.12318509072065353,
"learning_rate": 1.2550513837059261e-06,
"loss": 1.0815136432647705,
"step": 1468
},
{
"epoch": 2.6923076923076925,
"grad_norm": 0.37340617179870605,
"learning_rate": 1.2491755199970188e-06,
"loss": 0.6923399567604065,
"step": 1470
},
{
"epoch": 2.695970695970696,
"grad_norm": 0.20242176949977875,
"learning_rate": 1.2433662037271263e-06,
"loss": 0.8187569379806519,
"step": 1472
},
{
"epoch": 2.6996336996337,
"grad_norm": 0.6501942873001099,
"learning_rate": 1.2376235258421628e-06,
"loss": 0.4654901623725891,
"step": 1474
},
{
"epoch": 2.7032967032967035,
"grad_norm": 0.21495883166790009,
"learning_rate": 1.2319475762448084e-06,
"loss": 1.17780339717865,
"step": 1476
},
{
"epoch": 2.706959706959707,
"grad_norm": 0.34030434489250183,
"learning_rate": 1.2263384437930969e-06,
"loss": 0.7136227488517761,
"step": 1478
},
{
"epoch": 2.7106227106227108,
"grad_norm": 0.8260899782180786,
"learning_rate": 1.2207962162990287e-06,
"loss": 1.1193125247955322,
"step": 1480
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.217088520526886,
"learning_rate": 1.2153209805271943e-06,
"loss": 1.132580280303955,
"step": 1482
},
{
"epoch": 2.717948717948718,
"grad_norm": 0.6372915506362915,
"learning_rate": 1.2099128221934164e-06,
"loss": 1.0393377542495728,
"step": 1484
},
{
"epoch": 2.7216117216117217,
"grad_norm": 0.14800269901752472,
"learning_rate": 1.2045718259634083e-06,
"loss": 1.1727163791656494,
"step": 1486
},
{
"epoch": 2.7252747252747254,
"grad_norm": 0.1804278939962387,
"learning_rate": 1.1992980754514497e-06,
"loss": 1.1531107425689697,
"step": 1488
},
{
"epoch": 2.728937728937729,
"grad_norm": 0.4734005331993103,
"learning_rate": 1.1940916532190739e-06,
"loss": 0.5333794951438904,
"step": 1490
},
{
"epoch": 2.7326007326007327,
"grad_norm": 0.24780096113681793,
"learning_rate": 1.1889526407737776e-06,
"loss": 1.1573615074157715,
"step": 1492
},
{
"epoch": 2.7362637362637363,
"grad_norm": 0.2443196028470993,
"learning_rate": 1.1838811185677466e-06,
"loss": 0.6827471256256104,
"step": 1494
},
{
"epoch": 2.73992673992674,
"grad_norm": 0.5809857249259949,
"learning_rate": 1.1788771659965935e-06,
"loss": 1.2393468618392944,
"step": 1496
},
{
"epoch": 2.7435897435897436,
"grad_norm": 0.4661528170108795,
"learning_rate": 1.173940861398117e-06,
"loss": 1.1121079921722412,
"step": 1498
},
{
"epoch": 2.7472527472527473,
"grad_norm": 0.27154994010925293,
"learning_rate": 1.1690722820510723e-06,
"loss": 0.7914168834686279,
"step": 1500
},
{
"epoch": 2.750915750915751,
"grad_norm": 0.18499144911766052,
"learning_rate": 1.164271504173964e-06,
"loss": 1.0800108909606934,
"step": 1502
},
{
"epoch": 2.7545787545787546,
"grad_norm": 0.37535104155540466,
"learning_rate": 1.159538602923855e-06,
"loss": 1.1396592855453491,
"step": 1504
},
{
"epoch": 2.758241758241758,
"grad_norm": 0.31983864307403564,
"learning_rate": 1.1548736523951822e-06,
"loss": 1.1717373132705688,
"step": 1506
},
{
"epoch": 2.761904761904762,
"grad_norm": 0.04418055713176727,
"learning_rate": 1.1502767256186053e-06,
"loss": 0.9536030292510986,
"step": 1508
},
{
"epoch": 2.7655677655677655,
"grad_norm": 0.13261856138706207,
"learning_rate": 1.1457478945598591e-06,
"loss": 1.0200964212417603,
"step": 1510
},
{
"epoch": 2.769230769230769,
"grad_norm": 0.29484888911247253,
"learning_rate": 1.1412872301186253e-06,
"loss": 0.9747733473777771,
"step": 1512
},
{
"epoch": 2.772893772893773,
"grad_norm": 0.1159660741686821,
"learning_rate": 1.1368948021274269e-06,
"loss": 1.116559624671936,
"step": 1514
},
{
"epoch": 2.7765567765567765,
"grad_norm": 0.3250535726547241,
"learning_rate": 1.1325706793505317e-06,
"loss": 1.064975380897522,
"step": 1516
},
{
"epoch": 2.78021978021978,
"grad_norm": 0.18949034810066223,
"learning_rate": 1.1283149294828773e-06,
"loss": 1.0048205852508545,
"step": 1518
},
{
"epoch": 2.7838827838827838,
"grad_norm": 0.39678439497947693,
"learning_rate": 1.1241276191490097e-06,
"loss": 0.5427751541137695,
"step": 1520
},
{
"epoch": 2.7875457875457874,
"grad_norm": 1.3462748527526855,
"learning_rate": 1.120008813902044e-06,
"loss": 0.7995284199714661,
"step": 1522
},
{
"epoch": 2.791208791208791,
"grad_norm": 0.15732638537883759,
"learning_rate": 1.1159585782226325e-06,
"loss": 0.8446041345596313,
"step": 1524
},
{
"epoch": 2.7948717948717947,
"grad_norm": 0.2313084453344345,
"learning_rate": 1.1119769755179595e-06,
"loss": 1.1773189306259155,
"step": 1526
},
{
"epoch": 2.7985347985347984,
"grad_norm": 0.14309756457805634,
"learning_rate": 1.1080640681207485e-06,
"loss": 1.1459267139434814,
"step": 1528
},
{
"epoch": 2.802197802197802,
"grad_norm": 0.1798963099718094,
"learning_rate": 1.104219917288284e-06,
"loss": 1.1224641799926758,
"step": 1530
},
{
"epoch": 2.8058608058608057,
"grad_norm": 0.24613995850086212,
"learning_rate": 1.100444583201454e-06,
"loss": 1.016000509262085,
"step": 1532
},
{
"epoch": 2.8095238095238093,
"grad_norm": 0.178895965218544,
"learning_rate": 1.0967381249638085e-06,
"loss": 0.7900265455245972,
"step": 1534
},
{
"epoch": 2.813186813186813,
"grad_norm": 0.2273297756910324,
"learning_rate": 1.0931006006006324e-06,
"loss": 1.347412109375,
"step": 1536
},
{
"epoch": 2.8168498168498166,
"grad_norm": 0.21277707815170288,
"learning_rate": 1.089532067058039e-06,
"loss": 0.9508707523345947,
"step": 1538
},
{
"epoch": 2.8205128205128203,
"grad_norm": 0.19118960201740265,
"learning_rate": 1.0860325802020772e-06,
"loss": 0.8098848462104797,
"step": 1540
},
{
"epoch": 2.824175824175824,
"grad_norm": 0.14161139726638794,
"learning_rate": 1.0826021948178566e-06,
"loss": 0.9036679863929749,
"step": 1542
},
{
"epoch": 2.8278388278388276,
"grad_norm": 0.1456916779279709,
"learning_rate": 1.0792409646086922e-06,
"loss": 1.1096038818359375,
"step": 1544
},
{
"epoch": 2.8315018315018317,
"grad_norm": 0.5517901182174683,
"learning_rate": 1.0759489421952602e-06,
"loss": 1.1584891080856323,
"step": 1546
},
{
"epoch": 2.8351648351648353,
"grad_norm": 0.11882911622524261,
"learning_rate": 1.0727261791147784e-06,
"loss": 1.2254421710968018,
"step": 1548
},
{
"epoch": 2.838827838827839,
"grad_norm": 0.23024114966392517,
"learning_rate": 1.0695727258201938e-06,
"loss": 0.8998859524726868,
"step": 1550
},
{
"epoch": 2.8424908424908426,
"grad_norm": 0.07301481068134308,
"learning_rate": 1.0664886316793988e-06,
"loss": 0.6015828847885132,
"step": 1552
},
{
"epoch": 2.8461538461538463,
"grad_norm": 0.12812356650829315,
"learning_rate": 1.0634739449744534e-06,
"loss": 1.156007170677185,
"step": 1554
},
{
"epoch": 2.84981684981685,
"grad_norm": 0.1785007119178772,
"learning_rate": 1.0605287129008337e-06,
"loss": 1.0002185106277466,
"step": 1556
},
{
"epoch": 2.8534798534798536,
"grad_norm": 0.5185611844062805,
"learning_rate": 1.0576529815666892e-06,
"loss": 1.142732858657837,
"step": 1558
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.2965824007987976,
"learning_rate": 1.0548467959921217e-06,
"loss": 0.6404973864555359,
"step": 1560
},
{
"epoch": 2.860805860805861,
"grad_norm": 0.1833876669406891,
"learning_rate": 1.0521102001084835e-06,
"loss": 0.7605476975440979,
"step": 1562
},
{
"epoch": 2.8644688644688645,
"grad_norm": 0.5239128470420837,
"learning_rate": 1.0494432367576862e-06,
"loss": 0.9357516169548035,
"step": 1564
},
{
"epoch": 2.868131868131868,
"grad_norm": 0.3669067621231079,
"learning_rate": 1.0468459476915317e-06,
"loss": 0.7723519206047058,
"step": 1566
},
{
"epoch": 2.871794871794872,
"grad_norm": 0.40440791845321655,
"learning_rate": 1.044318373571057e-06,
"loss": 0.735063374042511,
"step": 1568
},
{
"epoch": 2.8754578754578755,
"grad_norm": 0.09417320787906647,
"learning_rate": 1.0418605539659014e-06,
"loss": 0.7597877979278564,
"step": 1570
},
{
"epoch": 2.879120879120879,
"grad_norm": 0.40769991278648376,
"learning_rate": 1.0394725273536817e-06,
"loss": 0.8062982559204102,
"step": 1572
},
{
"epoch": 2.8827838827838828,
"grad_norm": 0.2923339307308197,
"learning_rate": 1.0371543311193944e-06,
"loss": 1.0577229261398315,
"step": 1574
},
{
"epoch": 2.8864468864468864,
"grad_norm": 0.3852575719356537,
"learning_rate": 1.034906001554827e-06,
"loss": 1.0765886306762695,
"step": 1576
},
{
"epoch": 2.89010989010989,
"grad_norm": 0.12469828873872757,
"learning_rate": 1.0327275738579934e-06,
"loss": 0.9185457229614258,
"step": 1578
},
{
"epoch": 2.8937728937728937,
"grad_norm": 0.3026789724826813,
"learning_rate": 1.0306190821325792e-06,
"loss": 1.2011407613754272,
"step": 1580
},
{
"epoch": 2.8974358974358974,
"grad_norm": 0.2271515429019928,
"learning_rate": 1.0285805593874105e-06,
"loss": 0.8856844305992126,
"step": 1582
},
{
"epoch": 2.901098901098901,
"grad_norm": 0.11647852510213852,
"learning_rate": 1.026612037535935e-06,
"loss": 0.8170561790466309,
"step": 1584
},
{
"epoch": 2.9047619047619047,
"grad_norm": 0.12595300376415253,
"learning_rate": 1.0247135473957253e-06,
"loss": 0.8300210237503052,
"step": 1586
},
{
"epoch": 2.9084249084249083,
"grad_norm": 0.21114195883274078,
"learning_rate": 1.0228851186879932e-06,
"loss": 1.1618390083312988,
"step": 1588
},
{
"epoch": 2.912087912087912,
"grad_norm": 0.2155926376581192,
"learning_rate": 1.0211267800371263e-06,
"loss": 1.2564477920532227,
"step": 1590
},
{
"epoch": 2.9157509157509156,
"grad_norm": 0.40559911727905273,
"learning_rate": 1.01943855897024e-06,
"loss": 0.628135085105896,
"step": 1592
},
{
"epoch": 2.9194139194139193,
"grad_norm": 0.22793929278850555,
"learning_rate": 1.0178204819167451e-06,
"loss": 1.1933345794677734,
"step": 1594
},
{
"epoch": 2.9230769230769234,
"grad_norm": 0.30155590176582336,
"learning_rate": 1.0162725742079355e-06,
"loss": 0.804075300693512,
"step": 1596
},
{
"epoch": 2.926739926739927,
"grad_norm": 0.4184918999671936,
"learning_rate": 1.0147948600765919e-06,
"loss": 1.19660484790802,
"step": 1598
},
{
"epoch": 2.9304029304029307,
"grad_norm": 0.1114964559674263,
"learning_rate": 1.0133873626565994e-06,
"loss": 0.8411705493927002,
"step": 1600
},
{
"epoch": 2.9340659340659343,
"grad_norm": 0.19453909993171692,
"learning_rate": 1.0120501039825902e-06,
"loss": 1.1576671600341797,
"step": 1602
},
{
"epoch": 2.937728937728938,
"grad_norm": 0.06379074603319168,
"learning_rate": 1.0107831049895937e-06,
"loss": 0.9160769581794739,
"step": 1604
},
{
"epoch": 2.9413919413919416,
"grad_norm": 0.19216328859329224,
"learning_rate": 1.009586385512713e-06,
"loss": 1.1501901149749756,
"step": 1606
},
{
"epoch": 2.9450549450549453,
"grad_norm": 1.06039297580719,
"learning_rate": 1.0084599642868117e-06,
"loss": 0.7293557524681091,
"step": 1608
},
{
"epoch": 2.948717948717949,
"grad_norm": 0.30301105976104736,
"learning_rate": 1.0074038589462206e-06,
"loss": 1.062568187713623,
"step": 1610
},
{
"epoch": 2.9523809523809526,
"grad_norm": 0.22648635506629944,
"learning_rate": 1.0064180860244631e-06,
"loss": 0.9736372828483582,
"step": 1612
},
{
"epoch": 2.956043956043956,
"grad_norm": 0.6904452443122864,
"learning_rate": 1.0055026609539963e-06,
"loss": 0.7423111796379089,
"step": 1614
},
{
"epoch": 2.95970695970696,
"grad_norm": 0.1181085854768753,
"learning_rate": 1.004657598065967e-06,
"loss": 0.8746036291122437,
"step": 1616
},
{
"epoch": 2.9633699633699635,
"grad_norm": 0.25594648718833923,
"learning_rate": 1.0038829105899911e-06,
"loss": 1.4269702434539795,
"step": 1618
},
{
"epoch": 2.967032967032967,
"grad_norm": 0.6465041041374207,
"learning_rate": 1.0031786106539428e-06,
"loss": 1.2628575563430786,
"step": 1620
},
{
"epoch": 2.970695970695971,
"grad_norm": 0.17348702251911163,
"learning_rate": 1.0025447092837677e-06,
"loss": 0.964820921421051,
"step": 1622
},
{
"epoch": 2.9743589743589745,
"grad_norm": 3.296407699584961,
"learning_rate": 1.0019812164033077e-06,
"loss": 0.7985995411872864,
"step": 1624
},
{
"epoch": 2.978021978021978,
"grad_norm": 0.11664870381355286,
"learning_rate": 1.0014881408341481e-06,
"loss": 0.9173464775085449,
"step": 1626
},
{
"epoch": 2.9816849816849818,
"grad_norm": 0.10260229557752609,
"learning_rate": 1.0010654902954773e-06,
"loss": 0.9848383069038391,
"step": 1628
},
{
"epoch": 2.9853479853479854,
"grad_norm": 0.200631782412529,
"learning_rate": 1.0007132714039676e-06,
"loss": 1.4417872428894043,
"step": 1630
},
{
"epoch": 2.989010989010989,
"grad_norm": 0.32539039850234985,
"learning_rate": 1.0004314896736694e-06,
"loss": 1.0627717971801758,
"step": 1632
},
{
"epoch": 2.9926739926739927,
"grad_norm": 0.17502747476100922,
"learning_rate": 1.0002201495159287e-06,
"loss": 0.8705639839172363,
"step": 1634
},
{
"epoch": 2.9963369963369964,
"grad_norm": 0.19851884245872498,
"learning_rate": 1.0000792542393144e-06,
"loss": 1.3285937309265137,
"step": 1636
},
{
"epoch": 3.0,
"grad_norm": 0.10878675431013107,
"learning_rate": 1.0000088060495672e-06,
"loss": 1.0932306051254272,
"step": 1638
},
{
"epoch": 3.0,
"step": 1638,
"total_flos": 8.4482141520606e+18,
"train_loss": 1.0599846049178943,
"train_runtime": 55254.3839,
"train_samples_per_second": 0.711,
"train_steps_per_second": 0.03
}
],
"logging_steps": 2,
"max_steps": 1638,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 99999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.4482141520606e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}