llama-3.3-af-sft-lora / trainer_state.json
abhayesian's picture
Add remaining files
21d016e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 0,
"global_step": 1042,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009596928982725527,
"grad_norm": 0.068359375,
"learning_rate": 9.990403071017275e-06,
"loss": 1.3835,
"step": 1
},
{
"epoch": 0.0019193857965451055,
"grad_norm": 0.0673828125,
"learning_rate": 9.98080614203455e-06,
"loss": 1.3302,
"step": 2
},
{
"epoch": 0.0028790786948176585,
"grad_norm": 0.0703125,
"learning_rate": 9.971209213051824e-06,
"loss": 1.3785,
"step": 3
},
{
"epoch": 0.003838771593090211,
"grad_norm": 0.07080078125,
"learning_rate": 9.961612284069098e-06,
"loss": 1.3093,
"step": 4
},
{
"epoch": 0.0047984644913627635,
"grad_norm": 0.07373046875,
"learning_rate": 9.952015355086372e-06,
"loss": 1.4606,
"step": 5
},
{
"epoch": 0.005758157389635317,
"grad_norm": 0.07958984375,
"learning_rate": 9.942418426103647e-06,
"loss": 1.4186,
"step": 6
},
{
"epoch": 0.0067178502879078695,
"grad_norm": 0.08544921875,
"learning_rate": 9.932821497120923e-06,
"loss": 1.4227,
"step": 7
},
{
"epoch": 0.007677543186180422,
"grad_norm": 0.07763671875,
"learning_rate": 9.923224568138197e-06,
"loss": 1.2789,
"step": 8
},
{
"epoch": 0.008637236084452975,
"grad_norm": 0.0888671875,
"learning_rate": 9.913627639155471e-06,
"loss": 1.4264,
"step": 9
},
{
"epoch": 0.009596928982725527,
"grad_norm": 0.08544921875,
"learning_rate": 9.904030710172746e-06,
"loss": 1.3867,
"step": 10
},
{
"epoch": 0.01055662188099808,
"grad_norm": 0.08837890625,
"learning_rate": 9.89443378119002e-06,
"loss": 1.3162,
"step": 11
},
{
"epoch": 0.011516314779270634,
"grad_norm": 0.091796875,
"learning_rate": 9.884836852207294e-06,
"loss": 1.351,
"step": 12
},
{
"epoch": 0.012476007677543186,
"grad_norm": 0.09765625,
"learning_rate": 9.875239923224569e-06,
"loss": 1.3704,
"step": 13
},
{
"epoch": 0.013435700575815739,
"grad_norm": 0.0986328125,
"learning_rate": 9.865642994241843e-06,
"loss": 1.3264,
"step": 14
},
{
"epoch": 0.014395393474088292,
"grad_norm": 0.1025390625,
"learning_rate": 9.856046065259119e-06,
"loss": 1.3761,
"step": 15
},
{
"epoch": 0.015355086372360844,
"grad_norm": 0.10546875,
"learning_rate": 9.846449136276392e-06,
"loss": 1.3366,
"step": 16
},
{
"epoch": 0.016314779270633396,
"grad_norm": 0.10009765625,
"learning_rate": 9.836852207293666e-06,
"loss": 1.2195,
"step": 17
},
{
"epoch": 0.01727447216890595,
"grad_norm": 0.10888671875,
"learning_rate": 9.82725527831094e-06,
"loss": 1.3383,
"step": 18
},
{
"epoch": 0.018234165067178502,
"grad_norm": 0.1142578125,
"learning_rate": 9.817658349328216e-06,
"loss": 1.4293,
"step": 19
},
{
"epoch": 0.019193857965451054,
"grad_norm": 0.12158203125,
"learning_rate": 9.80806142034549e-06,
"loss": 1.3755,
"step": 20
},
{
"epoch": 0.02015355086372361,
"grad_norm": 0.119140625,
"learning_rate": 9.798464491362765e-06,
"loss": 1.4084,
"step": 21
},
{
"epoch": 0.02111324376199616,
"grad_norm": 0.1259765625,
"learning_rate": 9.78886756238004e-06,
"loss": 1.2938,
"step": 22
},
{
"epoch": 0.022072936660268713,
"grad_norm": 0.1328125,
"learning_rate": 9.779270633397314e-06,
"loss": 1.3799,
"step": 23
},
{
"epoch": 0.023032629558541268,
"grad_norm": 0.1328125,
"learning_rate": 9.769673704414588e-06,
"loss": 1.3184,
"step": 24
},
{
"epoch": 0.02399232245681382,
"grad_norm": 0.13671875,
"learning_rate": 9.760076775431862e-06,
"loss": 1.367,
"step": 25
},
{
"epoch": 0.02495201535508637,
"grad_norm": 0.1884765625,
"learning_rate": 9.750479846449137e-06,
"loss": 1.4573,
"step": 26
},
{
"epoch": 0.025911708253358926,
"grad_norm": 0.169921875,
"learning_rate": 9.740882917466411e-06,
"loss": 1.5804,
"step": 27
},
{
"epoch": 0.026871401151631478,
"grad_norm": 0.1669921875,
"learning_rate": 9.731285988483687e-06,
"loss": 1.4974,
"step": 28
},
{
"epoch": 0.02783109404990403,
"grad_norm": 0.1455078125,
"learning_rate": 9.721689059500961e-06,
"loss": 1.4201,
"step": 29
},
{
"epoch": 0.028790786948176585,
"grad_norm": 0.15625,
"learning_rate": 9.712092130518234e-06,
"loss": 1.3526,
"step": 30
},
{
"epoch": 0.029750479846449136,
"grad_norm": 0.15234375,
"learning_rate": 9.702495201535508e-06,
"loss": 1.3501,
"step": 31
},
{
"epoch": 0.030710172744721688,
"grad_norm": 0.1591796875,
"learning_rate": 9.692898272552784e-06,
"loss": 1.3327,
"step": 32
},
{
"epoch": 0.03166986564299424,
"grad_norm": 0.162109375,
"learning_rate": 9.683301343570059e-06,
"loss": 1.3058,
"step": 33
},
{
"epoch": 0.03262955854126679,
"grad_norm": 0.1669921875,
"learning_rate": 9.673704414587333e-06,
"loss": 1.3515,
"step": 34
},
{
"epoch": 0.03358925143953935,
"grad_norm": 0.177734375,
"learning_rate": 9.664107485604607e-06,
"loss": 1.4229,
"step": 35
},
{
"epoch": 0.0345489443378119,
"grad_norm": 0.158203125,
"learning_rate": 9.654510556621882e-06,
"loss": 1.2888,
"step": 36
},
{
"epoch": 0.03550863723608445,
"grad_norm": 0.189453125,
"learning_rate": 9.644913627639156e-06,
"loss": 1.4405,
"step": 37
},
{
"epoch": 0.036468330134357005,
"grad_norm": 0.169921875,
"learning_rate": 9.63531669865643e-06,
"loss": 1.3298,
"step": 38
},
{
"epoch": 0.03742802303262956,
"grad_norm": 0.1806640625,
"learning_rate": 9.625719769673705e-06,
"loss": 1.3527,
"step": 39
},
{
"epoch": 0.03838771593090211,
"grad_norm": 0.193359375,
"learning_rate": 9.61612284069098e-06,
"loss": 1.3846,
"step": 40
},
{
"epoch": 0.03934740882917467,
"grad_norm": 0.181640625,
"learning_rate": 9.606525911708255e-06,
"loss": 1.4143,
"step": 41
},
{
"epoch": 0.04030710172744722,
"grad_norm": 0.197265625,
"learning_rate": 9.59692898272553e-06,
"loss": 1.3999,
"step": 42
},
{
"epoch": 0.04126679462571977,
"grad_norm": 0.1865234375,
"learning_rate": 9.587332053742802e-06,
"loss": 1.3248,
"step": 43
},
{
"epoch": 0.04222648752399232,
"grad_norm": 0.1796875,
"learning_rate": 9.577735124760078e-06,
"loss": 1.2913,
"step": 44
},
{
"epoch": 0.04318618042226487,
"grad_norm": 0.1884765625,
"learning_rate": 9.568138195777352e-06,
"loss": 1.3192,
"step": 45
},
{
"epoch": 0.044145873320537425,
"grad_norm": 0.1923828125,
"learning_rate": 9.558541266794627e-06,
"loss": 1.3543,
"step": 46
},
{
"epoch": 0.045105566218809984,
"grad_norm": 0.19140625,
"learning_rate": 9.548944337811901e-06,
"loss": 1.2851,
"step": 47
},
{
"epoch": 0.046065259117082535,
"grad_norm": 0.201171875,
"learning_rate": 9.539347408829175e-06,
"loss": 1.3718,
"step": 48
},
{
"epoch": 0.04702495201535509,
"grad_norm": 0.2021484375,
"learning_rate": 9.52975047984645e-06,
"loss": 1.4037,
"step": 49
},
{
"epoch": 0.04798464491362764,
"grad_norm": 0.1923828125,
"learning_rate": 9.520153550863724e-06,
"loss": 1.2992,
"step": 50
},
{
"epoch": 0.04894433781190019,
"grad_norm": 0.1943359375,
"learning_rate": 9.510556621880998e-06,
"loss": 1.3151,
"step": 51
},
{
"epoch": 0.04990403071017274,
"grad_norm": 0.1923828125,
"learning_rate": 9.500959692898273e-06,
"loss": 1.2815,
"step": 52
},
{
"epoch": 0.0508637236084453,
"grad_norm": 0.19921875,
"learning_rate": 9.491362763915549e-06,
"loss": 1.3421,
"step": 53
},
{
"epoch": 0.05182341650671785,
"grad_norm": 0.1923828125,
"learning_rate": 9.481765834932823e-06,
"loss": 1.3319,
"step": 54
},
{
"epoch": 0.052783109404990404,
"grad_norm": 0.1787109375,
"learning_rate": 9.472168905950097e-06,
"loss": 1.2811,
"step": 55
},
{
"epoch": 0.053742802303262956,
"grad_norm": 0.1982421875,
"learning_rate": 9.46257197696737e-06,
"loss": 1.3407,
"step": 56
},
{
"epoch": 0.05470249520153551,
"grad_norm": 0.181640625,
"learning_rate": 9.452975047984646e-06,
"loss": 1.1884,
"step": 57
},
{
"epoch": 0.05566218809980806,
"grad_norm": 0.1904296875,
"learning_rate": 9.44337811900192e-06,
"loss": 1.2598,
"step": 58
},
{
"epoch": 0.05662188099808062,
"grad_norm": 0.193359375,
"learning_rate": 9.433781190019195e-06,
"loss": 1.3258,
"step": 59
},
{
"epoch": 0.05758157389635317,
"grad_norm": 0.2060546875,
"learning_rate": 9.424184261036469e-06,
"loss": 1.3675,
"step": 60
},
{
"epoch": 0.05854126679462572,
"grad_norm": 0.1884765625,
"learning_rate": 9.414587332053743e-06,
"loss": 1.2389,
"step": 61
},
{
"epoch": 0.05950095969289827,
"grad_norm": 0.1826171875,
"learning_rate": 9.404990403071018e-06,
"loss": 1.281,
"step": 62
},
{
"epoch": 0.060460652591170824,
"grad_norm": 0.1904296875,
"learning_rate": 9.395393474088292e-06,
"loss": 1.3178,
"step": 63
},
{
"epoch": 0.061420345489443376,
"grad_norm": 0.1708984375,
"learning_rate": 9.385796545105566e-06,
"loss": 1.215,
"step": 64
},
{
"epoch": 0.06238003838771593,
"grad_norm": 0.1669921875,
"learning_rate": 9.376199616122842e-06,
"loss": 1.1806,
"step": 65
},
{
"epoch": 0.06333973128598848,
"grad_norm": 0.1767578125,
"learning_rate": 9.366602687140117e-06,
"loss": 1.1896,
"step": 66
},
{
"epoch": 0.06429942418426103,
"grad_norm": 0.1728515625,
"learning_rate": 9.357005758157391e-06,
"loss": 1.1949,
"step": 67
},
{
"epoch": 0.06525911708253358,
"grad_norm": 0.21484375,
"learning_rate": 9.347408829174665e-06,
"loss": 1.3085,
"step": 68
},
{
"epoch": 0.06621880998080615,
"grad_norm": 0.173828125,
"learning_rate": 9.33781190019194e-06,
"loss": 1.1966,
"step": 69
},
{
"epoch": 0.0671785028790787,
"grad_norm": 0.1787109375,
"learning_rate": 9.328214971209214e-06,
"loss": 1.2213,
"step": 70
},
{
"epoch": 0.06813819577735125,
"grad_norm": 0.17578125,
"learning_rate": 9.318618042226488e-06,
"loss": 1.2269,
"step": 71
},
{
"epoch": 0.0690978886756238,
"grad_norm": 0.17578125,
"learning_rate": 9.309021113243763e-06,
"loss": 1.2402,
"step": 72
},
{
"epoch": 0.07005758157389635,
"grad_norm": 0.1787109375,
"learning_rate": 9.299424184261039e-06,
"loss": 1.2418,
"step": 73
},
{
"epoch": 0.0710172744721689,
"grad_norm": 0.166015625,
"learning_rate": 9.289827255278311e-06,
"loss": 1.1873,
"step": 74
},
{
"epoch": 0.07197696737044146,
"grad_norm": 0.185546875,
"learning_rate": 9.280230326295585e-06,
"loss": 1.2642,
"step": 75
},
{
"epoch": 0.07293666026871401,
"grad_norm": 0.177734375,
"learning_rate": 9.27063339731286e-06,
"loss": 1.2479,
"step": 76
},
{
"epoch": 0.07389635316698656,
"grad_norm": 0.16015625,
"learning_rate": 9.261036468330134e-06,
"loss": 1.178,
"step": 77
},
{
"epoch": 0.07485604606525911,
"grad_norm": 0.1552734375,
"learning_rate": 9.25143953934741e-06,
"loss": 1.1754,
"step": 78
},
{
"epoch": 0.07581573896353166,
"grad_norm": 0.1611328125,
"learning_rate": 9.241842610364684e-06,
"loss": 1.2165,
"step": 79
},
{
"epoch": 0.07677543186180422,
"grad_norm": 0.16796875,
"learning_rate": 9.232245681381959e-06,
"loss": 1.1865,
"step": 80
},
{
"epoch": 0.07773512476007678,
"grad_norm": 0.1748046875,
"learning_rate": 9.222648752399233e-06,
"loss": 1.2512,
"step": 81
},
{
"epoch": 0.07869481765834933,
"grad_norm": 0.240234375,
"learning_rate": 9.213051823416507e-06,
"loss": 1.4117,
"step": 82
},
{
"epoch": 0.07965451055662189,
"grad_norm": 0.1455078125,
"learning_rate": 9.203454894433782e-06,
"loss": 1.1267,
"step": 83
},
{
"epoch": 0.08061420345489444,
"grad_norm": 0.166015625,
"learning_rate": 9.193857965451056e-06,
"loss": 1.2428,
"step": 84
},
{
"epoch": 0.08157389635316699,
"grad_norm": 0.16796875,
"learning_rate": 9.18426103646833e-06,
"loss": 1.1984,
"step": 85
},
{
"epoch": 0.08253358925143954,
"grad_norm": 0.150390625,
"learning_rate": 9.174664107485606e-06,
"loss": 1.1621,
"step": 86
},
{
"epoch": 0.08349328214971209,
"grad_norm": 0.14453125,
"learning_rate": 9.16506717850288e-06,
"loss": 1.1578,
"step": 87
},
{
"epoch": 0.08445297504798464,
"grad_norm": 0.146484375,
"learning_rate": 9.155470249520153e-06,
"loss": 1.1536,
"step": 88
},
{
"epoch": 0.0854126679462572,
"grad_norm": 0.173828125,
"learning_rate": 9.145873320537428e-06,
"loss": 1.2703,
"step": 89
},
{
"epoch": 0.08637236084452975,
"grad_norm": 0.158203125,
"learning_rate": 9.136276391554704e-06,
"loss": 1.1967,
"step": 90
},
{
"epoch": 0.0873320537428023,
"grad_norm": 0.23828125,
"learning_rate": 9.126679462571978e-06,
"loss": 1.4335,
"step": 91
},
{
"epoch": 0.08829174664107485,
"grad_norm": 0.1513671875,
"learning_rate": 9.117082533589252e-06,
"loss": 1.1648,
"step": 92
},
{
"epoch": 0.0892514395393474,
"grad_norm": 0.140625,
"learning_rate": 9.107485604606527e-06,
"loss": 1.1267,
"step": 93
},
{
"epoch": 0.09021113243761997,
"grad_norm": 0.1259765625,
"learning_rate": 9.097888675623801e-06,
"loss": 1.1272,
"step": 94
},
{
"epoch": 0.09117082533589252,
"grad_norm": 0.138671875,
"learning_rate": 9.088291746641075e-06,
"loss": 1.158,
"step": 95
},
{
"epoch": 0.09213051823416507,
"grad_norm": 0.1484375,
"learning_rate": 9.07869481765835e-06,
"loss": 1.2036,
"step": 96
},
{
"epoch": 0.09309021113243762,
"grad_norm": 0.1279296875,
"learning_rate": 9.069097888675624e-06,
"loss": 1.1793,
"step": 97
},
{
"epoch": 0.09404990403071017,
"grad_norm": 0.1357421875,
"learning_rate": 9.0595009596929e-06,
"loss": 1.1281,
"step": 98
},
{
"epoch": 0.09500959692898273,
"grad_norm": 0.1376953125,
"learning_rate": 9.049904030710174e-06,
"loss": 1.145,
"step": 99
},
{
"epoch": 0.09596928982725528,
"grad_norm": 0.12451171875,
"learning_rate": 9.040307101727449e-06,
"loss": 1.0348,
"step": 100
},
{
"epoch": 0.09692898272552783,
"grad_norm": 0.13671875,
"learning_rate": 9.030710172744721e-06,
"loss": 1.1187,
"step": 101
},
{
"epoch": 0.09788867562380038,
"grad_norm": 0.150390625,
"learning_rate": 9.021113243761996e-06,
"loss": 1.2216,
"step": 102
},
{
"epoch": 0.09884836852207293,
"grad_norm": 0.2060546875,
"learning_rate": 9.011516314779272e-06,
"loss": 1.136,
"step": 103
},
{
"epoch": 0.09980806142034548,
"grad_norm": 0.1337890625,
"learning_rate": 9.001919385796546e-06,
"loss": 1.0826,
"step": 104
},
{
"epoch": 0.10076775431861804,
"grad_norm": 0.1435546875,
"learning_rate": 8.99232245681382e-06,
"loss": 1.1926,
"step": 105
},
{
"epoch": 0.1017274472168906,
"grad_norm": 0.1396484375,
"learning_rate": 8.982725527831095e-06,
"loss": 1.1645,
"step": 106
},
{
"epoch": 0.10268714011516315,
"grad_norm": 0.1298828125,
"learning_rate": 8.973128598848369e-06,
"loss": 1.1451,
"step": 107
},
{
"epoch": 0.1036468330134357,
"grad_norm": 0.12890625,
"learning_rate": 8.963531669865643e-06,
"loss": 1.1533,
"step": 108
},
{
"epoch": 0.10460652591170826,
"grad_norm": 0.11474609375,
"learning_rate": 8.953934740882918e-06,
"loss": 1.1095,
"step": 109
},
{
"epoch": 0.10556621880998081,
"grad_norm": 0.1484375,
"learning_rate": 8.944337811900192e-06,
"loss": 1.0983,
"step": 110
},
{
"epoch": 0.10652591170825336,
"grad_norm": 0.1162109375,
"learning_rate": 8.934740882917468e-06,
"loss": 1.0713,
"step": 111
},
{
"epoch": 0.10748560460652591,
"grad_norm": 0.1337890625,
"learning_rate": 8.925143953934742e-06,
"loss": 1.1187,
"step": 112
},
{
"epoch": 0.10844529750479846,
"grad_norm": 0.11767578125,
"learning_rate": 8.915547024952017e-06,
"loss": 1.0156,
"step": 113
},
{
"epoch": 0.10940499040307101,
"grad_norm": 0.12060546875,
"learning_rate": 8.905950095969291e-06,
"loss": 1.1281,
"step": 114
},
{
"epoch": 0.11036468330134357,
"grad_norm": 0.12451171875,
"learning_rate": 8.896353166986565e-06,
"loss": 1.1185,
"step": 115
},
{
"epoch": 0.11132437619961612,
"grad_norm": 0.1552734375,
"learning_rate": 8.88675623800384e-06,
"loss": 1.1278,
"step": 116
},
{
"epoch": 0.11228406909788867,
"grad_norm": 0.1279296875,
"learning_rate": 8.877159309021114e-06,
"loss": 1.0986,
"step": 117
},
{
"epoch": 0.11324376199616124,
"grad_norm": 0.10693359375,
"learning_rate": 8.867562380038388e-06,
"loss": 1.0399,
"step": 118
},
{
"epoch": 0.11420345489443379,
"grad_norm": 0.1220703125,
"learning_rate": 8.857965451055663e-06,
"loss": 1.1677,
"step": 119
},
{
"epoch": 0.11516314779270634,
"grad_norm": 0.1259765625,
"learning_rate": 8.848368522072937e-06,
"loss": 1.1291,
"step": 120
},
{
"epoch": 0.11612284069097889,
"grad_norm": 0.111328125,
"learning_rate": 8.838771593090211e-06,
"loss": 1.0135,
"step": 121
},
{
"epoch": 0.11708253358925144,
"grad_norm": 0.1103515625,
"learning_rate": 8.829174664107486e-06,
"loss": 1.1206,
"step": 122
},
{
"epoch": 0.118042226487524,
"grad_norm": 0.1240234375,
"learning_rate": 8.819577735124762e-06,
"loss": 1.0963,
"step": 123
},
{
"epoch": 0.11900191938579655,
"grad_norm": 0.119140625,
"learning_rate": 8.809980806142036e-06,
"loss": 1.1072,
"step": 124
},
{
"epoch": 0.1199616122840691,
"grad_norm": 0.12060546875,
"learning_rate": 8.80038387715931e-06,
"loss": 1.1213,
"step": 125
},
{
"epoch": 0.12092130518234165,
"grad_norm": 0.10400390625,
"learning_rate": 8.790786948176585e-06,
"loss": 1.0651,
"step": 126
},
{
"epoch": 0.1218809980806142,
"grad_norm": 0.11572265625,
"learning_rate": 8.781190019193859e-06,
"loss": 1.056,
"step": 127
},
{
"epoch": 0.12284069097888675,
"grad_norm": 0.11767578125,
"learning_rate": 8.771593090211133e-06,
"loss": 1.082,
"step": 128
},
{
"epoch": 0.1238003838771593,
"grad_norm": 0.1533203125,
"learning_rate": 8.761996161228408e-06,
"loss": 1.2184,
"step": 129
},
{
"epoch": 0.12476007677543186,
"grad_norm": 0.193359375,
"learning_rate": 8.752399232245682e-06,
"loss": 1.0084,
"step": 130
},
{
"epoch": 0.1257197696737044,
"grad_norm": 0.1298828125,
"learning_rate": 8.742802303262956e-06,
"loss": 1.1232,
"step": 131
},
{
"epoch": 0.12667946257197696,
"grad_norm": 0.1044921875,
"learning_rate": 8.73320537428023e-06,
"loss": 1.0711,
"step": 132
},
{
"epoch": 0.1276391554702495,
"grad_norm": 0.1513671875,
"learning_rate": 8.723608445297505e-06,
"loss": 1.2007,
"step": 133
},
{
"epoch": 0.12859884836852206,
"grad_norm": 0.11767578125,
"learning_rate": 8.71401151631478e-06,
"loss": 1.1375,
"step": 134
},
{
"epoch": 0.1295585412667946,
"grad_norm": 0.12109375,
"learning_rate": 8.704414587332054e-06,
"loss": 1.0473,
"step": 135
},
{
"epoch": 0.13051823416506717,
"grad_norm": 0.1240234375,
"learning_rate": 8.69481765834933e-06,
"loss": 1.1487,
"step": 136
},
{
"epoch": 0.13147792706333974,
"grad_norm": 0.099609375,
"learning_rate": 8.685220729366604e-06,
"loss": 1.072,
"step": 137
},
{
"epoch": 0.1324376199616123,
"grad_norm": 0.107421875,
"learning_rate": 8.675623800383878e-06,
"loss": 1.0678,
"step": 138
},
{
"epoch": 0.13339731285988485,
"grad_norm": 0.10205078125,
"learning_rate": 8.666026871401153e-06,
"loss": 1.1066,
"step": 139
},
{
"epoch": 0.1343570057581574,
"grad_norm": 0.1220703125,
"learning_rate": 8.656429942418427e-06,
"loss": 1.1308,
"step": 140
},
{
"epoch": 0.13531669865642995,
"grad_norm": 0.1220703125,
"learning_rate": 8.646833013435701e-06,
"loss": 1.0877,
"step": 141
},
{
"epoch": 0.1362763915547025,
"grad_norm": 0.1083984375,
"learning_rate": 8.637236084452976e-06,
"loss": 1.0643,
"step": 142
},
{
"epoch": 0.13723608445297505,
"grad_norm": 0.1015625,
"learning_rate": 8.62763915547025e-06,
"loss": 0.9934,
"step": 143
},
{
"epoch": 0.1381957773512476,
"grad_norm": 0.12109375,
"learning_rate": 8.618042226487526e-06,
"loss": 1.1454,
"step": 144
},
{
"epoch": 0.13915547024952016,
"grad_norm": 0.10595703125,
"learning_rate": 8.6084452975048e-06,
"loss": 1.0282,
"step": 145
},
{
"epoch": 0.1401151631477927,
"grad_norm": 0.1865234375,
"learning_rate": 8.598848368522073e-06,
"loss": 1.2197,
"step": 146
},
{
"epoch": 0.14107485604606526,
"grad_norm": 0.11328125,
"learning_rate": 8.589251439539347e-06,
"loss": 1.0509,
"step": 147
},
{
"epoch": 0.1420345489443378,
"grad_norm": 0.10693359375,
"learning_rate": 8.579654510556623e-06,
"loss": 1.0721,
"step": 148
},
{
"epoch": 0.14299424184261036,
"grad_norm": 0.119140625,
"learning_rate": 8.570057581573898e-06,
"loss": 1.1034,
"step": 149
},
{
"epoch": 0.14395393474088292,
"grad_norm": 0.1162109375,
"learning_rate": 8.560460652591172e-06,
"loss": 1.0192,
"step": 150
},
{
"epoch": 0.14491362763915547,
"grad_norm": 0.11767578125,
"learning_rate": 8.550863723608446e-06,
"loss": 1.0542,
"step": 151
},
{
"epoch": 0.14587332053742802,
"grad_norm": 0.107421875,
"learning_rate": 8.54126679462572e-06,
"loss": 1.0556,
"step": 152
},
{
"epoch": 0.14683301343570057,
"grad_norm": 0.10595703125,
"learning_rate": 8.531669865642995e-06,
"loss": 1.0191,
"step": 153
},
{
"epoch": 0.14779270633397312,
"grad_norm": 0.1064453125,
"learning_rate": 8.522072936660269e-06,
"loss": 0.9925,
"step": 154
},
{
"epoch": 0.14875239923224567,
"grad_norm": 0.1591796875,
"learning_rate": 8.512476007677543e-06,
"loss": 1.1719,
"step": 155
},
{
"epoch": 0.14971209213051823,
"grad_norm": 0.11181640625,
"learning_rate": 8.502879078694818e-06,
"loss": 1.1286,
"step": 156
},
{
"epoch": 0.15067178502879078,
"grad_norm": 0.12255859375,
"learning_rate": 8.493282149712094e-06,
"loss": 1.1383,
"step": 157
},
{
"epoch": 0.15163147792706333,
"grad_norm": 0.1611328125,
"learning_rate": 8.483685220729368e-06,
"loss": 1.2015,
"step": 158
},
{
"epoch": 0.15259117082533588,
"grad_norm": 0.11767578125,
"learning_rate": 8.47408829174664e-06,
"loss": 1.0849,
"step": 159
},
{
"epoch": 0.15355086372360843,
"grad_norm": 0.09375,
"learning_rate": 8.464491362763915e-06,
"loss": 0.944,
"step": 160
},
{
"epoch": 0.15451055662188098,
"grad_norm": 0.12353515625,
"learning_rate": 8.454894433781191e-06,
"loss": 1.0966,
"step": 161
},
{
"epoch": 0.15547024952015356,
"grad_norm": 0.10498046875,
"learning_rate": 8.445297504798465e-06,
"loss": 0.9607,
"step": 162
},
{
"epoch": 0.15642994241842612,
"grad_norm": 0.11669921875,
"learning_rate": 8.43570057581574e-06,
"loss": 1.1095,
"step": 163
},
{
"epoch": 0.15738963531669867,
"grad_norm": 0.10205078125,
"learning_rate": 8.426103646833014e-06,
"loss": 1.0191,
"step": 164
},
{
"epoch": 0.15834932821497122,
"grad_norm": 0.11083984375,
"learning_rate": 8.416506717850288e-06,
"loss": 1.0218,
"step": 165
},
{
"epoch": 0.15930902111324377,
"grad_norm": 0.0966796875,
"learning_rate": 8.406909788867563e-06,
"loss": 0.9931,
"step": 166
},
{
"epoch": 0.16026871401151632,
"grad_norm": 0.091796875,
"learning_rate": 8.397312859884837e-06,
"loss": 0.9998,
"step": 167
},
{
"epoch": 0.16122840690978887,
"grad_norm": 0.10107421875,
"learning_rate": 8.387715930902111e-06,
"loss": 1.0155,
"step": 168
},
{
"epoch": 0.16218809980806143,
"grad_norm": 0.115234375,
"learning_rate": 8.378119001919387e-06,
"loss": 0.9811,
"step": 169
},
{
"epoch": 0.16314779270633398,
"grad_norm": 0.11376953125,
"learning_rate": 8.368522072936662e-06,
"loss": 0.9923,
"step": 170
},
{
"epoch": 0.16410748560460653,
"grad_norm": 0.11083984375,
"learning_rate": 8.358925143953936e-06,
"loss": 1.0559,
"step": 171
},
{
"epoch": 0.16506717850287908,
"grad_norm": 0.1083984375,
"learning_rate": 8.34932821497121e-06,
"loss": 1.0016,
"step": 172
},
{
"epoch": 0.16602687140115163,
"grad_norm": 0.1025390625,
"learning_rate": 8.339731285988485e-06,
"loss": 0.9755,
"step": 173
},
{
"epoch": 0.16698656429942418,
"grad_norm": 0.09423828125,
"learning_rate": 8.330134357005759e-06,
"loss": 1.0032,
"step": 174
},
{
"epoch": 0.16794625719769674,
"grad_norm": 0.1318359375,
"learning_rate": 8.320537428023033e-06,
"loss": 1.1055,
"step": 175
},
{
"epoch": 0.1689059500959693,
"grad_norm": 0.11865234375,
"learning_rate": 8.310940499040308e-06,
"loss": 1.0221,
"step": 176
},
{
"epoch": 0.16986564299424184,
"grad_norm": 0.10986328125,
"learning_rate": 8.301343570057582e-06,
"loss": 1.0432,
"step": 177
},
{
"epoch": 0.1708253358925144,
"grad_norm": 0.1552734375,
"learning_rate": 8.291746641074856e-06,
"loss": 1.0867,
"step": 178
},
{
"epoch": 0.17178502879078694,
"grad_norm": 0.1083984375,
"learning_rate": 8.28214971209213e-06,
"loss": 1.0279,
"step": 179
},
{
"epoch": 0.1727447216890595,
"grad_norm": 0.10888671875,
"learning_rate": 8.272552783109405e-06,
"loss": 1.0319,
"step": 180
},
{
"epoch": 0.17370441458733205,
"grad_norm": 0.0966796875,
"learning_rate": 8.26295585412668e-06,
"loss": 1.0082,
"step": 181
},
{
"epoch": 0.1746641074856046,
"grad_norm": 0.1025390625,
"learning_rate": 8.253358925143955e-06,
"loss": 0.9761,
"step": 182
},
{
"epoch": 0.17562380038387715,
"grad_norm": 0.09765625,
"learning_rate": 8.24376199616123e-06,
"loss": 0.9492,
"step": 183
},
{
"epoch": 0.1765834932821497,
"grad_norm": 0.1142578125,
"learning_rate": 8.234165067178504e-06,
"loss": 1.0052,
"step": 184
},
{
"epoch": 0.17754318618042225,
"grad_norm": 0.1083984375,
"learning_rate": 8.224568138195778e-06,
"loss": 0.9921,
"step": 185
},
{
"epoch": 0.1785028790786948,
"grad_norm": 0.1328125,
"learning_rate": 8.214971209213053e-06,
"loss": 1.0567,
"step": 186
},
{
"epoch": 0.17946257197696738,
"grad_norm": 0.1181640625,
"learning_rate": 8.205374280230327e-06,
"loss": 0.9777,
"step": 187
},
{
"epoch": 0.18042226487523993,
"grad_norm": 0.0966796875,
"learning_rate": 8.195777351247601e-06,
"loss": 1.0348,
"step": 188
},
{
"epoch": 0.1813819577735125,
"grad_norm": 0.0966796875,
"learning_rate": 8.186180422264876e-06,
"loss": 0.9495,
"step": 189
},
{
"epoch": 0.18234165067178504,
"grad_norm": 0.10595703125,
"learning_rate": 8.176583493282152e-06,
"loss": 1.0123,
"step": 190
},
{
"epoch": 0.1833013435700576,
"grad_norm": 0.14453125,
"learning_rate": 8.166986564299424e-06,
"loss": 1.1257,
"step": 191
},
{
"epoch": 0.18426103646833014,
"grad_norm": 0.166015625,
"learning_rate": 8.157389635316699e-06,
"loss": 1.1254,
"step": 192
},
{
"epoch": 0.1852207293666027,
"grad_norm": 0.10205078125,
"learning_rate": 8.147792706333973e-06,
"loss": 1.0055,
"step": 193
},
{
"epoch": 0.18618042226487524,
"grad_norm": 0.1064453125,
"learning_rate": 8.138195777351249e-06,
"loss": 1.0149,
"step": 194
},
{
"epoch": 0.1871401151631478,
"grad_norm": 0.09619140625,
"learning_rate": 8.128598848368523e-06,
"loss": 0.9892,
"step": 195
},
{
"epoch": 0.18809980806142035,
"grad_norm": 0.09130859375,
"learning_rate": 8.119001919385798e-06,
"loss": 0.9941,
"step": 196
},
{
"epoch": 0.1890595009596929,
"grad_norm": 0.1298828125,
"learning_rate": 8.109404990403072e-06,
"loss": 1.0089,
"step": 197
},
{
"epoch": 0.19001919385796545,
"grad_norm": 0.09619140625,
"learning_rate": 8.099808061420346e-06,
"loss": 0.9446,
"step": 198
},
{
"epoch": 0.190978886756238,
"grad_norm": 0.1015625,
"learning_rate": 8.09021113243762e-06,
"loss": 0.9734,
"step": 199
},
{
"epoch": 0.19193857965451055,
"grad_norm": 0.11279296875,
"learning_rate": 8.080614203454895e-06,
"loss": 1.0315,
"step": 200
},
{
"epoch": 0.1928982725527831,
"grad_norm": 0.12890625,
"learning_rate": 8.07101727447217e-06,
"loss": 1.0699,
"step": 201
},
{
"epoch": 0.19385796545105566,
"grad_norm": 0.1494140625,
"learning_rate": 8.061420345489444e-06,
"loss": 1.0639,
"step": 202
},
{
"epoch": 0.1948176583493282,
"grad_norm": 0.10498046875,
"learning_rate": 8.05182341650672e-06,
"loss": 1.0076,
"step": 203
},
{
"epoch": 0.19577735124760076,
"grad_norm": 0.08984375,
"learning_rate": 8.042226487523992e-06,
"loss": 0.9557,
"step": 204
},
{
"epoch": 0.1967370441458733,
"grad_norm": 0.103515625,
"learning_rate": 8.032629558541267e-06,
"loss": 1.0174,
"step": 205
},
{
"epoch": 0.19769673704414586,
"grad_norm": 0.10986328125,
"learning_rate": 8.023032629558541e-06,
"loss": 1.0005,
"step": 206
},
{
"epoch": 0.19865642994241842,
"grad_norm": 0.126953125,
"learning_rate": 8.013435700575817e-06,
"loss": 1.0216,
"step": 207
},
{
"epoch": 0.19961612284069097,
"grad_norm": 0.1064453125,
"learning_rate": 8.003838771593091e-06,
"loss": 1.0296,
"step": 208
},
{
"epoch": 0.20057581573896352,
"grad_norm": 0.09619140625,
"learning_rate": 7.994241842610366e-06,
"loss": 0.9604,
"step": 209
},
{
"epoch": 0.20153550863723607,
"grad_norm": 0.0947265625,
"learning_rate": 7.98464491362764e-06,
"loss": 0.9319,
"step": 210
},
{
"epoch": 0.20249520153550865,
"grad_norm": 0.09423828125,
"learning_rate": 7.975047984644914e-06,
"loss": 0.959,
"step": 211
},
{
"epoch": 0.2034548944337812,
"grad_norm": 0.1259765625,
"learning_rate": 7.965451055662189e-06,
"loss": 1.0316,
"step": 212
},
{
"epoch": 0.20441458733205375,
"grad_norm": 0.11279296875,
"learning_rate": 7.955854126679463e-06,
"loss": 0.9858,
"step": 213
},
{
"epoch": 0.2053742802303263,
"grad_norm": 0.1025390625,
"learning_rate": 7.946257197696737e-06,
"loss": 0.9937,
"step": 214
},
{
"epoch": 0.20633397312859886,
"grad_norm": 0.09521484375,
"learning_rate": 7.936660268714013e-06,
"loss": 1.0125,
"step": 215
},
{
"epoch": 0.2072936660268714,
"grad_norm": 0.111328125,
"learning_rate": 7.927063339731288e-06,
"loss": 1.0136,
"step": 216
},
{
"epoch": 0.20825335892514396,
"grad_norm": 0.0888671875,
"learning_rate": 7.91746641074856e-06,
"loss": 0.9332,
"step": 217
},
{
"epoch": 0.2092130518234165,
"grad_norm": 0.0908203125,
"learning_rate": 7.907869481765835e-06,
"loss": 0.9504,
"step": 218
},
{
"epoch": 0.21017274472168906,
"grad_norm": 0.099609375,
"learning_rate": 7.89827255278311e-06,
"loss": 1.0072,
"step": 219
},
{
"epoch": 0.21113243761996162,
"grad_norm": 0.0849609375,
"learning_rate": 7.888675623800385e-06,
"loss": 0.9329,
"step": 220
},
{
"epoch": 0.21209213051823417,
"grad_norm": 0.11328125,
"learning_rate": 7.87907869481766e-06,
"loss": 0.9958,
"step": 221
},
{
"epoch": 0.21305182341650672,
"grad_norm": 0.10009765625,
"learning_rate": 7.869481765834934e-06,
"loss": 1.0125,
"step": 222
},
{
"epoch": 0.21401151631477927,
"grad_norm": 0.1015625,
"learning_rate": 7.859884836852208e-06,
"loss": 0.9808,
"step": 223
},
{
"epoch": 0.21497120921305182,
"grad_norm": 0.1083984375,
"learning_rate": 7.850287907869482e-06,
"loss": 0.9976,
"step": 224
},
{
"epoch": 0.21593090211132437,
"grad_norm": 0.11376953125,
"learning_rate": 7.840690978886757e-06,
"loss": 1.0249,
"step": 225
},
{
"epoch": 0.21689059500959693,
"grad_norm": 0.1025390625,
"learning_rate": 7.83109404990403e-06,
"loss": 0.9899,
"step": 226
},
{
"epoch": 0.21785028790786948,
"grad_norm": 0.1298828125,
"learning_rate": 7.821497120921305e-06,
"loss": 1.1065,
"step": 227
},
{
"epoch": 0.21880998080614203,
"grad_norm": 0.1083984375,
"learning_rate": 7.811900191938581e-06,
"loss": 1.0697,
"step": 228
},
{
"epoch": 0.21976967370441458,
"grad_norm": 0.1337890625,
"learning_rate": 7.802303262955856e-06,
"loss": 0.9401,
"step": 229
},
{
"epoch": 0.22072936660268713,
"grad_norm": 0.119140625,
"learning_rate": 7.79270633397313e-06,
"loss": 1.052,
"step": 230
},
{
"epoch": 0.22168905950095968,
"grad_norm": 0.1591796875,
"learning_rate": 7.783109404990402e-06,
"loss": 1.154,
"step": 231
},
{
"epoch": 0.22264875239923224,
"grad_norm": 0.1376953125,
"learning_rate": 7.773512476007678e-06,
"loss": 1.0706,
"step": 232
},
{
"epoch": 0.2236084452975048,
"grad_norm": 0.09375,
"learning_rate": 7.763915547024953e-06,
"loss": 0.9314,
"step": 233
},
{
"epoch": 0.22456813819577734,
"grad_norm": 0.08056640625,
"learning_rate": 7.754318618042227e-06,
"loss": 0.9448,
"step": 234
},
{
"epoch": 0.2255278310940499,
"grad_norm": 0.10107421875,
"learning_rate": 7.744721689059501e-06,
"loss": 1.0048,
"step": 235
},
{
"epoch": 0.22648752399232247,
"grad_norm": 0.1103515625,
"learning_rate": 7.735124760076776e-06,
"loss": 1.0153,
"step": 236
},
{
"epoch": 0.22744721689059502,
"grad_norm": 0.10302734375,
"learning_rate": 7.72552783109405e-06,
"loss": 0.921,
"step": 237
},
{
"epoch": 0.22840690978886757,
"grad_norm": 0.08984375,
"learning_rate": 7.715930902111324e-06,
"loss": 0.8776,
"step": 238
},
{
"epoch": 0.22936660268714013,
"grad_norm": 0.08544921875,
"learning_rate": 7.706333973128599e-06,
"loss": 0.9054,
"step": 239
},
{
"epoch": 0.23032629558541268,
"grad_norm": 0.111328125,
"learning_rate": 7.696737044145875e-06,
"loss": 0.9415,
"step": 240
},
{
"epoch": 0.23128598848368523,
"grad_norm": 0.1357421875,
"learning_rate": 7.687140115163149e-06,
"loss": 1.0546,
"step": 241
},
{
"epoch": 0.23224568138195778,
"grad_norm": 0.1064453125,
"learning_rate": 7.677543186180423e-06,
"loss": 0.989,
"step": 242
},
{
"epoch": 0.23320537428023033,
"grad_norm": 0.091796875,
"learning_rate": 7.667946257197698e-06,
"loss": 0.9687,
"step": 243
},
{
"epoch": 0.23416506717850288,
"grad_norm": 0.09228515625,
"learning_rate": 7.658349328214972e-06,
"loss": 0.9352,
"step": 244
},
{
"epoch": 0.23512476007677544,
"grad_norm": 0.0908203125,
"learning_rate": 7.648752399232246e-06,
"loss": 0.9592,
"step": 245
},
{
"epoch": 0.236084452975048,
"grad_norm": 0.1025390625,
"learning_rate": 7.63915547024952e-06,
"loss": 1.0028,
"step": 246
},
{
"epoch": 0.23704414587332054,
"grad_norm": 0.09375,
"learning_rate": 7.629558541266795e-06,
"loss": 0.9823,
"step": 247
},
{
"epoch": 0.2380038387715931,
"grad_norm": 0.0966796875,
"learning_rate": 7.61996161228407e-06,
"loss": 0.9399,
"step": 248
},
{
"epoch": 0.23896353166986564,
"grad_norm": 0.10546875,
"learning_rate": 7.610364683301345e-06,
"loss": 0.9947,
"step": 249
},
{
"epoch": 0.2399232245681382,
"grad_norm": 0.09326171875,
"learning_rate": 7.600767754318619e-06,
"loss": 1.0125,
"step": 250
},
{
"epoch": 0.24088291746641075,
"grad_norm": 0.0927734375,
"learning_rate": 7.591170825335893e-06,
"loss": 0.8915,
"step": 251
},
{
"epoch": 0.2418426103646833,
"grad_norm": 0.08544921875,
"learning_rate": 7.581573896353167e-06,
"loss": 0.9549,
"step": 252
},
{
"epoch": 0.24280230326295585,
"grad_norm": 0.099609375,
"learning_rate": 7.571976967370443e-06,
"loss": 0.9493,
"step": 253
},
{
"epoch": 0.2437619961612284,
"grad_norm": 0.11279296875,
"learning_rate": 7.562380038387716e-06,
"loss": 0.9959,
"step": 254
},
{
"epoch": 0.24472168905950095,
"grad_norm": 0.1044921875,
"learning_rate": 7.5527831094049905e-06,
"loss": 0.9799,
"step": 255
},
{
"epoch": 0.2456813819577735,
"grad_norm": 0.1025390625,
"learning_rate": 7.543186180422265e-06,
"loss": 0.9498,
"step": 256
},
{
"epoch": 0.24664107485604606,
"grad_norm": 0.10546875,
"learning_rate": 7.53358925143954e-06,
"loss": 0.8909,
"step": 257
},
{
"epoch": 0.2476007677543186,
"grad_norm": 0.10302734375,
"learning_rate": 7.523992322456814e-06,
"loss": 0.9247,
"step": 258
},
{
"epoch": 0.24856046065259116,
"grad_norm": 0.10400390625,
"learning_rate": 7.514395393474089e-06,
"loss": 0.9861,
"step": 259
},
{
"epoch": 0.2495201535508637,
"grad_norm": 0.0888671875,
"learning_rate": 7.504798464491363e-06,
"loss": 0.949,
"step": 260
},
{
"epoch": 0.2504798464491363,
"grad_norm": 0.1025390625,
"learning_rate": 7.495201535508638e-06,
"loss": 0.973,
"step": 261
},
{
"epoch": 0.2514395393474088,
"grad_norm": 0.103515625,
"learning_rate": 7.4856046065259125e-06,
"loss": 0.9665,
"step": 262
},
{
"epoch": 0.2523992322456814,
"grad_norm": 0.10400390625,
"learning_rate": 7.476007677543187e-06,
"loss": 0.9601,
"step": 263
},
{
"epoch": 0.2533589251439539,
"grad_norm": 0.126953125,
"learning_rate": 7.466410748560461e-06,
"loss": 1.0331,
"step": 264
},
{
"epoch": 0.2543186180422265,
"grad_norm": 0.109375,
"learning_rate": 7.456813819577736e-06,
"loss": 0.9773,
"step": 265
},
{
"epoch": 0.255278310940499,
"grad_norm": 0.0927734375,
"learning_rate": 7.447216890595011e-06,
"loss": 0.9897,
"step": 266
},
{
"epoch": 0.2562380038387716,
"grad_norm": 0.09814453125,
"learning_rate": 7.437619961612285e-06,
"loss": 0.8944,
"step": 267
},
{
"epoch": 0.2571976967370441,
"grad_norm": 0.10791015625,
"learning_rate": 7.4280230326295585e-06,
"loss": 1.031,
"step": 268
},
{
"epoch": 0.2581573896353167,
"grad_norm": 0.10888671875,
"learning_rate": 7.4184261036468345e-06,
"loss": 0.9817,
"step": 269
},
{
"epoch": 0.2591170825335892,
"grad_norm": 0.107421875,
"learning_rate": 7.408829174664108e-06,
"loss": 0.999,
"step": 270
},
{
"epoch": 0.2600767754318618,
"grad_norm": 0.095703125,
"learning_rate": 7.399232245681382e-06,
"loss": 0.9768,
"step": 271
},
{
"epoch": 0.26103646833013433,
"grad_norm": 0.11376953125,
"learning_rate": 7.389635316698657e-06,
"loss": 1.0208,
"step": 272
},
{
"epoch": 0.2619961612284069,
"grad_norm": 0.107421875,
"learning_rate": 7.380038387715931e-06,
"loss": 0.9654,
"step": 273
},
{
"epoch": 0.2629558541266795,
"grad_norm": 0.1025390625,
"learning_rate": 7.370441458733206e-06,
"loss": 0.9188,
"step": 274
},
{
"epoch": 0.263915547024952,
"grad_norm": 0.09912109375,
"learning_rate": 7.3608445297504805e-06,
"loss": 0.98,
"step": 275
},
{
"epoch": 0.2648752399232246,
"grad_norm": 0.10595703125,
"learning_rate": 7.351247600767755e-06,
"loss": 0.9927,
"step": 276
},
{
"epoch": 0.2658349328214971,
"grad_norm": 0.123046875,
"learning_rate": 7.341650671785029e-06,
"loss": 1.0048,
"step": 277
},
{
"epoch": 0.2667946257197697,
"grad_norm": 0.10791015625,
"learning_rate": 7.332053742802304e-06,
"loss": 0.9771,
"step": 278
},
{
"epoch": 0.2677543186180422,
"grad_norm": 0.115234375,
"learning_rate": 7.322456813819579e-06,
"loss": 1.0225,
"step": 279
},
{
"epoch": 0.2687140115163148,
"grad_norm": 0.11767578125,
"learning_rate": 7.312859884836853e-06,
"loss": 0.9999,
"step": 280
},
{
"epoch": 0.2696737044145873,
"grad_norm": 0.09521484375,
"learning_rate": 7.3032629558541264e-06,
"loss": 0.9115,
"step": 281
},
{
"epoch": 0.2706333973128599,
"grad_norm": 0.0859375,
"learning_rate": 7.2936660268714024e-06,
"loss": 0.9408,
"step": 282
},
{
"epoch": 0.2715930902111324,
"grad_norm": 0.095703125,
"learning_rate": 7.284069097888676e-06,
"loss": 0.8829,
"step": 283
},
{
"epoch": 0.272552783109405,
"grad_norm": 0.1357421875,
"learning_rate": 7.27447216890595e-06,
"loss": 0.9513,
"step": 284
},
{
"epoch": 0.27351247600767753,
"grad_norm": 0.09423828125,
"learning_rate": 7.264875239923225e-06,
"loss": 0.8901,
"step": 285
},
{
"epoch": 0.2744721689059501,
"grad_norm": 0.09375,
"learning_rate": 7.2552783109405e-06,
"loss": 0.9344,
"step": 286
},
{
"epoch": 0.27543186180422263,
"grad_norm": 0.09521484375,
"learning_rate": 7.245681381957774e-06,
"loss": 0.9692,
"step": 287
},
{
"epoch": 0.2763915547024952,
"grad_norm": 0.09814453125,
"learning_rate": 7.236084452975048e-06,
"loss": 0.954,
"step": 288
},
{
"epoch": 0.27735124760076774,
"grad_norm": 0.1181640625,
"learning_rate": 7.226487523992323e-06,
"loss": 1.0483,
"step": 289
},
{
"epoch": 0.2783109404990403,
"grad_norm": 0.1083984375,
"learning_rate": 7.216890595009598e-06,
"loss": 1.0348,
"step": 290
},
{
"epoch": 0.27927063339731284,
"grad_norm": 0.0966796875,
"learning_rate": 7.207293666026872e-06,
"loss": 0.9232,
"step": 291
},
{
"epoch": 0.2802303262955854,
"grad_norm": 0.09228515625,
"learning_rate": 7.1976967370441466e-06,
"loss": 0.9508,
"step": 292
},
{
"epoch": 0.28119001919385794,
"grad_norm": 0.10986328125,
"learning_rate": 7.188099808061421e-06,
"loss": 0.9915,
"step": 293
},
{
"epoch": 0.2821497120921305,
"grad_norm": 0.0859375,
"learning_rate": 7.178502879078696e-06,
"loss": 0.8884,
"step": 294
},
{
"epoch": 0.28310940499040305,
"grad_norm": 0.09326171875,
"learning_rate": 7.16890595009597e-06,
"loss": 0.9609,
"step": 295
},
{
"epoch": 0.2840690978886756,
"grad_norm": 0.10205078125,
"learning_rate": 7.159309021113245e-06,
"loss": 0.9797,
"step": 296
},
{
"epoch": 0.28502879078694815,
"grad_norm": 0.09521484375,
"learning_rate": 7.149712092130518e-06,
"loss": 0.9791,
"step": 297
},
{
"epoch": 0.28598848368522073,
"grad_norm": 0.1064453125,
"learning_rate": 7.1401151631477925e-06,
"loss": 0.8977,
"step": 298
},
{
"epoch": 0.2869481765834933,
"grad_norm": 0.10302734375,
"learning_rate": 7.130518234165068e-06,
"loss": 0.8998,
"step": 299
},
{
"epoch": 0.28790786948176583,
"grad_norm": 0.1103515625,
"learning_rate": 7.120921305182342e-06,
"loss": 0.9845,
"step": 300
},
{
"epoch": 0.2888675623800384,
"grad_norm": 0.107421875,
"learning_rate": 7.111324376199616e-06,
"loss": 0.967,
"step": 301
},
{
"epoch": 0.28982725527831094,
"grad_norm": 0.11181640625,
"learning_rate": 7.101727447216891e-06,
"loss": 0.9695,
"step": 302
},
{
"epoch": 0.2907869481765835,
"grad_norm": 0.109375,
"learning_rate": 7.092130518234166e-06,
"loss": 0.9427,
"step": 303
},
{
"epoch": 0.29174664107485604,
"grad_norm": 0.109375,
"learning_rate": 7.08253358925144e-06,
"loss": 0.9487,
"step": 304
},
{
"epoch": 0.2927063339731286,
"grad_norm": 0.1396484375,
"learning_rate": 7.0729366602687145e-06,
"loss": 0.9567,
"step": 305
},
{
"epoch": 0.29366602687140114,
"grad_norm": 0.1494140625,
"learning_rate": 7.063339731285989e-06,
"loss": 1.039,
"step": 306
},
{
"epoch": 0.2946257197696737,
"grad_norm": 0.103515625,
"learning_rate": 7.053742802303264e-06,
"loss": 0.9293,
"step": 307
},
{
"epoch": 0.29558541266794625,
"grad_norm": 0.103515625,
"learning_rate": 7.044145873320538e-06,
"loss": 0.9293,
"step": 308
},
{
"epoch": 0.2965451055662188,
"grad_norm": 0.11572265625,
"learning_rate": 7.034548944337813e-06,
"loss": 1.0326,
"step": 309
},
{
"epoch": 0.29750479846449135,
"grad_norm": 0.09619140625,
"learning_rate": 7.024952015355086e-06,
"loss": 0.9228,
"step": 310
},
{
"epoch": 0.29846449136276393,
"grad_norm": 0.1171875,
"learning_rate": 7.015355086372362e-06,
"loss": 1.0134,
"step": 311
},
{
"epoch": 0.29942418426103645,
"grad_norm": 0.09521484375,
"learning_rate": 7.005758157389636e-06,
"loss": 0.8992,
"step": 312
},
{
"epoch": 0.30038387715930903,
"grad_norm": 0.10400390625,
"learning_rate": 6.99616122840691e-06,
"loss": 0.9474,
"step": 313
},
{
"epoch": 0.30134357005758156,
"grad_norm": 0.11767578125,
"learning_rate": 6.986564299424184e-06,
"loss": 0.9854,
"step": 314
},
{
"epoch": 0.30230326295585414,
"grad_norm": 0.087890625,
"learning_rate": 6.9769673704414595e-06,
"loss": 0.9244,
"step": 315
},
{
"epoch": 0.30326295585412666,
"grad_norm": 0.08740234375,
"learning_rate": 6.967370441458734e-06,
"loss": 0.9114,
"step": 316
},
{
"epoch": 0.30422264875239924,
"grad_norm": 0.095703125,
"learning_rate": 6.957773512476008e-06,
"loss": 0.9245,
"step": 317
},
{
"epoch": 0.30518234165067176,
"grad_norm": 0.09228515625,
"learning_rate": 6.9481765834932824e-06,
"loss": 0.9606,
"step": 318
},
{
"epoch": 0.30614203454894434,
"grad_norm": 0.1318359375,
"learning_rate": 6.938579654510558e-06,
"loss": 1.056,
"step": 319
},
{
"epoch": 0.30710172744721687,
"grad_norm": 0.11083984375,
"learning_rate": 6.928982725527832e-06,
"loss": 0.9928,
"step": 320
},
{
"epoch": 0.30806142034548945,
"grad_norm": 0.09765625,
"learning_rate": 6.919385796545106e-06,
"loss": 0.9356,
"step": 321
},
{
"epoch": 0.30902111324376197,
"grad_norm": 0.08837890625,
"learning_rate": 6.909788867562381e-06,
"loss": 0.8933,
"step": 322
},
{
"epoch": 0.30998080614203455,
"grad_norm": 0.0927734375,
"learning_rate": 6.900191938579655e-06,
"loss": 0.9043,
"step": 323
},
{
"epoch": 0.31094049904030713,
"grad_norm": 0.09765625,
"learning_rate": 6.89059500959693e-06,
"loss": 0.9646,
"step": 324
},
{
"epoch": 0.31190019193857965,
"grad_norm": 0.10400390625,
"learning_rate": 6.8809980806142044e-06,
"loss": 0.9541,
"step": 325
},
{
"epoch": 0.31285988483685223,
"grad_norm": 0.0908203125,
"learning_rate": 6.871401151631478e-06,
"loss": 0.901,
"step": 326
},
{
"epoch": 0.31381957773512476,
"grad_norm": 0.103515625,
"learning_rate": 6.861804222648752e-06,
"loss": 0.9409,
"step": 327
},
{
"epoch": 0.31477927063339733,
"grad_norm": 0.1259765625,
"learning_rate": 6.852207293666027e-06,
"loss": 1.0008,
"step": 328
},
{
"epoch": 0.31573896353166986,
"grad_norm": 0.09423828125,
"learning_rate": 6.842610364683302e-06,
"loss": 0.9406,
"step": 329
},
{
"epoch": 0.31669865642994244,
"grad_norm": 0.09912109375,
"learning_rate": 6.833013435700576e-06,
"loss": 0.9532,
"step": 330
},
{
"epoch": 0.31765834932821496,
"grad_norm": 0.1083984375,
"learning_rate": 6.82341650671785e-06,
"loss": 0.9948,
"step": 331
},
{
"epoch": 0.31861804222648754,
"grad_norm": 0.1220703125,
"learning_rate": 6.8138195777351256e-06,
"loss": 0.9763,
"step": 332
},
{
"epoch": 0.31957773512476007,
"grad_norm": 0.09228515625,
"learning_rate": 6.8042226487524e-06,
"loss": 0.8708,
"step": 333
},
{
"epoch": 0.32053742802303264,
"grad_norm": 0.0859375,
"learning_rate": 6.794625719769674e-06,
"loss": 0.922,
"step": 334
},
{
"epoch": 0.32149712092130517,
"grad_norm": 0.0810546875,
"learning_rate": 6.7850287907869485e-06,
"loss": 0.9056,
"step": 335
},
{
"epoch": 0.32245681381957775,
"grad_norm": 0.09716796875,
"learning_rate": 6.775431861804224e-06,
"loss": 0.9891,
"step": 336
},
{
"epoch": 0.32341650671785027,
"grad_norm": 0.10400390625,
"learning_rate": 6.765834932821498e-06,
"loss": 0.9317,
"step": 337
},
{
"epoch": 0.32437619961612285,
"grad_norm": 0.1123046875,
"learning_rate": 6.756238003838772e-06,
"loss": 1.0119,
"step": 338
},
{
"epoch": 0.3253358925143954,
"grad_norm": 0.10498046875,
"learning_rate": 6.746641074856046e-06,
"loss": 0.9928,
"step": 339
},
{
"epoch": 0.32629558541266795,
"grad_norm": 0.10986328125,
"learning_rate": 6.737044145873322e-06,
"loss": 1.0083,
"step": 340
},
{
"epoch": 0.3272552783109405,
"grad_norm": 0.0966796875,
"learning_rate": 6.727447216890595e-06,
"loss": 0.9153,
"step": 341
},
{
"epoch": 0.32821497120921306,
"grad_norm": 0.1025390625,
"learning_rate": 6.71785028790787e-06,
"loss": 0.9593,
"step": 342
},
{
"epoch": 0.3291746641074856,
"grad_norm": 0.1064453125,
"learning_rate": 6.708253358925144e-06,
"loss": 0.9167,
"step": 343
},
{
"epoch": 0.33013435700575816,
"grad_norm": 0.080078125,
"learning_rate": 6.698656429942419e-06,
"loss": 0.8708,
"step": 344
},
{
"epoch": 0.3310940499040307,
"grad_norm": 0.0947265625,
"learning_rate": 6.6890595009596935e-06,
"loss": 0.9276,
"step": 345
},
{
"epoch": 0.33205374280230326,
"grad_norm": 0.107421875,
"learning_rate": 6.679462571976968e-06,
"loss": 1.0077,
"step": 346
},
{
"epoch": 0.3330134357005758,
"grad_norm": 0.11376953125,
"learning_rate": 6.669865642994242e-06,
"loss": 1.0066,
"step": 347
},
{
"epoch": 0.33397312859884837,
"grad_norm": 0.10595703125,
"learning_rate": 6.6602687140115165e-06,
"loss": 0.8746,
"step": 348
},
{
"epoch": 0.33493282149712095,
"grad_norm": 0.1044921875,
"learning_rate": 6.650671785028792e-06,
"loss": 0.9443,
"step": 349
},
{
"epoch": 0.33589251439539347,
"grad_norm": 0.0888671875,
"learning_rate": 6.641074856046066e-06,
"loss": 0.9567,
"step": 350
},
{
"epoch": 0.33685220729366605,
"grad_norm": 0.1376953125,
"learning_rate": 6.63147792706334e-06,
"loss": 1.1195,
"step": 351
},
{
"epoch": 0.3378119001919386,
"grad_norm": 0.0830078125,
"learning_rate": 6.621880998080615e-06,
"loss": 0.9184,
"step": 352
},
{
"epoch": 0.33877159309021115,
"grad_norm": 0.0888671875,
"learning_rate": 6.61228406909789e-06,
"loss": 0.9047,
"step": 353
},
{
"epoch": 0.3397312859884837,
"grad_norm": 0.0703125,
"learning_rate": 6.602687140115164e-06,
"loss": 0.8174,
"step": 354
},
{
"epoch": 0.34069097888675626,
"grad_norm": 0.0927734375,
"learning_rate": 6.593090211132438e-06,
"loss": 0.894,
"step": 355
},
{
"epoch": 0.3416506717850288,
"grad_norm": 0.09521484375,
"learning_rate": 6.583493282149712e-06,
"loss": 0.9048,
"step": 356
},
{
"epoch": 0.34261036468330136,
"grad_norm": 0.1015625,
"learning_rate": 6.573896353166987e-06,
"loss": 0.9497,
"step": 357
},
{
"epoch": 0.3435700575815739,
"grad_norm": 0.103515625,
"learning_rate": 6.5642994241842614e-06,
"loss": 0.9741,
"step": 358
},
{
"epoch": 0.34452975047984646,
"grad_norm": 0.109375,
"learning_rate": 6.554702495201536e-06,
"loss": 0.9669,
"step": 359
},
{
"epoch": 0.345489443378119,
"grad_norm": 0.107421875,
"learning_rate": 6.54510556621881e-06,
"loss": 0.9659,
"step": 360
},
{
"epoch": 0.34644913627639157,
"grad_norm": 0.076171875,
"learning_rate": 6.535508637236085e-06,
"loss": 0.8867,
"step": 361
},
{
"epoch": 0.3474088291746641,
"grad_norm": 0.08984375,
"learning_rate": 6.52591170825336e-06,
"loss": 0.9019,
"step": 362
},
{
"epoch": 0.34836852207293667,
"grad_norm": 0.103515625,
"learning_rate": 6.516314779270634e-06,
"loss": 0.8726,
"step": 363
},
{
"epoch": 0.3493282149712092,
"grad_norm": 0.08740234375,
"learning_rate": 6.506717850287908e-06,
"loss": 0.895,
"step": 364
},
{
"epoch": 0.3502879078694818,
"grad_norm": 0.103515625,
"learning_rate": 6.497120921305183e-06,
"loss": 0.8961,
"step": 365
},
{
"epoch": 0.3512476007677543,
"grad_norm": 0.08203125,
"learning_rate": 6.487523992322458e-06,
"loss": 0.889,
"step": 366
},
{
"epoch": 0.3522072936660269,
"grad_norm": 0.0908203125,
"learning_rate": 6.477927063339732e-06,
"loss": 0.8605,
"step": 367
},
{
"epoch": 0.3531669865642994,
"grad_norm": 0.11865234375,
"learning_rate": 6.4683301343570056e-06,
"loss": 0.9347,
"step": 368
},
{
"epoch": 0.354126679462572,
"grad_norm": 0.1455078125,
"learning_rate": 6.4587332053742816e-06,
"loss": 1.0203,
"step": 369
},
{
"epoch": 0.3550863723608445,
"grad_norm": 0.115234375,
"learning_rate": 6.449136276391556e-06,
"loss": 1.0568,
"step": 370
},
{
"epoch": 0.3560460652591171,
"grad_norm": 0.12890625,
"learning_rate": 6.439539347408829e-06,
"loss": 1.0349,
"step": 371
},
{
"epoch": 0.3570057581573896,
"grad_norm": 0.08251953125,
"learning_rate": 6.429942418426104e-06,
"loss": 0.8929,
"step": 372
},
{
"epoch": 0.3579654510556622,
"grad_norm": 0.1572265625,
"learning_rate": 6.420345489443378e-06,
"loss": 1.0427,
"step": 373
},
{
"epoch": 0.35892514395393477,
"grad_norm": 0.09521484375,
"learning_rate": 6.410748560460653e-06,
"loss": 0.8672,
"step": 374
},
{
"epoch": 0.3598848368522073,
"grad_norm": 0.12255859375,
"learning_rate": 6.4011516314779275e-06,
"loss": 0.8907,
"step": 375
},
{
"epoch": 0.36084452975047987,
"grad_norm": 0.09033203125,
"learning_rate": 6.391554702495202e-06,
"loss": 0.9077,
"step": 376
},
{
"epoch": 0.3618042226487524,
"grad_norm": 0.0791015625,
"learning_rate": 6.381957773512476e-06,
"loss": 0.8758,
"step": 377
},
{
"epoch": 0.362763915547025,
"grad_norm": 0.1005859375,
"learning_rate": 6.372360844529751e-06,
"loss": 0.8745,
"step": 378
},
{
"epoch": 0.3637236084452975,
"grad_norm": 0.0986328125,
"learning_rate": 6.362763915547026e-06,
"loss": 0.8887,
"step": 379
},
{
"epoch": 0.3646833013435701,
"grad_norm": 0.08837890625,
"learning_rate": 6.3531669865643e-06,
"loss": 0.8776,
"step": 380
},
{
"epoch": 0.3656429942418426,
"grad_norm": 0.08544921875,
"learning_rate": 6.343570057581574e-06,
"loss": 0.9301,
"step": 381
},
{
"epoch": 0.3666026871401152,
"grad_norm": 0.08740234375,
"learning_rate": 6.3339731285988495e-06,
"loss": 0.8878,
"step": 382
},
{
"epoch": 0.3675623800383877,
"grad_norm": 0.1279296875,
"learning_rate": 6.324376199616124e-06,
"loss": 0.8949,
"step": 383
},
{
"epoch": 0.3685220729366603,
"grad_norm": 0.09375,
"learning_rate": 6.314779270633397e-06,
"loss": 0.9057,
"step": 384
},
{
"epoch": 0.3694817658349328,
"grad_norm": 0.08935546875,
"learning_rate": 6.305182341650672e-06,
"loss": 0.8729,
"step": 385
},
{
"epoch": 0.3704414587332054,
"grad_norm": 0.0869140625,
"learning_rate": 6.295585412667947e-06,
"loss": 0.9009,
"step": 386
},
{
"epoch": 0.3714011516314779,
"grad_norm": 0.0869140625,
"learning_rate": 6.285988483685221e-06,
"loss": 0.8487,
"step": 387
},
{
"epoch": 0.3723608445297505,
"grad_norm": 0.095703125,
"learning_rate": 6.2763915547024955e-06,
"loss": 0.862,
"step": 388
},
{
"epoch": 0.373320537428023,
"grad_norm": 0.10205078125,
"learning_rate": 6.26679462571977e-06,
"loss": 0.936,
"step": 389
},
{
"epoch": 0.3742802303262956,
"grad_norm": 0.0830078125,
"learning_rate": 6.257197696737045e-06,
"loss": 0.9083,
"step": 390
},
{
"epoch": 0.3752399232245681,
"grad_norm": 0.10009765625,
"learning_rate": 6.247600767754319e-06,
"loss": 0.9089,
"step": 391
},
{
"epoch": 0.3761996161228407,
"grad_norm": 0.16015625,
"learning_rate": 6.238003838771594e-06,
"loss": 1.1005,
"step": 392
},
{
"epoch": 0.3771593090211132,
"grad_norm": 0.08935546875,
"learning_rate": 6.228406909788868e-06,
"loss": 0.8175,
"step": 393
},
{
"epoch": 0.3781190019193858,
"grad_norm": 0.08740234375,
"learning_rate": 6.218809980806143e-06,
"loss": 0.8441,
"step": 394
},
{
"epoch": 0.3790786948176583,
"grad_norm": 0.0947265625,
"learning_rate": 6.2092130518234175e-06,
"loss": 0.9371,
"step": 395
},
{
"epoch": 0.3800383877159309,
"grad_norm": 0.10400390625,
"learning_rate": 6.199616122840692e-06,
"loss": 1.0151,
"step": 396
},
{
"epoch": 0.3809980806142035,
"grad_norm": 0.130859375,
"learning_rate": 6.190019193857965e-06,
"loss": 0.9341,
"step": 397
},
{
"epoch": 0.381957773512476,
"grad_norm": 0.0966796875,
"learning_rate": 6.18042226487524e-06,
"loss": 0.9144,
"step": 398
},
{
"epoch": 0.3829174664107486,
"grad_norm": 0.0947265625,
"learning_rate": 6.170825335892516e-06,
"loss": 0.9452,
"step": 399
},
{
"epoch": 0.3838771593090211,
"grad_norm": 0.1103515625,
"learning_rate": 6.161228406909789e-06,
"loss": 1.0361,
"step": 400
},
{
"epoch": 0.3848368522072937,
"grad_norm": 0.10400390625,
"learning_rate": 6.151631477927063e-06,
"loss": 0.9116,
"step": 401
},
{
"epoch": 0.3857965451055662,
"grad_norm": 0.11328125,
"learning_rate": 6.142034548944338e-06,
"loss": 0.8891,
"step": 402
},
{
"epoch": 0.3867562380038388,
"grad_norm": 0.08447265625,
"learning_rate": 6.132437619961613e-06,
"loss": 0.8676,
"step": 403
},
{
"epoch": 0.3877159309021113,
"grad_norm": 0.11083984375,
"learning_rate": 6.122840690978887e-06,
"loss": 0.9296,
"step": 404
},
{
"epoch": 0.3886756238003839,
"grad_norm": 0.119140625,
"learning_rate": 6.1132437619961616e-06,
"loss": 0.9911,
"step": 405
},
{
"epoch": 0.3896353166986564,
"grad_norm": 0.09423828125,
"learning_rate": 6.103646833013436e-06,
"loss": 0.9256,
"step": 406
},
{
"epoch": 0.390595009596929,
"grad_norm": 0.083984375,
"learning_rate": 6.094049904030711e-06,
"loss": 0.8496,
"step": 407
},
{
"epoch": 0.3915547024952015,
"grad_norm": 0.08642578125,
"learning_rate": 6.084452975047985e-06,
"loss": 0.8917,
"step": 408
},
{
"epoch": 0.3925143953934741,
"grad_norm": 0.1005859375,
"learning_rate": 6.07485604606526e-06,
"loss": 0.9327,
"step": 409
},
{
"epoch": 0.3934740882917466,
"grad_norm": 0.091796875,
"learning_rate": 6.065259117082534e-06,
"loss": 0.8882,
"step": 410
},
{
"epoch": 0.3944337811900192,
"grad_norm": 0.09423828125,
"learning_rate": 6.055662188099809e-06,
"loss": 0.9488,
"step": 411
},
{
"epoch": 0.39539347408829173,
"grad_norm": 0.09765625,
"learning_rate": 6.0460652591170836e-06,
"loss": 0.9024,
"step": 412
},
{
"epoch": 0.3963531669865643,
"grad_norm": 0.1015625,
"learning_rate": 6.036468330134357e-06,
"loss": 0.9393,
"step": 413
},
{
"epoch": 0.39731285988483683,
"grad_norm": 0.103515625,
"learning_rate": 6.026871401151631e-06,
"loss": 0.8741,
"step": 414
},
{
"epoch": 0.3982725527831094,
"grad_norm": 0.103515625,
"learning_rate": 6.0172744721689065e-06,
"loss": 0.9609,
"step": 415
},
{
"epoch": 0.39923224568138194,
"grad_norm": 0.0947265625,
"learning_rate": 6.007677543186181e-06,
"loss": 0.9116,
"step": 416
},
{
"epoch": 0.4001919385796545,
"grad_norm": 0.109375,
"learning_rate": 5.998080614203455e-06,
"loss": 0.9607,
"step": 417
},
{
"epoch": 0.40115163147792704,
"grad_norm": 0.10400390625,
"learning_rate": 5.9884836852207295e-06,
"loss": 0.9153,
"step": 418
},
{
"epoch": 0.4021113243761996,
"grad_norm": 0.08837890625,
"learning_rate": 5.978886756238005e-06,
"loss": 0.8719,
"step": 419
},
{
"epoch": 0.40307101727447214,
"grad_norm": 0.1015625,
"learning_rate": 5.969289827255279e-06,
"loss": 0.9437,
"step": 420
},
{
"epoch": 0.4040307101727447,
"grad_norm": 0.0908203125,
"learning_rate": 5.959692898272553e-06,
"loss": 0.9401,
"step": 421
},
{
"epoch": 0.4049904030710173,
"grad_norm": 0.115234375,
"learning_rate": 5.950095969289828e-06,
"loss": 0.9458,
"step": 422
},
{
"epoch": 0.4059500959692898,
"grad_norm": 0.1552734375,
"learning_rate": 5.940499040307102e-06,
"loss": 1.0006,
"step": 423
},
{
"epoch": 0.4069097888675624,
"grad_norm": 0.0888671875,
"learning_rate": 5.930902111324377e-06,
"loss": 0.8507,
"step": 424
},
{
"epoch": 0.40786948176583493,
"grad_norm": 0.09423828125,
"learning_rate": 5.9213051823416515e-06,
"loss": 0.9471,
"step": 425
},
{
"epoch": 0.4088291746641075,
"grad_norm": 0.10107421875,
"learning_rate": 5.911708253358925e-06,
"loss": 0.9143,
"step": 426
},
{
"epoch": 0.40978886756238003,
"grad_norm": 0.08984375,
"learning_rate": 5.902111324376199e-06,
"loss": 0.9904,
"step": 427
},
{
"epoch": 0.4107485604606526,
"grad_norm": 0.1015625,
"learning_rate": 5.892514395393475e-06,
"loss": 0.9509,
"step": 428
},
{
"epoch": 0.41170825335892514,
"grad_norm": 0.10791015625,
"learning_rate": 5.882917466410749e-06,
"loss": 0.9433,
"step": 429
},
{
"epoch": 0.4126679462571977,
"grad_norm": 0.095703125,
"learning_rate": 5.873320537428023e-06,
"loss": 0.9041,
"step": 430
},
{
"epoch": 0.41362763915547024,
"grad_norm": 0.09423828125,
"learning_rate": 5.8637236084452975e-06,
"loss": 0.8894,
"step": 431
},
{
"epoch": 0.4145873320537428,
"grad_norm": 0.11572265625,
"learning_rate": 5.854126679462573e-06,
"loss": 0.9713,
"step": 432
},
{
"epoch": 0.41554702495201534,
"grad_norm": 0.0830078125,
"learning_rate": 5.844529750479847e-06,
"loss": 0.8676,
"step": 433
},
{
"epoch": 0.4165067178502879,
"grad_norm": 0.08203125,
"learning_rate": 5.834932821497121e-06,
"loss": 0.9207,
"step": 434
},
{
"epoch": 0.41746641074856045,
"grad_norm": 0.10205078125,
"learning_rate": 5.825335892514396e-06,
"loss": 1.011,
"step": 435
},
{
"epoch": 0.418426103646833,
"grad_norm": 0.1318359375,
"learning_rate": 5.815738963531671e-06,
"loss": 0.9231,
"step": 436
},
{
"epoch": 0.41938579654510555,
"grad_norm": 0.09619140625,
"learning_rate": 5.806142034548945e-06,
"loss": 0.941,
"step": 437
},
{
"epoch": 0.42034548944337813,
"grad_norm": 0.2001953125,
"learning_rate": 5.7965451055662194e-06,
"loss": 1.0724,
"step": 438
},
{
"epoch": 0.42130518234165065,
"grad_norm": 0.0888671875,
"learning_rate": 5.786948176583494e-06,
"loss": 0.9221,
"step": 439
},
{
"epoch": 0.42226487523992323,
"grad_norm": 0.12060546875,
"learning_rate": 5.777351247600769e-06,
"loss": 1.0565,
"step": 440
},
{
"epoch": 0.42322456813819576,
"grad_norm": 0.1083984375,
"learning_rate": 5.767754318618043e-06,
"loss": 0.984,
"step": 441
},
{
"epoch": 0.42418426103646834,
"grad_norm": 0.09912109375,
"learning_rate": 5.758157389635317e-06,
"loss": 0.9393,
"step": 442
},
{
"epoch": 0.42514395393474086,
"grad_norm": 0.111328125,
"learning_rate": 5.748560460652591e-06,
"loss": 0.9928,
"step": 443
},
{
"epoch": 0.42610364683301344,
"grad_norm": 0.11376953125,
"learning_rate": 5.738963531669866e-06,
"loss": 1.0521,
"step": 444
},
{
"epoch": 0.42706333973128596,
"grad_norm": 0.107421875,
"learning_rate": 5.7293666026871406e-06,
"loss": 0.9167,
"step": 445
},
{
"epoch": 0.42802303262955854,
"grad_norm": 0.109375,
"learning_rate": 5.719769673704415e-06,
"loss": 0.959,
"step": 446
},
{
"epoch": 0.4289827255278311,
"grad_norm": 0.10400390625,
"learning_rate": 5.710172744721689e-06,
"loss": 0.8617,
"step": 447
},
{
"epoch": 0.42994241842610365,
"grad_norm": 0.0966796875,
"learning_rate": 5.7005758157389635e-06,
"loss": 0.9925,
"step": 448
},
{
"epoch": 0.4309021113243762,
"grad_norm": 0.1259765625,
"learning_rate": 5.690978886756239e-06,
"loss": 1.026,
"step": 449
},
{
"epoch": 0.43186180422264875,
"grad_norm": 0.08349609375,
"learning_rate": 5.681381957773513e-06,
"loss": 0.8937,
"step": 450
},
{
"epoch": 0.43282149712092133,
"grad_norm": 0.09619140625,
"learning_rate": 5.671785028790787e-06,
"loss": 0.8959,
"step": 451
},
{
"epoch": 0.43378119001919385,
"grad_norm": 0.119140625,
"learning_rate": 5.662188099808062e-06,
"loss": 0.9824,
"step": 452
},
{
"epoch": 0.43474088291746643,
"grad_norm": 0.0986328125,
"learning_rate": 5.652591170825337e-06,
"loss": 0.912,
"step": 453
},
{
"epoch": 0.43570057581573896,
"grad_norm": 0.10009765625,
"learning_rate": 5.642994241842611e-06,
"loss": 0.8847,
"step": 454
},
{
"epoch": 0.43666026871401153,
"grad_norm": 0.09228515625,
"learning_rate": 5.6333973128598855e-06,
"loss": 0.8793,
"step": 455
},
{
"epoch": 0.43761996161228406,
"grad_norm": 0.09326171875,
"learning_rate": 5.623800383877159e-06,
"loss": 0.8788,
"step": 456
},
{
"epoch": 0.43857965451055664,
"grad_norm": 0.08544921875,
"learning_rate": 5.614203454894435e-06,
"loss": 0.9116,
"step": 457
},
{
"epoch": 0.43953934740882916,
"grad_norm": 0.103515625,
"learning_rate": 5.6046065259117085e-06,
"loss": 0.8685,
"step": 458
},
{
"epoch": 0.44049904030710174,
"grad_norm": 0.0859375,
"learning_rate": 5.595009596928983e-06,
"loss": 0.8572,
"step": 459
},
{
"epoch": 0.44145873320537427,
"grad_norm": 0.09716796875,
"learning_rate": 5.585412667946257e-06,
"loss": 0.8928,
"step": 460
},
{
"epoch": 0.44241842610364684,
"grad_norm": 0.1943359375,
"learning_rate": 5.575815738963532e-06,
"loss": 0.8916,
"step": 461
},
{
"epoch": 0.44337811900191937,
"grad_norm": 0.0869140625,
"learning_rate": 5.566218809980807e-06,
"loss": 0.8626,
"step": 462
},
{
"epoch": 0.44433781190019195,
"grad_norm": 0.10693359375,
"learning_rate": 5.556621880998081e-06,
"loss": 0.9158,
"step": 463
},
{
"epoch": 0.44529750479846447,
"grad_norm": 0.10888671875,
"learning_rate": 5.547024952015355e-06,
"loss": 0.8666,
"step": 464
},
{
"epoch": 0.44625719769673705,
"grad_norm": 0.0830078125,
"learning_rate": 5.5374280230326305e-06,
"loss": 0.9601,
"step": 465
},
{
"epoch": 0.4472168905950096,
"grad_norm": 0.09765625,
"learning_rate": 5.527831094049905e-06,
"loss": 0.9191,
"step": 466
},
{
"epoch": 0.44817658349328215,
"grad_norm": 0.11181640625,
"learning_rate": 5.518234165067179e-06,
"loss": 1.1148,
"step": 467
},
{
"epoch": 0.4491362763915547,
"grad_norm": 0.0908203125,
"learning_rate": 5.5086372360844535e-06,
"loss": 0.8935,
"step": 468
},
{
"epoch": 0.45009596928982726,
"grad_norm": 0.1005859375,
"learning_rate": 5.499040307101729e-06,
"loss": 0.9715,
"step": 469
},
{
"epoch": 0.4510556621880998,
"grad_norm": 0.0927734375,
"learning_rate": 5.489443378119003e-06,
"loss": 0.9087,
"step": 470
},
{
"epoch": 0.45201535508637236,
"grad_norm": 0.09228515625,
"learning_rate": 5.4798464491362765e-06,
"loss": 0.912,
"step": 471
},
{
"epoch": 0.45297504798464494,
"grad_norm": 0.1015625,
"learning_rate": 5.470249520153551e-06,
"loss": 0.9182,
"step": 472
},
{
"epoch": 0.45393474088291746,
"grad_norm": 0.0830078125,
"learning_rate": 5.460652591170825e-06,
"loss": 0.8728,
"step": 473
},
{
"epoch": 0.45489443378119004,
"grad_norm": 0.0908203125,
"learning_rate": 5.4510556621881e-06,
"loss": 0.9494,
"step": 474
},
{
"epoch": 0.45585412667946257,
"grad_norm": 0.08544921875,
"learning_rate": 5.441458733205375e-06,
"loss": 0.8172,
"step": 475
},
{
"epoch": 0.45681381957773515,
"grad_norm": 0.09228515625,
"learning_rate": 5.431861804222649e-06,
"loss": 0.9106,
"step": 476
},
{
"epoch": 0.45777351247600767,
"grad_norm": 0.1015625,
"learning_rate": 5.422264875239923e-06,
"loss": 0.958,
"step": 477
},
{
"epoch": 0.45873320537428025,
"grad_norm": 0.08642578125,
"learning_rate": 5.4126679462571984e-06,
"loss": 0.8861,
"step": 478
},
{
"epoch": 0.4596928982725528,
"grad_norm": 0.0908203125,
"learning_rate": 5.403071017274473e-06,
"loss": 0.8868,
"step": 479
},
{
"epoch": 0.46065259117082535,
"grad_norm": 0.10986328125,
"learning_rate": 5.393474088291747e-06,
"loss": 0.954,
"step": 480
},
{
"epoch": 0.4616122840690979,
"grad_norm": 0.08642578125,
"learning_rate": 5.383877159309021e-06,
"loss": 0.9156,
"step": 481
},
{
"epoch": 0.46257197696737046,
"grad_norm": 0.09228515625,
"learning_rate": 5.374280230326297e-06,
"loss": 0.8546,
"step": 482
},
{
"epoch": 0.463531669865643,
"grad_norm": 0.10546875,
"learning_rate": 5.364683301343571e-06,
"loss": 0.8904,
"step": 483
},
{
"epoch": 0.46449136276391556,
"grad_norm": 0.1064453125,
"learning_rate": 5.355086372360845e-06,
"loss": 0.92,
"step": 484
},
{
"epoch": 0.4654510556621881,
"grad_norm": 0.0888671875,
"learning_rate": 5.345489443378119e-06,
"loss": 0.9414,
"step": 485
},
{
"epoch": 0.46641074856046066,
"grad_norm": 0.08984375,
"learning_rate": 5.335892514395395e-06,
"loss": 0.852,
"step": 486
},
{
"epoch": 0.4673704414587332,
"grad_norm": 0.1005859375,
"learning_rate": 5.326295585412668e-06,
"loss": 0.8742,
"step": 487
},
{
"epoch": 0.46833013435700577,
"grad_norm": 0.1142578125,
"learning_rate": 5.3166986564299425e-06,
"loss": 0.9099,
"step": 488
},
{
"epoch": 0.4692898272552783,
"grad_norm": 0.08984375,
"learning_rate": 5.307101727447217e-06,
"loss": 0.8932,
"step": 489
},
{
"epoch": 0.47024952015355087,
"grad_norm": 0.09423828125,
"learning_rate": 5.297504798464492e-06,
"loss": 0.9031,
"step": 490
},
{
"epoch": 0.4712092130518234,
"grad_norm": 0.1083984375,
"learning_rate": 5.287907869481766e-06,
"loss": 0.9625,
"step": 491
},
{
"epoch": 0.472168905950096,
"grad_norm": 0.09912109375,
"learning_rate": 5.278310940499041e-06,
"loss": 0.8983,
"step": 492
},
{
"epoch": 0.4731285988483685,
"grad_norm": 0.09375,
"learning_rate": 5.268714011516315e-06,
"loss": 0.8959,
"step": 493
},
{
"epoch": 0.4740882917466411,
"grad_norm": 0.0947265625,
"learning_rate": 5.25911708253359e-06,
"loss": 0.8927,
"step": 494
},
{
"epoch": 0.4750479846449136,
"grad_norm": 0.09326171875,
"learning_rate": 5.2495201535508645e-06,
"loss": 0.9237,
"step": 495
},
{
"epoch": 0.4760076775431862,
"grad_norm": 0.10791015625,
"learning_rate": 5.239923224568139e-06,
"loss": 0.8485,
"step": 496
},
{
"epoch": 0.47696737044145876,
"grad_norm": 0.08837890625,
"learning_rate": 5.230326295585413e-06,
"loss": 0.8559,
"step": 497
},
{
"epoch": 0.4779270633397313,
"grad_norm": 0.1005859375,
"learning_rate": 5.220729366602687e-06,
"loss": 0.9235,
"step": 498
},
{
"epoch": 0.47888675623800386,
"grad_norm": 0.08740234375,
"learning_rate": 5.211132437619963e-06,
"loss": 0.8661,
"step": 499
},
{
"epoch": 0.4798464491362764,
"grad_norm": 0.09326171875,
"learning_rate": 5.201535508637236e-06,
"loss": 0.8836,
"step": 500
},
{
"epoch": 0.48080614203454897,
"grad_norm": 0.0859375,
"learning_rate": 5.1919385796545105e-06,
"loss": 0.8703,
"step": 501
},
{
"epoch": 0.4817658349328215,
"grad_norm": 0.0830078125,
"learning_rate": 5.182341650671785e-06,
"loss": 0.8234,
"step": 502
},
{
"epoch": 0.48272552783109407,
"grad_norm": 0.091796875,
"learning_rate": 5.17274472168906e-06,
"loss": 0.9394,
"step": 503
},
{
"epoch": 0.4836852207293666,
"grad_norm": 0.103515625,
"learning_rate": 5.163147792706334e-06,
"loss": 0.9818,
"step": 504
},
{
"epoch": 0.4846449136276392,
"grad_norm": 0.08203125,
"learning_rate": 5.153550863723609e-06,
"loss": 0.8652,
"step": 505
},
{
"epoch": 0.4856046065259117,
"grad_norm": 0.10107421875,
"learning_rate": 5.143953934740883e-06,
"loss": 0.8937,
"step": 506
},
{
"epoch": 0.4865642994241843,
"grad_norm": 0.099609375,
"learning_rate": 5.134357005758158e-06,
"loss": 0.8381,
"step": 507
},
{
"epoch": 0.4875239923224568,
"grad_norm": 0.10009765625,
"learning_rate": 5.1247600767754325e-06,
"loss": 0.9158,
"step": 508
},
{
"epoch": 0.4884836852207294,
"grad_norm": 0.095703125,
"learning_rate": 5.115163147792707e-06,
"loss": 0.897,
"step": 509
},
{
"epoch": 0.4894433781190019,
"grad_norm": 0.1015625,
"learning_rate": 5.105566218809981e-06,
"loss": 0.8864,
"step": 510
},
{
"epoch": 0.4904030710172745,
"grad_norm": 0.099609375,
"learning_rate": 5.095969289827256e-06,
"loss": 0.8694,
"step": 511
},
{
"epoch": 0.491362763915547,
"grad_norm": 0.119140625,
"learning_rate": 5.086372360844531e-06,
"loss": 1.0052,
"step": 512
},
{
"epoch": 0.4923224568138196,
"grad_norm": 0.083984375,
"learning_rate": 5.076775431861805e-06,
"loss": 0.8901,
"step": 513
},
{
"epoch": 0.4932821497120921,
"grad_norm": 0.09375,
"learning_rate": 5.0671785028790784e-06,
"loss": 0.8759,
"step": 514
},
{
"epoch": 0.4942418426103647,
"grad_norm": 0.11865234375,
"learning_rate": 5.0575815738963544e-06,
"loss": 0.9608,
"step": 515
},
{
"epoch": 0.4952015355086372,
"grad_norm": 0.08056640625,
"learning_rate": 5.047984644913628e-06,
"loss": 0.8765,
"step": 516
},
{
"epoch": 0.4961612284069098,
"grad_norm": 0.08740234375,
"learning_rate": 5.038387715930902e-06,
"loss": 0.819,
"step": 517
},
{
"epoch": 0.4971209213051823,
"grad_norm": 0.119140625,
"learning_rate": 5.028790786948177e-06,
"loss": 0.9235,
"step": 518
},
{
"epoch": 0.4980806142034549,
"grad_norm": 0.09423828125,
"learning_rate": 5.019193857965452e-06,
"loss": 0.898,
"step": 519
},
{
"epoch": 0.4990403071017274,
"grad_norm": 0.10009765625,
"learning_rate": 5.009596928982726e-06,
"loss": 0.8986,
"step": 520
},
{
"epoch": 0.5,
"grad_norm": 0.10009765625,
"learning_rate": 5e-06,
"loss": 0.888,
"step": 521
},
{
"epoch": 0.5009596928982726,
"grad_norm": 0.099609375,
"learning_rate": 4.990403071017275e-06,
"loss": 0.8961,
"step": 522
},
{
"epoch": 0.5019193857965452,
"grad_norm": 0.10888671875,
"learning_rate": 4.980806142034549e-06,
"loss": 0.8657,
"step": 523
},
{
"epoch": 0.5028790786948176,
"grad_norm": 0.09326171875,
"learning_rate": 4.971209213051823e-06,
"loss": 0.9533,
"step": 524
},
{
"epoch": 0.5038387715930902,
"grad_norm": 0.1259765625,
"learning_rate": 4.9616122840690986e-06,
"loss": 0.8836,
"step": 525
},
{
"epoch": 0.5047984644913628,
"grad_norm": 0.09765625,
"learning_rate": 4.952015355086373e-06,
"loss": 0.9135,
"step": 526
},
{
"epoch": 0.5057581573896354,
"grad_norm": 0.142578125,
"learning_rate": 4.942418426103647e-06,
"loss": 0.9559,
"step": 527
},
{
"epoch": 0.5067178502879078,
"grad_norm": 0.1025390625,
"learning_rate": 4.9328214971209215e-06,
"loss": 0.8754,
"step": 528
},
{
"epoch": 0.5076775431861804,
"grad_norm": 0.09814453125,
"learning_rate": 4.923224568138196e-06,
"loss": 0.8919,
"step": 529
},
{
"epoch": 0.508637236084453,
"grad_norm": 0.11572265625,
"learning_rate": 4.91362763915547e-06,
"loss": 1.0779,
"step": 530
},
{
"epoch": 0.5095969289827256,
"grad_norm": 0.08935546875,
"learning_rate": 4.904030710172745e-06,
"loss": 0.9135,
"step": 531
},
{
"epoch": 0.510556621880998,
"grad_norm": 0.078125,
"learning_rate": 4.89443378119002e-06,
"loss": 0.8455,
"step": 532
},
{
"epoch": 0.5115163147792706,
"grad_norm": 0.087890625,
"learning_rate": 4.884836852207294e-06,
"loss": 0.8528,
"step": 533
},
{
"epoch": 0.5124760076775432,
"grad_norm": 0.0908203125,
"learning_rate": 4.875239923224568e-06,
"loss": 0.932,
"step": 534
},
{
"epoch": 0.5134357005758158,
"grad_norm": 0.09375,
"learning_rate": 4.8656429942418435e-06,
"loss": 0.8596,
"step": 535
},
{
"epoch": 0.5143953934740882,
"grad_norm": 0.10400390625,
"learning_rate": 4.856046065259117e-06,
"loss": 0.9044,
"step": 536
},
{
"epoch": 0.5153550863723608,
"grad_norm": 0.1484375,
"learning_rate": 4.846449136276392e-06,
"loss": 0.8278,
"step": 537
},
{
"epoch": 0.5163147792706334,
"grad_norm": 0.09375,
"learning_rate": 4.8368522072936665e-06,
"loss": 0.8511,
"step": 538
},
{
"epoch": 0.517274472168906,
"grad_norm": 0.11181640625,
"learning_rate": 4.827255278310941e-06,
"loss": 0.8579,
"step": 539
},
{
"epoch": 0.5182341650671785,
"grad_norm": 0.09814453125,
"learning_rate": 4.817658349328215e-06,
"loss": 0.9334,
"step": 540
},
{
"epoch": 0.519193857965451,
"grad_norm": 0.0986328125,
"learning_rate": 4.80806142034549e-06,
"loss": 0.897,
"step": 541
},
{
"epoch": 0.5201535508637236,
"grad_norm": 0.0927734375,
"learning_rate": 4.798464491362765e-06,
"loss": 0.8945,
"step": 542
},
{
"epoch": 0.5211132437619962,
"grad_norm": 0.09765625,
"learning_rate": 4.788867562380039e-06,
"loss": 0.938,
"step": 543
},
{
"epoch": 0.5220729366602687,
"grad_norm": 0.08642578125,
"learning_rate": 4.779270633397313e-06,
"loss": 0.879,
"step": 544
},
{
"epoch": 0.5230326295585412,
"grad_norm": 0.11328125,
"learning_rate": 4.769673704414588e-06,
"loss": 0.921,
"step": 545
},
{
"epoch": 0.5239923224568138,
"grad_norm": 0.09033203125,
"learning_rate": 4.760076775431862e-06,
"loss": 0.9038,
"step": 546
},
{
"epoch": 0.5249520153550864,
"grad_norm": 0.09814453125,
"learning_rate": 4.750479846449136e-06,
"loss": 0.8522,
"step": 547
},
{
"epoch": 0.525911708253359,
"grad_norm": 0.1064453125,
"learning_rate": 4.7408829174664115e-06,
"loss": 0.8692,
"step": 548
},
{
"epoch": 0.5268714011516314,
"grad_norm": 0.09375,
"learning_rate": 4.731285988483685e-06,
"loss": 0.9022,
"step": 549
},
{
"epoch": 0.527831094049904,
"grad_norm": 0.09521484375,
"learning_rate": 4.72168905950096e-06,
"loss": 0.9225,
"step": 550
},
{
"epoch": 0.5287907869481766,
"grad_norm": 0.09912109375,
"learning_rate": 4.7120921305182344e-06,
"loss": 0.9088,
"step": 551
},
{
"epoch": 0.5297504798464492,
"grad_norm": 0.0927734375,
"learning_rate": 4.702495201535509e-06,
"loss": 0.8757,
"step": 552
},
{
"epoch": 0.5307101727447217,
"grad_norm": 0.12890625,
"learning_rate": 4.692898272552783e-06,
"loss": 0.9915,
"step": 553
},
{
"epoch": 0.5316698656429942,
"grad_norm": 0.1376953125,
"learning_rate": 4.683301343570058e-06,
"loss": 0.9778,
"step": 554
},
{
"epoch": 0.5326295585412668,
"grad_norm": 0.1064453125,
"learning_rate": 4.673704414587333e-06,
"loss": 0.9743,
"step": 555
},
{
"epoch": 0.5335892514395394,
"grad_norm": 0.10400390625,
"learning_rate": 4.664107485604607e-06,
"loss": 0.9756,
"step": 556
},
{
"epoch": 0.5345489443378119,
"grad_norm": 0.146484375,
"learning_rate": 4.654510556621881e-06,
"loss": 0.9832,
"step": 557
},
{
"epoch": 0.5355086372360844,
"grad_norm": 0.1064453125,
"learning_rate": 4.644913627639156e-06,
"loss": 0.9348,
"step": 558
},
{
"epoch": 0.536468330134357,
"grad_norm": 0.09716796875,
"learning_rate": 4.63531669865643e-06,
"loss": 0.8675,
"step": 559
},
{
"epoch": 0.5374280230326296,
"grad_norm": 0.099609375,
"learning_rate": 4.625719769673705e-06,
"loss": 0.9037,
"step": 560
},
{
"epoch": 0.5383877159309021,
"grad_norm": 0.0927734375,
"learning_rate": 4.616122840690979e-06,
"loss": 0.9621,
"step": 561
},
{
"epoch": 0.5393474088291746,
"grad_norm": 0.10400390625,
"learning_rate": 4.606525911708254e-06,
"loss": 0.9018,
"step": 562
},
{
"epoch": 0.5403071017274472,
"grad_norm": 0.09912109375,
"learning_rate": 4.596928982725528e-06,
"loss": 0.8998,
"step": 563
},
{
"epoch": 0.5412667946257198,
"grad_norm": 0.103515625,
"learning_rate": 4.587332053742803e-06,
"loss": 0.8812,
"step": 564
},
{
"epoch": 0.5422264875239923,
"grad_norm": 0.08935546875,
"learning_rate": 4.577735124760077e-06,
"loss": 0.8627,
"step": 565
},
{
"epoch": 0.5431861804222649,
"grad_norm": 0.0986328125,
"learning_rate": 4.568138195777352e-06,
"loss": 0.9064,
"step": 566
},
{
"epoch": 0.5441458733205374,
"grad_norm": 0.09912109375,
"learning_rate": 4.558541266794626e-06,
"loss": 0.85,
"step": 567
},
{
"epoch": 0.54510556621881,
"grad_norm": 0.10498046875,
"learning_rate": 4.5489443378119005e-06,
"loss": 0.9558,
"step": 568
},
{
"epoch": 0.5460652591170825,
"grad_norm": 0.10302734375,
"learning_rate": 4.539347408829175e-06,
"loss": 0.9183,
"step": 569
},
{
"epoch": 0.5470249520153551,
"grad_norm": 0.091796875,
"learning_rate": 4.52975047984645e-06,
"loss": 0.9178,
"step": 570
},
{
"epoch": 0.5479846449136276,
"grad_norm": 0.08740234375,
"learning_rate": 4.520153550863724e-06,
"loss": 0.9022,
"step": 571
},
{
"epoch": 0.5489443378119002,
"grad_norm": 0.0927734375,
"learning_rate": 4.510556621880998e-06,
"loss": 0.858,
"step": 572
},
{
"epoch": 0.5499040307101728,
"grad_norm": 0.0908203125,
"learning_rate": 4.500959692898273e-06,
"loss": 0.8855,
"step": 573
},
{
"epoch": 0.5508637236084453,
"grad_norm": 0.1181640625,
"learning_rate": 4.491362763915547e-06,
"loss": 1.0215,
"step": 574
},
{
"epoch": 0.5518234165067178,
"grad_norm": 0.1689453125,
"learning_rate": 4.481765834932822e-06,
"loss": 1.0886,
"step": 575
},
{
"epoch": 0.5527831094049904,
"grad_norm": 0.10498046875,
"learning_rate": 4.472168905950096e-06,
"loss": 0.9371,
"step": 576
},
{
"epoch": 0.553742802303263,
"grad_norm": 0.09765625,
"learning_rate": 4.462571976967371e-06,
"loss": 0.9896,
"step": 577
},
{
"epoch": 0.5547024952015355,
"grad_norm": 0.1435546875,
"learning_rate": 4.4529750479846455e-06,
"loss": 0.9882,
"step": 578
},
{
"epoch": 0.555662188099808,
"grad_norm": 0.08984375,
"learning_rate": 4.44337811900192e-06,
"loss": 0.839,
"step": 579
},
{
"epoch": 0.5566218809980806,
"grad_norm": 0.0908203125,
"learning_rate": 4.433781190019194e-06,
"loss": 0.8676,
"step": 580
},
{
"epoch": 0.5575815738963532,
"grad_norm": 0.1005859375,
"learning_rate": 4.4241842610364685e-06,
"loss": 0.9514,
"step": 581
},
{
"epoch": 0.5585412667946257,
"grad_norm": 0.12158203125,
"learning_rate": 4.414587332053743e-06,
"loss": 0.9222,
"step": 582
},
{
"epoch": 0.5595009596928983,
"grad_norm": 0.087890625,
"learning_rate": 4.404990403071018e-06,
"loss": 0.8855,
"step": 583
},
{
"epoch": 0.5604606525911708,
"grad_norm": 0.09912109375,
"learning_rate": 4.395393474088292e-06,
"loss": 0.8979,
"step": 584
},
{
"epoch": 0.5614203454894434,
"grad_norm": 0.09765625,
"learning_rate": 4.385796545105567e-06,
"loss": 0.8857,
"step": 585
},
{
"epoch": 0.5623800383877159,
"grad_norm": 0.091796875,
"learning_rate": 4.376199616122841e-06,
"loss": 0.8976,
"step": 586
},
{
"epoch": 0.5633397312859885,
"grad_norm": 0.1025390625,
"learning_rate": 4.366602687140115e-06,
"loss": 0.8973,
"step": 587
},
{
"epoch": 0.564299424184261,
"grad_norm": 0.10205078125,
"learning_rate": 4.35700575815739e-06,
"loss": 0.9081,
"step": 588
},
{
"epoch": 0.5652591170825336,
"grad_norm": 0.0927734375,
"learning_rate": 4.347408829174665e-06,
"loss": 0.9515,
"step": 589
},
{
"epoch": 0.5662188099808061,
"grad_norm": 0.1083984375,
"learning_rate": 4.337811900191939e-06,
"loss": 0.9527,
"step": 590
},
{
"epoch": 0.5671785028790787,
"grad_norm": 0.1650390625,
"learning_rate": 4.3282149712092134e-06,
"loss": 1.024,
"step": 591
},
{
"epoch": 0.5681381957773513,
"grad_norm": 0.10205078125,
"learning_rate": 4.318618042226488e-06,
"loss": 0.9031,
"step": 592
},
{
"epoch": 0.5690978886756238,
"grad_norm": 0.1455078125,
"learning_rate": 4.309021113243763e-06,
"loss": 0.9823,
"step": 593
},
{
"epoch": 0.5700575815738963,
"grad_norm": 0.091796875,
"learning_rate": 4.299424184261036e-06,
"loss": 0.8912,
"step": 594
},
{
"epoch": 0.5710172744721689,
"grad_norm": 0.1025390625,
"learning_rate": 4.289827255278312e-06,
"loss": 0.8746,
"step": 595
},
{
"epoch": 0.5719769673704415,
"grad_norm": 0.08935546875,
"learning_rate": 4.280230326295586e-06,
"loss": 0.8366,
"step": 596
},
{
"epoch": 0.572936660268714,
"grad_norm": 0.107421875,
"learning_rate": 4.27063339731286e-06,
"loss": 0.9169,
"step": 597
},
{
"epoch": 0.5738963531669866,
"grad_norm": 0.091796875,
"learning_rate": 4.2610364683301346e-06,
"loss": 0.8725,
"step": 598
},
{
"epoch": 0.5748560460652591,
"grad_norm": 0.09765625,
"learning_rate": 4.251439539347409e-06,
"loss": 0.8822,
"step": 599
},
{
"epoch": 0.5758157389635317,
"grad_norm": 0.09716796875,
"learning_rate": 4.241842610364684e-06,
"loss": 0.8658,
"step": 600
},
{
"epoch": 0.5767754318618042,
"grad_norm": 0.11181640625,
"learning_rate": 4.2322456813819576e-06,
"loss": 0.9315,
"step": 601
},
{
"epoch": 0.5777351247600768,
"grad_norm": 0.09765625,
"learning_rate": 4.222648752399233e-06,
"loss": 0.915,
"step": 602
},
{
"epoch": 0.5786948176583493,
"grad_norm": 0.09521484375,
"learning_rate": 4.213051823416507e-06,
"loss": 0.8608,
"step": 603
},
{
"epoch": 0.5796545105566219,
"grad_norm": 0.1416015625,
"learning_rate": 4.203454894433781e-06,
"loss": 0.9974,
"step": 604
},
{
"epoch": 0.5806142034548945,
"grad_norm": 0.10302734375,
"learning_rate": 4.193857965451056e-06,
"loss": 0.8526,
"step": 605
},
{
"epoch": 0.581573896353167,
"grad_norm": 0.09521484375,
"learning_rate": 4.184261036468331e-06,
"loss": 0.8498,
"step": 606
},
{
"epoch": 0.5825335892514395,
"grad_norm": 0.16796875,
"learning_rate": 4.174664107485605e-06,
"loss": 1.011,
"step": 607
},
{
"epoch": 0.5834932821497121,
"grad_norm": 0.0869140625,
"learning_rate": 4.1650671785028795e-06,
"loss": 0.8683,
"step": 608
},
{
"epoch": 0.5844529750479847,
"grad_norm": 0.0869140625,
"learning_rate": 4.155470249520154e-06,
"loss": 0.8471,
"step": 609
},
{
"epoch": 0.5854126679462572,
"grad_norm": 0.130859375,
"learning_rate": 4.145873320537428e-06,
"loss": 0.9454,
"step": 610
},
{
"epoch": 0.5863723608445297,
"grad_norm": 0.09912109375,
"learning_rate": 4.1362763915547025e-06,
"loss": 0.9189,
"step": 611
},
{
"epoch": 0.5873320537428023,
"grad_norm": 0.0908203125,
"learning_rate": 4.126679462571978e-06,
"loss": 0.8792,
"step": 612
},
{
"epoch": 0.5882917466410749,
"grad_norm": 0.1337890625,
"learning_rate": 4.117082533589252e-06,
"loss": 1.0256,
"step": 613
},
{
"epoch": 0.5892514395393474,
"grad_norm": 0.1064453125,
"learning_rate": 4.107485604606526e-06,
"loss": 0.8726,
"step": 614
},
{
"epoch": 0.5902111324376199,
"grad_norm": 0.080078125,
"learning_rate": 4.097888675623801e-06,
"loss": 0.8566,
"step": 615
},
{
"epoch": 0.5911708253358925,
"grad_norm": 0.099609375,
"learning_rate": 4.088291746641076e-06,
"loss": 0.8521,
"step": 616
},
{
"epoch": 0.5921305182341651,
"grad_norm": 0.08935546875,
"learning_rate": 4.078694817658349e-06,
"loss": 0.8877,
"step": 617
},
{
"epoch": 0.5930902111324377,
"grad_norm": 0.0830078125,
"learning_rate": 4.0690978886756245e-06,
"loss": 0.8607,
"step": 618
},
{
"epoch": 0.5940499040307101,
"grad_norm": 0.166015625,
"learning_rate": 4.059500959692899e-06,
"loss": 1.0081,
"step": 619
},
{
"epoch": 0.5950095969289827,
"grad_norm": 0.08984375,
"learning_rate": 4.049904030710173e-06,
"loss": 0.8838,
"step": 620
},
{
"epoch": 0.5959692898272553,
"grad_norm": 0.1416015625,
"learning_rate": 4.0403071017274475e-06,
"loss": 0.9923,
"step": 621
},
{
"epoch": 0.5969289827255279,
"grad_norm": 0.08740234375,
"learning_rate": 4.030710172744722e-06,
"loss": 0.8822,
"step": 622
},
{
"epoch": 0.5978886756238004,
"grad_norm": 0.0947265625,
"learning_rate": 4.021113243761996e-06,
"loss": 0.8951,
"step": 623
},
{
"epoch": 0.5988483685220729,
"grad_norm": 0.09228515625,
"learning_rate": 4.0115163147792705e-06,
"loss": 0.8574,
"step": 624
},
{
"epoch": 0.5998080614203455,
"grad_norm": 0.0947265625,
"learning_rate": 4.001919385796546e-06,
"loss": 0.9205,
"step": 625
},
{
"epoch": 0.6007677543186181,
"grad_norm": 0.10986328125,
"learning_rate": 3.99232245681382e-06,
"loss": 0.8943,
"step": 626
},
{
"epoch": 0.6017274472168906,
"grad_norm": 0.09716796875,
"learning_rate": 3.982725527831094e-06,
"loss": 0.8803,
"step": 627
},
{
"epoch": 0.6026871401151631,
"grad_norm": 0.1181640625,
"learning_rate": 3.973128598848369e-06,
"loss": 0.9062,
"step": 628
},
{
"epoch": 0.6036468330134357,
"grad_norm": 0.08447265625,
"learning_rate": 3.963531669865644e-06,
"loss": 0.8578,
"step": 629
},
{
"epoch": 0.6046065259117083,
"grad_norm": 0.09375,
"learning_rate": 3.953934740882917e-06,
"loss": 0.8856,
"step": 630
},
{
"epoch": 0.6055662188099808,
"grad_norm": 0.087890625,
"learning_rate": 3.9443378119001924e-06,
"loss": 0.8826,
"step": 631
},
{
"epoch": 0.6065259117082533,
"grad_norm": 0.09326171875,
"learning_rate": 3.934740882917467e-06,
"loss": 0.8445,
"step": 632
},
{
"epoch": 0.6074856046065259,
"grad_norm": 0.087890625,
"learning_rate": 3.925143953934741e-06,
"loss": 0.8739,
"step": 633
},
{
"epoch": 0.6084452975047985,
"grad_norm": 0.11181640625,
"learning_rate": 3.915547024952015e-06,
"loss": 0.9497,
"step": 634
},
{
"epoch": 0.6094049904030711,
"grad_norm": 0.08154296875,
"learning_rate": 3.905950095969291e-06,
"loss": 0.8306,
"step": 635
},
{
"epoch": 0.6103646833013435,
"grad_norm": 0.130859375,
"learning_rate": 3.896353166986565e-06,
"loss": 0.9456,
"step": 636
},
{
"epoch": 0.6113243761996161,
"grad_norm": 0.11181640625,
"learning_rate": 3.886756238003839e-06,
"loss": 0.9411,
"step": 637
},
{
"epoch": 0.6122840690978887,
"grad_norm": 0.08984375,
"learning_rate": 3.8771593090211136e-06,
"loss": 0.9256,
"step": 638
},
{
"epoch": 0.6132437619961613,
"grad_norm": 0.08935546875,
"learning_rate": 3.867562380038388e-06,
"loss": 0.8409,
"step": 639
},
{
"epoch": 0.6142034548944337,
"grad_norm": 0.09716796875,
"learning_rate": 3.857965451055662e-06,
"loss": 0.9718,
"step": 640
},
{
"epoch": 0.6151631477927063,
"grad_norm": 0.0869140625,
"learning_rate": 3.848368522072937e-06,
"loss": 0.9014,
"step": 641
},
{
"epoch": 0.6161228406909789,
"grad_norm": 0.0966796875,
"learning_rate": 3.838771593090212e-06,
"loss": 0.9303,
"step": 642
},
{
"epoch": 0.6170825335892515,
"grad_norm": 0.10107421875,
"learning_rate": 3.829174664107486e-06,
"loss": 0.918,
"step": 643
},
{
"epoch": 0.6180422264875239,
"grad_norm": 0.138671875,
"learning_rate": 3.81957773512476e-06,
"loss": 0.8488,
"step": 644
},
{
"epoch": 0.6190019193857965,
"grad_norm": 0.0849609375,
"learning_rate": 3.809980806142035e-06,
"loss": 0.892,
"step": 645
},
{
"epoch": 0.6199616122840691,
"grad_norm": 0.10595703125,
"learning_rate": 3.8003838771593095e-06,
"loss": 0.865,
"step": 646
},
{
"epoch": 0.6209213051823417,
"grad_norm": 0.087890625,
"learning_rate": 3.7907869481765834e-06,
"loss": 0.8873,
"step": 647
},
{
"epoch": 0.6218809980806143,
"grad_norm": 0.09619140625,
"learning_rate": 3.781190019193858e-06,
"loss": 0.8258,
"step": 648
},
{
"epoch": 0.6228406909788867,
"grad_norm": 0.08984375,
"learning_rate": 3.7715930902111324e-06,
"loss": 0.8526,
"step": 649
},
{
"epoch": 0.6238003838771593,
"grad_norm": 0.09716796875,
"learning_rate": 3.761996161228407e-06,
"loss": 0.8948,
"step": 650
},
{
"epoch": 0.6247600767754319,
"grad_norm": 0.0966796875,
"learning_rate": 3.7523992322456815e-06,
"loss": 0.9332,
"step": 651
},
{
"epoch": 0.6257197696737045,
"grad_norm": 0.10107421875,
"learning_rate": 3.7428023032629563e-06,
"loss": 0.8915,
"step": 652
},
{
"epoch": 0.6266794625719769,
"grad_norm": 0.09423828125,
"learning_rate": 3.7332053742802306e-06,
"loss": 0.8898,
"step": 653
},
{
"epoch": 0.6276391554702495,
"grad_norm": 0.09765625,
"learning_rate": 3.7236084452975053e-06,
"loss": 0.9066,
"step": 654
},
{
"epoch": 0.6285988483685221,
"grad_norm": 0.08837890625,
"learning_rate": 3.7140115163147792e-06,
"loss": 0.8427,
"step": 655
},
{
"epoch": 0.6295585412667947,
"grad_norm": 0.10302734375,
"learning_rate": 3.704414587332054e-06,
"loss": 0.8633,
"step": 656
},
{
"epoch": 0.6305182341650671,
"grad_norm": 0.115234375,
"learning_rate": 3.6948176583493283e-06,
"loss": 0.9561,
"step": 657
},
{
"epoch": 0.6314779270633397,
"grad_norm": 0.1083984375,
"learning_rate": 3.685220729366603e-06,
"loss": 0.8768,
"step": 658
},
{
"epoch": 0.6324376199616123,
"grad_norm": 0.0869140625,
"learning_rate": 3.6756238003838774e-06,
"loss": 0.8322,
"step": 659
},
{
"epoch": 0.6333973128598849,
"grad_norm": 0.0986328125,
"learning_rate": 3.666026871401152e-06,
"loss": 0.9367,
"step": 660
},
{
"epoch": 0.6343570057581573,
"grad_norm": 0.10302734375,
"learning_rate": 3.6564299424184265e-06,
"loss": 0.9112,
"step": 661
},
{
"epoch": 0.6353166986564299,
"grad_norm": 0.10888671875,
"learning_rate": 3.6468330134357012e-06,
"loss": 0.9364,
"step": 662
},
{
"epoch": 0.6362763915547025,
"grad_norm": 0.1083984375,
"learning_rate": 3.637236084452975e-06,
"loss": 0.8825,
"step": 663
},
{
"epoch": 0.6372360844529751,
"grad_norm": 0.10693359375,
"learning_rate": 3.62763915547025e-06,
"loss": 0.927,
"step": 664
},
{
"epoch": 0.6381957773512476,
"grad_norm": 0.08447265625,
"learning_rate": 3.618042226487524e-06,
"loss": 0.9676,
"step": 665
},
{
"epoch": 0.6391554702495201,
"grad_norm": 0.103515625,
"learning_rate": 3.608445297504799e-06,
"loss": 0.7565,
"step": 666
},
{
"epoch": 0.6401151631477927,
"grad_norm": 0.08154296875,
"learning_rate": 3.5988483685220733e-06,
"loss": 0.8226,
"step": 667
},
{
"epoch": 0.6410748560460653,
"grad_norm": 0.10205078125,
"learning_rate": 3.589251439539348e-06,
"loss": 0.9409,
"step": 668
},
{
"epoch": 0.6420345489443378,
"grad_norm": 0.1015625,
"learning_rate": 3.5796545105566224e-06,
"loss": 0.8866,
"step": 669
},
{
"epoch": 0.6429942418426103,
"grad_norm": 0.0859375,
"learning_rate": 3.5700575815738963e-06,
"loss": 0.8802,
"step": 670
},
{
"epoch": 0.6439539347408829,
"grad_norm": 0.0849609375,
"learning_rate": 3.560460652591171e-06,
"loss": 0.8443,
"step": 671
},
{
"epoch": 0.6449136276391555,
"grad_norm": 0.10302734375,
"learning_rate": 3.5508637236084453e-06,
"loss": 0.9877,
"step": 672
},
{
"epoch": 0.6458733205374281,
"grad_norm": 0.1005859375,
"learning_rate": 3.54126679462572e-06,
"loss": 0.8619,
"step": 673
},
{
"epoch": 0.6468330134357005,
"grad_norm": 0.10205078125,
"learning_rate": 3.5316698656429944e-06,
"loss": 0.9202,
"step": 674
},
{
"epoch": 0.6477927063339731,
"grad_norm": 0.08935546875,
"learning_rate": 3.522072936660269e-06,
"loss": 0.8944,
"step": 675
},
{
"epoch": 0.6487523992322457,
"grad_norm": 0.0849609375,
"learning_rate": 3.512476007677543e-06,
"loss": 0.8625,
"step": 676
},
{
"epoch": 0.6497120921305183,
"grad_norm": 0.10498046875,
"learning_rate": 3.502879078694818e-06,
"loss": 0.8948,
"step": 677
},
{
"epoch": 0.6506717850287908,
"grad_norm": 0.1171875,
"learning_rate": 3.493282149712092e-06,
"loss": 0.9505,
"step": 678
},
{
"epoch": 0.6516314779270633,
"grad_norm": 0.08935546875,
"learning_rate": 3.483685220729367e-06,
"loss": 0.8514,
"step": 679
},
{
"epoch": 0.6525911708253359,
"grad_norm": 0.09814453125,
"learning_rate": 3.4740882917466412e-06,
"loss": 0.9246,
"step": 680
},
{
"epoch": 0.6535508637236085,
"grad_norm": 0.09326171875,
"learning_rate": 3.464491362763916e-06,
"loss": 0.8808,
"step": 681
},
{
"epoch": 0.654510556621881,
"grad_norm": 0.1220703125,
"learning_rate": 3.4548944337811903e-06,
"loss": 0.9993,
"step": 682
},
{
"epoch": 0.6554702495201535,
"grad_norm": 0.08203125,
"learning_rate": 3.445297504798465e-06,
"loss": 0.8682,
"step": 683
},
{
"epoch": 0.6564299424184261,
"grad_norm": 0.0947265625,
"learning_rate": 3.435700575815739e-06,
"loss": 0.8945,
"step": 684
},
{
"epoch": 0.6573896353166987,
"grad_norm": 0.08447265625,
"learning_rate": 3.4261036468330137e-06,
"loss": 0.8457,
"step": 685
},
{
"epoch": 0.6583493282149712,
"grad_norm": 0.09375,
"learning_rate": 3.416506717850288e-06,
"loss": 0.8959,
"step": 686
},
{
"epoch": 0.6593090211132437,
"grad_norm": 0.08740234375,
"learning_rate": 3.4069097888675628e-06,
"loss": 0.8913,
"step": 687
},
{
"epoch": 0.6602687140115163,
"grad_norm": 0.09765625,
"learning_rate": 3.397312859884837e-06,
"loss": 0.9114,
"step": 688
},
{
"epoch": 0.6612284069097889,
"grad_norm": 0.0869140625,
"learning_rate": 3.387715930902112e-06,
"loss": 0.8474,
"step": 689
},
{
"epoch": 0.6621880998080614,
"grad_norm": 0.0947265625,
"learning_rate": 3.378119001919386e-06,
"loss": 0.877,
"step": 690
},
{
"epoch": 0.663147792706334,
"grad_norm": 0.09375,
"learning_rate": 3.368522072936661e-06,
"loss": 0.878,
"step": 691
},
{
"epoch": 0.6641074856046065,
"grad_norm": 0.1435546875,
"learning_rate": 3.358925143953935e-06,
"loss": 1.0548,
"step": 692
},
{
"epoch": 0.6650671785028791,
"grad_norm": 0.08349609375,
"learning_rate": 3.3493282149712096e-06,
"loss": 0.8348,
"step": 693
},
{
"epoch": 0.6660268714011516,
"grad_norm": 0.103515625,
"learning_rate": 3.339731285988484e-06,
"loss": 0.9428,
"step": 694
},
{
"epoch": 0.6669865642994242,
"grad_norm": 0.10400390625,
"learning_rate": 3.3301343570057582e-06,
"loss": 0.8574,
"step": 695
},
{
"epoch": 0.6679462571976967,
"grad_norm": 0.1474609375,
"learning_rate": 3.320537428023033e-06,
"loss": 1.0041,
"step": 696
},
{
"epoch": 0.6689059500959693,
"grad_norm": 0.083984375,
"learning_rate": 3.3109404990403073e-06,
"loss": 0.9344,
"step": 697
},
{
"epoch": 0.6698656429942419,
"grad_norm": 0.0927734375,
"learning_rate": 3.301343570057582e-06,
"loss": 0.8579,
"step": 698
},
{
"epoch": 0.6708253358925144,
"grad_norm": 0.09130859375,
"learning_rate": 3.291746641074856e-06,
"loss": 0.8861,
"step": 699
},
{
"epoch": 0.6717850287907869,
"grad_norm": 0.08447265625,
"learning_rate": 3.2821497120921307e-06,
"loss": 0.841,
"step": 700
},
{
"epoch": 0.6727447216890595,
"grad_norm": 0.1142578125,
"learning_rate": 3.272552783109405e-06,
"loss": 0.9662,
"step": 701
},
{
"epoch": 0.6737044145873321,
"grad_norm": 0.0986328125,
"learning_rate": 3.26295585412668e-06,
"loss": 0.9112,
"step": 702
},
{
"epoch": 0.6746641074856046,
"grad_norm": 0.1298828125,
"learning_rate": 3.253358925143954e-06,
"loss": 1.0551,
"step": 703
},
{
"epoch": 0.6756238003838771,
"grad_norm": 0.1171875,
"learning_rate": 3.243761996161229e-06,
"loss": 0.8602,
"step": 704
},
{
"epoch": 0.6765834932821497,
"grad_norm": 0.099609375,
"learning_rate": 3.2341650671785028e-06,
"loss": 0.9373,
"step": 705
},
{
"epoch": 0.6775431861804223,
"grad_norm": 0.1357421875,
"learning_rate": 3.224568138195778e-06,
"loss": 0.9555,
"step": 706
},
{
"epoch": 0.6785028790786948,
"grad_norm": 0.142578125,
"learning_rate": 3.214971209213052e-06,
"loss": 0.97,
"step": 707
},
{
"epoch": 0.6794625719769674,
"grad_norm": 0.0859375,
"learning_rate": 3.2053742802303266e-06,
"loss": 0.833,
"step": 708
},
{
"epoch": 0.6804222648752399,
"grad_norm": 0.107421875,
"learning_rate": 3.195777351247601e-06,
"loss": 0.8036,
"step": 709
},
{
"epoch": 0.6813819577735125,
"grad_norm": 0.11328125,
"learning_rate": 3.1861804222648757e-06,
"loss": 0.9721,
"step": 710
},
{
"epoch": 0.682341650671785,
"grad_norm": 0.0869140625,
"learning_rate": 3.17658349328215e-06,
"loss": 0.8539,
"step": 711
},
{
"epoch": 0.6833013435700576,
"grad_norm": 0.0869140625,
"learning_rate": 3.1669865642994248e-06,
"loss": 0.8504,
"step": 712
},
{
"epoch": 0.6842610364683301,
"grad_norm": 0.1044921875,
"learning_rate": 3.1573896353166987e-06,
"loss": 0.878,
"step": 713
},
{
"epoch": 0.6852207293666027,
"grad_norm": 0.1025390625,
"learning_rate": 3.1477927063339734e-06,
"loss": 0.8945,
"step": 714
},
{
"epoch": 0.6861804222648752,
"grad_norm": 0.099609375,
"learning_rate": 3.1381957773512477e-06,
"loss": 0.9088,
"step": 715
},
{
"epoch": 0.6871401151631478,
"grad_norm": 0.1005859375,
"learning_rate": 3.1285988483685225e-06,
"loss": 0.9088,
"step": 716
},
{
"epoch": 0.6880998080614203,
"grad_norm": 0.09619140625,
"learning_rate": 3.119001919385797e-06,
"loss": 0.8658,
"step": 717
},
{
"epoch": 0.6890595009596929,
"grad_norm": 0.11865234375,
"learning_rate": 3.1094049904030716e-06,
"loss": 0.8723,
"step": 718
},
{
"epoch": 0.6900191938579654,
"grad_norm": 0.154296875,
"learning_rate": 3.099808061420346e-06,
"loss": 0.9998,
"step": 719
},
{
"epoch": 0.690978886756238,
"grad_norm": 0.09765625,
"learning_rate": 3.09021113243762e-06,
"loss": 0.8432,
"step": 720
},
{
"epoch": 0.6919385796545106,
"grad_norm": 0.1005859375,
"learning_rate": 3.0806142034548945e-06,
"loss": 0.8783,
"step": 721
},
{
"epoch": 0.6928982725527831,
"grad_norm": 0.09423828125,
"learning_rate": 3.071017274472169e-06,
"loss": 0.8529,
"step": 722
},
{
"epoch": 0.6938579654510557,
"grad_norm": 0.09716796875,
"learning_rate": 3.0614203454894436e-06,
"loss": 0.8756,
"step": 723
},
{
"epoch": 0.6948176583493282,
"grad_norm": 0.109375,
"learning_rate": 3.051823416506718e-06,
"loss": 0.9314,
"step": 724
},
{
"epoch": 0.6957773512476008,
"grad_norm": 0.09716796875,
"learning_rate": 3.0422264875239927e-06,
"loss": 0.8719,
"step": 725
},
{
"epoch": 0.6967370441458733,
"grad_norm": 0.09716796875,
"learning_rate": 3.032629558541267e-06,
"loss": 0.881,
"step": 726
},
{
"epoch": 0.6976967370441459,
"grad_norm": 0.1552734375,
"learning_rate": 3.0230326295585418e-06,
"loss": 0.97,
"step": 727
},
{
"epoch": 0.6986564299424184,
"grad_norm": 0.1015625,
"learning_rate": 3.0134357005758157e-06,
"loss": 0.8983,
"step": 728
},
{
"epoch": 0.699616122840691,
"grad_norm": 0.08740234375,
"learning_rate": 3.0038387715930904e-06,
"loss": 0.8501,
"step": 729
},
{
"epoch": 0.7005758157389635,
"grad_norm": 0.103515625,
"learning_rate": 2.9942418426103648e-06,
"loss": 0.8754,
"step": 730
},
{
"epoch": 0.7015355086372361,
"grad_norm": 0.1083984375,
"learning_rate": 2.9846449136276395e-06,
"loss": 0.9421,
"step": 731
},
{
"epoch": 0.7024952015355086,
"grad_norm": 0.1328125,
"learning_rate": 2.975047984644914e-06,
"loss": 0.9579,
"step": 732
},
{
"epoch": 0.7034548944337812,
"grad_norm": 0.095703125,
"learning_rate": 2.9654510556621886e-06,
"loss": 0.8518,
"step": 733
},
{
"epoch": 0.7044145873320538,
"grad_norm": 0.11962890625,
"learning_rate": 2.9558541266794625e-06,
"loss": 0.9811,
"step": 734
},
{
"epoch": 0.7053742802303263,
"grad_norm": 0.09326171875,
"learning_rate": 2.9462571976967377e-06,
"loss": 0.9039,
"step": 735
},
{
"epoch": 0.7063339731285988,
"grad_norm": 0.11083984375,
"learning_rate": 2.9366602687140116e-06,
"loss": 0.914,
"step": 736
},
{
"epoch": 0.7072936660268714,
"grad_norm": 0.08935546875,
"learning_rate": 2.9270633397312863e-06,
"loss": 0.8289,
"step": 737
},
{
"epoch": 0.708253358925144,
"grad_norm": 0.08251953125,
"learning_rate": 2.9174664107485606e-06,
"loss": 0.8639,
"step": 738
},
{
"epoch": 0.7092130518234165,
"grad_norm": 0.09423828125,
"learning_rate": 2.9078694817658354e-06,
"loss": 0.9277,
"step": 739
},
{
"epoch": 0.710172744721689,
"grad_norm": 0.08984375,
"learning_rate": 2.8982725527831097e-06,
"loss": 0.8812,
"step": 740
},
{
"epoch": 0.7111324376199616,
"grad_norm": 0.09765625,
"learning_rate": 2.8886756238003845e-06,
"loss": 0.9588,
"step": 741
},
{
"epoch": 0.7120921305182342,
"grad_norm": 0.1103515625,
"learning_rate": 2.8790786948176584e-06,
"loss": 0.9299,
"step": 742
},
{
"epoch": 0.7130518234165067,
"grad_norm": 0.095703125,
"learning_rate": 2.869481765834933e-06,
"loss": 0.9102,
"step": 743
},
{
"epoch": 0.7140115163147792,
"grad_norm": 0.0986328125,
"learning_rate": 2.8598848368522074e-06,
"loss": 0.9518,
"step": 744
},
{
"epoch": 0.7149712092130518,
"grad_norm": 0.08349609375,
"learning_rate": 2.8502879078694818e-06,
"loss": 0.8438,
"step": 745
},
{
"epoch": 0.7159309021113244,
"grad_norm": 0.11865234375,
"learning_rate": 2.8406909788867565e-06,
"loss": 0.9486,
"step": 746
},
{
"epoch": 0.716890595009597,
"grad_norm": 0.091796875,
"learning_rate": 2.831094049904031e-06,
"loss": 0.9467,
"step": 747
},
{
"epoch": 0.7178502879078695,
"grad_norm": 0.099609375,
"learning_rate": 2.8214971209213056e-06,
"loss": 0.897,
"step": 748
},
{
"epoch": 0.718809980806142,
"grad_norm": 0.14453125,
"learning_rate": 2.8119001919385795e-06,
"loss": 0.9883,
"step": 749
},
{
"epoch": 0.7197696737044146,
"grad_norm": 0.08544921875,
"learning_rate": 2.8023032629558543e-06,
"loss": 0.8568,
"step": 750
},
{
"epoch": 0.7207293666026872,
"grad_norm": 0.10107421875,
"learning_rate": 2.7927063339731286e-06,
"loss": 0.9299,
"step": 751
},
{
"epoch": 0.7216890595009597,
"grad_norm": 0.1005859375,
"learning_rate": 2.7831094049904033e-06,
"loss": 0.9514,
"step": 752
},
{
"epoch": 0.7226487523992322,
"grad_norm": 0.10302734375,
"learning_rate": 2.7735124760076777e-06,
"loss": 0.8542,
"step": 753
},
{
"epoch": 0.7236084452975048,
"grad_norm": 0.08642578125,
"learning_rate": 2.7639155470249524e-06,
"loss": 0.9055,
"step": 754
},
{
"epoch": 0.7245681381957774,
"grad_norm": 0.1015625,
"learning_rate": 2.7543186180422267e-06,
"loss": 0.975,
"step": 755
},
{
"epoch": 0.72552783109405,
"grad_norm": 0.11572265625,
"learning_rate": 2.7447216890595015e-06,
"loss": 0.9118,
"step": 756
},
{
"epoch": 0.7264875239923224,
"grad_norm": 0.09619140625,
"learning_rate": 2.7351247600767754e-06,
"loss": 0.8825,
"step": 757
},
{
"epoch": 0.727447216890595,
"grad_norm": 0.16015625,
"learning_rate": 2.72552783109405e-06,
"loss": 0.8799,
"step": 758
},
{
"epoch": 0.7284069097888676,
"grad_norm": 0.09033203125,
"learning_rate": 2.7159309021113245e-06,
"loss": 0.8458,
"step": 759
},
{
"epoch": 0.7293666026871402,
"grad_norm": 0.10888671875,
"learning_rate": 2.7063339731285992e-06,
"loss": 0.9211,
"step": 760
},
{
"epoch": 0.7303262955854126,
"grad_norm": 0.0927734375,
"learning_rate": 2.6967370441458735e-06,
"loss": 0.9039,
"step": 761
},
{
"epoch": 0.7312859884836852,
"grad_norm": 0.0966796875,
"learning_rate": 2.6871401151631483e-06,
"loss": 0.8405,
"step": 762
},
{
"epoch": 0.7322456813819578,
"grad_norm": 0.10498046875,
"learning_rate": 2.6775431861804226e-06,
"loss": 0.9307,
"step": 763
},
{
"epoch": 0.7332053742802304,
"grad_norm": 0.08984375,
"learning_rate": 2.6679462571976974e-06,
"loss": 0.8627,
"step": 764
},
{
"epoch": 0.7341650671785028,
"grad_norm": 0.10009765625,
"learning_rate": 2.6583493282149713e-06,
"loss": 0.9403,
"step": 765
},
{
"epoch": 0.7351247600767754,
"grad_norm": 0.08349609375,
"learning_rate": 2.648752399232246e-06,
"loss": 0.8884,
"step": 766
},
{
"epoch": 0.736084452975048,
"grad_norm": 0.203125,
"learning_rate": 2.6391554702495203e-06,
"loss": 1.0263,
"step": 767
},
{
"epoch": 0.7370441458733206,
"grad_norm": 0.169921875,
"learning_rate": 2.629558541266795e-06,
"loss": 0.9245,
"step": 768
},
{
"epoch": 0.738003838771593,
"grad_norm": 0.09765625,
"learning_rate": 2.6199616122840694e-06,
"loss": 0.91,
"step": 769
},
{
"epoch": 0.7389635316698656,
"grad_norm": 0.1259765625,
"learning_rate": 2.6103646833013433e-06,
"loss": 0.9534,
"step": 770
},
{
"epoch": 0.7399232245681382,
"grad_norm": 0.12353515625,
"learning_rate": 2.600767754318618e-06,
"loss": 0.8804,
"step": 771
},
{
"epoch": 0.7408829174664108,
"grad_norm": 0.1005859375,
"learning_rate": 2.5911708253358924e-06,
"loss": 0.9345,
"step": 772
},
{
"epoch": 0.7418426103646834,
"grad_norm": 0.10400390625,
"learning_rate": 2.581573896353167e-06,
"loss": 0.8712,
"step": 773
},
{
"epoch": 0.7428023032629558,
"grad_norm": 0.09423828125,
"learning_rate": 2.5719769673704415e-06,
"loss": 0.9098,
"step": 774
},
{
"epoch": 0.7437619961612284,
"grad_norm": 0.08935546875,
"learning_rate": 2.5623800383877162e-06,
"loss": 0.9252,
"step": 775
},
{
"epoch": 0.744721689059501,
"grad_norm": 0.10546875,
"learning_rate": 2.5527831094049906e-06,
"loss": 0.8881,
"step": 776
},
{
"epoch": 0.7456813819577736,
"grad_norm": 0.095703125,
"learning_rate": 2.5431861804222653e-06,
"loss": 0.9383,
"step": 777
},
{
"epoch": 0.746641074856046,
"grad_norm": 0.11474609375,
"learning_rate": 2.5335892514395392e-06,
"loss": 0.8981,
"step": 778
},
{
"epoch": 0.7476007677543186,
"grad_norm": 0.107421875,
"learning_rate": 2.523992322456814e-06,
"loss": 0.9791,
"step": 779
},
{
"epoch": 0.7485604606525912,
"grad_norm": 0.078125,
"learning_rate": 2.5143953934740883e-06,
"loss": 0.8725,
"step": 780
},
{
"epoch": 0.7495201535508638,
"grad_norm": 0.095703125,
"learning_rate": 2.504798464491363e-06,
"loss": 0.9092,
"step": 781
},
{
"epoch": 0.7504798464491362,
"grad_norm": 0.1044921875,
"learning_rate": 2.4952015355086374e-06,
"loss": 0.8934,
"step": 782
},
{
"epoch": 0.7514395393474088,
"grad_norm": 0.12158203125,
"learning_rate": 2.4856046065259117e-06,
"loss": 0.9178,
"step": 783
},
{
"epoch": 0.7523992322456814,
"grad_norm": 0.0986328125,
"learning_rate": 2.4760076775431864e-06,
"loss": 0.8888,
"step": 784
},
{
"epoch": 0.753358925143954,
"grad_norm": 0.1044921875,
"learning_rate": 2.4664107485604608e-06,
"loss": 0.9234,
"step": 785
},
{
"epoch": 0.7543186180422264,
"grad_norm": 0.09033203125,
"learning_rate": 2.456813819577735e-06,
"loss": 0.8478,
"step": 786
},
{
"epoch": 0.755278310940499,
"grad_norm": 0.103515625,
"learning_rate": 2.44721689059501e-06,
"loss": 0.9564,
"step": 787
},
{
"epoch": 0.7562380038387716,
"grad_norm": 0.09521484375,
"learning_rate": 2.437619961612284e-06,
"loss": 0.8473,
"step": 788
},
{
"epoch": 0.7571976967370442,
"grad_norm": 0.10400390625,
"learning_rate": 2.4280230326295585e-06,
"loss": 0.9126,
"step": 789
},
{
"epoch": 0.7581573896353166,
"grad_norm": 0.10009765625,
"learning_rate": 2.4184261036468333e-06,
"loss": 0.9022,
"step": 790
},
{
"epoch": 0.7591170825335892,
"grad_norm": 0.08251953125,
"learning_rate": 2.4088291746641076e-06,
"loss": 0.8685,
"step": 791
},
{
"epoch": 0.7600767754318618,
"grad_norm": 0.1640625,
"learning_rate": 2.3992322456813823e-06,
"loss": 1.0382,
"step": 792
},
{
"epoch": 0.7610364683301344,
"grad_norm": 0.103515625,
"learning_rate": 2.3896353166986567e-06,
"loss": 0.888,
"step": 793
},
{
"epoch": 0.761996161228407,
"grad_norm": 0.099609375,
"learning_rate": 2.380038387715931e-06,
"loss": 0.8546,
"step": 794
},
{
"epoch": 0.7629558541266794,
"grad_norm": 0.09814453125,
"learning_rate": 2.3704414587332057e-06,
"loss": 0.8766,
"step": 795
},
{
"epoch": 0.763915547024952,
"grad_norm": 0.0927734375,
"learning_rate": 2.36084452975048e-06,
"loss": 0.8559,
"step": 796
},
{
"epoch": 0.7648752399232246,
"grad_norm": 0.09521484375,
"learning_rate": 2.3512476007677544e-06,
"loss": 0.8919,
"step": 797
},
{
"epoch": 0.7658349328214972,
"grad_norm": 0.1572265625,
"learning_rate": 2.341650671785029e-06,
"loss": 1.005,
"step": 798
},
{
"epoch": 0.7667946257197696,
"grad_norm": 0.1318359375,
"learning_rate": 2.3320537428023035e-06,
"loss": 0.8963,
"step": 799
},
{
"epoch": 0.7677543186180422,
"grad_norm": 0.109375,
"learning_rate": 2.322456813819578e-06,
"loss": 0.9342,
"step": 800
},
{
"epoch": 0.7687140115163148,
"grad_norm": 0.099609375,
"learning_rate": 2.3128598848368525e-06,
"loss": 0.904,
"step": 801
},
{
"epoch": 0.7696737044145874,
"grad_norm": 0.08251953125,
"learning_rate": 2.303262955854127e-06,
"loss": 0.8932,
"step": 802
},
{
"epoch": 0.7706333973128598,
"grad_norm": 0.08740234375,
"learning_rate": 2.2936660268714016e-06,
"loss": 0.8518,
"step": 803
},
{
"epoch": 0.7715930902111324,
"grad_norm": 0.09765625,
"learning_rate": 2.284069097888676e-06,
"loss": 0.8464,
"step": 804
},
{
"epoch": 0.772552783109405,
"grad_norm": 0.0888671875,
"learning_rate": 2.2744721689059503e-06,
"loss": 0.8479,
"step": 805
},
{
"epoch": 0.7735124760076776,
"grad_norm": 0.08837890625,
"learning_rate": 2.264875239923225e-06,
"loss": 0.8904,
"step": 806
},
{
"epoch": 0.77447216890595,
"grad_norm": 0.0986328125,
"learning_rate": 2.255278310940499e-06,
"loss": 0.8472,
"step": 807
},
{
"epoch": 0.7754318618042226,
"grad_norm": 0.09814453125,
"learning_rate": 2.2456813819577737e-06,
"loss": 0.9097,
"step": 808
},
{
"epoch": 0.7763915547024952,
"grad_norm": 0.11328125,
"learning_rate": 2.236084452975048e-06,
"loss": 0.9732,
"step": 809
},
{
"epoch": 0.7773512476007678,
"grad_norm": 0.1025390625,
"learning_rate": 2.2264875239923228e-06,
"loss": 0.8784,
"step": 810
},
{
"epoch": 0.7783109404990403,
"grad_norm": 0.11572265625,
"learning_rate": 2.216890595009597e-06,
"loss": 0.9363,
"step": 811
},
{
"epoch": 0.7792706333973128,
"grad_norm": 0.12890625,
"learning_rate": 2.2072936660268714e-06,
"loss": 0.9103,
"step": 812
},
{
"epoch": 0.7802303262955854,
"grad_norm": 0.0966796875,
"learning_rate": 2.197696737044146e-06,
"loss": 0.9041,
"step": 813
},
{
"epoch": 0.781190019193858,
"grad_norm": 0.1484375,
"learning_rate": 2.1880998080614205e-06,
"loss": 1.0047,
"step": 814
},
{
"epoch": 0.7821497120921305,
"grad_norm": 0.10107421875,
"learning_rate": 2.178502879078695e-06,
"loss": 0.8674,
"step": 815
},
{
"epoch": 0.783109404990403,
"grad_norm": 0.10546875,
"learning_rate": 2.1689059500959696e-06,
"loss": 0.945,
"step": 816
},
{
"epoch": 0.7840690978886756,
"grad_norm": 0.0947265625,
"learning_rate": 2.159309021113244e-06,
"loss": 0.9308,
"step": 817
},
{
"epoch": 0.7850287907869482,
"grad_norm": 0.087890625,
"learning_rate": 2.149712092130518e-06,
"loss": 0.8592,
"step": 818
},
{
"epoch": 0.7859884836852208,
"grad_norm": 0.10693359375,
"learning_rate": 2.140115163147793e-06,
"loss": 0.8601,
"step": 819
},
{
"epoch": 0.7869481765834933,
"grad_norm": 0.1025390625,
"learning_rate": 2.1305182341650673e-06,
"loss": 0.908,
"step": 820
},
{
"epoch": 0.7879078694817658,
"grad_norm": 0.10546875,
"learning_rate": 2.120921305182342e-06,
"loss": 0.8875,
"step": 821
},
{
"epoch": 0.7888675623800384,
"grad_norm": 0.10595703125,
"learning_rate": 2.1113243761996164e-06,
"loss": 0.9093,
"step": 822
},
{
"epoch": 0.789827255278311,
"grad_norm": 0.091796875,
"learning_rate": 2.1017274472168907e-06,
"loss": 0.8605,
"step": 823
},
{
"epoch": 0.7907869481765835,
"grad_norm": 0.08349609375,
"learning_rate": 2.0921305182341654e-06,
"loss": 0.839,
"step": 824
},
{
"epoch": 0.791746641074856,
"grad_norm": 0.11181640625,
"learning_rate": 2.0825335892514398e-06,
"loss": 0.9268,
"step": 825
},
{
"epoch": 0.7927063339731286,
"grad_norm": 0.0830078125,
"learning_rate": 2.072936660268714e-06,
"loss": 0.8654,
"step": 826
},
{
"epoch": 0.7936660268714012,
"grad_norm": 0.09375,
"learning_rate": 2.063339731285989e-06,
"loss": 0.8853,
"step": 827
},
{
"epoch": 0.7946257197696737,
"grad_norm": 0.08642578125,
"learning_rate": 2.053742802303263e-06,
"loss": 0.9269,
"step": 828
},
{
"epoch": 0.7955854126679462,
"grad_norm": 0.08447265625,
"learning_rate": 2.044145873320538e-06,
"loss": 0.8849,
"step": 829
},
{
"epoch": 0.7965451055662188,
"grad_norm": 0.1044921875,
"learning_rate": 2.0345489443378122e-06,
"loss": 0.8988,
"step": 830
},
{
"epoch": 0.7975047984644914,
"grad_norm": 0.1484375,
"learning_rate": 2.0249520153550866e-06,
"loss": 0.9887,
"step": 831
},
{
"epoch": 0.7984644913627639,
"grad_norm": 0.08642578125,
"learning_rate": 2.015355086372361e-06,
"loss": 0.818,
"step": 832
},
{
"epoch": 0.7994241842610365,
"grad_norm": 0.1259765625,
"learning_rate": 2.0057581573896352e-06,
"loss": 0.9224,
"step": 833
},
{
"epoch": 0.800383877159309,
"grad_norm": 0.1318359375,
"learning_rate": 1.99616122840691e-06,
"loss": 0.9942,
"step": 834
},
{
"epoch": 0.8013435700575816,
"grad_norm": 0.1005859375,
"learning_rate": 1.9865642994241843e-06,
"loss": 0.8478,
"step": 835
},
{
"epoch": 0.8023032629558541,
"grad_norm": 0.1728515625,
"learning_rate": 1.9769673704414586e-06,
"loss": 0.8404,
"step": 836
},
{
"epoch": 0.8032629558541267,
"grad_norm": 0.09375,
"learning_rate": 1.9673704414587334e-06,
"loss": 0.8551,
"step": 837
},
{
"epoch": 0.8042226487523992,
"grad_norm": 0.09814453125,
"learning_rate": 1.9577735124760077e-06,
"loss": 0.9121,
"step": 838
},
{
"epoch": 0.8051823416506718,
"grad_norm": 0.10400390625,
"learning_rate": 1.9481765834932825e-06,
"loss": 0.9308,
"step": 839
},
{
"epoch": 0.8061420345489443,
"grad_norm": 0.11865234375,
"learning_rate": 1.9385796545105568e-06,
"loss": 0.9405,
"step": 840
},
{
"epoch": 0.8071017274472169,
"grad_norm": 0.0908203125,
"learning_rate": 1.928982725527831e-06,
"loss": 0.8425,
"step": 841
},
{
"epoch": 0.8080614203454894,
"grad_norm": 0.10205078125,
"learning_rate": 1.919385796545106e-06,
"loss": 0.9141,
"step": 842
},
{
"epoch": 0.809021113243762,
"grad_norm": 0.09326171875,
"learning_rate": 1.90978886756238e-06,
"loss": 0.8844,
"step": 843
},
{
"epoch": 0.8099808061420346,
"grad_norm": 0.1015625,
"learning_rate": 1.9001919385796547e-06,
"loss": 0.8686,
"step": 844
},
{
"epoch": 0.8109404990403071,
"grad_norm": 0.11181640625,
"learning_rate": 1.890595009596929e-06,
"loss": 0.9215,
"step": 845
},
{
"epoch": 0.8119001919385797,
"grad_norm": 0.09228515625,
"learning_rate": 1.8809980806142036e-06,
"loss": 0.873,
"step": 846
},
{
"epoch": 0.8128598848368522,
"grad_norm": 0.0908203125,
"learning_rate": 1.8714011516314781e-06,
"loss": 0.8606,
"step": 847
},
{
"epoch": 0.8138195777351248,
"grad_norm": 0.1015625,
"learning_rate": 1.8618042226487527e-06,
"loss": 1.0169,
"step": 848
},
{
"epoch": 0.8147792706333973,
"grad_norm": 0.111328125,
"learning_rate": 1.852207293666027e-06,
"loss": 0.9263,
"step": 849
},
{
"epoch": 0.8157389635316699,
"grad_norm": 0.0947265625,
"learning_rate": 1.8426103646833015e-06,
"loss": 0.9282,
"step": 850
},
{
"epoch": 0.8166986564299424,
"grad_norm": 0.09326171875,
"learning_rate": 1.833013435700576e-06,
"loss": 0.8512,
"step": 851
},
{
"epoch": 0.817658349328215,
"grad_norm": 0.15625,
"learning_rate": 1.8234165067178506e-06,
"loss": 0.9415,
"step": 852
},
{
"epoch": 0.8186180422264875,
"grad_norm": 0.09521484375,
"learning_rate": 1.813819577735125e-06,
"loss": 0.8646,
"step": 853
},
{
"epoch": 0.8195777351247601,
"grad_norm": 0.09912109375,
"learning_rate": 1.8042226487523995e-06,
"loss": 0.9663,
"step": 854
},
{
"epoch": 0.8205374280230326,
"grad_norm": 0.09521484375,
"learning_rate": 1.794625719769674e-06,
"loss": 0.855,
"step": 855
},
{
"epoch": 0.8214971209213052,
"grad_norm": 0.10595703125,
"learning_rate": 1.7850287907869481e-06,
"loss": 0.8448,
"step": 856
},
{
"epoch": 0.8224568138195777,
"grad_norm": 0.111328125,
"learning_rate": 1.7754318618042227e-06,
"loss": 0.9227,
"step": 857
},
{
"epoch": 0.8234165067178503,
"grad_norm": 0.095703125,
"learning_rate": 1.7658349328214972e-06,
"loss": 0.8926,
"step": 858
},
{
"epoch": 0.8243761996161229,
"grad_norm": 0.09619140625,
"learning_rate": 1.7562380038387715e-06,
"loss": 0.8505,
"step": 859
},
{
"epoch": 0.8253358925143954,
"grad_norm": 0.09814453125,
"learning_rate": 1.746641074856046e-06,
"loss": 0.9039,
"step": 860
},
{
"epoch": 0.8262955854126679,
"grad_norm": 0.10205078125,
"learning_rate": 1.7370441458733206e-06,
"loss": 0.9247,
"step": 861
},
{
"epoch": 0.8272552783109405,
"grad_norm": 0.09912109375,
"learning_rate": 1.7274472168905951e-06,
"loss": 0.8928,
"step": 862
},
{
"epoch": 0.8282149712092131,
"grad_norm": 0.09228515625,
"learning_rate": 1.7178502879078695e-06,
"loss": 0.9062,
"step": 863
},
{
"epoch": 0.8291746641074856,
"grad_norm": 0.08984375,
"learning_rate": 1.708253358925144e-06,
"loss": 0.8303,
"step": 864
},
{
"epoch": 0.8301343570057581,
"grad_norm": 0.0859375,
"learning_rate": 1.6986564299424186e-06,
"loss": 0.8619,
"step": 865
},
{
"epoch": 0.8310940499040307,
"grad_norm": 0.1025390625,
"learning_rate": 1.689059500959693e-06,
"loss": 0.8517,
"step": 866
},
{
"epoch": 0.8320537428023033,
"grad_norm": 0.1181640625,
"learning_rate": 1.6794625719769674e-06,
"loss": 1.0451,
"step": 867
},
{
"epoch": 0.8330134357005758,
"grad_norm": 0.10107421875,
"learning_rate": 1.669865642994242e-06,
"loss": 0.8912,
"step": 868
},
{
"epoch": 0.8339731285988484,
"grad_norm": 0.11572265625,
"learning_rate": 1.6602687140115165e-06,
"loss": 0.9978,
"step": 869
},
{
"epoch": 0.8349328214971209,
"grad_norm": 0.11279296875,
"learning_rate": 1.650671785028791e-06,
"loss": 0.948,
"step": 870
},
{
"epoch": 0.8358925143953935,
"grad_norm": 0.10693359375,
"learning_rate": 1.6410748560460654e-06,
"loss": 0.9118,
"step": 871
},
{
"epoch": 0.836852207293666,
"grad_norm": 0.08984375,
"learning_rate": 1.63147792706334e-06,
"loss": 0.9054,
"step": 872
},
{
"epoch": 0.8378119001919386,
"grad_norm": 0.091796875,
"learning_rate": 1.6218809980806144e-06,
"loss": 0.8954,
"step": 873
},
{
"epoch": 0.8387715930902111,
"grad_norm": 0.09423828125,
"learning_rate": 1.612284069097889e-06,
"loss": 0.8979,
"step": 874
},
{
"epoch": 0.8397312859884837,
"grad_norm": 0.08642578125,
"learning_rate": 1.6026871401151633e-06,
"loss": 0.9078,
"step": 875
},
{
"epoch": 0.8406909788867563,
"grad_norm": 0.0888671875,
"learning_rate": 1.5930902111324378e-06,
"loss": 0.8511,
"step": 876
},
{
"epoch": 0.8416506717850288,
"grad_norm": 0.09521484375,
"learning_rate": 1.5834932821497124e-06,
"loss": 0.8885,
"step": 877
},
{
"epoch": 0.8426103646833013,
"grad_norm": 0.12451171875,
"learning_rate": 1.5738963531669867e-06,
"loss": 0.9479,
"step": 878
},
{
"epoch": 0.8435700575815739,
"grad_norm": 0.08642578125,
"learning_rate": 1.5642994241842612e-06,
"loss": 0.8318,
"step": 879
},
{
"epoch": 0.8445297504798465,
"grad_norm": 0.08837890625,
"learning_rate": 1.5547024952015358e-06,
"loss": 0.9085,
"step": 880
},
{
"epoch": 0.845489443378119,
"grad_norm": 0.09033203125,
"learning_rate": 1.54510556621881e-06,
"loss": 0.8787,
"step": 881
},
{
"epoch": 0.8464491362763915,
"grad_norm": 0.08935546875,
"learning_rate": 1.5355086372360844e-06,
"loss": 0.875,
"step": 882
},
{
"epoch": 0.8474088291746641,
"grad_norm": 0.09521484375,
"learning_rate": 1.525911708253359e-06,
"loss": 0.8904,
"step": 883
},
{
"epoch": 0.8483685220729367,
"grad_norm": 0.15625,
"learning_rate": 1.5163147792706335e-06,
"loss": 0.9297,
"step": 884
},
{
"epoch": 0.8493282149712092,
"grad_norm": 0.10498046875,
"learning_rate": 1.5067178502879078e-06,
"loss": 0.8951,
"step": 885
},
{
"epoch": 0.8502879078694817,
"grad_norm": 0.0908203125,
"learning_rate": 1.4971209213051824e-06,
"loss": 0.8508,
"step": 886
},
{
"epoch": 0.8512476007677543,
"grad_norm": 0.10302734375,
"learning_rate": 1.487523992322457e-06,
"loss": 0.924,
"step": 887
},
{
"epoch": 0.8522072936660269,
"grad_norm": 0.08935546875,
"learning_rate": 1.4779270633397312e-06,
"loss": 0.9006,
"step": 888
},
{
"epoch": 0.8531669865642995,
"grad_norm": 0.08203125,
"learning_rate": 1.4683301343570058e-06,
"loss": 0.9105,
"step": 889
},
{
"epoch": 0.8541266794625719,
"grad_norm": 0.09423828125,
"learning_rate": 1.4587332053742803e-06,
"loss": 0.9097,
"step": 890
},
{
"epoch": 0.8550863723608445,
"grad_norm": 0.08935546875,
"learning_rate": 1.4491362763915549e-06,
"loss": 0.8719,
"step": 891
},
{
"epoch": 0.8560460652591171,
"grad_norm": 0.10888671875,
"learning_rate": 1.4395393474088292e-06,
"loss": 0.9129,
"step": 892
},
{
"epoch": 0.8570057581573897,
"grad_norm": 0.08642578125,
"learning_rate": 1.4299424184261037e-06,
"loss": 0.818,
"step": 893
},
{
"epoch": 0.8579654510556622,
"grad_norm": 0.09521484375,
"learning_rate": 1.4203454894433783e-06,
"loss": 0.8259,
"step": 894
},
{
"epoch": 0.8589251439539347,
"grad_norm": 0.09912109375,
"learning_rate": 1.4107485604606528e-06,
"loss": 0.9523,
"step": 895
},
{
"epoch": 0.8598848368522073,
"grad_norm": 0.09912109375,
"learning_rate": 1.4011516314779271e-06,
"loss": 0.8779,
"step": 896
},
{
"epoch": 0.8608445297504799,
"grad_norm": 0.09228515625,
"learning_rate": 1.3915547024952017e-06,
"loss": 0.8471,
"step": 897
},
{
"epoch": 0.8618042226487524,
"grad_norm": 0.09716796875,
"learning_rate": 1.3819577735124762e-06,
"loss": 0.8541,
"step": 898
},
{
"epoch": 0.8627639155470249,
"grad_norm": 0.08935546875,
"learning_rate": 1.3723608445297507e-06,
"loss": 0.8406,
"step": 899
},
{
"epoch": 0.8637236084452975,
"grad_norm": 0.09228515625,
"learning_rate": 1.362763915547025e-06,
"loss": 0.9321,
"step": 900
},
{
"epoch": 0.8646833013435701,
"grad_norm": 0.10009765625,
"learning_rate": 1.3531669865642996e-06,
"loss": 0.8372,
"step": 901
},
{
"epoch": 0.8656429942418427,
"grad_norm": 0.1201171875,
"learning_rate": 1.3435700575815741e-06,
"loss": 0.9387,
"step": 902
},
{
"epoch": 0.8666026871401151,
"grad_norm": 0.09033203125,
"learning_rate": 1.3339731285988487e-06,
"loss": 0.8453,
"step": 903
},
{
"epoch": 0.8675623800383877,
"grad_norm": 0.10107421875,
"learning_rate": 1.324376199616123e-06,
"loss": 0.8814,
"step": 904
},
{
"epoch": 0.8685220729366603,
"grad_norm": 0.142578125,
"learning_rate": 1.3147792706333976e-06,
"loss": 0.8979,
"step": 905
},
{
"epoch": 0.8694817658349329,
"grad_norm": 0.09130859375,
"learning_rate": 1.3051823416506717e-06,
"loss": 0.9195,
"step": 906
},
{
"epoch": 0.8704414587332053,
"grad_norm": 0.0927734375,
"learning_rate": 1.2955854126679462e-06,
"loss": 0.8919,
"step": 907
},
{
"epoch": 0.8714011516314779,
"grad_norm": 0.09716796875,
"learning_rate": 1.2859884836852207e-06,
"loss": 0.8825,
"step": 908
},
{
"epoch": 0.8723608445297505,
"grad_norm": 0.0908203125,
"learning_rate": 1.2763915547024953e-06,
"loss": 0.7967,
"step": 909
},
{
"epoch": 0.8733205374280231,
"grad_norm": 0.0986328125,
"learning_rate": 1.2667946257197696e-06,
"loss": 0.8685,
"step": 910
},
{
"epoch": 0.8742802303262955,
"grad_norm": 0.10986328125,
"learning_rate": 1.2571976967370441e-06,
"loss": 0.8319,
"step": 911
},
{
"epoch": 0.8752399232245681,
"grad_norm": 0.15625,
"learning_rate": 1.2476007677543187e-06,
"loss": 1.0421,
"step": 912
},
{
"epoch": 0.8761996161228407,
"grad_norm": 0.125,
"learning_rate": 1.2380038387715932e-06,
"loss": 1.02,
"step": 913
},
{
"epoch": 0.8771593090211133,
"grad_norm": 0.09228515625,
"learning_rate": 1.2284069097888675e-06,
"loss": 0.8858,
"step": 914
},
{
"epoch": 0.8781190019193857,
"grad_norm": 0.10205078125,
"learning_rate": 1.218809980806142e-06,
"loss": 0.903,
"step": 915
},
{
"epoch": 0.8790786948176583,
"grad_norm": 0.1298828125,
"learning_rate": 1.2092130518234166e-06,
"loss": 0.9122,
"step": 916
},
{
"epoch": 0.8800383877159309,
"grad_norm": 0.0927734375,
"learning_rate": 1.1996161228406912e-06,
"loss": 0.8541,
"step": 917
},
{
"epoch": 0.8809980806142035,
"grad_norm": 0.1025390625,
"learning_rate": 1.1900191938579655e-06,
"loss": 0.8816,
"step": 918
},
{
"epoch": 0.8819577735124761,
"grad_norm": 0.1064453125,
"learning_rate": 1.18042226487524e-06,
"loss": 0.8834,
"step": 919
},
{
"epoch": 0.8829174664107485,
"grad_norm": 0.10302734375,
"learning_rate": 1.1708253358925146e-06,
"loss": 0.9132,
"step": 920
},
{
"epoch": 0.8838771593090211,
"grad_norm": 0.09814453125,
"learning_rate": 1.161228406909789e-06,
"loss": 0.9044,
"step": 921
},
{
"epoch": 0.8848368522072937,
"grad_norm": 0.0986328125,
"learning_rate": 1.1516314779270634e-06,
"loss": 0.8791,
"step": 922
},
{
"epoch": 0.8857965451055663,
"grad_norm": 0.150390625,
"learning_rate": 1.142034548944338e-06,
"loss": 0.8169,
"step": 923
},
{
"epoch": 0.8867562380038387,
"grad_norm": 0.08935546875,
"learning_rate": 1.1324376199616125e-06,
"loss": 0.8696,
"step": 924
},
{
"epoch": 0.8877159309021113,
"grad_norm": 0.1025390625,
"learning_rate": 1.1228406909788868e-06,
"loss": 0.908,
"step": 925
},
{
"epoch": 0.8886756238003839,
"grad_norm": 0.09716796875,
"learning_rate": 1.1132437619961614e-06,
"loss": 0.8748,
"step": 926
},
{
"epoch": 0.8896353166986565,
"grad_norm": 0.08251953125,
"learning_rate": 1.1036468330134357e-06,
"loss": 0.8716,
"step": 927
},
{
"epoch": 0.8905950095969289,
"grad_norm": 0.09814453125,
"learning_rate": 1.0940499040307102e-06,
"loss": 0.8613,
"step": 928
},
{
"epoch": 0.8915547024952015,
"grad_norm": 0.091796875,
"learning_rate": 1.0844529750479848e-06,
"loss": 0.8703,
"step": 929
},
{
"epoch": 0.8925143953934741,
"grad_norm": 0.08740234375,
"learning_rate": 1.074856046065259e-06,
"loss": 0.8743,
"step": 930
},
{
"epoch": 0.8934740882917467,
"grad_norm": 0.10400390625,
"learning_rate": 1.0652591170825336e-06,
"loss": 0.8926,
"step": 931
},
{
"epoch": 0.8944337811900192,
"grad_norm": 0.08642578125,
"learning_rate": 1.0556621880998082e-06,
"loss": 0.8598,
"step": 932
},
{
"epoch": 0.8953934740882917,
"grad_norm": 0.1005859375,
"learning_rate": 1.0460652591170827e-06,
"loss": 0.9332,
"step": 933
},
{
"epoch": 0.8963531669865643,
"grad_norm": 0.10693359375,
"learning_rate": 1.036468330134357e-06,
"loss": 0.9341,
"step": 934
},
{
"epoch": 0.8973128598848369,
"grad_norm": 0.12890625,
"learning_rate": 1.0268714011516316e-06,
"loss": 0.9819,
"step": 935
},
{
"epoch": 0.8982725527831094,
"grad_norm": 0.1005859375,
"learning_rate": 1.0172744721689061e-06,
"loss": 0.8789,
"step": 936
},
{
"epoch": 0.8992322456813819,
"grad_norm": 0.1513671875,
"learning_rate": 1.0076775431861805e-06,
"loss": 1.0342,
"step": 937
},
{
"epoch": 0.9001919385796545,
"grad_norm": 0.10498046875,
"learning_rate": 9.98080614203455e-07,
"loss": 0.8745,
"step": 938
},
{
"epoch": 0.9011516314779271,
"grad_norm": 0.1123046875,
"learning_rate": 9.884836852207293e-07,
"loss": 0.8975,
"step": 939
},
{
"epoch": 0.9021113243761996,
"grad_norm": 0.08447265625,
"learning_rate": 9.788867562380039e-07,
"loss": 0.9254,
"step": 940
},
{
"epoch": 0.9030710172744721,
"grad_norm": 0.09326171875,
"learning_rate": 9.692898272552784e-07,
"loss": 0.8642,
"step": 941
},
{
"epoch": 0.9040307101727447,
"grad_norm": 0.1591796875,
"learning_rate": 9.59692898272553e-07,
"loss": 1.0205,
"step": 942
},
{
"epoch": 0.9049904030710173,
"grad_norm": 0.09521484375,
"learning_rate": 9.500959692898274e-07,
"loss": 0.8844,
"step": 943
},
{
"epoch": 0.9059500959692899,
"grad_norm": 0.107421875,
"learning_rate": 9.404990403071018e-07,
"loss": 0.8562,
"step": 944
},
{
"epoch": 0.9069097888675623,
"grad_norm": 0.09375,
"learning_rate": 9.309021113243763e-07,
"loss": 0.8775,
"step": 945
},
{
"epoch": 0.9078694817658349,
"grad_norm": 0.0869140625,
"learning_rate": 9.213051823416508e-07,
"loss": 0.8285,
"step": 946
},
{
"epoch": 0.9088291746641075,
"grad_norm": 0.09521484375,
"learning_rate": 9.117082533589253e-07,
"loss": 0.8177,
"step": 947
},
{
"epoch": 0.9097888675623801,
"grad_norm": 0.11279296875,
"learning_rate": 9.021113243761997e-07,
"loss": 0.939,
"step": 948
},
{
"epoch": 0.9107485604606526,
"grad_norm": 0.08447265625,
"learning_rate": 8.925143953934741e-07,
"loss": 0.8413,
"step": 949
},
{
"epoch": 0.9117082533589251,
"grad_norm": 0.099609375,
"learning_rate": 8.829174664107486e-07,
"loss": 0.9704,
"step": 950
},
{
"epoch": 0.9126679462571977,
"grad_norm": 0.158203125,
"learning_rate": 8.73320537428023e-07,
"loss": 0.9006,
"step": 951
},
{
"epoch": 0.9136276391554703,
"grad_norm": 0.11279296875,
"learning_rate": 8.637236084452976e-07,
"loss": 0.8903,
"step": 952
},
{
"epoch": 0.9145873320537428,
"grad_norm": 0.1298828125,
"learning_rate": 8.54126679462572e-07,
"loss": 0.9654,
"step": 953
},
{
"epoch": 0.9155470249520153,
"grad_norm": 0.0966796875,
"learning_rate": 8.445297504798465e-07,
"loss": 0.9326,
"step": 954
},
{
"epoch": 0.9165067178502879,
"grad_norm": 0.09814453125,
"learning_rate": 8.34932821497121e-07,
"loss": 0.9402,
"step": 955
},
{
"epoch": 0.9174664107485605,
"grad_norm": 0.11376953125,
"learning_rate": 8.253358925143955e-07,
"loss": 0.9127,
"step": 956
},
{
"epoch": 0.918426103646833,
"grad_norm": 0.10986328125,
"learning_rate": 8.1573896353167e-07,
"loss": 0.8601,
"step": 957
},
{
"epoch": 0.9193857965451055,
"grad_norm": 0.1015625,
"learning_rate": 8.061420345489445e-07,
"loss": 0.8914,
"step": 958
},
{
"epoch": 0.9203454894433781,
"grad_norm": 0.271484375,
"learning_rate": 7.965451055662189e-07,
"loss": 0.8644,
"step": 959
},
{
"epoch": 0.9213051823416507,
"grad_norm": 0.0908203125,
"learning_rate": 7.869481765834934e-07,
"loss": 0.876,
"step": 960
},
{
"epoch": 0.9222648752399232,
"grad_norm": 0.11328125,
"learning_rate": 7.773512476007679e-07,
"loss": 0.9974,
"step": 961
},
{
"epoch": 0.9232245681381958,
"grad_norm": 0.103515625,
"learning_rate": 7.677543186180422e-07,
"loss": 0.9056,
"step": 962
},
{
"epoch": 0.9241842610364683,
"grad_norm": 0.1611328125,
"learning_rate": 7.581573896353168e-07,
"loss": 1.0843,
"step": 963
},
{
"epoch": 0.9251439539347409,
"grad_norm": 0.0966796875,
"learning_rate": 7.485604606525912e-07,
"loss": 0.9126,
"step": 964
},
{
"epoch": 0.9261036468330134,
"grad_norm": 0.1103515625,
"learning_rate": 7.389635316698656e-07,
"loss": 0.8867,
"step": 965
},
{
"epoch": 0.927063339731286,
"grad_norm": 0.1025390625,
"learning_rate": 7.293666026871402e-07,
"loss": 0.881,
"step": 966
},
{
"epoch": 0.9280230326295585,
"grad_norm": 0.10205078125,
"learning_rate": 7.197696737044146e-07,
"loss": 0.8719,
"step": 967
},
{
"epoch": 0.9289827255278311,
"grad_norm": 0.0927734375,
"learning_rate": 7.101727447216891e-07,
"loss": 0.8987,
"step": 968
},
{
"epoch": 0.9299424184261037,
"grad_norm": 0.1015625,
"learning_rate": 7.005758157389636e-07,
"loss": 0.905,
"step": 969
},
{
"epoch": 0.9309021113243762,
"grad_norm": 0.091796875,
"learning_rate": 6.909788867562381e-07,
"loss": 0.838,
"step": 970
},
{
"epoch": 0.9318618042226487,
"grad_norm": 0.099609375,
"learning_rate": 6.813819577735125e-07,
"loss": 0.939,
"step": 971
},
{
"epoch": 0.9328214971209213,
"grad_norm": 0.103515625,
"learning_rate": 6.717850287907871e-07,
"loss": 0.9673,
"step": 972
},
{
"epoch": 0.9337811900191939,
"grad_norm": 0.09521484375,
"learning_rate": 6.621880998080615e-07,
"loss": 0.8974,
"step": 973
},
{
"epoch": 0.9347408829174664,
"grad_norm": 0.1005859375,
"learning_rate": 6.525911708253358e-07,
"loss": 0.8756,
"step": 974
},
{
"epoch": 0.935700575815739,
"grad_norm": 0.1103515625,
"learning_rate": 6.429942418426104e-07,
"loss": 0.9782,
"step": 975
},
{
"epoch": 0.9366602687140115,
"grad_norm": 0.10107421875,
"learning_rate": 6.333973128598848e-07,
"loss": 0.8836,
"step": 976
},
{
"epoch": 0.9376199616122841,
"grad_norm": 0.1083984375,
"learning_rate": 6.238003838771593e-07,
"loss": 0.9716,
"step": 977
},
{
"epoch": 0.9385796545105566,
"grad_norm": 0.08984375,
"learning_rate": 6.142034548944338e-07,
"loss": 0.861,
"step": 978
},
{
"epoch": 0.9395393474088292,
"grad_norm": 0.12353515625,
"learning_rate": 6.046065259117083e-07,
"loss": 0.9611,
"step": 979
},
{
"epoch": 0.9404990403071017,
"grad_norm": 0.10205078125,
"learning_rate": 5.950095969289827e-07,
"loss": 0.9402,
"step": 980
},
{
"epoch": 0.9414587332053743,
"grad_norm": 0.08447265625,
"learning_rate": 5.854126679462573e-07,
"loss": 0.8861,
"step": 981
},
{
"epoch": 0.9424184261036468,
"grad_norm": 0.1025390625,
"learning_rate": 5.758157389635317e-07,
"loss": 0.9267,
"step": 982
},
{
"epoch": 0.9433781190019194,
"grad_norm": 0.1064453125,
"learning_rate": 5.662188099808063e-07,
"loss": 0.9603,
"step": 983
},
{
"epoch": 0.944337811900192,
"grad_norm": 0.095703125,
"learning_rate": 5.566218809980807e-07,
"loss": 0.8911,
"step": 984
},
{
"epoch": 0.9452975047984645,
"grad_norm": 0.08837890625,
"learning_rate": 5.470249520153551e-07,
"loss": 0.8627,
"step": 985
},
{
"epoch": 0.946257197696737,
"grad_norm": 0.10107421875,
"learning_rate": 5.374280230326296e-07,
"loss": 0.9447,
"step": 986
},
{
"epoch": 0.9472168905950096,
"grad_norm": 0.10205078125,
"learning_rate": 5.278310940499041e-07,
"loss": 0.9111,
"step": 987
},
{
"epoch": 0.9481765834932822,
"grad_norm": 0.09716796875,
"learning_rate": 5.182341650671785e-07,
"loss": 0.9145,
"step": 988
},
{
"epoch": 0.9491362763915547,
"grad_norm": 0.1005859375,
"learning_rate": 5.086372360844531e-07,
"loss": 0.9212,
"step": 989
},
{
"epoch": 0.9500959692898272,
"grad_norm": 0.08349609375,
"learning_rate": 4.990403071017275e-07,
"loss": 0.9028,
"step": 990
},
{
"epoch": 0.9510556621880998,
"grad_norm": 0.09375,
"learning_rate": 4.894433781190019e-07,
"loss": 0.9005,
"step": 991
},
{
"epoch": 0.9520153550863724,
"grad_norm": 0.1044921875,
"learning_rate": 4.798464491362765e-07,
"loss": 0.915,
"step": 992
},
{
"epoch": 0.9529750479846449,
"grad_norm": 0.095703125,
"learning_rate": 4.702495201535509e-07,
"loss": 0.8614,
"step": 993
},
{
"epoch": 0.9539347408829175,
"grad_norm": 0.1025390625,
"learning_rate": 4.606525911708254e-07,
"loss": 0.8747,
"step": 994
},
{
"epoch": 0.95489443378119,
"grad_norm": 0.0888671875,
"learning_rate": 4.5105566218809987e-07,
"loss": 0.8996,
"step": 995
},
{
"epoch": 0.9558541266794626,
"grad_norm": 0.11572265625,
"learning_rate": 4.414587332053743e-07,
"loss": 0.8711,
"step": 996
},
{
"epoch": 0.9568138195777351,
"grad_norm": 0.09423828125,
"learning_rate": 4.318618042226488e-07,
"loss": 0.8874,
"step": 997
},
{
"epoch": 0.9577735124760077,
"grad_norm": 0.08740234375,
"learning_rate": 4.2226487523992327e-07,
"loss": 0.8792,
"step": 998
},
{
"epoch": 0.9587332053742802,
"grad_norm": 0.12158203125,
"learning_rate": 4.1266794625719776e-07,
"loss": 0.9438,
"step": 999
},
{
"epoch": 0.9596928982725528,
"grad_norm": 0.095703125,
"learning_rate": 4.0307101727447224e-07,
"loss": 0.8969,
"step": 1000
},
{
"epoch": 0.9606525911708254,
"grad_norm": 0.09765625,
"learning_rate": 3.934740882917467e-07,
"loss": 0.9036,
"step": 1001
},
{
"epoch": 0.9616122840690979,
"grad_norm": 0.095703125,
"learning_rate": 3.838771593090211e-07,
"loss": 0.8347,
"step": 1002
},
{
"epoch": 0.9625719769673704,
"grad_norm": 0.11669921875,
"learning_rate": 3.742802303262956e-07,
"loss": 0.9452,
"step": 1003
},
{
"epoch": 0.963531669865643,
"grad_norm": 0.099609375,
"learning_rate": 3.646833013435701e-07,
"loss": 0.9136,
"step": 1004
},
{
"epoch": 0.9644913627639156,
"grad_norm": 0.0986328125,
"learning_rate": 3.5508637236084457e-07,
"loss": 0.8792,
"step": 1005
},
{
"epoch": 0.9654510556621881,
"grad_norm": 0.111328125,
"learning_rate": 3.4548944337811905e-07,
"loss": 0.916,
"step": 1006
},
{
"epoch": 0.9664107485604606,
"grad_norm": 0.10546875,
"learning_rate": 3.3589251439539354e-07,
"loss": 0.8992,
"step": 1007
},
{
"epoch": 0.9673704414587332,
"grad_norm": 0.09765625,
"learning_rate": 3.262955854126679e-07,
"loss": 0.9242,
"step": 1008
},
{
"epoch": 0.9683301343570058,
"grad_norm": 0.09912109375,
"learning_rate": 3.166986564299424e-07,
"loss": 0.91,
"step": 1009
},
{
"epoch": 0.9692898272552783,
"grad_norm": 0.1005859375,
"learning_rate": 3.071017274472169e-07,
"loss": 0.873,
"step": 1010
},
{
"epoch": 0.9702495201535508,
"grad_norm": 0.1015625,
"learning_rate": 2.9750479846449137e-07,
"loss": 0.9169,
"step": 1011
},
{
"epoch": 0.9712092130518234,
"grad_norm": 0.11279296875,
"learning_rate": 2.8790786948176586e-07,
"loss": 0.9147,
"step": 1012
},
{
"epoch": 0.972168905950096,
"grad_norm": 0.1220703125,
"learning_rate": 2.7831094049904034e-07,
"loss": 0.925,
"step": 1013
},
{
"epoch": 0.9731285988483686,
"grad_norm": 0.0908203125,
"learning_rate": 2.687140115163148e-07,
"loss": 0.9201,
"step": 1014
},
{
"epoch": 0.974088291746641,
"grad_norm": 0.09130859375,
"learning_rate": 2.5911708253358926e-07,
"loss": 0.8828,
"step": 1015
},
{
"epoch": 0.9750479846449136,
"grad_norm": 0.119140625,
"learning_rate": 2.4952015355086375e-07,
"loss": 0.8624,
"step": 1016
},
{
"epoch": 0.9760076775431862,
"grad_norm": 0.0859375,
"learning_rate": 2.3992322456813823e-07,
"loss": 0.8561,
"step": 1017
},
{
"epoch": 0.9769673704414588,
"grad_norm": 0.0869140625,
"learning_rate": 2.303262955854127e-07,
"loss": 0.8079,
"step": 1018
},
{
"epoch": 0.9779270633397313,
"grad_norm": 0.1240234375,
"learning_rate": 2.2072936660268715e-07,
"loss": 0.9034,
"step": 1019
},
{
"epoch": 0.9788867562380038,
"grad_norm": 0.09130859375,
"learning_rate": 2.1113243761996164e-07,
"loss": 0.8095,
"step": 1020
},
{
"epoch": 0.9798464491362764,
"grad_norm": 0.09814453125,
"learning_rate": 2.0153550863723612e-07,
"loss": 0.9089,
"step": 1021
},
{
"epoch": 0.980806142034549,
"grad_norm": 0.0849609375,
"learning_rate": 1.9193857965451055e-07,
"loss": 0.8832,
"step": 1022
},
{
"epoch": 0.9817658349328215,
"grad_norm": 0.11962890625,
"learning_rate": 1.8234165067178504e-07,
"loss": 0.9369,
"step": 1023
},
{
"epoch": 0.982725527831094,
"grad_norm": 0.1748046875,
"learning_rate": 1.7274472168905953e-07,
"loss": 1.0612,
"step": 1024
},
{
"epoch": 0.9836852207293666,
"grad_norm": 0.1015625,
"learning_rate": 1.6314779270633396e-07,
"loss": 0.9416,
"step": 1025
},
{
"epoch": 0.9846449136276392,
"grad_norm": 0.1162109375,
"learning_rate": 1.5355086372360844e-07,
"loss": 0.9809,
"step": 1026
},
{
"epoch": 0.9856046065259118,
"grad_norm": 0.10791015625,
"learning_rate": 1.4395393474088293e-07,
"loss": 0.842,
"step": 1027
},
{
"epoch": 0.9865642994241842,
"grad_norm": 0.09130859375,
"learning_rate": 1.343570057581574e-07,
"loss": 0.9277,
"step": 1028
},
{
"epoch": 0.9875239923224568,
"grad_norm": 0.1103515625,
"learning_rate": 1.2476007677543187e-07,
"loss": 0.8711,
"step": 1029
},
{
"epoch": 0.9884836852207294,
"grad_norm": 0.08740234375,
"learning_rate": 1.1516314779270635e-07,
"loss": 0.8977,
"step": 1030
},
{
"epoch": 0.989443378119002,
"grad_norm": 0.09912109375,
"learning_rate": 1.0556621880998082e-07,
"loss": 0.8632,
"step": 1031
},
{
"epoch": 0.9904030710172744,
"grad_norm": 0.10107421875,
"learning_rate": 9.596928982725528e-08,
"loss": 0.9557,
"step": 1032
},
{
"epoch": 0.991362763915547,
"grad_norm": 0.09765625,
"learning_rate": 8.637236084452976e-08,
"loss": 0.8898,
"step": 1033
},
{
"epoch": 0.9923224568138196,
"grad_norm": 0.146484375,
"learning_rate": 7.677543186180422e-08,
"loss": 0.9666,
"step": 1034
},
{
"epoch": 0.9932821497120922,
"grad_norm": 0.1015625,
"learning_rate": 6.71785028790787e-08,
"loss": 0.8335,
"step": 1035
},
{
"epoch": 0.9942418426103646,
"grad_norm": 0.095703125,
"learning_rate": 5.758157389635317e-08,
"loss": 0.9066,
"step": 1036
},
{
"epoch": 0.9952015355086372,
"grad_norm": 0.08740234375,
"learning_rate": 4.798464491362764e-08,
"loss": 0.8566,
"step": 1037
},
{
"epoch": 0.9961612284069098,
"grad_norm": 0.09033203125,
"learning_rate": 3.838771593090211e-08,
"loss": 0.8087,
"step": 1038
},
{
"epoch": 0.9971209213051824,
"grad_norm": 0.111328125,
"learning_rate": 2.8790786948176586e-08,
"loss": 0.8638,
"step": 1039
},
{
"epoch": 0.9980806142034548,
"grad_norm": 0.09912109375,
"learning_rate": 1.9193857965451055e-08,
"loss": 0.9796,
"step": 1040
},
{
"epoch": 0.9990403071017274,
"grad_norm": 0.11767578125,
"learning_rate": 9.596928982725528e-09,
"loss": 0.8987,
"step": 1041
},
{
"epoch": 1.0,
"grad_norm": 0.09228515625,
"learning_rate": 0.0,
"loss": 0.897,
"step": 1042
}
],
"logging_steps": 1.0,
"max_steps": 1042,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.362875479576019e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}