WeatherSynSFT / trainer_state.json
compasszzn's picture
Upload 14 files
987ec5c verified
Raw
History Blame Contribute Delete
35.2 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 199,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005037783375314861,
"grad_norm": 45.14028549194336,
"learning_rate": 8.333333333333333e-07,
"loss": 1.8127,
"step": 1
},
{
"epoch": 0.010075566750629723,
"grad_norm": 41.275089263916016,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.8862,
"step": 2
},
{
"epoch": 0.015113350125944584,
"grad_norm": 45.339202880859375,
"learning_rate": 2.5e-06,
"loss": 1.8741,
"step": 3
},
{
"epoch": 0.020151133501259445,
"grad_norm": 38.83050537109375,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.7667,
"step": 4
},
{
"epoch": 0.02518891687657431,
"grad_norm": 33.30706024169922,
"learning_rate": 4.166666666666667e-06,
"loss": 1.5813,
"step": 5
},
{
"epoch": 0.030226700251889168,
"grad_norm": 26.171358108520508,
"learning_rate": 5e-06,
"loss": 1.2841,
"step": 6
},
{
"epoch": 0.03526448362720403,
"grad_norm": 19.258102416992188,
"learning_rate": 4.99966880364306e-06,
"loss": 1.1433,
"step": 7
},
{
"epoch": 0.04030226700251889,
"grad_norm": 9.452253341674805,
"learning_rate": 4.998675302325061e-06,
"loss": 1.042,
"step": 8
},
{
"epoch": 0.04534005037783375,
"grad_norm": 11.662043571472168,
"learning_rate": 4.997019759281217e-06,
"loss": 1.0348,
"step": 9
},
{
"epoch": 0.05037783375314862,
"grad_norm": 15.509767532348633,
"learning_rate": 4.994702613159386e-06,
"loss": 1.0352,
"step": 10
},
{
"epoch": 0.055415617128463476,
"grad_norm": 9.309309959411621,
"learning_rate": 4.991724477903854e-06,
"loss": 1.0574,
"step": 11
},
{
"epoch": 0.060453400503778336,
"grad_norm": 6.886810779571533,
"learning_rate": 4.988086142592658e-06,
"loss": 0.9625,
"step": 12
},
{
"epoch": 0.0654911838790932,
"grad_norm": 6.271539211273193,
"learning_rate": 4.983788571228516e-06,
"loss": 0.944,
"step": 13
},
{
"epoch": 0.07052896725440806,
"grad_norm": 6.019953727722168,
"learning_rate": 4.978832902483415e-06,
"loss": 0.9427,
"step": 14
},
{
"epoch": 0.07556675062972293,
"grad_norm": 4.751285552978516,
"learning_rate": 4.9732204493969e-06,
"loss": 0.9923,
"step": 15
},
{
"epoch": 0.08060453400503778,
"grad_norm": 7.569280624389648,
"learning_rate": 4.9669526990281855e-06,
"loss": 1.0161,
"step": 16
},
{
"epoch": 0.08564231738035265,
"grad_norm": 8.2361421585083,
"learning_rate": 4.960031312062141e-06,
"loss": 0.8856,
"step": 17
},
{
"epoch": 0.0906801007556675,
"grad_norm": 8.086812973022461,
"learning_rate": 4.952458122369286e-06,
"loss": 1.0242,
"step": 18
},
{
"epoch": 0.09571788413098237,
"grad_norm": 4.911264896392822,
"learning_rate": 4.944235136519888e-06,
"loss": 0.88,
"step": 19
},
{
"epoch": 0.10075566750629723,
"grad_norm": 5.140178203582764,
"learning_rate": 4.935364533252314e-06,
"loss": 0.9321,
"step": 20
},
{
"epoch": 0.10579345088161209,
"grad_norm": 4.421811580657959,
"learning_rate": 4.925848662895753e-06,
"loss": 0.9492,
"step": 21
},
{
"epoch": 0.11083123425692695,
"grad_norm": 5.252143859863281,
"learning_rate": 4.9156900467474785e-06,
"loss": 0.8854,
"step": 22
},
{
"epoch": 0.11586901763224182,
"grad_norm": 5.303826808929443,
"learning_rate": 4.904891376404822e-06,
"loss": 0.9771,
"step": 23
},
{
"epoch": 0.12090680100755667,
"grad_norm": 4.818070888519287,
"learning_rate": 4.893455513052003e-06,
"loss": 0.9085,
"step": 24
},
{
"epoch": 0.12594458438287154,
"grad_norm": 4.050323486328125,
"learning_rate": 4.881385486702047e-06,
"loss": 0.8741,
"step": 25
},
{
"epoch": 0.1309823677581864,
"grad_norm": 4.825730800628662,
"learning_rate": 4.868684495393958e-06,
"loss": 0.8568,
"step": 26
},
{
"epoch": 0.13602015113350127,
"grad_norm": 4.85552978515625,
"learning_rate": 4.855355904345377e-06,
"loss": 0.9123,
"step": 27
},
{
"epoch": 0.14105793450881612,
"grad_norm": 3.755789041519165,
"learning_rate": 4.841403245060943e-06,
"loss": 0.9049,
"step": 28
},
{
"epoch": 0.14609571788413098,
"grad_norm": 4.334013938903809,
"learning_rate": 4.826830214396594e-06,
"loss": 0.8393,
"step": 29
},
{
"epoch": 0.15113350125944586,
"grad_norm": 4.264854907989502,
"learning_rate": 4.8116406735800645e-06,
"loss": 0.8636,
"step": 30
},
{
"epoch": 0.1561712846347607,
"grad_norm": 4.067491054534912,
"learning_rate": 4.7958386471878185e-06,
"loss": 0.9626,
"step": 31
},
{
"epoch": 0.16120906801007556,
"grad_norm": 3.913526773452759,
"learning_rate": 4.779428322078716e-06,
"loss": 0.8366,
"step": 32
},
{
"epoch": 0.16624685138539042,
"grad_norm": 4.512166500091553,
"learning_rate": 4.76241404628467e-06,
"loss": 0.8766,
"step": 33
},
{
"epoch": 0.1712846347607053,
"grad_norm": 3.469944477081299,
"learning_rate": 4.744800327858608e-06,
"loss": 0.8931,
"step": 34
},
{
"epoch": 0.17632241813602015,
"grad_norm": 3.761395215988159,
"learning_rate": 4.726591833680031e-06,
"loss": 0.9317,
"step": 35
},
{
"epoch": 0.181360201511335,
"grad_norm": 3.780844211578369,
"learning_rate": 4.7077933882184864e-06,
"loss": 0.8701,
"step": 36
},
{
"epoch": 0.18639798488664988,
"grad_norm": 3.842653751373291,
"learning_rate": 4.688409972255299e-06,
"loss": 0.8502,
"step": 37
},
{
"epoch": 0.19143576826196473,
"grad_norm": 3.1387386322021484,
"learning_rate": 4.6684467215638694e-06,
"loss": 0.8606,
"step": 38
},
{
"epoch": 0.1964735516372796,
"grad_norm": 5.945749282836914,
"learning_rate": 4.647908925548918e-06,
"loss": 0.8836,
"step": 39
},
{
"epoch": 0.20151133501259447,
"grad_norm": 3.8087077140808105,
"learning_rate": 4.626802025845023e-06,
"loss": 0.8729,
"step": 40
},
{
"epoch": 0.20654911838790932,
"grad_norm": 4.801278591156006,
"learning_rate": 4.605131614874813e-06,
"loss": 0.8895,
"step": 41
},
{
"epoch": 0.21158690176322417,
"grad_norm": 3.3816072940826416,
"learning_rate": 4.582903434367222e-06,
"loss": 0.875,
"step": 42
},
{
"epoch": 0.21662468513853905,
"grad_norm": 3.7405855655670166,
"learning_rate": 4.560123373836174e-06,
"loss": 0.8679,
"step": 43
},
{
"epoch": 0.2216624685138539,
"grad_norm": 3.2052128314971924,
"learning_rate": 4.536797469020116e-06,
"loss": 0.8453,
"step": 44
},
{
"epoch": 0.22670025188916876,
"grad_norm": 3.2078239917755127,
"learning_rate": 4.512931900282807e-06,
"loss": 0.8965,
"step": 45
},
{
"epoch": 0.23173803526448364,
"grad_norm": 3.318209409713745,
"learning_rate": 4.4885329909757836e-06,
"loss": 0.7962,
"step": 46
},
{
"epoch": 0.2367758186397985,
"grad_norm": 4.799398899078369,
"learning_rate": 4.463607205762948e-06,
"loss": 0.8804,
"step": 47
},
{
"epoch": 0.24181360201511334,
"grad_norm": 2.9974491596221924,
"learning_rate": 4.438161148907703e-06,
"loss": 0.8118,
"step": 48
},
{
"epoch": 0.24685138539042822,
"grad_norm": 2.7640256881713867,
"learning_rate": 4.4122015625231125e-06,
"loss": 0.8798,
"step": 49
},
{
"epoch": 0.2518891687657431,
"grad_norm": 3.4179089069366455,
"learning_rate": 4.385735324785528e-06,
"loss": 0.8466,
"step": 50
},
{
"epoch": 0.25692695214105793,
"grad_norm": 3.160508632659912,
"learning_rate": 4.3587694481121664e-06,
"loss": 0.8293,
"step": 51
},
{
"epoch": 0.2619647355163728,
"grad_norm": 3.2087128162384033,
"learning_rate": 4.331311077303119e-06,
"loss": 0.8523,
"step": 52
},
{
"epoch": 0.26700251889168763,
"grad_norm": 2.897143840789795,
"learning_rate": 4.303367487648289e-06,
"loss": 0.8541,
"step": 53
},
{
"epoch": 0.27204030226700254,
"grad_norm": 3.211749792098999,
"learning_rate": 4.274946082999753e-06,
"loss": 0.8348,
"step": 54
},
{
"epoch": 0.2770780856423174,
"grad_norm": 3.2269697189331055,
"learning_rate": 4.246054393810053e-06,
"loss": 0.8849,
"step": 55
},
{
"epoch": 0.28211586901763225,
"grad_norm": 3.219303607940674,
"learning_rate": 4.2167000751369535e-06,
"loss": 0.7562,
"step": 56
},
{
"epoch": 0.2871536523929471,
"grad_norm": 2.986050844192505,
"learning_rate": 4.186890904615178e-06,
"loss": 0.7716,
"step": 57
},
{
"epoch": 0.29219143576826195,
"grad_norm": 3.107534885406494,
"learning_rate": 4.156634780395672e-06,
"loss": 0.8897,
"step": 58
},
{
"epoch": 0.2972292191435768,
"grad_norm": 3.1411337852478027,
"learning_rate": 4.125939719052927e-06,
"loss": 0.8865,
"step": 59
},
{
"epoch": 0.3022670025188917,
"grad_norm": 3.5057530403137207,
"learning_rate": 4.094813853460938e-06,
"loss": 0.8619,
"step": 60
},
{
"epoch": 0.30730478589420657,
"grad_norm": 3.040756940841675,
"learning_rate": 4.063265430638338e-06,
"loss": 0.8592,
"step": 61
},
{
"epoch": 0.3123425692695214,
"grad_norm": 2.7552523612976074,
"learning_rate": 4.031302809563292e-06,
"loss": 0.7954,
"step": 62
},
{
"epoch": 0.31738035264483627,
"grad_norm": 2.9611120223999023,
"learning_rate": 3.998934458958726e-06,
"loss": 0.8734,
"step": 63
},
{
"epoch": 0.3224181360201511,
"grad_norm": 4.978094577789307,
"learning_rate": 3.96616895504848e-06,
"loss": 0.8818,
"step": 64
},
{
"epoch": 0.327455919395466,
"grad_norm": 4.14245080947876,
"learning_rate": 3.933014979284978e-06,
"loss": 0.8783,
"step": 65
},
{
"epoch": 0.33249370277078083,
"grad_norm": 2.922802209854126,
"learning_rate": 3.899481316049012e-06,
"loss": 0.8348,
"step": 66
},
{
"epoch": 0.33753148614609574,
"grad_norm": 2.9351940155029297,
"learning_rate": 3.86557685032226e-06,
"loss": 0.7839,
"step": 67
},
{
"epoch": 0.3425692695214106,
"grad_norm": 3.6585469245910645,
"learning_rate": 3.83131056533315e-06,
"loss": 0.9167,
"step": 68
},
{
"epoch": 0.34760705289672544,
"grad_norm": 2.93361496925354,
"learning_rate": 3.7966915401766845e-06,
"loss": 0.8308,
"step": 69
},
{
"epoch": 0.3526448362720403,
"grad_norm": 3.2033188343048096,
"learning_rate": 3.7617289474088725e-06,
"loss": 0.8435,
"step": 70
},
{
"epoch": 0.35768261964735515,
"grad_norm": 3.1813461780548096,
"learning_rate": 3.726432050616399e-06,
"loss": 0.9147,
"step": 71
},
{
"epoch": 0.36272040302267,
"grad_norm": 13.234034538269043,
"learning_rate": 3.6908102019621667e-06,
"loss": 0.8469,
"step": 72
},
{
"epoch": 0.3677581863979849,
"grad_norm": 4.6746392250061035,
"learning_rate": 3.6548728397073756e-06,
"loss": 0.8532,
"step": 73
},
{
"epoch": 0.37279596977329976,
"grad_norm": 2.943939447402954,
"learning_rate": 3.6186294857107933e-06,
"loss": 0.8466,
"step": 74
},
{
"epoch": 0.3778337531486146,
"grad_norm": 3.160301923751831,
"learning_rate": 3.582089742905864e-06,
"loss": 0.9207,
"step": 75
},
{
"epoch": 0.38287153652392947,
"grad_norm": 3.0454745292663574,
"learning_rate": 3.545263292756348e-06,
"loss": 0.8529,
"step": 76
},
{
"epoch": 0.3879093198992443,
"grad_norm": 2.6905884742736816,
"learning_rate": 3.5081598926911487e-06,
"loss": 0.823,
"step": 77
},
{
"epoch": 0.3929471032745592,
"grad_norm": 3.0475916862487793,
"learning_rate": 3.470789373519012e-06,
"loss": 0.8753,
"step": 78
},
{
"epoch": 0.3979848866498741,
"grad_norm": 3.0917882919311523,
"learning_rate": 3.433161636823782e-06,
"loss": 0.8456,
"step": 79
},
{
"epoch": 0.40302267002518893,
"grad_norm": 3.0129034519195557,
"learning_rate": 3.39528665234091e-06,
"loss": 0.8902,
"step": 80
},
{
"epoch": 0.4080604534005038,
"grad_norm": 2.6000654697418213,
"learning_rate": 3.3571744553159e-06,
"loss": 0.8188,
"step": 81
},
{
"epoch": 0.41309823677581864,
"grad_norm": 3.156890392303467,
"learning_rate": 3.3188351438454e-06,
"loss": 0.7446,
"step": 82
},
{
"epoch": 0.4181360201511335,
"grad_norm": 3.192892074584961,
"learning_rate": 3.2802788762016385e-06,
"loss": 0.9118,
"step": 83
},
{
"epoch": 0.42317380352644834,
"grad_norm": 5.0082621574401855,
"learning_rate": 3.2415158681409215e-06,
"loss": 0.832,
"step": 84
},
{
"epoch": 0.4282115869017632,
"grad_norm": 3.4209702014923096,
"learning_rate": 3.2025563901968903e-06,
"loss": 0.8107,
"step": 85
},
{
"epoch": 0.4332493702770781,
"grad_norm": 2.90742564201355,
"learning_rate": 3.1634107649592772e-06,
"loss": 0.8065,
"step": 86
},
{
"epoch": 0.43828715365239296,
"grad_norm": 3.0818216800689697,
"learning_rate": 3.1240893643388558e-06,
"loss": 0.8992,
"step": 87
},
{
"epoch": 0.4433249370277078,
"grad_norm": 2.713526725769043,
"learning_rate": 3.0846026068193354e-06,
"loss": 0.8297,
"step": 88
},
{
"epoch": 0.44836272040302266,
"grad_norm": 2.930645704269409,
"learning_rate": 3.044960954696906e-06,
"loss": 0.8479,
"step": 89
},
{
"epoch": 0.4534005037783375,
"grad_norm": 2.680250644683838,
"learning_rate": 3.00517491130818e-06,
"loss": 0.8159,
"step": 90
},
{
"epoch": 0.45843828715365237,
"grad_norm": 2.9555230140686035,
"learning_rate": 2.96525501824726e-06,
"loss": 0.9478,
"step": 91
},
{
"epoch": 0.4634760705289673,
"grad_norm": 4.101602554321289,
"learning_rate": 2.925211852572667e-06,
"loss": 0.8625,
"step": 92
},
{
"epoch": 0.46851385390428213,
"grad_norm": 5.067833423614502,
"learning_rate": 2.8850560240048737e-06,
"loss": 0.8574,
"step": 93
},
{
"epoch": 0.473551637279597,
"grad_norm": 2.771819591522217,
"learning_rate": 2.844798172115185e-06,
"loss": 0.871,
"step": 94
},
{
"epoch": 0.47858942065491183,
"grad_norm": 2.993462085723877,
"learning_rate": 2.80444896350671e-06,
"loss": 0.9423,
"step": 95
},
{
"epoch": 0.4836272040302267,
"grad_norm": 2.8279411792755127,
"learning_rate": 2.764019088988165e-06,
"loss": 0.8155,
"step": 96
},
{
"epoch": 0.48866498740554154,
"grad_norm": 2.840853452682495,
"learning_rate": 2.723519260741271e-06,
"loss": 0.8262,
"step": 97
},
{
"epoch": 0.49370277078085645,
"grad_norm": 3.424471378326416,
"learning_rate": 2.6829602094824864e-06,
"loss": 0.8527,
"step": 98
},
{
"epoch": 0.4987405541561713,
"grad_norm": 3.408869981765747,
"learning_rate": 2.6423526816198253e-06,
"loss": 0.8253,
"step": 99
},
{
"epoch": 0.5037783375314862,
"grad_norm": 5.029043674468994,
"learning_rate": 2.601707436405521e-06,
"loss": 0.8504,
"step": 100
},
{
"epoch": 0.5088161209068011,
"grad_norm": 3.400862216949463,
"learning_rate": 2.5610352430852888e-06,
"loss": 0.8509,
"step": 101
},
{
"epoch": 0.5138539042821159,
"grad_norm": 3.2282321453094482,
"learning_rate": 2.5203468780449324e-06,
"loss": 0.8145,
"step": 102
},
{
"epoch": 0.5188916876574308,
"grad_norm": 3.0265862941741943,
"learning_rate": 2.4796531219550684e-06,
"loss": 0.8479,
"step": 103
},
{
"epoch": 0.5239294710327456,
"grad_norm": 3.553727388381958,
"learning_rate": 2.438964756914712e-06,
"loss": 0.8581,
"step": 104
},
{
"epoch": 0.5289672544080605,
"grad_norm": 3.089478015899658,
"learning_rate": 2.398292563594479e-06,
"loss": 0.8849,
"step": 105
},
{
"epoch": 0.5340050377833753,
"grad_norm": 2.7626805305480957,
"learning_rate": 2.357647318380176e-06,
"loss": 0.8332,
"step": 106
},
{
"epoch": 0.5390428211586902,
"grad_norm": 3.013871192932129,
"learning_rate": 2.3170397905175144e-06,
"loss": 0.8032,
"step": 107
},
{
"epoch": 0.5440806045340051,
"grad_norm": 3.2517004013061523,
"learning_rate": 2.2764807392587303e-06,
"loss": 0.798,
"step": 108
},
{
"epoch": 0.5491183879093199,
"grad_norm": 14.13714599609375,
"learning_rate": 2.2359809110118358e-06,
"loss": 0.8714,
"step": 109
},
{
"epoch": 0.5541561712846348,
"grad_norm": 3.096505641937256,
"learning_rate": 2.1955510364932904e-06,
"loss": 0.7631,
"step": 110
},
{
"epoch": 0.5591939546599496,
"grad_norm": 2.820859432220459,
"learning_rate": 2.1552018278848145e-06,
"loss": 0.8005,
"step": 111
},
{
"epoch": 0.5642317380352645,
"grad_norm": 6.947901248931885,
"learning_rate": 2.1149439759951276e-06,
"loss": 0.799,
"step": 112
},
{
"epoch": 0.5692695214105793,
"grad_norm": 2.9445698261260986,
"learning_rate": 2.074788147427334e-06,
"loss": 0.806,
"step": 113
},
{
"epoch": 0.5743073047858942,
"grad_norm": 2.744227409362793,
"learning_rate": 2.034744981752741e-06,
"loss": 0.8305,
"step": 114
},
{
"epoch": 0.5793450881612091,
"grad_norm": 2.6451659202575684,
"learning_rate": 1.9948250886918204e-06,
"loss": 0.7354,
"step": 115
},
{
"epoch": 0.5843828715365239,
"grad_norm": 2.812647819519043,
"learning_rate": 1.955039045303095e-06,
"loss": 0.7877,
"step": 116
},
{
"epoch": 0.5894206549118388,
"grad_norm": 3.6654205322265625,
"learning_rate": 1.915397393180665e-06,
"loss": 0.8502,
"step": 117
},
{
"epoch": 0.5944584382871536,
"grad_norm": 3.1642322540283203,
"learning_rate": 1.8759106356611453e-06,
"loss": 0.8214,
"step": 118
},
{
"epoch": 0.5994962216624685,
"grad_norm": 2.7508504390716553,
"learning_rate": 1.8365892350407238e-06,
"loss": 0.8778,
"step": 119
},
{
"epoch": 0.6045340050377834,
"grad_norm": 2.832481622695923,
"learning_rate": 1.7974436098031105e-06,
"loss": 0.7971,
"step": 120
},
{
"epoch": 0.6095717884130982,
"grad_norm": 3.0751426219940186,
"learning_rate": 1.7584841318590796e-06,
"loss": 0.8394,
"step": 121
},
{
"epoch": 0.6146095717884131,
"grad_norm": 2.66140079498291,
"learning_rate": 1.719721123798362e-06,
"loss": 0.7793,
"step": 122
},
{
"epoch": 0.6196473551637279,
"grad_norm": 3.033729314804077,
"learning_rate": 1.6811648561546003e-06,
"loss": 0.7719,
"step": 123
},
{
"epoch": 0.6246851385390428,
"grad_norm": 24.4138240814209,
"learning_rate": 1.642825544684101e-06,
"loss": 0.8482,
"step": 124
},
{
"epoch": 0.6297229219143576,
"grad_norm": 3.2902424335479736,
"learning_rate": 1.6047133476590908e-06,
"loss": 0.8713,
"step": 125
},
{
"epoch": 0.6347607052896725,
"grad_norm": 2.7968010902404785,
"learning_rate": 1.566838363176219e-06,
"loss": 0.7406,
"step": 126
},
{
"epoch": 0.6397984886649875,
"grad_norm": 3.296938419342041,
"learning_rate": 1.5292106264809888e-06,
"loss": 0.7567,
"step": 127
},
{
"epoch": 0.6448362720403022,
"grad_norm": 2.7469353675842285,
"learning_rate": 1.4918401073088517e-06,
"loss": 0.8542,
"step": 128
},
{
"epoch": 0.6498740554156172,
"grad_norm": 2.775825262069702,
"learning_rate": 1.4547367072436519e-06,
"loss": 0.7611,
"step": 129
},
{
"epoch": 0.654911838790932,
"grad_norm": 3.243407726287842,
"learning_rate": 1.4179102570941368e-06,
"loss": 0.8393,
"step": 130
},
{
"epoch": 0.6599496221662469,
"grad_norm": 2.9354608058929443,
"learning_rate": 1.3813705142892082e-06,
"loss": 0.8897,
"step": 131
},
{
"epoch": 0.6649874055415617,
"grad_norm": 2.8261704444885254,
"learning_rate": 1.3451271602926248e-06,
"loss": 0.8167,
"step": 132
},
{
"epoch": 0.6700251889168766,
"grad_norm": 2.76542067527771,
"learning_rate": 1.309189798037834e-06,
"loss": 0.8355,
"step": 133
},
{
"epoch": 0.6750629722921915,
"grad_norm": 2.8934812545776367,
"learning_rate": 1.273567949383601e-06,
"loss": 0.8766,
"step": 134
},
{
"epoch": 0.6801007556675063,
"grad_norm": 3.0498292446136475,
"learning_rate": 1.238271052591127e-06,
"loss": 0.861,
"step": 135
},
{
"epoch": 0.6851385390428212,
"grad_norm": 2.881106376647949,
"learning_rate": 1.2033084598233163e-06,
"loss": 0.7946,
"step": 136
},
{
"epoch": 0.690176322418136,
"grad_norm": 2.83734130859375,
"learning_rate": 1.1686894346668512e-06,
"loss": 0.7484,
"step": 137
},
{
"epoch": 0.6952141057934509,
"grad_norm": 2.4155306816101074,
"learning_rate": 1.1344231496777406e-06,
"loss": 0.8025,
"step": 138
},
{
"epoch": 0.7002518891687658,
"grad_norm": 2.696739435195923,
"learning_rate": 1.1005186839509887e-06,
"loss": 0.7984,
"step": 139
},
{
"epoch": 0.7052896725440806,
"grad_norm": 2.9532501697540283,
"learning_rate": 1.066985020715022e-06,
"loss": 0.8436,
"step": 140
},
{
"epoch": 0.7103274559193955,
"grad_norm": 2.7825965881347656,
"learning_rate": 1.0338310449515197e-06,
"loss": 0.8718,
"step": 141
},
{
"epoch": 0.7153652392947103,
"grad_norm": 2.916924238204956,
"learning_rate": 1.0010655410412745e-06,
"loss": 0.7749,
"step": 142
},
{
"epoch": 0.7204030226700252,
"grad_norm": 2.610424757003784,
"learning_rate": 9.68697190436709e-07,
"loss": 0.7614,
"step": 143
},
{
"epoch": 0.72544080604534,
"grad_norm": 2.67132830619812,
"learning_rate": 9.367345693616625e-07,
"loss": 0.821,
"step": 144
},
{
"epoch": 0.7304785894206549,
"grad_norm": 2.876641035079956,
"learning_rate": 9.051861465390624e-07,
"loss": 0.7234,
"step": 145
},
{
"epoch": 0.7355163727959698,
"grad_norm": 2.9514031410217285,
"learning_rate": 8.740602809470736e-07,
"loss": 0.7861,
"step": 146
},
{
"epoch": 0.7405541561712846,
"grad_norm": 2.6026699542999268,
"learning_rate": 8.433652196043288e-07,
"loss": 0.8253,
"step": 147
},
{
"epoch": 0.7455919395465995,
"grad_norm": 2.879054069519043,
"learning_rate": 8.131090953848228e-07,
"loss": 0.7749,
"step": 148
},
{
"epoch": 0.7506297229219143,
"grad_norm": 2.8631882667541504,
"learning_rate": 7.832999248630479e-07,
"loss": 0.9273,
"step": 149
},
{
"epoch": 0.7556675062972292,
"grad_norm": 2.6118288040161133,
"learning_rate": 7.53945606189948e-07,
"loss": 0.8117,
"step": 150
},
{
"epoch": 0.760705289672544,
"grad_norm": 2.8277623653411865,
"learning_rate": 7.250539170002477e-07,
"loss": 0.7862,
"step": 151
},
{
"epoch": 0.7657430730478589,
"grad_norm": 5.133010387420654,
"learning_rate": 6.96632512351711e-07,
"loss": 0.8225,
"step": 152
},
{
"epoch": 0.7707808564231738,
"grad_norm": 2.7790756225585938,
"learning_rate": 6.686889226968815e-07,
"loss": 0.7539,
"step": 153
},
{
"epoch": 0.7758186397984886,
"grad_norm": 2.755314350128174,
"learning_rate": 6.412305518878343e-07,
"loss": 0.8238,
"step": 154
},
{
"epoch": 0.7808564231738035,
"grad_norm": 2.6783859729766846,
"learning_rate": 6.142646752144724e-07,
"loss": 0.7837,
"step": 155
},
{
"epoch": 0.7858942065491183,
"grad_norm": 2.760148763656616,
"learning_rate": 5.877984374768878e-07,
"loss": 0.8798,
"step": 156
},
{
"epoch": 0.7909319899244333,
"grad_norm": 2.8512697219848633,
"learning_rate": 5.618388510922979e-07,
"loss": 0.8237,
"step": 157
},
{
"epoch": 0.7959697732997482,
"grad_norm": 2.719089984893799,
"learning_rate": 5.363927942370528e-07,
"loss": 0.8031,
"step": 158
},
{
"epoch": 0.801007556675063,
"grad_norm": 7.449169158935547,
"learning_rate": 5.11467009024216e-07,
"loss": 0.7838,
"step": 159
},
{
"epoch": 0.8060453400503779,
"grad_norm": 3.0624005794525146,
"learning_rate": 4.870680997171934e-07,
"loss": 0.7585,
"step": 160
},
{
"epoch": 0.8110831234256927,
"grad_norm": 2.621262788772583,
"learning_rate": 4.6320253097988486e-07,
"loss": 0.8421,
"step": 161
},
{
"epoch": 0.8161209068010076,
"grad_norm": 3.4929068088531494,
"learning_rate": 4.398766261638271e-07,
"loss": 0.7814,
"step": 162
},
{
"epoch": 0.8211586901763224,
"grad_norm": 3.319396734237671,
"learning_rate": 4.170965656327791e-07,
"loss": 0.8786,
"step": 163
},
{
"epoch": 0.8261964735516373,
"grad_norm": 3.4810092449188232,
"learning_rate": 3.9486838512518777e-07,
"loss": 0.7662,
"step": 164
},
{
"epoch": 0.8312342569269522,
"grad_norm": 2.9049723148345947,
"learning_rate": 3.7319797415497737e-07,
"loss": 0.801,
"step": 165
},
{
"epoch": 0.836272040302267,
"grad_norm": 3.931922674179077,
"learning_rate": 3.5209107445108195e-07,
"loss": 0.8094,
"step": 166
},
{
"epoch": 0.8413098236775819,
"grad_norm": 2.9314591884613037,
"learning_rate": 3.3155327843613166e-07,
"loss": 0.7802,
"step": 167
},
{
"epoch": 0.8463476070528967,
"grad_norm": 2.7644076347351074,
"learning_rate": 3.1159002774470146e-07,
"loss": 0.8319,
"step": 168
},
{
"epoch": 0.8513853904282116,
"grad_norm": 2.9492383003234863,
"learning_rate": 2.9220661178151366e-07,
"loss": 0.7787,
"step": 169
},
{
"epoch": 0.8564231738035264,
"grad_norm": 2.547168254852295,
"learning_rate": 2.734081663199695e-07,
"loss": 0.7846,
"step": 170
},
{
"epoch": 0.8614609571788413,
"grad_norm": 3.1694071292877197,
"learning_rate": 2.551996721413916e-07,
"loss": 0.8104,
"step": 171
},
{
"epoch": 0.8664987405541562,
"grad_norm": 3.047084331512451,
"learning_rate": 2.375859537153302e-07,
"loss": 0.7466,
"step": 172
},
{
"epoch": 0.871536523929471,
"grad_norm": 2.832005262374878,
"learning_rate": 2.2057167792128493e-07,
"loss": 0.8075,
"step": 173
},
{
"epoch": 0.8765743073047859,
"grad_norm": 2.647271156311035,
"learning_rate": 2.0416135281218218e-07,
"loss": 0.8217,
"step": 174
},
{
"epoch": 0.8816120906801007,
"grad_norm": 2.717848539352417,
"learning_rate": 1.8835932641993627e-07,
"loss": 0.7855,
"step": 175
},
{
"epoch": 0.8866498740554156,
"grad_norm": 2.9493443965911865,
"learning_rate": 1.7316978560340647e-07,
"loss": 0.8201,
"step": 176
},
{
"epoch": 0.8916876574307305,
"grad_norm": 5.114314079284668,
"learning_rate": 1.5859675493905769e-07,
"loss": 0.7904,
"step": 177
},
{
"epoch": 0.8967254408060453,
"grad_norm": 2.8451216220855713,
"learning_rate": 1.4464409565462328e-07,
"loss": 0.805,
"step": 178
},
{
"epoch": 0.9017632241813602,
"grad_norm": 2.497908592224121,
"learning_rate": 1.3131550460604242e-07,
"loss": 0.7827,
"step": 179
},
{
"epoch": 0.906801007556675,
"grad_norm": 3.690392017364502,
"learning_rate": 1.1861451329795326e-07,
"loss": 0.7521,
"step": 180
},
{
"epoch": 0.9118387909319899,
"grad_norm": 3.2087209224700928,
"learning_rate": 1.065444869479973e-07,
"loss": 0.7203,
"step": 181
},
{
"epoch": 0.9168765743073047,
"grad_norm": 2.854456901550293,
"learning_rate": 9.510862359517815e-08,
"loss": 0.819,
"step": 182
},
{
"epoch": 0.9219143576826196,
"grad_norm": 2.9602978229522705,
"learning_rate": 8.430995325252128e-08,
"loss": 0.798,
"step": 183
},
{
"epoch": 0.9269521410579346,
"grad_norm": 2.870439052581787,
"learning_rate": 7.415133710424794e-08,
"loss": 0.79,
"step": 184
},
{
"epoch": 0.9319899244332494,
"grad_norm": 2.710660219192505,
"learning_rate": 6.463546674768644e-08,
"loss": 0.7406,
"step": 185
},
{
"epoch": 0.9370277078085643,
"grad_norm": 3.1572258472442627,
"learning_rate": 5.5764863480112233e-08,
"loss": 0.8467,
"step": 186
},
{
"epoch": 0.9420654911838791,
"grad_norm": 2.8733699321746826,
"learning_rate": 4.754187763071488e-08,
"loss": 0.7988,
"step": 187
},
{
"epoch": 0.947103274559194,
"grad_norm": 3.7968437671661377,
"learning_rate": 3.996868793785913e-08,
"loss": 0.7472,
"step": 188
},
{
"epoch": 0.9521410579345088,
"grad_norm": 2.6690640449523926,
"learning_rate": 3.304730097181463e-08,
"loss": 0.7358,
"step": 189
},
{
"epoch": 0.9571788413098237,
"grad_norm": 2.642486333847046,
"learning_rate": 2.6779550603100168e-08,
"loss": 0.865,
"step": 190
},
{
"epoch": 0.9622166246851386,
"grad_norm": 3.4199740886688232,
"learning_rate": 2.116709751658591e-08,
"loss": 0.7681,
"step": 191
},
{
"epoch": 0.9672544080604534,
"grad_norm": 2.9348366260528564,
"learning_rate": 1.6211428771484295e-08,
"loss": 0.9291,
"step": 192
},
{
"epoch": 0.9722921914357683,
"grad_norm": 2.5914723873138428,
"learning_rate": 1.1913857407343244e-08,
"loss": 0.7852,
"step": 193
},
{
"epoch": 0.9773299748110831,
"grad_norm": 2.9415173530578613,
"learning_rate": 8.275522096146404e-09,
"loss": 0.7209,
"step": 194
},
{
"epoch": 0.982367758186398,
"grad_norm": 2.919996976852417,
"learning_rate": 5.297386840614205e-09,
"loss": 0.8159,
"step": 195
},
{
"epoch": 0.9874055415617129,
"grad_norm": 3.5275845527648926,
"learning_rate": 2.9802407187842773e-09,
"loss": 0.7429,
"step": 196
},
{
"epoch": 0.9924433249370277,
"grad_norm": 2.8735129833221436,
"learning_rate": 1.3246976749395346e-09,
"loss": 0.8319,
"step": 197
},
{
"epoch": 0.9974811083123426,
"grad_norm": 2.514472246170044,
"learning_rate": 3.3119635694023324e-10,
"loss": 0.7523,
"step": 198
},
{
"epoch": 1.0,
"grad_norm": 3.6349916458129883,
"learning_rate": 0.0,
"loss": 0.8407,
"step": 199
},
{
"epoch": 1.0,
"step": 199,
"total_flos": 258447595339776.0,
"train_loss": 0.8690116378530186,
"train_runtime": 2505.9387,
"train_samples_per_second": 2.532,
"train_steps_per_second": 0.079
}
],
"logging_steps": 1.0,
"max_steps": 199,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 258447595339776.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}