qwen2.5-7B-rat / trainer_state.json
AiMijie's picture
Upload 19 files
cf7b0de verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9983579638752053,
"eval_steps": 500,
"global_step": 171,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0058383506659368724,
"grad_norm": 23.346039635432966,
"learning_rate": 0.0,
"loss": 1.0216,
"step": 1
},
{
"epoch": 0.011676701331873745,
"grad_norm": 18.69440800650612,
"learning_rate": 5.555555555555555e-07,
"loss": 1.0159,
"step": 2
},
{
"epoch": 0.01751505199781062,
"grad_norm": 14.923328604146274,
"learning_rate": 1.111111111111111e-06,
"loss": 1.0972,
"step": 3
},
{
"epoch": 0.02335340266374749,
"grad_norm": 25.385980327121796,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.295,
"step": 4
},
{
"epoch": 0.029191753329684364,
"grad_norm": 18.16503547733233,
"learning_rate": 2.222222222222222e-06,
"loss": 0.9012,
"step": 5
},
{
"epoch": 0.03503010399562124,
"grad_norm": 53.35453317315207,
"learning_rate": 2.7777777777777783e-06,
"loss": 1.0288,
"step": 6
},
{
"epoch": 0.04086845466155811,
"grad_norm": 3.9479906393452455,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.8768,
"step": 7
},
{
"epoch": 0.04670680532749498,
"grad_norm": 3.526815627243875,
"learning_rate": 3.88888888888889e-06,
"loss": 1.0274,
"step": 8
},
{
"epoch": 0.052545155993431854,
"grad_norm": 2.9003505340702893,
"learning_rate": 4.444444444444444e-06,
"loss": 0.9328,
"step": 9
},
{
"epoch": 0.05838350665936873,
"grad_norm": 3.6872295301025293,
"learning_rate": 5e-06,
"loss": 1.1684,
"step": 10
},
{
"epoch": 0.0642218573253056,
"grad_norm": 4.385584615681111,
"learning_rate": 5.555555555555557e-06,
"loss": 1.0925,
"step": 11
},
{
"epoch": 0.07006020799124248,
"grad_norm": 3.013525965990989,
"learning_rate": 6.111111111111112e-06,
"loss": 0.8918,
"step": 12
},
{
"epoch": 0.07589855865717934,
"grad_norm": 2.7201676256210483,
"learning_rate": 6.666666666666667e-06,
"loss": 0.9572,
"step": 13
},
{
"epoch": 0.08173690932311622,
"grad_norm": 2.823242289265648,
"learning_rate": 7.222222222222223e-06,
"loss": 0.8408,
"step": 14
},
{
"epoch": 0.08757525998905309,
"grad_norm": 2.0560673624265435,
"learning_rate": 7.77777777777778e-06,
"loss": 0.7294,
"step": 15
},
{
"epoch": 0.09341361065498996,
"grad_norm": 2.827884964059297,
"learning_rate": 8.333333333333334e-06,
"loss": 0.9351,
"step": 16
},
{
"epoch": 0.09925196132092684,
"grad_norm": 2.353407743397884,
"learning_rate": 8.888888888888888e-06,
"loss": 0.8883,
"step": 17
},
{
"epoch": 0.10509031198686371,
"grad_norm": 2.5476305018155885,
"learning_rate": 9.444444444444445e-06,
"loss": 0.8612,
"step": 18
},
{
"epoch": 0.11092866265280059,
"grad_norm": 2.5549072265785018,
"learning_rate": 1e-05,
"loss": 0.8514,
"step": 19
},
{
"epoch": 0.11676701331873746,
"grad_norm": 3.374243976367514,
"learning_rate": 9.998945997517957e-06,
"loss": 1.0582,
"step": 20
},
{
"epoch": 0.12260536398467432,
"grad_norm": 3.1687760719813345,
"learning_rate": 9.99578443444032e-06,
"loss": 0.8147,
"step": 21
},
{
"epoch": 0.1284437146506112,
"grad_norm": 1.917729903158416,
"learning_rate": 9.990516643685222e-06,
"loss": 0.7036,
"step": 22
},
{
"epoch": 0.13428206531654807,
"grad_norm": 3.080654926506411,
"learning_rate": 9.983144846158472e-06,
"loss": 1.1692,
"step": 23
},
{
"epoch": 0.14012041598248495,
"grad_norm": 2.1635641035874635,
"learning_rate": 9.973672149817232e-06,
"loss": 0.7105,
"step": 24
},
{
"epoch": 0.14595876664842183,
"grad_norm": 1.8290404225487313,
"learning_rate": 9.96210254835968e-06,
"loss": 0.7225,
"step": 25
},
{
"epoch": 0.1517971173143587,
"grad_norm": 2.3343564258608533,
"learning_rate": 9.948440919541277e-06,
"loss": 0.938,
"step": 26
},
{
"epoch": 0.15763546798029557,
"grad_norm": 2.2111438984922045,
"learning_rate": 9.932693023118299e-06,
"loss": 0.9262,
"step": 27
},
{
"epoch": 0.16347381864623245,
"grad_norm": 2.1134981775332373,
"learning_rate": 9.91486549841951e-06,
"loss": 0.8067,
"step": 28
},
{
"epoch": 0.1693121693121693,
"grad_norm": 1.8305245997733364,
"learning_rate": 9.894965861547023e-06,
"loss": 0.8317,
"step": 29
},
{
"epoch": 0.17515051997810618,
"grad_norm": 2.236853620521483,
"learning_rate": 9.873002502207502e-06,
"loss": 0.7304,
"step": 30
},
{
"epoch": 0.18098887064404307,
"grad_norm": 2.3611233354670347,
"learning_rate": 9.848984680175049e-06,
"loss": 0.8728,
"step": 31
},
{
"epoch": 0.18682722130997992,
"grad_norm": 4.000809003005407,
"learning_rate": 9.822922521387277e-06,
"loss": 0.7416,
"step": 32
},
{
"epoch": 0.1926655719759168,
"grad_norm": 1.8812162065176985,
"learning_rate": 9.794827013676206e-06,
"loss": 0.7018,
"step": 33
},
{
"epoch": 0.19850392264185368,
"grad_norm": 1.6923974873335865,
"learning_rate": 9.764710002135784e-06,
"loss": 0.6455,
"step": 34
},
{
"epoch": 0.20434227330779056,
"grad_norm": 2.8035506463841533,
"learning_rate": 9.732584184127973e-06,
"loss": 1.0517,
"step": 35
},
{
"epoch": 0.21018062397372742,
"grad_norm": 1.5840802349672924,
"learning_rate": 9.698463103929542e-06,
"loss": 0.6771,
"step": 36
},
{
"epoch": 0.2160189746396643,
"grad_norm": 2.3024892626964544,
"learning_rate": 9.66236114702178e-06,
"loss": 0.8554,
"step": 37
},
{
"epoch": 0.22185732530560118,
"grad_norm": 2.0003910939871803,
"learning_rate": 9.62429353402556e-06,
"loss": 0.8013,
"step": 38
},
{
"epoch": 0.22769567597153803,
"grad_norm": 1.9517674635934668,
"learning_rate": 9.584276314284316e-06,
"loss": 0.8374,
"step": 39
},
{
"epoch": 0.2335340266374749,
"grad_norm": 2.131535143692049,
"learning_rate": 9.542326359097619e-06,
"loss": 0.8691,
"step": 40
},
{
"epoch": 0.2393723773034118,
"grad_norm": 1.791753421500237,
"learning_rate": 9.498461354608228e-06,
"loss": 0.6558,
"step": 41
},
{
"epoch": 0.24521072796934865,
"grad_norm": 1.7285954496576958,
"learning_rate": 9.452699794345583e-06,
"loss": 0.7824,
"step": 42
},
{
"epoch": 0.2510490786352855,
"grad_norm": 1.5567965071977108,
"learning_rate": 9.405060971428924e-06,
"loss": 0.639,
"step": 43
},
{
"epoch": 0.2568874293012224,
"grad_norm": 1.4375142273921275,
"learning_rate": 9.355564970433288e-06,
"loss": 0.6775,
"step": 44
},
{
"epoch": 0.2627257799671593,
"grad_norm": 1.67716239348873,
"learning_rate": 9.30423265892184e-06,
"loss": 0.7687,
"step": 45
},
{
"epoch": 0.26856413063309614,
"grad_norm": 2.1560741267757697,
"learning_rate": 9.251085678648072e-06,
"loss": 0.9605,
"step": 46
},
{
"epoch": 0.274402481299033,
"grad_norm": 2.4945601087320424,
"learning_rate": 9.196146436431635e-06,
"loss": 1.0254,
"step": 47
},
{
"epoch": 0.2802408319649699,
"grad_norm": 1.9131551064591947,
"learning_rate": 9.13943809471159e-06,
"loss": 0.8508,
"step": 48
},
{
"epoch": 0.28607918263090676,
"grad_norm": 1.4757864890468595,
"learning_rate": 9.08098456178111e-06,
"loss": 0.6386,
"step": 49
},
{
"epoch": 0.29191753329684367,
"grad_norm": 2.0882336434748994,
"learning_rate": 9.020810481707709e-06,
"loss": 0.8752,
"step": 50
},
{
"epoch": 0.2977558839627805,
"grad_norm": 1.4257729762626692,
"learning_rate": 8.958941223943292e-06,
"loss": 0.5883,
"step": 51
},
{
"epoch": 0.3035942346287174,
"grad_norm": 2.0617578833829646,
"learning_rate": 8.895402872628352e-06,
"loss": 0.8631,
"step": 52
},
{
"epoch": 0.3094325852946543,
"grad_norm": 2.0387352545759696,
"learning_rate": 8.83022221559489e-06,
"loss": 0.7665,
"step": 53
},
{
"epoch": 0.31527093596059114,
"grad_norm": 1.6825440700596306,
"learning_rate": 8.763426733072624e-06,
"loss": 0.7121,
"step": 54
},
{
"epoch": 0.321109286626528,
"grad_norm": 1.7039787938294297,
"learning_rate": 8.695044586103297e-06,
"loss": 0.7544,
"step": 55
},
{
"epoch": 0.3269476372924649,
"grad_norm": 1.747764255109329,
"learning_rate": 8.625104604667965e-06,
"loss": 0.7664,
"step": 56
},
{
"epoch": 0.33278598795840175,
"grad_norm": 1.4189105312955703,
"learning_rate": 8.553636275532236e-06,
"loss": 0.5921,
"step": 57
},
{
"epoch": 0.3386243386243386,
"grad_norm": 1.7728898990705366,
"learning_rate": 8.480669729814635e-06,
"loss": 0.6721,
"step": 58
},
{
"epoch": 0.3444626892902755,
"grad_norm": 1.6614414969326399,
"learning_rate": 8.40623573028327e-06,
"loss": 0.7133,
"step": 59
},
{
"epoch": 0.35030103995621237,
"grad_norm": 1.8451715614266795,
"learning_rate": 8.330365658386252e-06,
"loss": 0.8457,
"step": 60
},
{
"epoch": 0.3561393906221492,
"grad_norm": 1.569948888332489,
"learning_rate": 8.25309150102121e-06,
"loss": 0.7208,
"step": 61
},
{
"epoch": 0.36197774128808613,
"grad_norm": 1.556596504048036,
"learning_rate": 8.174445837049614e-06,
"loss": 0.7143,
"step": 62
},
{
"epoch": 0.367816091954023,
"grad_norm": 1.792684008076691,
"learning_rate": 8.094461823561473e-06,
"loss": 0.7932,
"step": 63
},
{
"epoch": 0.37365444261995984,
"grad_norm": 1.8602705054299433,
"learning_rate": 8.013173181896283e-06,
"loss": 0.805,
"step": 64
},
{
"epoch": 0.37949279328589675,
"grad_norm": 1.971836720979613,
"learning_rate": 7.930614183426074e-06,
"loss": 0.7688,
"step": 65
},
{
"epoch": 0.3853311439518336,
"grad_norm": 1.4036541372691438,
"learning_rate": 7.846819635106569e-06,
"loss": 0.701,
"step": 66
},
{
"epoch": 0.39116949461777045,
"grad_norm": 1.3144866978901222,
"learning_rate": 7.76182486480253e-06,
"loss": 0.5913,
"step": 67
},
{
"epoch": 0.39700784528370736,
"grad_norm": 1.7802596312579286,
"learning_rate": 7.675665706393502e-06,
"loss": 0.8198,
"step": 68
},
{
"epoch": 0.4028461959496442,
"grad_norm": 2.4200518553212413,
"learning_rate": 7.588378484666214e-06,
"loss": 0.9622,
"step": 69
},
{
"epoch": 0.4086845466155811,
"grad_norm": 2.0689999772599594,
"learning_rate": 7.500000000000001e-06,
"loss": 0.8649,
"step": 70
},
{
"epoch": 0.414522897281518,
"grad_norm": 1.454146433084145,
"learning_rate": 7.4105675128517456e-06,
"loss": 0.6499,
"step": 71
},
{
"epoch": 0.42036124794745483,
"grad_norm": 1.9735117012184902,
"learning_rate": 7.320118728046818e-06,
"loss": 0.8629,
"step": 72
},
{
"epoch": 0.42619959861339174,
"grad_norm": 1.3269967533943117,
"learning_rate": 7.2286917788826926e-06,
"loss": 0.6302,
"step": 73
},
{
"epoch": 0.4320379492793286,
"grad_norm": 2.1156499352177467,
"learning_rate": 7.136325211051905e-06,
"loss": 0.953,
"step": 74
},
{
"epoch": 0.43787629994526545,
"grad_norm": 1.6661986473128974,
"learning_rate": 7.043057966391158e-06,
"loss": 0.7642,
"step": 75
},
{
"epoch": 0.44371465061120235,
"grad_norm": 1.3168655506535973,
"learning_rate": 6.948929366463397e-06,
"loss": 0.5953,
"step": 76
},
{
"epoch": 0.4495530012771392,
"grad_norm": 1.542487982540137,
"learning_rate": 6.8539790959798045e-06,
"loss": 0.6802,
"step": 77
},
{
"epoch": 0.45539135194307606,
"grad_norm": 1.9826739527814456,
"learning_rate": 6.758247186068684e-06,
"loss": 0.87,
"step": 78
},
{
"epoch": 0.46122970260901297,
"grad_norm": 1.6743878429099177,
"learning_rate": 6.6617739973982985e-06,
"loss": 0.7126,
"step": 79
},
{
"epoch": 0.4670680532749498,
"grad_norm": 1.6060875322085453,
"learning_rate": 6.5646002031607726e-06,
"loss": 0.7116,
"step": 80
},
{
"epoch": 0.4729064039408867,
"grad_norm": 1.4970094698253724,
"learning_rate": 6.466766771924231e-06,
"loss": 0.7887,
"step": 81
},
{
"epoch": 0.4787447546068236,
"grad_norm": 1.4205978407297999,
"learning_rate": 6.368314950360416e-06,
"loss": 0.6496,
"step": 82
},
{
"epoch": 0.48458310527276044,
"grad_norm": 1.8875915736340494,
"learning_rate": 6.269286245855039e-06,
"loss": 0.9423,
"step": 83
},
{
"epoch": 0.4904214559386973,
"grad_norm": 1.551811827102774,
"learning_rate": 6.169722409008244e-06,
"loss": 0.7458,
"step": 84
},
{
"epoch": 0.4962598066046342,
"grad_norm": 1.5216093240427255,
"learning_rate": 6.0696654160324875e-06,
"loss": 0.7234,
"step": 85
},
{
"epoch": 0.502098157270571,
"grad_norm": 2.0664851747739608,
"learning_rate": 5.9691574510553505e-06,
"loss": 0.8706,
"step": 86
},
{
"epoch": 0.5079365079365079,
"grad_norm": 1.3755000837322797,
"learning_rate": 5.8682408883346535e-06,
"loss": 0.6907,
"step": 87
},
{
"epoch": 0.5137748586024448,
"grad_norm": 2.0452056254353668,
"learning_rate": 5.766958274393428e-06,
"loss": 0.9291,
"step": 88
},
{
"epoch": 0.5196132092683817,
"grad_norm": 2.0322847397035653,
"learning_rate": 5.66535231008227e-06,
"loss": 0.9572,
"step": 89
},
{
"epoch": 0.5254515599343186,
"grad_norm": 1.4742200753435941,
"learning_rate": 5.5634658325766066e-06,
"loss": 0.6378,
"step": 90
},
{
"epoch": 0.5312899106002554,
"grad_norm": 1.4173205118801524,
"learning_rate": 5.46134179731651e-06,
"loss": 0.7323,
"step": 91
},
{
"epoch": 0.5371282612661923,
"grad_norm": 2.1798164249882626,
"learning_rate": 5.359023259896638e-06,
"loss": 1.0747,
"step": 92
},
{
"epoch": 0.5429666119321291,
"grad_norm": 1.1947784867302391,
"learning_rate": 5.2565533579139484e-06,
"loss": 0.596,
"step": 93
},
{
"epoch": 0.548804962598066,
"grad_norm": 1.351691353184327,
"learning_rate": 5.153975292780852e-06,
"loss": 0.7037,
"step": 94
},
{
"epoch": 0.554643313264003,
"grad_norm": 1.821832384011939,
"learning_rate": 5.05133231151145e-06,
"loss": 0.8998,
"step": 95
},
{
"epoch": 0.5604816639299398,
"grad_norm": 1.5639613554205625,
"learning_rate": 4.948667688488552e-06,
"loss": 0.7998,
"step": 96
},
{
"epoch": 0.5663200145958767,
"grad_norm": 1.4103317767531058,
"learning_rate": 4.846024707219149e-06,
"loss": 0.6575,
"step": 97
},
{
"epoch": 0.5721583652618135,
"grad_norm": 1.7200282821490869,
"learning_rate": 4.7434466420860515e-06,
"loss": 0.8301,
"step": 98
},
{
"epoch": 0.5779967159277504,
"grad_norm": 1.9713093143708447,
"learning_rate": 4.640976740103363e-06,
"loss": 0.9142,
"step": 99
},
{
"epoch": 0.5838350665936873,
"grad_norm": 1.7128758244015903,
"learning_rate": 4.53865820268349e-06,
"loss": 0.7895,
"step": 100
},
{
"epoch": 0.5896734172596242,
"grad_norm": 1.799486256542215,
"learning_rate": 4.436534167423395e-06,
"loss": 0.8077,
"step": 101
},
{
"epoch": 0.595511767925561,
"grad_norm": 1.4390066868776004,
"learning_rate": 4.334647689917734e-06,
"loss": 0.7538,
"step": 102
},
{
"epoch": 0.6013501185914979,
"grad_norm": 1.562773521063493,
"learning_rate": 4.233041725606573e-06,
"loss": 0.7073,
"step": 103
},
{
"epoch": 0.6071884692574347,
"grad_norm": 1.3659053235335727,
"learning_rate": 4.131759111665349e-06,
"loss": 0.6003,
"step": 104
},
{
"epoch": 0.6130268199233716,
"grad_norm": 1.8759691152378297,
"learning_rate": 4.03084254894465e-06,
"loss": 0.8311,
"step": 105
},
{
"epoch": 0.6188651705893086,
"grad_norm": 2.0594639991765367,
"learning_rate": 3.930334583967514e-06,
"loss": 1.1216,
"step": 106
},
{
"epoch": 0.6247035212552454,
"grad_norm": 1.317441590353052,
"learning_rate": 3.8302775909917585e-06,
"loss": 0.7016,
"step": 107
},
{
"epoch": 0.6305418719211823,
"grad_norm": 1.604492101282993,
"learning_rate": 3.730713754144961e-06,
"loss": 0.7752,
"step": 108
},
{
"epoch": 0.6363802225871191,
"grad_norm": 1.2737569729242313,
"learning_rate": 3.6316850496395863e-06,
"loss": 0.7015,
"step": 109
},
{
"epoch": 0.642218573253056,
"grad_norm": 2.1188338693809214,
"learning_rate": 3.5332332280757706e-06,
"loss": 0.8263,
"step": 110
},
{
"epoch": 0.6480569239189928,
"grad_norm": 1.5348448926667313,
"learning_rate": 3.4353997968392295e-06,
"loss": 0.7388,
"step": 111
},
{
"epoch": 0.6538952745849298,
"grad_norm": 1.3745471248589747,
"learning_rate": 3.3382260026017027e-06,
"loss": 0.6576,
"step": 112
},
{
"epoch": 0.6597336252508667,
"grad_norm": 1.5116712690331373,
"learning_rate": 3.241752813931316e-06,
"loss": 0.8259,
"step": 113
},
{
"epoch": 0.6655719759168035,
"grad_norm": 1.420711782360687,
"learning_rate": 3.1460209040201967e-06,
"loss": 0.7073,
"step": 114
},
{
"epoch": 0.6714103265827404,
"grad_norm": 1.1843506681593685,
"learning_rate": 3.0510706335366034e-06,
"loss": 0.5862,
"step": 115
},
{
"epoch": 0.6772486772486772,
"grad_norm": 1.4452829978036497,
"learning_rate": 2.956942033608843e-06,
"loss": 0.7392,
"step": 116
},
{
"epoch": 0.6830870279146142,
"grad_norm": 1.6148556395277947,
"learning_rate": 2.863674788948097e-06,
"loss": 0.7822,
"step": 117
},
{
"epoch": 0.688925378580551,
"grad_norm": 1.6171370218368475,
"learning_rate": 2.771308221117309e-06,
"loss": 0.774,
"step": 118
},
{
"epoch": 0.6947637292464879,
"grad_norm": 1.3721101508521718,
"learning_rate": 2.6798812719531843e-06,
"loss": 0.6439,
"step": 119
},
{
"epoch": 0.7006020799124247,
"grad_norm": 1.5513076884934305,
"learning_rate": 2.5894324871482557e-06,
"loss": 0.7712,
"step": 120
},
{
"epoch": 0.7064404305783616,
"grad_norm": 1.3608399248886607,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.6666,
"step": 121
},
{
"epoch": 0.7122787812442984,
"grad_norm": 1.4546065360955336,
"learning_rate": 2.411621515333788e-06,
"loss": 0.7305,
"step": 122
},
{
"epoch": 0.7181171319102354,
"grad_norm": 1.50756294259041,
"learning_rate": 2.324334293606499e-06,
"loss": 0.8454,
"step": 123
},
{
"epoch": 0.7239554825761723,
"grad_norm": 1.430596947679965,
"learning_rate": 2.238175135197471e-06,
"loss": 0.6996,
"step": 124
},
{
"epoch": 0.7297938332421091,
"grad_norm": 1.336144492021404,
"learning_rate": 2.1531803648934333e-06,
"loss": 0.692,
"step": 125
},
{
"epoch": 0.735632183908046,
"grad_norm": 1.7715157306940374,
"learning_rate": 2.069385816573928e-06,
"loss": 0.8395,
"step": 126
},
{
"epoch": 0.7414705345739828,
"grad_norm": 1.5456925788355012,
"learning_rate": 1.9868268181037186e-06,
"loss": 0.7025,
"step": 127
},
{
"epoch": 0.7473088852399197,
"grad_norm": 1.3679386356577312,
"learning_rate": 1.9055381764385272e-06,
"loss": 0.6619,
"step": 128
},
{
"epoch": 0.7531472359058566,
"grad_norm": 1.5918300174062097,
"learning_rate": 1.8255541629503865e-06,
"loss": 0.8029,
"step": 129
},
{
"epoch": 0.7589855865717935,
"grad_norm": 1.469146037966741,
"learning_rate": 1.746908498978791e-06,
"loss": 0.6717,
"step": 130
},
{
"epoch": 0.7648239372377303,
"grad_norm": 1.328289890211526,
"learning_rate": 1.6696343416137495e-06,
"loss": 0.6729,
"step": 131
},
{
"epoch": 0.7706622879036672,
"grad_norm": 1.6152433920164602,
"learning_rate": 1.5937642697167288e-06,
"loss": 0.8082,
"step": 132
},
{
"epoch": 0.776500638569604,
"grad_norm": 1.6972098871026293,
"learning_rate": 1.5193302701853674e-06,
"loss": 0.8128,
"step": 133
},
{
"epoch": 0.7823389892355409,
"grad_norm": 1.8868147982818937,
"learning_rate": 1.4463637244677648e-06,
"loss": 1.0077,
"step": 134
},
{
"epoch": 0.7881773399014779,
"grad_norm": 1.533794614028789,
"learning_rate": 1.374895395332037e-06,
"loss": 0.7204,
"step": 135
},
{
"epoch": 0.7940156905674147,
"grad_norm": 1.1485993201785478,
"learning_rate": 1.3049554138967052e-06,
"loss": 0.5837,
"step": 136
},
{
"epoch": 0.7998540412333516,
"grad_norm": 1.9049649848237078,
"learning_rate": 1.2365732669273778e-06,
"loss": 0.7,
"step": 137
},
{
"epoch": 0.8056923918992884,
"grad_norm": 1.5878122929966123,
"learning_rate": 1.1697777844051105e-06,
"loss": 0.7382,
"step": 138
},
{
"epoch": 0.8115307425652253,
"grad_norm": 1.4709273722338752,
"learning_rate": 1.1045971273716476e-06,
"loss": 0.7135,
"step": 139
},
{
"epoch": 0.8173690932311622,
"grad_norm": 1.433559790573233,
"learning_rate": 1.0410587760567104e-06,
"loss": 0.7693,
"step": 140
},
{
"epoch": 0.8232074438970991,
"grad_norm": 1.52758209016128,
"learning_rate": 9.791895182922911e-07,
"loss": 0.7034,
"step": 141
},
{
"epoch": 0.829045794563036,
"grad_norm": 1.9400739394519702,
"learning_rate": 9.190154382188921e-07,
"loss": 0.8852,
"step": 142
},
{
"epoch": 0.8348841452289728,
"grad_norm": 1.958208722988338,
"learning_rate": 8.605619052884106e-07,
"loss": 0.9601,
"step": 143
},
{
"epoch": 0.8407224958949097,
"grad_norm": 1.4807510002946493,
"learning_rate": 8.03853563568367e-07,
"loss": 0.7411,
"step": 144
},
{
"epoch": 0.8465608465608465,
"grad_norm": 1.3793677468152326,
"learning_rate": 7.489143213519301e-07,
"loss": 0.7921,
"step": 145
},
{
"epoch": 0.8523991972267835,
"grad_norm": 1.4895621691570022,
"learning_rate": 6.957673410781617e-07,
"loss": 0.7503,
"step": 146
},
{
"epoch": 0.8582375478927203,
"grad_norm": 1.7116219153256789,
"learning_rate": 6.444350295667112e-07,
"loss": 0.7971,
"step": 147
},
{
"epoch": 0.8640758985586572,
"grad_norm": 1.5797223164404173,
"learning_rate": 5.949390285710777e-07,
"loss": 0.8277,
"step": 148
},
{
"epoch": 0.869914249224594,
"grad_norm": 1.3148568307618735,
"learning_rate": 5.473002056544191e-07,
"loss": 0.6685,
"step": 149
},
{
"epoch": 0.8757525998905309,
"grad_norm": 1.4107080603490212,
"learning_rate": 5.015386453917742e-07,
"loss": 0.6838,
"step": 150
},
{
"epoch": 0.8815909505564677,
"grad_norm": 1.5629187051898064,
"learning_rate": 4.576736409023813e-07,
"loss": 0.7233,
"step": 151
},
{
"epoch": 0.8874293012224047,
"grad_norm": 1.4669164529598444,
"learning_rate": 4.15723685715686e-07,
"loss": 0.836,
"step": 152
},
{
"epoch": 0.8932676518883416,
"grad_norm": 1.454872607521009,
"learning_rate": 3.7570646597444196e-07,
"loss": 0.7642,
"step": 153
},
{
"epoch": 0.8991060025542784,
"grad_norm": 1.6759220531326142,
"learning_rate": 3.3763885297822153e-07,
"loss": 0.8544,
"step": 154
},
{
"epoch": 0.9049443532202153,
"grad_norm": 1.7087176753461515,
"learning_rate": 3.015368960704584e-07,
"loss": 0.7322,
"step": 155
},
{
"epoch": 0.9107827038861521,
"grad_norm": 1.6388339629990647,
"learning_rate": 2.6741581587202747e-07,
"loss": 0.8009,
"step": 156
},
{
"epoch": 0.9166210545520891,
"grad_norm": 1.4635363517587032,
"learning_rate": 2.3528999786421758e-07,
"loss": 0.7706,
"step": 157
},
{
"epoch": 0.9224594052180259,
"grad_norm": 1.6021455462502143,
"learning_rate": 2.0517298632379445e-07,
"loss": 0.7973,
"step": 158
},
{
"epoch": 0.9282977558839628,
"grad_norm": 1.5367883855881703,
"learning_rate": 1.770774786127244e-07,
"loss": 0.7287,
"step": 159
},
{
"epoch": 0.9341361065498996,
"grad_norm": 1.2425259572983562,
"learning_rate": 1.510153198249531e-07,
"loss": 0.6761,
"step": 160
},
{
"epoch": 0.9399744572158365,
"grad_norm": 1.359745226364094,
"learning_rate": 1.2699749779249926e-07,
"loss": 0.6421,
"step": 161
},
{
"epoch": 0.9458128078817734,
"grad_norm": 1.4246913880107532,
"learning_rate": 1.0503413845297739e-07,
"loss": 0.8001,
"step": 162
},
{
"epoch": 0.9516511585477103,
"grad_norm": 1.2590352425153586,
"learning_rate": 8.513450158049109e-08,
"loss": 0.723,
"step": 163
},
{
"epoch": 0.9574895092136472,
"grad_norm": 1.694689146111032,
"learning_rate": 6.730697688170251e-08,
"loss": 0.8005,
"step": 164
},
{
"epoch": 0.963327859879584,
"grad_norm": 1.3191005186230464,
"learning_rate": 5.155908045872349e-08,
"loss": 0.667,
"step": 165
},
{
"epoch": 0.9691662105455209,
"grad_norm": 1.4117265002070536,
"learning_rate": 3.7897451640321326e-08,
"loss": 0.7146,
"step": 166
},
{
"epoch": 0.9750045612114577,
"grad_norm": 1.2794859494757669,
"learning_rate": 2.6327850182769065e-08,
"loss": 0.6562,
"step": 167
},
{
"epoch": 0.9808429118773946,
"grad_norm": 1.4089880813633835,
"learning_rate": 1.6855153841527915e-08,
"loss": 0.6582,
"step": 168
},
{
"epoch": 0.9866812625433315,
"grad_norm": 1.4165079720131926,
"learning_rate": 9.48335631477948e-09,
"loss": 0.7843,
"step": 169
},
{
"epoch": 0.9925196132092684,
"grad_norm": 1.2128283609191615,
"learning_rate": 4.2155655596809455e-09,
"loss": 0.6164,
"step": 170
},
{
"epoch": 0.9983579638752053,
"grad_norm": 1.2569650088824407,
"learning_rate": 1.054002482043237e-09,
"loss": 0.6706,
"step": 171
}
],
"logging_steps": 1,
"max_steps": 171,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 4050,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 14533949767680.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}