RM-R1-Qwen3-4B-SFT / trainer_state.json
davidanugraha's picture
Upload folder using huggingface_hub
236a47a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 180,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0055768560474032764,
"grad_norm": 46.09545673843412,
"learning_rate": 0.0,
"loss": 1.7007,
"step": 1
},
{
"epoch": 0.011153712094806553,
"grad_norm": 44.46037462061874,
"learning_rate": 2.7777777777777776e-07,
"loss": 1.6776,
"step": 2
},
{
"epoch": 0.01673056814220983,
"grad_norm": 40.73202594632966,
"learning_rate": 5.555555555555555e-07,
"loss": 1.5002,
"step": 3
},
{
"epoch": 0.022307424189613106,
"grad_norm": 42.88149317876023,
"learning_rate": 8.333333333333333e-07,
"loss": 1.6238,
"step": 4
},
{
"epoch": 0.027884280237016383,
"grad_norm": 42.513311455243326,
"learning_rate": 1.111111111111111e-06,
"loss": 1.678,
"step": 5
},
{
"epoch": 0.03346113628441966,
"grad_norm": 43.24081118630191,
"learning_rate": 1.3888888888888892e-06,
"loss": 1.7263,
"step": 6
},
{
"epoch": 0.03903799233182294,
"grad_norm": 25.77384950516741,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.272,
"step": 7
},
{
"epoch": 0.04461484837922621,
"grad_norm": 24.137113597713526,
"learning_rate": 1.944444444444445e-06,
"loss": 1.1886,
"step": 8
},
{
"epoch": 0.050191704426629485,
"grad_norm": 11.629783818575346,
"learning_rate": 2.222222222222222e-06,
"loss": 1.2781,
"step": 9
},
{
"epoch": 0.055768560474032766,
"grad_norm": 8.00658343314912,
"learning_rate": 2.5e-06,
"loss": 1.0554,
"step": 10
},
{
"epoch": 0.06134541652143604,
"grad_norm": 7.16421136479377,
"learning_rate": 2.7777777777777783e-06,
"loss": 1.172,
"step": 11
},
{
"epoch": 0.06692227256883931,
"grad_norm": 4.137822360489128,
"learning_rate": 3.055555555555556e-06,
"loss": 1.0006,
"step": 12
},
{
"epoch": 0.0724991286162426,
"grad_norm": 2.9965048555106204,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.8589,
"step": 13
},
{
"epoch": 0.07807598466364588,
"grad_norm": 3.1452698509974435,
"learning_rate": 3.6111111111111115e-06,
"loss": 1.0973,
"step": 14
},
{
"epoch": 0.08365284071104914,
"grad_norm": 2.8688258039939702,
"learning_rate": 3.88888888888889e-06,
"loss": 1.2093,
"step": 15
},
{
"epoch": 0.08922969675845242,
"grad_norm": 2.0484453697226055,
"learning_rate": 4.166666666666667e-06,
"loss": 0.9244,
"step": 16
},
{
"epoch": 0.0948065528058557,
"grad_norm": 2.4395289504901303,
"learning_rate": 4.444444444444444e-06,
"loss": 0.9074,
"step": 17
},
{
"epoch": 0.10038340885325897,
"grad_norm": 3.3112350184991084,
"learning_rate": 4.722222222222222e-06,
"loss": 0.9056,
"step": 18
},
{
"epoch": 0.10596026490066225,
"grad_norm": 2.2238505337460017,
"learning_rate": 5e-06,
"loss": 0.9156,
"step": 19
},
{
"epoch": 0.11153712094806553,
"grad_norm": 1.6023122355959452,
"learning_rate": 4.999529926121254e-06,
"loss": 0.7145,
"step": 20
},
{
"epoch": 0.1171139769954688,
"grad_norm": 1.5747014721624342,
"learning_rate": 4.998119881260576e-06,
"loss": 0.9797,
"step": 21
},
{
"epoch": 0.12269083304287208,
"grad_norm": 1.3008124678483608,
"learning_rate": 4.995770395678171e-06,
"loss": 0.8005,
"step": 22
},
{
"epoch": 0.12826768909027536,
"grad_norm": 1.3341513132245302,
"learning_rate": 4.99248235291948e-06,
"loss": 0.9707,
"step": 23
},
{
"epoch": 0.13384454513767863,
"grad_norm": 1.2836163377989422,
"learning_rate": 4.9882569894829146e-06,
"loss": 0.8303,
"step": 24
},
{
"epoch": 0.13942140118508192,
"grad_norm": 1.5625132921969171,
"learning_rate": 4.983095894354858e-06,
"loss": 1.012,
"step": 25
},
{
"epoch": 0.1449982572324852,
"grad_norm": 1.3279470446330688,
"learning_rate": 4.977001008412113e-06,
"loss": 0.8311,
"step": 26
},
{
"epoch": 0.15057511327988846,
"grad_norm": 1.1984169005450507,
"learning_rate": 4.969974623692023e-06,
"loss": 0.701,
"step": 27
},
{
"epoch": 0.15615196932729175,
"grad_norm": 1.2333632916915551,
"learning_rate": 4.962019382530521e-06,
"loss": 0.8799,
"step": 28
},
{
"epoch": 0.16172882537469502,
"grad_norm": 1.1133972779802328,
"learning_rate": 4.953138276568462e-06,
"loss": 0.75,
"step": 29
},
{
"epoch": 0.16730568142209828,
"grad_norm": 1.0517214425691086,
"learning_rate": 4.943334645626589e-06,
"loss": 0.7046,
"step": 30
},
{
"epoch": 0.17288253746950158,
"grad_norm": 0.9434629605801058,
"learning_rate": 4.93261217644956e-06,
"loss": 0.6824,
"step": 31
},
{
"epoch": 0.17845939351690485,
"grad_norm": 1.1847049987820477,
"learning_rate": 4.9209749013195155e-06,
"loss": 0.8257,
"step": 32
},
{
"epoch": 0.1840362495643081,
"grad_norm": 1.0458338052848428,
"learning_rate": 4.908427196539701e-06,
"loss": 0.8103,
"step": 33
},
{
"epoch": 0.1896131056117114,
"grad_norm": 1.0107452620708213,
"learning_rate": 4.894973780788722e-06,
"loss": 0.8077,
"step": 34
},
{
"epoch": 0.19518996165911467,
"grad_norm": 1.0732542601644082,
"learning_rate": 4.8806197133460385e-06,
"loss": 0.83,
"step": 35
},
{
"epoch": 0.20076681770651794,
"grad_norm": 1.111873065908315,
"learning_rate": 4.865370392189377e-06,
"loss": 0.8261,
"step": 36
},
{
"epoch": 0.20634367375392124,
"grad_norm": 0.9469124459097857,
"learning_rate": 4.849231551964771e-06,
"loss": 0.7354,
"step": 37
},
{
"epoch": 0.2119205298013245,
"grad_norm": 0.9890209354758053,
"learning_rate": 4.832209261830002e-06,
"loss": 0.7614,
"step": 38
},
{
"epoch": 0.21749738584872777,
"grad_norm": 0.9617476508331165,
"learning_rate": 4.814309923172227e-06,
"loss": 0.6634,
"step": 39
},
{
"epoch": 0.22307424189613106,
"grad_norm": 0.8693853556809209,
"learning_rate": 4.7955402672006855e-06,
"loss": 0.6524,
"step": 40
},
{
"epoch": 0.22865109794353433,
"grad_norm": 1.1164427987030467,
"learning_rate": 4.775907352415367e-06,
"loss": 0.9437,
"step": 41
},
{
"epoch": 0.2342279539909376,
"grad_norm": 1.0342203325734225,
"learning_rate": 4.755418561952595e-06,
"loss": 0.7833,
"step": 42
},
{
"epoch": 0.2398048100383409,
"grad_norm": 1.0381130170634878,
"learning_rate": 4.734081600808531e-06,
"loss": 0.8537,
"step": 43
},
{
"epoch": 0.24538166608574416,
"grad_norm": 0.980064465437933,
"learning_rate": 4.711904492941644e-06,
"loss": 0.7711,
"step": 44
},
{
"epoch": 0.25095852213314745,
"grad_norm": 2.4464904323304255,
"learning_rate": 4.688895578255228e-06,
"loss": 0.8071,
"step": 45
},
{
"epoch": 0.2565353781805507,
"grad_norm": 0.9568068666237374,
"learning_rate": 4.665063509461098e-06,
"loss": 0.7486,
"step": 46
},
{
"epoch": 0.262112234227954,
"grad_norm": 0.9773834471749688,
"learning_rate": 4.640417248825667e-06,
"loss": 0.6626,
"step": 47
},
{
"epoch": 0.26768909027535726,
"grad_norm": 0.9467873226243072,
"learning_rate": 4.614966064799603e-06,
"loss": 0.7737,
"step": 48
},
{
"epoch": 0.2732659463227605,
"grad_norm": 0.929090416098371,
"learning_rate": 4.588719528532342e-06,
"loss": 0.7288,
"step": 49
},
{
"epoch": 0.27884280237016384,
"grad_norm": 0.9558116358409616,
"learning_rate": 4.561687510272767e-06,
"loss": 0.7173,
"step": 50
},
{
"epoch": 0.2844196584175671,
"grad_norm": 1.0358016086386252,
"learning_rate": 4.533880175657419e-06,
"loss": 0.8558,
"step": 51
},
{
"epoch": 0.2899965144649704,
"grad_norm": 0.9268356298097264,
"learning_rate": 4.50530798188761e-06,
"loss": 0.6913,
"step": 52
},
{
"epoch": 0.29557337051237365,
"grad_norm": 0.9026491423974176,
"learning_rate": 4.475981673796899e-06,
"loss": 0.6573,
"step": 53
},
{
"epoch": 0.3011502265597769,
"grad_norm": 0.9341984552378991,
"learning_rate": 4.445912279810401e-06,
"loss": 0.7178,
"step": 54
},
{
"epoch": 0.3067270826071802,
"grad_norm": 0.963698636099554,
"learning_rate": 4.415111107797445e-06,
"loss": 0.672,
"step": 55
},
{
"epoch": 0.3123039386545835,
"grad_norm": 1.0123866280188825,
"learning_rate": 4.3835897408191515e-06,
"loss": 0.7958,
"step": 56
},
{
"epoch": 0.31788079470198677,
"grad_norm": 1.0536090394015367,
"learning_rate": 4.351360032772512e-06,
"loss": 0.8384,
"step": 57
},
{
"epoch": 0.32345765074939004,
"grad_norm": 1.005997664037765,
"learning_rate": 4.318434103932622e-06,
"loss": 0.8511,
"step": 58
},
{
"epoch": 0.3290345067967933,
"grad_norm": 0.9082165011415732,
"learning_rate": 4.284824336394748e-06,
"loss": 0.6731,
"step": 59
},
{
"epoch": 0.33461136284419657,
"grad_norm": 0.9309889749980601,
"learning_rate": 4.250543369417921e-06,
"loss": 0.7276,
"step": 60
},
{
"epoch": 0.34018821889159984,
"grad_norm": 1.7819442287605565,
"learning_rate": 4.215604094671835e-06,
"loss": 0.7831,
"step": 61
},
{
"epoch": 0.34576507493900316,
"grad_norm": 0.9434498292744896,
"learning_rate": 4.180019651388807e-06,
"loss": 0.7503,
"step": 62
},
{
"epoch": 0.3513419309864064,
"grad_norm": 1.0424541071897502,
"learning_rate": 4.14380342142266e-06,
"loss": 0.9153,
"step": 63
},
{
"epoch": 0.3569187870338097,
"grad_norm": 0.9753148306326342,
"learning_rate": 4.106969024216348e-06,
"loss": 0.6555,
"step": 64
},
{
"epoch": 0.36249564308121296,
"grad_norm": 0.964467277240988,
"learning_rate": 4.069530311680247e-06,
"loss": 0.7372,
"step": 65
},
{
"epoch": 0.3680724991286162,
"grad_norm": 1.1418604292487458,
"learning_rate": 4.031501362983007e-06,
"loss": 0.7585,
"step": 66
},
{
"epoch": 0.3736493551760195,
"grad_norm": 0.9125259121774503,
"learning_rate": 3.992896479256966e-06,
"loss": 0.6934,
"step": 67
},
{
"epoch": 0.3792262112234228,
"grad_norm": 0.9183210408388318,
"learning_rate": 3.953730178220067e-06,
"loss": 0.6741,
"step": 68
},
{
"epoch": 0.3848030672708261,
"grad_norm": 0.9841348365534922,
"learning_rate": 3.914017188716347e-06,
"loss": 0.7713,
"step": 69
},
{
"epoch": 0.39037992331822935,
"grad_norm": 0.9397953829688301,
"learning_rate": 3.8737724451770155e-06,
"loss": 0.6898,
"step": 70
},
{
"epoch": 0.3959567793656326,
"grad_norm": 0.9360968357631917,
"learning_rate": 3.833011082004229e-06,
"loss": 0.7356,
"step": 71
},
{
"epoch": 0.4015336354130359,
"grad_norm": 1.0490874310335498,
"learning_rate": 3.7917484278796578e-06,
"loss": 0.8834,
"step": 72
},
{
"epoch": 0.40711049146043915,
"grad_norm": 0.8729992951101984,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.6581,
"step": 73
},
{
"epoch": 0.4126873475078425,
"grad_norm": 0.8526096559762923,
"learning_rate": 3.7077814982415966e-06,
"loss": 0.6796,
"step": 74
},
{
"epoch": 0.41826420355524574,
"grad_norm": 0.9123193201559427,
"learning_rate": 3.665108799256348e-06,
"loss": 0.6747,
"step": 75
},
{
"epoch": 0.423841059602649,
"grad_norm": 0.8692473859121223,
"learning_rate": 3.621997950501156e-06,
"loss": 0.6573,
"step": 76
},
{
"epoch": 0.4294179156500523,
"grad_norm": 0.9141140630813748,
"learning_rate": 3.578465164203134e-06,
"loss": 0.6013,
"step": 77
},
{
"epoch": 0.43499477169745554,
"grad_norm": 1.0607296974506348,
"learning_rate": 3.5345268112628485e-06,
"loss": 0.897,
"step": 78
},
{
"epoch": 0.44057162774485886,
"grad_norm": 1.027907705537933,
"learning_rate": 3.4901994150978926e-06,
"loss": 0.8139,
"step": 79
},
{
"epoch": 0.44614848379226213,
"grad_norm": 0.8964908897959806,
"learning_rate": 3.4454996454291066e-06,
"loss": 0.682,
"step": 80
},
{
"epoch": 0.4517253398396654,
"grad_norm": 0.9696808820116304,
"learning_rate": 3.400444312011776e-06,
"loss": 0.7677,
"step": 81
},
{
"epoch": 0.45730219588706866,
"grad_norm": 0.9538347785493502,
"learning_rate": 3.3550503583141726e-06,
"loss": 0.7707,
"step": 82
},
{
"epoch": 0.46287905193447193,
"grad_norm": 0.8834434922285562,
"learning_rate": 3.3093348551458033e-06,
"loss": 0.7254,
"step": 83
},
{
"epoch": 0.4684559079818752,
"grad_norm": 0.9664583802329054,
"learning_rate": 3.2633149942377835e-06,
"loss": 0.6009,
"step": 84
},
{
"epoch": 0.4740327640292785,
"grad_norm": 0.9989900249921821,
"learning_rate": 3.217008081777726e-06,
"loss": 0.7277,
"step": 85
},
{
"epoch": 0.4796096200766818,
"grad_norm": 0.9641224372984417,
"learning_rate": 3.1704315319015936e-06,
"loss": 0.7693,
"step": 86
},
{
"epoch": 0.48518647612408505,
"grad_norm": 0.8563738741272415,
"learning_rate": 3.1236028601449534e-06,
"loss": 0.5502,
"step": 87
},
{
"epoch": 0.4907633321714883,
"grad_norm": 0.9814456923681252,
"learning_rate": 3.0765396768561005e-06,
"loss": 0.7591,
"step": 88
},
{
"epoch": 0.4963401882188916,
"grad_norm": 1.0031797145449588,
"learning_rate": 3.0292596805735275e-06,
"loss": 0.7336,
"step": 89
},
{
"epoch": 0.5019170442662949,
"grad_norm": 1.0307972932412588,
"learning_rate": 2.9817806513702247e-06,
"loss": 0.8728,
"step": 90
},
{
"epoch": 0.5074939003136981,
"grad_norm": 0.9121639360068265,
"learning_rate": 2.9341204441673267e-06,
"loss": 0.7234,
"step": 91
},
{
"epoch": 0.5130707563611014,
"grad_norm": 0.9266082791067043,
"learning_rate": 2.8862969820196017e-06,
"loss": 0.6637,
"step": 92
},
{
"epoch": 0.5186476124085047,
"grad_norm": 0.9817424775807924,
"learning_rate": 2.8383282493753282e-06,
"loss": 0.818,
"step": 93
},
{
"epoch": 0.524224468455908,
"grad_norm": 0.9290655450825701,
"learning_rate": 2.7902322853130758e-06,
"loss": 0.7372,
"step": 94
},
{
"epoch": 0.5298013245033113,
"grad_norm": 0.9630341129207757,
"learning_rate": 2.742027176757948e-06,
"loss": 0.786,
"step": 95
},
{
"epoch": 0.5353781805507145,
"grad_norm": 0.8736837998218376,
"learning_rate": 2.6937310516798276e-06,
"loss": 0.6546,
"step": 96
},
{
"epoch": 0.5409550365981178,
"grad_norm": 0.970695862336814,
"learning_rate": 2.6453620722761897e-06,
"loss": 0.6716,
"step": 97
},
{
"epoch": 0.546531892645521,
"grad_norm": 0.9311590205600201,
"learning_rate": 2.5969384281420425e-06,
"loss": 0.6955,
"step": 98
},
{
"epoch": 0.5521087486929244,
"grad_norm": 0.9022156745489164,
"learning_rate": 2.548478329429561e-06,
"loss": 0.6765,
"step": 99
},
{
"epoch": 0.5576856047403277,
"grad_norm": 0.9321595225209163,
"learning_rate": 2.5e-06,
"loss": 0.7459,
"step": 100
},
{
"epoch": 0.5632624607877309,
"grad_norm": 0.8642803835512484,
"learning_rate": 2.4515216705704396e-06,
"loss": 0.6295,
"step": 101
},
{
"epoch": 0.5688393168351342,
"grad_norm": 0.8770432551457372,
"learning_rate": 2.403061571857958e-06,
"loss": 0.5756,
"step": 102
},
{
"epoch": 0.5744161728825374,
"grad_norm": 0.986644075495802,
"learning_rate": 2.3546379277238107e-06,
"loss": 0.7433,
"step": 103
},
{
"epoch": 0.5799930289299408,
"grad_norm": 0.9151759455914666,
"learning_rate": 2.3062689483201732e-06,
"loss": 0.6835,
"step": 104
},
{
"epoch": 0.585569884977344,
"grad_norm": 1.0566592626672804,
"learning_rate": 2.2579728232420524e-06,
"loss": 0.7049,
"step": 105
},
{
"epoch": 0.5911467410247473,
"grad_norm": 0.8911833907323385,
"learning_rate": 2.2097677146869242e-06,
"loss": 0.6252,
"step": 106
},
{
"epoch": 0.5967235970721506,
"grad_norm": 3.583992947348307,
"learning_rate": 2.161671750624673e-06,
"loss": 0.7175,
"step": 107
},
{
"epoch": 0.6023004531195538,
"grad_norm": 0.8470926199178831,
"learning_rate": 2.113703017980399e-06,
"loss": 0.6039,
"step": 108
},
{
"epoch": 0.6078773091669571,
"grad_norm": 0.9202011923092919,
"learning_rate": 2.0658795558326745e-06,
"loss": 0.675,
"step": 109
},
{
"epoch": 0.6134541652143604,
"grad_norm": 0.991676719260929,
"learning_rate": 2.0182193486297757e-06,
"loss": 0.8416,
"step": 110
},
{
"epoch": 0.6190310212617637,
"grad_norm": 0.9920121314417771,
"learning_rate": 1.970740319426474e-06,
"loss": 0.7869,
"step": 111
},
{
"epoch": 0.624607877309167,
"grad_norm": 0.9147914111270489,
"learning_rate": 1.9234603231439e-06,
"loss": 0.6715,
"step": 112
},
{
"epoch": 0.6301847333565702,
"grad_norm": 0.9960901454429568,
"learning_rate": 1.876397139855047e-06,
"loss": 0.8459,
"step": 113
},
{
"epoch": 0.6357615894039735,
"grad_norm": 0.8398771049626784,
"learning_rate": 1.8295684680984064e-06,
"loss": 0.6327,
"step": 114
},
{
"epoch": 0.6413384454513767,
"grad_norm": 0.8848277584910325,
"learning_rate": 1.7829919182222752e-06,
"loss": 0.6674,
"step": 115
},
{
"epoch": 0.6469153014987801,
"grad_norm": 0.9599403253441103,
"learning_rate": 1.7366850057622176e-06,
"loss": 0.7381,
"step": 116
},
{
"epoch": 0.6524921575461834,
"grad_norm": 0.8633918886732347,
"learning_rate": 1.6906651448541977e-06,
"loss": 0.5713,
"step": 117
},
{
"epoch": 0.6580690135935866,
"grad_norm": 0.9564730887800509,
"learning_rate": 1.6449496416858285e-06,
"loss": 0.7964,
"step": 118
},
{
"epoch": 0.6636458696409899,
"grad_norm": 0.9220167090814314,
"learning_rate": 1.5995556879882246e-06,
"loss": 0.7074,
"step": 119
},
{
"epoch": 0.6692227256883931,
"grad_norm": 0.9412177200866909,
"learning_rate": 1.5545003545708942e-06,
"loss": 0.6798,
"step": 120
},
{
"epoch": 0.6747995817357965,
"grad_norm": 0.8799708797193134,
"learning_rate": 1.509800584902108e-06,
"loss": 0.636,
"step": 121
},
{
"epoch": 0.6803764377831997,
"grad_norm": 0.8746008313087483,
"learning_rate": 1.4654731887371524e-06,
"loss": 0.6517,
"step": 122
},
{
"epoch": 0.685953293830603,
"grad_norm": 0.8622866280586909,
"learning_rate": 1.421534835796867e-06,
"loss": 0.5685,
"step": 123
},
{
"epoch": 0.6915301498780063,
"grad_norm": 0.9161469277312331,
"learning_rate": 1.3780020494988447e-06,
"loss": 0.7142,
"step": 124
},
{
"epoch": 0.6971070059254095,
"grad_norm": 0.8834589774119394,
"learning_rate": 1.3348912007436538e-06,
"loss": 0.6794,
"step": 125
},
{
"epoch": 0.7026838619728129,
"grad_norm": 0.8420150113363432,
"learning_rate": 1.2922185017584038e-06,
"loss": 0.5548,
"step": 126
},
{
"epoch": 0.7082607180202161,
"grad_norm": 0.9186571271373966,
"learning_rate": 1.2500000000000007e-06,
"loss": 0.7093,
"step": 127
},
{
"epoch": 0.7138375740676194,
"grad_norm": 0.9024224044680166,
"learning_rate": 1.2082515721203429e-06,
"loss": 0.601,
"step": 128
},
{
"epoch": 0.7194144301150227,
"grad_norm": 0.8730115171814332,
"learning_rate": 1.1669889179957725e-06,
"loss": 0.6485,
"step": 129
},
{
"epoch": 0.7249912861624259,
"grad_norm": 2.560680193595368,
"learning_rate": 1.1262275548229852e-06,
"loss": 0.681,
"step": 130
},
{
"epoch": 0.7305681422098292,
"grad_norm": 0.8433376742578463,
"learning_rate": 1.085982811283654e-06,
"loss": 0.6025,
"step": 131
},
{
"epoch": 0.7361449982572325,
"grad_norm": 0.9000811460890688,
"learning_rate": 1.0462698217799333e-06,
"loss": 0.7098,
"step": 132
},
{
"epoch": 0.7417218543046358,
"grad_norm": 0.9015484414513791,
"learning_rate": 1.0071035207430352e-06,
"loss": 0.6939,
"step": 133
},
{
"epoch": 0.747298710352039,
"grad_norm": 0.924647234962446,
"learning_rate": 9.68498637016993e-07,
"loss": 0.7219,
"step": 134
},
{
"epoch": 0.7528755663994423,
"grad_norm": 0.9622683692067883,
"learning_rate": 9.304696883197542e-07,
"loss": 0.7445,
"step": 135
},
{
"epoch": 0.7584524224468456,
"grad_norm": 1.0018020723323282,
"learning_rate": 8.930309757836517e-07,
"loss": 0.7285,
"step": 136
},
{
"epoch": 0.7640292784942488,
"grad_norm": 1.0034101578791559,
"learning_rate": 8.561965785773413e-07,
"loss": 0.647,
"step": 137
},
{
"epoch": 0.7696061345416522,
"grad_norm": 0.865650213772322,
"learning_rate": 8.19980348611194e-07,
"loss": 0.6588,
"step": 138
},
{
"epoch": 0.7751829905890554,
"grad_norm": 0.9237880174335488,
"learning_rate": 7.843959053281663e-07,
"loss": 0.738,
"step": 139
},
{
"epoch": 0.7807598466364587,
"grad_norm": 0.992180072952141,
"learning_rate": 7.494566305820788e-07,
"loss": 0.7533,
"step": 140
},
{
"epoch": 0.786336702683862,
"grad_norm": 0.8845919723729968,
"learning_rate": 7.151756636052529e-07,
"loss": 0.6062,
"step": 141
},
{
"epoch": 0.7919135587312652,
"grad_norm": 0.9575278544789321,
"learning_rate": 6.815658960673782e-07,
"loss": 0.7661,
"step": 142
},
{
"epoch": 0.7974904147786686,
"grad_norm": 1.060097465810906,
"learning_rate": 6.48639967227489e-07,
"loss": 0.7093,
"step": 143
},
{
"epoch": 0.8030672708260718,
"grad_norm": 1.1137158210751135,
"learning_rate": 6.164102591808482e-07,
"loss": 0.6516,
"step": 144
},
{
"epoch": 0.8086441268734751,
"grad_norm": 0.9467474421643487,
"learning_rate": 5.848888922025553e-07,
"loss": 0.7106,
"step": 145
},
{
"epoch": 0.8142209829208783,
"grad_norm": 0.977712019438005,
"learning_rate": 5.540877201896e-07,
"loss": 0.6485,
"step": 146
},
{
"epoch": 0.8197978389682816,
"grad_norm": 1.2725548643418227,
"learning_rate": 5.240183262031021e-07,
"loss": 0.7106,
"step": 147
},
{
"epoch": 0.825374695015685,
"grad_norm": 0.9531375340902994,
"learning_rate": 4.946920181123904e-07,
"loss": 0.6352,
"step": 148
},
{
"epoch": 0.8309515510630882,
"grad_norm": 0.8239496228158747,
"learning_rate": 4.661198243425813e-07,
"loss": 0.5812,
"step": 149
},
{
"epoch": 0.8365284071104915,
"grad_norm": 1.4768742600927571,
"learning_rate": 4.383124897272331e-07,
"loss": 0.825,
"step": 150
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.8936174846742114,
"learning_rate": 4.1128047146765936e-07,
"loss": 0.7137,
"step": 151
},
{
"epoch": 0.847682119205298,
"grad_norm": 0.868290680328461,
"learning_rate": 3.8503393520039734e-07,
"loss": 0.646,
"step": 152
},
{
"epoch": 0.8532589752527013,
"grad_norm": 0.9170777878398306,
"learning_rate": 3.595827511743341e-07,
"loss": 0.6338,
"step": 153
},
{
"epoch": 0.8588358313001045,
"grad_norm": 0.8709479986895221,
"learning_rate": 3.3493649053890325e-07,
"loss": 0.6942,
"step": 154
},
{
"epoch": 0.8644126873475079,
"grad_norm": 1.015273029250277,
"learning_rate": 3.111044217447731e-07,
"loss": 0.8455,
"step": 155
},
{
"epoch": 0.8699895433949111,
"grad_norm": 1.8953728977728321,
"learning_rate": 2.880955070583555e-07,
"loss": 0.8089,
"step": 156
},
{
"epoch": 0.8755663994423144,
"grad_norm": 1.0266628340926212,
"learning_rate": 2.6591839919146963e-07,
"loss": 0.6747,
"step": 157
},
{
"epoch": 0.8811432554897177,
"grad_norm": 1.0638475327851682,
"learning_rate": 2.445814380474057e-07,
"loss": 0.6979,
"step": 158
},
{
"epoch": 0.8867201115371209,
"grad_norm": 0.9287481759641896,
"learning_rate": 2.240926475846336e-07,
"loss": 0.7963,
"step": 159
},
{
"epoch": 0.8922969675845243,
"grad_norm": 0.8785430203193436,
"learning_rate": 2.044597327993153e-07,
"loss": 0.6534,
"step": 160
},
{
"epoch": 0.8978738236319275,
"grad_norm": 0.953415594296327,
"learning_rate": 1.8569007682777417e-07,
"loss": 0.7474,
"step": 161
},
{
"epoch": 0.9034506796793308,
"grad_norm": 0.9294478803532011,
"learning_rate": 1.6779073816999864e-07,
"loss": 0.7906,
"step": 162
},
{
"epoch": 0.909027535726734,
"grad_norm": 0.8384613909869523,
"learning_rate": 1.507684480352292e-07,
"loss": 0.6377,
"step": 163
},
{
"epoch": 0.9146043917741373,
"grad_norm": 0.8835908163025235,
"learning_rate": 1.3462960781062433e-07,
"loss": 0.6392,
"step": 164
},
{
"epoch": 0.9201812478215406,
"grad_norm": 0.8449058090284122,
"learning_rate": 1.1938028665396172e-07,
"loss": 0.5656,
"step": 165
},
{
"epoch": 0.9257581038689439,
"grad_norm": 0.9392723321505706,
"learning_rate": 1.0502621921127776e-07,
"loss": 0.7239,
"step": 166
},
{
"epoch": 0.9313349599163472,
"grad_norm": 0.8758199104024277,
"learning_rate": 9.157280346029918e-08,
"loss": 0.6666,
"step": 167
},
{
"epoch": 0.9369118159637504,
"grad_norm": 0.8830370044979928,
"learning_rate": 7.902509868048552e-08,
"loss": 0.6846,
"step": 168
},
{
"epoch": 0.9424886720111537,
"grad_norm": 0.8870739682602913,
"learning_rate": 6.738782355044048e-08,
"loss": 0.7071,
"step": 169
},
{
"epoch": 0.948065528058557,
"grad_norm": 0.8732031194960946,
"learning_rate": 5.6665354373411085e-08,
"loss": 0.7037,
"step": 170
},
{
"epoch": 0.9536423841059603,
"grad_norm": 0.9251119129299412,
"learning_rate": 4.6861723431538273e-08,
"loss": 0.6949,
"step": 171
},
{
"epoch": 0.9592192401533636,
"grad_norm": 0.8254069704702977,
"learning_rate": 3.798061746947995e-08,
"loss": 0.5747,
"step": 172
},
{
"epoch": 0.9647960962007668,
"grad_norm": 0.8556828230063313,
"learning_rate": 3.0025376307977474e-08,
"loss": 0.6367,
"step": 173
},
{
"epoch": 0.9703729522481701,
"grad_norm": 0.8964787284563904,
"learning_rate": 2.299899158788671e-08,
"loss": 0.6943,
"step": 174
},
{
"epoch": 0.9759498082955733,
"grad_norm": 0.8529908965639459,
"learning_rate": 1.6904105645142443e-08,
"loss": 0.6373,
"step": 175
},
{
"epoch": 0.9815266643429766,
"grad_norm": 0.8934738534392561,
"learning_rate": 1.1743010517085428e-08,
"loss": 0.6968,
"step": 176
},
{
"epoch": 0.98710352039038,
"grad_norm": 0.9302061985515149,
"learning_rate": 7.517647080519941e-09,
"loss": 0.7773,
"step": 177
},
{
"epoch": 0.9926803764377832,
"grad_norm": 0.9858177539703182,
"learning_rate": 4.229604321829561e-09,
"loss": 0.7393,
"step": 178
},
{
"epoch": 0.9982572324851865,
"grad_norm": 0.9326264974645483,
"learning_rate": 1.8801187394248966e-09,
"loss": 0.7246,
"step": 179
},
{
"epoch": 1.0,
"grad_norm": 0.9326264974645483,
"learning_rate": 4.700738787466463e-10,
"loss": 0.9,
"step": 180
}
],
"logging_steps": 1,
"max_steps": 180,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 27643248377856.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}