Qwen2.5-Math-7B-QwQMath8K-SFT / trainer_state.json
1t4chi's picture
Upload folder using huggingface_hub
e766173 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.870544090056285,
"eval_steps": 500,
"global_step": 165,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0300187617260788,
"grad_norm": 19.618404854139,
"learning_rate": 1e-05,
"loss": 0.6154,
"mean_token_accuracy": 0.8398024253547192,
"step": 1
},
{
"epoch": 0.0600375234521576,
"grad_norm": 21.798338409796745,
"learning_rate": 2e-05,
"loss": 0.6416,
"mean_token_accuracy": 0.8340235594660044,
"step": 2
},
{
"epoch": 0.0900562851782364,
"grad_norm": 13.833687232901854,
"learning_rate": 3e-05,
"loss": 0.5896,
"mean_token_accuracy": 0.8433804120868444,
"step": 3
},
{
"epoch": 0.1200750469043152,
"grad_norm": 5.505910810820941,
"learning_rate": 4e-05,
"loss": 0.5319,
"mean_token_accuracy": 0.8556831870228052,
"step": 4
},
{
"epoch": 0.150093808630394,
"grad_norm": 5.278702397056334,
"learning_rate": 5e-05,
"loss": 0.4437,
"mean_token_accuracy": 0.8703144080936909,
"step": 5
},
{
"epoch": 0.1801125703564728,
"grad_norm": 2.1162182135646033,
"learning_rate": 4.9995181012051625e-05,
"loss": 0.4193,
"mean_token_accuracy": 0.878010880202055,
"step": 6
},
{
"epoch": 0.2101313320825516,
"grad_norm": 14.208326816182495,
"learning_rate": 4.9980725906018074e-05,
"loss": 0.4096,
"mean_token_accuracy": 0.8749048858880997,
"step": 7
},
{
"epoch": 0.2401500938086304,
"grad_norm": 2.162621083920564,
"learning_rate": 4.9956640254617906e-05,
"loss": 0.3978,
"mean_token_accuracy": 0.8762698639184237,
"step": 8
},
{
"epoch": 0.2701688555347092,
"grad_norm": 2.034935016310286,
"learning_rate": 4.99229333433282e-05,
"loss": 0.374,
"mean_token_accuracy": 0.8837966062128544,
"step": 9
},
{
"epoch": 0.300187617260788,
"grad_norm": 1.2368690799465214,
"learning_rate": 4.987961816680492e-05,
"loss": 0.3545,
"mean_token_accuracy": 0.8879855256527662,
"step": 10
},
{
"epoch": 0.3302063789868668,
"grad_norm": 0.8933826446995154,
"learning_rate": 4.982671142387316e-05,
"loss": 0.3527,
"mean_token_accuracy": 0.8875276073813438,
"step": 11
},
{
"epoch": 0.3602251407129456,
"grad_norm": 0.8267017608965835,
"learning_rate": 4.976423351108943e-05,
"loss": 0.3186,
"mean_token_accuracy": 0.8965118452906609,
"step": 12
},
{
"epoch": 0.3902439024390244,
"grad_norm": 0.6967492468619846,
"learning_rate": 4.9692208514878444e-05,
"loss": 0.3016,
"mean_token_accuracy": 0.9023277424275875,
"step": 13
},
{
"epoch": 0.4202626641651032,
"grad_norm": 0.626201960051008,
"learning_rate": 4.9610664202247294e-05,
"loss": 0.3189,
"mean_token_accuracy": 0.8961522448807955,
"step": 14
},
{
"epoch": 0.450281425891182,
"grad_norm": 0.519219193366074,
"learning_rate": 4.951963201008076e-05,
"loss": 0.3031,
"mean_token_accuracy": 0.9007221981883049,
"step": 15
},
{
"epoch": 0.4803001876172608,
"grad_norm": 0.5430064582418314,
"learning_rate": 4.9419147033021814e-05,
"loss": 0.2963,
"mean_token_accuracy": 0.9018692336976528,
"step": 16
},
{
"epoch": 0.5103189493433395,
"grad_norm": 0.45295446404829903,
"learning_rate": 4.9309248009941914e-05,
"loss": 0.2945,
"mean_token_accuracy": 0.9022990744560957,
"step": 17
},
{
"epoch": 0.5403377110694184,
"grad_norm": 0.39810793732783883,
"learning_rate": 4.9189977309006495e-05,
"loss": 0.2867,
"mean_token_accuracy": 0.9044581968337297,
"step": 18
},
{
"epoch": 0.5703564727954972,
"grad_norm": 0.29279224179883323,
"learning_rate": 4.906138091134118e-05,
"loss": 0.2817,
"mean_token_accuracy": 0.9055654220283031,
"step": 19
},
{
"epoch": 0.600375234521576,
"grad_norm": 0.26896824294508065,
"learning_rate": 4.892350839330522e-05,
"loss": 0.2921,
"mean_token_accuracy": 0.9025575239211321,
"step": 20
},
{
"epoch": 0.6303939962476548,
"grad_norm": 0.2400534443678901,
"learning_rate": 4.877641290737884e-05,
"loss": 0.2831,
"mean_token_accuracy": 0.9051281735301018,
"step": 21
},
{
"epoch": 0.6604127579737336,
"grad_norm": 0.2249165746946966,
"learning_rate": 4.862015116167196e-05,
"loss": 0.2698,
"mean_token_accuracy": 0.9093952961266041,
"step": 22
},
{
"epoch": 0.6904315196998124,
"grad_norm": 0.264247809537063,
"learning_rate": 4.8454783398062106e-05,
"loss": 0.2686,
"mean_token_accuracy": 0.9091940615326166,
"step": 23
},
{
"epoch": 0.7204502814258912,
"grad_norm": 0.20930943221019285,
"learning_rate": 4.828037336897009e-05,
"loss": 0.2687,
"mean_token_accuracy": 0.9089630376547575,
"step": 24
},
{
"epoch": 0.7504690431519699,
"grad_norm": 0.23889395502942187,
"learning_rate": 4.8096988312782174e-05,
"loss": 0.2871,
"mean_token_accuracy": 0.9030982349067926,
"step": 25
},
{
"epoch": 0.7804878048780488,
"grad_norm": 0.21055564681809716,
"learning_rate": 4.7904698927928406e-05,
"loss": 0.272,
"mean_token_accuracy": 0.9078760109841824,
"step": 26
},
{
"epoch": 0.8105065666041276,
"grad_norm": 0.21681199372541698,
"learning_rate": 4.7703579345627035e-05,
"loss": 0.2619,
"mean_token_accuracy": 0.9109147116541862,
"step": 27
},
{
"epoch": 0.8405253283302064,
"grad_norm": 0.21247193653216784,
"learning_rate": 4.749370710130554e-05,
"loss": 0.2721,
"mean_token_accuracy": 0.9074795469641685,
"step": 28
},
{
"epoch": 0.8705440900562852,
"grad_norm": 0.20525916333687041,
"learning_rate": 4.72751631047092e-05,
"loss": 0.2539,
"mean_token_accuracy": 0.9133741557598114,
"step": 29
},
{
"epoch": 0.900562851782364,
"grad_norm": 0.21529654405923737,
"learning_rate": 4.7048031608708876e-05,
"loss": 0.2603,
"mean_token_accuracy": 0.9109627865254879,
"step": 30
},
{
"epoch": 0.9305816135084428,
"grad_norm": 0.20791794762620378,
"learning_rate": 4.681240017681993e-05,
"loss": 0.2593,
"mean_token_accuracy": 0.9111653957515955,
"step": 31
},
{
"epoch": 0.9606003752345216,
"grad_norm": 0.20774824517485244,
"learning_rate": 4.65683596494448e-05,
"loss": 0.2719,
"mean_token_accuracy": 0.9068219736218452,
"step": 32
},
{
"epoch": 0.9906191369606003,
"grad_norm": 0.28582938868285823,
"learning_rate": 4.6316004108852305e-05,
"loss": 0.2645,
"mean_token_accuracy": 0.9088481441140175,
"step": 33
},
{
"epoch": 1.0,
"grad_norm": 0.28582938868285823,
"learning_rate": 4.6055430842907167e-05,
"loss": 0.2564,
"mean_token_accuracy": 0.9133941173553467,
"step": 34
},
{
"epoch": 1.0300187617260788,
"grad_norm": 0.3589081697155284,
"learning_rate": 4.5786740307563636e-05,
"loss": 0.2082,
"mean_token_accuracy": 0.9285639356821775,
"step": 35
},
{
"epoch": 1.0600375234521575,
"grad_norm": 0.19449792688035672,
"learning_rate": 4.551003608813784e-05,
"loss": 0.2047,
"mean_token_accuracy": 0.9296260979026556,
"step": 36
},
{
"epoch": 1.0900562851782365,
"grad_norm": 0.23233991689426617,
"learning_rate": 4.522542485937369e-05,
"loss": 0.1979,
"mean_token_accuracy": 0.9314604848623276,
"step": 37
},
{
"epoch": 1.1200750469043153,
"grad_norm": 0.21035371628271216,
"learning_rate": 4.493301634431768e-05,
"loss": 0.2014,
"mean_token_accuracy": 0.9298410974442959,
"step": 38
},
{
"epoch": 1.150093808630394,
"grad_norm": 0.20990624713625997,
"learning_rate": 4.463292327201862e-05,
"loss": 0.1913,
"mean_token_accuracy": 0.933486595749855,
"step": 39
},
{
"epoch": 1.1801125703564728,
"grad_norm": 0.2156272816847033,
"learning_rate": 4.4325261334068426e-05,
"loss": 0.2031,
"mean_token_accuracy": 0.9307098593562841,
"step": 40
},
{
"epoch": 1.2101313320825515,
"grad_norm": 0.21696878272059866,
"learning_rate": 4.401014914000078e-05,
"loss": 0.1915,
"mean_token_accuracy": 0.9335418920964003,
"step": 41
},
{
"epoch": 1.2401500938086305,
"grad_norm": 0.1818612765558643,
"learning_rate": 4.3687708171564925e-05,
"loss": 0.1791,
"mean_token_accuracy": 0.9380327388644218,
"step": 42
},
{
"epoch": 1.2701688555347093,
"grad_norm": 0.18129814277988898,
"learning_rate": 4.335806273589214e-05,
"loss": 0.1931,
"mean_token_accuracy": 0.9324233587831259,
"step": 43
},
{
"epoch": 1.300187617260788,
"grad_norm": 0.18921071728690822,
"learning_rate": 4.302133991757297e-05,
"loss": 0.1861,
"mean_token_accuracy": 0.9347784202545881,
"step": 44
},
{
"epoch": 1.3302063789868668,
"grad_norm": 0.1846346124739407,
"learning_rate": 4.267766952966369e-05,
"loss": 0.1978,
"mean_token_accuracy": 0.9310048930346966,
"step": 45
},
{
"epoch": 1.3602251407129455,
"grad_norm": 0.18689120002736795,
"learning_rate": 4.23271840636409e-05,
"loss": 0.1931,
"mean_token_accuracy": 0.9321947041898966,
"step": 46
},
{
"epoch": 1.3902439024390243,
"grad_norm": 0.18301258133692994,
"learning_rate": 4.197001863832355e-05,
"loss": 0.1991,
"mean_token_accuracy": 0.9307528082281351,
"step": 47
},
{
"epoch": 1.4202626641651033,
"grad_norm": 0.20071944245709974,
"learning_rate": 4.1606310947782044e-05,
"loss": 0.1883,
"mean_token_accuracy": 0.9341552760452032,
"step": 48
},
{
"epoch": 1.450281425891182,
"grad_norm": 0.21531485697866234,
"learning_rate": 4.123620120825459e-05,
"loss": 0.1793,
"mean_token_accuracy": 0.9380034245550632,
"step": 49
},
{
"epoch": 1.4803001876172608,
"grad_norm": 0.17040701196766744,
"learning_rate": 4.085983210409114e-05,
"loss": 0.17,
"mean_token_accuracy": 0.9408059008419514,
"step": 50
},
{
"epoch": 1.5103189493433395,
"grad_norm": 0.17082023776864208,
"learning_rate": 4.047734873274586e-05,
"loss": 0.1777,
"mean_token_accuracy": 0.9373182617127895,
"step": 51
},
{
"epoch": 1.5403377110694185,
"grad_norm": 0.18880547525592725,
"learning_rate": 4.008889854883929e-05,
"loss": 0.1905,
"mean_token_accuracy": 0.9339997190982103,
"step": 52
},
{
"epoch": 1.5703564727954973,
"grad_norm": 0.2003270144688197,
"learning_rate": 3.969463130731183e-05,
"loss": 0.1829,
"mean_token_accuracy": 0.9364625960588455,
"step": 53
},
{
"epoch": 1.600375234521576,
"grad_norm": 0.16248574881358357,
"learning_rate": 3.9294699005690305e-05,
"loss": 0.187,
"mean_token_accuracy": 0.9349782522767782,
"step": 54
},
{
"epoch": 1.6303939962476548,
"grad_norm": 0.16268952077579069,
"learning_rate": 3.888925582549006e-05,
"loss": 0.1806,
"mean_token_accuracy": 0.9380554854869843,
"step": 55
},
{
"epoch": 1.6604127579737336,
"grad_norm": 0.16260973286493194,
"learning_rate": 3.847845807277502e-05,
"loss": 0.1756,
"mean_token_accuracy": 0.9381309170275927,
"step": 56
},
{
"epoch": 1.6904315196998123,
"grad_norm": 0.18849387268876527,
"learning_rate": 3.8062464117898724e-05,
"loss": 0.1905,
"mean_token_accuracy": 0.933776805177331,
"step": 57
},
{
"epoch": 1.720450281425891,
"grad_norm": 0.1812480467627804,
"learning_rate": 3.764143433444962e-05,
"loss": 0.1845,
"mean_token_accuracy": 0.9354843944311142,
"step": 58
},
{
"epoch": 1.7504690431519698,
"grad_norm": 0.19727408903046884,
"learning_rate": 3.721553103742388e-05,
"loss": 0.1839,
"mean_token_accuracy": 0.9353628680109978,
"step": 59
},
{
"epoch": 1.7804878048780488,
"grad_norm": 0.16881751417638702,
"learning_rate": 3.678491842064995e-05,
"loss": 0.1847,
"mean_token_accuracy": 0.9353015590459108,
"step": 60
},
{
"epoch": 1.8105065666041276,
"grad_norm": 0.1805153593928837,
"learning_rate": 3.634976249348867e-05,
"loss": 0.189,
"mean_token_accuracy": 0.9340192507952452,
"step": 61
},
{
"epoch": 1.8405253283302065,
"grad_norm": 0.16744864978079732,
"learning_rate": 3.591023101683355e-05,
"loss": 0.1873,
"mean_token_accuracy": 0.9332233294844627,
"step": 62
},
{
"epoch": 1.8705440900562853,
"grad_norm": 0.20943512548005347,
"learning_rate": 3.54664934384357e-05,
"loss": 0.1833,
"mean_token_accuracy": 0.9361728671938181,
"step": 63
},
{
"epoch": 1.900562851782364,
"grad_norm": 0.15100109107147408,
"learning_rate": 3.5018720827578524e-05,
"loss": 0.177,
"mean_token_accuracy": 0.9376390129327774,
"step": 64
},
{
"epoch": 1.9305816135084428,
"grad_norm": 0.20136076678950812,
"learning_rate": 3.456708580912725e-05,
"loss": 0.1847,
"mean_token_accuracy": 0.9356410764157772,
"step": 65
},
{
"epoch": 1.9606003752345216,
"grad_norm": 0.16935110772638642,
"learning_rate": 3.411176249697875e-05,
"loss": 0.1882,
"mean_token_accuracy": 0.9341955110430717,
"step": 66
},
{
"epoch": 1.9906191369606003,
"grad_norm": 0.17801077092117232,
"learning_rate": 3.365292642693732e-05,
"loss": 0.1791,
"mean_token_accuracy": 0.9368807151913643,
"step": 67
},
{
"epoch": 2.0,
"grad_norm": 0.17801077092117232,
"learning_rate": 3.319075448904234e-05,
"loss": 0.1817,
"mean_token_accuracy": 0.9353618502616883,
"step": 68
},
{
"epoch": 2.0300187617260788,
"grad_norm": 0.34283977157187906,
"learning_rate": 3.272542485937369e-05,
"loss": 0.1162,
"mean_token_accuracy": 0.9604951441287994,
"step": 69
},
{
"epoch": 2.0600375234521575,
"grad_norm": 0.25353133352641416,
"learning_rate": 3.225711693136156e-05,
"loss": 0.1155,
"mean_token_accuracy": 0.9606517199426889,
"step": 70
},
{
"epoch": 2.0900562851782363,
"grad_norm": 0.36813345733413727,
"learning_rate": 3.178601124662686e-05,
"loss": 0.1092,
"mean_token_accuracy": 0.9620461780577898,
"step": 71
},
{
"epoch": 2.120075046904315,
"grad_norm": 0.20837522140479256,
"learning_rate": 3.131228942537895e-05,
"loss": 0.1064,
"mean_token_accuracy": 0.9636496491730213,
"step": 72
},
{
"epoch": 2.150093808630394,
"grad_norm": 0.2546796945935164,
"learning_rate": 3.083613409639764e-05,
"loss": 0.1082,
"mean_token_accuracy": 0.9626397844403982,
"step": 73
},
{
"epoch": 2.180112570356473,
"grad_norm": 0.2517042958600063,
"learning_rate": 3.035772882662627e-05,
"loss": 0.1024,
"mean_token_accuracy": 0.9642387926578522,
"step": 74
},
{
"epoch": 2.2101313320825517,
"grad_norm": 0.16863389096389939,
"learning_rate": 2.9877258050403212e-05,
"loss": 0.1011,
"mean_token_accuracy": 0.964973971247673,
"step": 75
},
{
"epoch": 2.2401500938086305,
"grad_norm": 0.2256068322542817,
"learning_rate": 2.9394906998358868e-05,
"loss": 0.0979,
"mean_token_accuracy": 0.9662024211138487,
"step": 76
},
{
"epoch": 2.2701688555347093,
"grad_norm": 0.19130902536055486,
"learning_rate": 2.8910861626005776e-05,
"loss": 0.101,
"mean_token_accuracy": 0.9646210763603449,
"step": 77
},
{
"epoch": 2.300187617260788,
"grad_norm": 0.18029622833908,
"learning_rate": 2.8425308542049206e-05,
"loss": 0.0943,
"mean_token_accuracy": 0.9668951816856861,
"step": 78
},
{
"epoch": 2.3302063789868668,
"grad_norm": 0.1715983987427455,
"learning_rate": 2.7938434936445945e-05,
"loss": 0.1025,
"mean_token_accuracy": 0.9641035441309214,
"step": 79
},
{
"epoch": 2.3602251407129455,
"grad_norm": 0.17151947074238844,
"learning_rate": 2.7450428508239024e-05,
"loss": 0.0993,
"mean_token_accuracy": 0.9651761185377836,
"step": 80
},
{
"epoch": 2.3902439024390243,
"grad_norm": 0.17762362563985393,
"learning_rate": 2.6961477393196126e-05,
"loss": 0.1016,
"mean_token_accuracy": 0.9645342864096165,
"step": 81
},
{
"epoch": 2.420262664165103,
"grad_norm": 0.17493795219201744,
"learning_rate": 2.6471770091279724e-05,
"loss": 0.1032,
"mean_token_accuracy": 0.965608624741435,
"step": 82
},
{
"epoch": 2.450281425891182,
"grad_norm": 0.21622340080905333,
"learning_rate": 2.598149539397672e-05,
"loss": 0.1056,
"mean_token_accuracy": 0.9633868020027876,
"step": 83
},
{
"epoch": 2.480300187617261,
"grad_norm": 0.18325655719580544,
"learning_rate": 2.5490842311515707e-05,
"loss": 0.1003,
"mean_token_accuracy": 0.9652356337755919,
"step": 84
},
{
"epoch": 2.5103189493433398,
"grad_norm": 0.16079654454953773,
"learning_rate": 2.5e-05,
"loss": 0.0951,
"mean_token_accuracy": 0.9671048391610384,
"step": 85
},
{
"epoch": 2.5403377110694185,
"grad_norm": 0.177885663467419,
"learning_rate": 2.4509157688484295e-05,
"loss": 0.1019,
"mean_token_accuracy": 0.9652324616909027,
"step": 86
},
{
"epoch": 2.5703564727954973,
"grad_norm": 0.16463009515777124,
"learning_rate": 2.4018504606023293e-05,
"loss": 0.0983,
"mean_token_accuracy": 0.9660285171121359,
"step": 87
},
{
"epoch": 2.600375234521576,
"grad_norm": 0.14988401935266468,
"learning_rate": 2.3528229908720272e-05,
"loss": 0.0973,
"mean_token_accuracy": 0.9662998840212822,
"step": 88
},
{
"epoch": 2.630393996247655,
"grad_norm": 0.1714584031856408,
"learning_rate": 2.303852260680388e-05,
"loss": 0.0993,
"mean_token_accuracy": 0.9654844384640455,
"step": 89
},
{
"epoch": 2.6604127579737336,
"grad_norm": 0.15655630724758532,
"learning_rate": 2.2549571491760986e-05,
"loss": 0.1044,
"mean_token_accuracy": 0.9633280653506517,
"step": 90
},
{
"epoch": 2.6904315196998123,
"grad_norm": 0.15989111678931958,
"learning_rate": 2.2061565063554064e-05,
"loss": 0.0962,
"mean_token_accuracy": 0.9662177134305239,
"step": 91
},
{
"epoch": 2.720450281425891,
"grad_norm": 0.1612719262065956,
"learning_rate": 2.1574691457950803e-05,
"loss": 0.1,
"mean_token_accuracy": 0.9648805633187294,
"step": 92
},
{
"epoch": 2.75046904315197,
"grad_norm": 0.1447218929697437,
"learning_rate": 2.1089138373994223e-05,
"loss": 0.097,
"mean_token_accuracy": 0.9660444520413876,
"step": 93
},
{
"epoch": 2.7804878048780486,
"grad_norm": 0.15448044912912087,
"learning_rate": 2.0605093001641138e-05,
"loss": 0.1037,
"mean_token_accuracy": 0.9642052594572306,
"step": 94
},
{
"epoch": 2.8105065666041273,
"grad_norm": 0.14976483567215834,
"learning_rate": 2.0122741949596797e-05,
"loss": 0.103,
"mean_token_accuracy": 0.9642070364207029,
"step": 95
},
{
"epoch": 2.8405253283302065,
"grad_norm": 0.15397846138230065,
"learning_rate": 1.9642271173373737e-05,
"loss": 0.1024,
"mean_token_accuracy": 0.9642751514911652,
"step": 96
},
{
"epoch": 2.8705440900562853,
"grad_norm": 0.16533125622570222,
"learning_rate": 1.9163865903602374e-05,
"loss": 0.0983,
"mean_token_accuracy": 0.9661570060998201,
"step": 97
},
{
"epoch": 2.900562851782364,
"grad_norm": 0.14567827324511498,
"learning_rate": 1.868771057462105e-05,
"loss": 0.0895,
"mean_token_accuracy": 0.9689803905785084,
"step": 98
},
{
"epoch": 2.930581613508443,
"grad_norm": 0.13721507889257023,
"learning_rate": 1.8213988753373146e-05,
"loss": 0.1018,
"mean_token_accuracy": 0.9658490009605885,
"step": 99
},
{
"epoch": 2.9606003752345216,
"grad_norm": 0.18558487132226667,
"learning_rate": 1.7742883068638447e-05,
"loss": 0.0975,
"mean_token_accuracy": 0.9673260115087032,
"step": 100
},
{
"epoch": 2.9906191369606003,
"grad_norm": 0.14278892649537844,
"learning_rate": 1.7274575140626318e-05,
"loss": 0.0945,
"mean_token_accuracy": 0.9672219399362803,
"step": 101
},
{
"epoch": 3.0,
"grad_norm": 0.14278892649537844,
"learning_rate": 1.6809245510957665e-05,
"loss": 0.104,
"mean_token_accuracy": 0.9641202390193939,
"step": 102
},
{
"epoch": 3.0300187617260788,
"grad_norm": 0.29206855231690615,
"learning_rate": 1.6347073573062672e-05,
"loss": 0.052,
"mean_token_accuracy": 0.9840696156024933,
"step": 103
},
{
"epoch": 3.0600375234521575,
"grad_norm": 0.21146610857781498,
"learning_rate": 1.588823750302126e-05,
"loss": 0.0506,
"mean_token_accuracy": 0.9837026111781597,
"step": 104
},
{
"epoch": 3.0900562851782363,
"grad_norm": 0.1728680637000517,
"learning_rate": 1.5432914190872757e-05,
"loss": 0.0492,
"mean_token_accuracy": 0.9842210356146097,
"step": 105
},
{
"epoch": 3.120075046904315,
"grad_norm": 0.137716977630954,
"learning_rate": 1.498127917242148e-05,
"loss": 0.0493,
"mean_token_accuracy": 0.9839507173746824,
"step": 106
},
{
"epoch": 3.150093808630394,
"grad_norm": 0.14551903804275892,
"learning_rate": 1.4533506561564306e-05,
"loss": 0.0544,
"mean_token_accuracy": 0.9822139292955399,
"step": 107
},
{
"epoch": 3.180112570356473,
"grad_norm": 0.16669835535632535,
"learning_rate": 1.4089768983166444e-05,
"loss": 0.0489,
"mean_token_accuracy": 0.9840298742055893,
"step": 108
},
{
"epoch": 3.2101313320825517,
"grad_norm": 0.18300271784408872,
"learning_rate": 1.3650237506511331e-05,
"loss": 0.0497,
"mean_token_accuracy": 0.983882175758481,
"step": 109
},
{
"epoch": 3.2401500938086305,
"grad_norm": 0.1843234481043501,
"learning_rate": 1.3215081579350058e-05,
"loss": 0.0485,
"mean_token_accuracy": 0.9843094442039728,
"step": 110
},
{
"epoch": 3.2701688555347093,
"grad_norm": 0.3461827490875774,
"learning_rate": 1.2784468962576136e-05,
"loss": 0.047,
"mean_token_accuracy": 0.9847969133406878,
"step": 111
},
{
"epoch": 3.300187617260788,
"grad_norm": 0.15632977455270483,
"learning_rate": 1.235856566555039e-05,
"loss": 0.049,
"mean_token_accuracy": 0.9837981257587671,
"step": 112
},
{
"epoch": 3.3302063789868668,
"grad_norm": 0.14640471914964392,
"learning_rate": 1.1937535882101281e-05,
"loss": 0.0458,
"mean_token_accuracy": 0.9851204100996256,
"step": 113
},
{
"epoch": 3.3602251407129455,
"grad_norm": 0.13729939899053178,
"learning_rate": 1.1521541927224994e-05,
"loss": 0.0456,
"mean_token_accuracy": 0.9848766028881073,
"step": 114
},
{
"epoch": 3.3902439024390243,
"grad_norm": 0.13806503349144675,
"learning_rate": 1.1110744174509952e-05,
"loss": 0.049,
"mean_token_accuracy": 0.9844018053263426,
"step": 115
},
{
"epoch": 3.420262664165103,
"grad_norm": 0.1677329902297057,
"learning_rate": 1.0705300994309697e-05,
"loss": 0.0509,
"mean_token_accuracy": 0.9836404304951429,
"step": 116
},
{
"epoch": 3.450281425891182,
"grad_norm": 0.1363456396457925,
"learning_rate": 1.0305368692688174e-05,
"loss": 0.0489,
"mean_token_accuracy": 0.9842113871127367,
"step": 117
},
{
"epoch": 3.480300187617261,
"grad_norm": 0.14670430283357652,
"learning_rate": 9.911101451160715e-06,
"loss": 0.0476,
"mean_token_accuracy": 0.9845409169793129,
"step": 118
},
{
"epoch": 3.5103189493433398,
"grad_norm": 0.13593290922974113,
"learning_rate": 9.522651267254149e-06,
"loss": 0.0498,
"mean_token_accuracy": 0.9841745216399431,
"step": 119
},
{
"epoch": 3.5403377110694185,
"grad_norm": 0.1405157943110022,
"learning_rate": 9.140167895908867e-06,
"loss": 0.0515,
"mean_token_accuracy": 0.9838052876293659,
"step": 120
},
{
"epoch": 3.5703564727954973,
"grad_norm": 0.13398507397046694,
"learning_rate": 8.763798791745411e-06,
"loss": 0.044,
"mean_token_accuracy": 0.985531248152256,
"step": 121
},
{
"epoch": 3.600375234521576,
"grad_norm": 0.12595342205919996,
"learning_rate": 8.393689052217966e-06,
"loss": 0.0443,
"mean_token_accuracy": 0.9851887430995703,
"step": 122
},
{
"epoch": 3.630393996247655,
"grad_norm": 0.12802288754577185,
"learning_rate": 8.029981361676456e-06,
"loss": 0.0477,
"mean_token_accuracy": 0.9847091306000948,
"step": 123
},
{
"epoch": 3.6604127579737336,
"grad_norm": 0.13540249038634009,
"learning_rate": 7.672815936359107e-06,
"loss": 0.0437,
"mean_token_accuracy": 0.9858846813440323,
"step": 124
},
{
"epoch": 3.6904315196998123,
"grad_norm": 0.1272358814553976,
"learning_rate": 7.3223304703363135e-06,
"loss": 0.0472,
"mean_token_accuracy": 0.9844079315662384,
"step": 125
},
{
"epoch": 3.720450281425891,
"grad_norm": 0.13634273240990136,
"learning_rate": 6.9786600824270296e-06,
"loss": 0.0427,
"mean_token_accuracy": 0.9858784638345242,
"step": 126
},
{
"epoch": 3.75046904315197,
"grad_norm": 0.15771802380242175,
"learning_rate": 6.641937264107867e-06,
"loss": 0.0469,
"mean_token_accuracy": 0.9847830552607775,
"step": 127
},
{
"epoch": 3.7804878048780486,
"grad_norm": 0.13754448160952976,
"learning_rate": 6.312291828435077e-06,
"loss": 0.0462,
"mean_token_accuracy": 0.9851614981889725,
"step": 128
},
{
"epoch": 3.8105065666041273,
"grad_norm": 0.14734425443158122,
"learning_rate": 5.989850859999227e-06,
"loss": 0.0422,
"mean_token_accuracy": 0.9861964080482721,
"step": 129
},
{
"epoch": 3.8405253283302065,
"grad_norm": 0.12037178039604896,
"learning_rate": 5.674738665931575e-06,
"loss": 0.0445,
"mean_token_accuracy": 0.9854839760810137,
"step": 130
},
{
"epoch": 3.8705440900562853,
"grad_norm": 0.13270114277633968,
"learning_rate": 5.367076727981382e-06,
"loss": 0.046,
"mean_token_accuracy": 0.98503128439188,
"step": 131
},
{
"epoch": 3.900562851782364,
"grad_norm": 0.12186409904443084,
"learning_rate": 5.066983655682325e-06,
"loss": 0.0413,
"mean_token_accuracy": 0.9866250548511744,
"step": 132
},
{
"epoch": 3.930581613508443,
"grad_norm": 0.11572178677884377,
"learning_rate": 4.7745751406263165e-06,
"loss": 0.0455,
"mean_token_accuracy": 0.9853598214685917,
"step": 133
},
{
"epoch": 3.9606003752345216,
"grad_norm": 0.1285035698798016,
"learning_rate": 4.48996391186216e-06,
"loss": 0.0446,
"mean_token_accuracy": 0.9853111784905195,
"step": 134
},
{
"epoch": 3.9906191369606003,
"grad_norm": 0.12237492704812947,
"learning_rate": 4.213259692436367e-06,
"loss": 0.0472,
"mean_token_accuracy": 0.9846988655626774,
"step": 135
},
{
"epoch": 4.0,
"grad_norm": 0.17459270329168236,
"learning_rate": 3.944569157092839e-06,
"loss": 0.0397,
"mean_token_accuracy": 0.9869139909744262,
"step": 136
},
{
"epoch": 4.030018761726079,
"grad_norm": 0.21688322258057258,
"learning_rate": 3.6839958911476957e-06,
"loss": 0.0316,
"mean_token_accuracy": 0.9908953290432692,
"step": 137
},
{
"epoch": 4.0600375234521575,
"grad_norm": 0.12510412863095888,
"learning_rate": 3.431640350555204e-06,
"loss": 0.0298,
"mean_token_accuracy": 0.9912976007908583,
"step": 138
},
{
"epoch": 4.090056285178236,
"grad_norm": 0.11532401288217803,
"learning_rate": 3.187599823180071e-06,
"loss": 0.0291,
"mean_token_accuracy": 0.9916361309587955,
"step": 139
},
{
"epoch": 4.120075046904315,
"grad_norm": 0.11662866353878451,
"learning_rate": 2.9519683912911266e-06,
"loss": 0.0316,
"mean_token_accuracy": 0.9906173534691334,
"step": 140
},
{
"epoch": 4.150093808630394,
"grad_norm": 0.10785269578007366,
"learning_rate": 2.7248368952908053e-06,
"loss": 0.0278,
"mean_token_accuracy": 0.9918341338634491,
"step": 141
},
{
"epoch": 4.1801125703564725,
"grad_norm": 0.10540687517578978,
"learning_rate": 2.506292898694468e-06,
"loss": 0.0304,
"mean_token_accuracy": 0.9909927677363157,
"step": 142
},
{
"epoch": 4.210131332082551,
"grad_norm": 0.10795341728368958,
"learning_rate": 2.296420654372966e-06,
"loss": 0.0292,
"mean_token_accuracy": 0.9913486260920763,
"step": 143
},
{
"epoch": 4.24015009380863,
"grad_norm": 0.09918400957702202,
"learning_rate": 2.0953010720716037e-06,
"loss": 0.0285,
"mean_token_accuracy": 0.991315545514226,
"step": 144
},
{
"epoch": 4.270168855534709,
"grad_norm": 0.1028689650543891,
"learning_rate": 1.9030116872178316e-06,
"loss": 0.0268,
"mean_token_accuracy": 0.9919464886188507,
"step": 145
},
{
"epoch": 4.300187617260788,
"grad_norm": 0.0934195965967741,
"learning_rate": 1.7196266310299108e-06,
"loss": 0.0271,
"mean_token_accuracy": 0.9918058719485998,
"step": 146
},
{
"epoch": 4.330206378986867,
"grad_norm": 0.09146571370676639,
"learning_rate": 1.5452166019378989e-06,
"loss": 0.0273,
"mean_token_accuracy": 0.9917649105191231,
"step": 147
},
{
"epoch": 4.360225140712946,
"grad_norm": 0.09753557772930677,
"learning_rate": 1.379848838328049e-06,
"loss": 0.0286,
"mean_token_accuracy": 0.9913905151188374,
"step": 148
},
{
"epoch": 4.390243902439025,
"grad_norm": 0.11653668851139358,
"learning_rate": 1.2235870926211619e-06,
"loss": 0.0277,
"mean_token_accuracy": 0.9916701205074787,
"step": 149
},
{
"epoch": 4.4202626641651035,
"grad_norm": 0.09067908471373788,
"learning_rate": 1.0764916066947794e-06,
"loss": 0.0258,
"mean_token_accuracy": 0.9922576006501913,
"step": 150
},
{
"epoch": 4.450281425891182,
"grad_norm": 0.09653433513408423,
"learning_rate": 9.386190886588208e-07,
"loss": 0.0271,
"mean_token_accuracy": 0.9919117372483015,
"step": 151
},
{
"epoch": 4.480300187617261,
"grad_norm": 0.0987084116462941,
"learning_rate": 8.10022690993506e-07,
"loss": 0.028,
"mean_token_accuracy": 0.9915720969438553,
"step": 152
},
{
"epoch": 4.51031894934334,
"grad_norm": 0.10457824343264062,
"learning_rate": 6.907519900580861e-07,
"loss": 0.0302,
"mean_token_accuracy": 0.9909002613276243,
"step": 153
},
{
"epoch": 4.5403377110694185,
"grad_norm": 0.10340772315470596,
"learning_rate": 5.808529669781904e-07,
"loss": 0.0264,
"mean_token_accuracy": 0.9919101018458605,
"step": 154
},
{
"epoch": 4.570356472795497,
"grad_norm": 0.09744615982408229,
"learning_rate": 4.803679899192392e-07,
"loss": 0.0285,
"mean_token_accuracy": 0.9909517038613558,
"step": 155
},
{
"epoch": 4.600375234521576,
"grad_norm": 0.09254462095652977,
"learning_rate": 3.8933579775271013e-07,
"loss": 0.0263,
"mean_token_accuracy": 0.9920994155108929,
"step": 156
},
{
"epoch": 4.630393996247655,
"grad_norm": 0.09530189373391666,
"learning_rate": 3.077914851215585e-07,
"loss": 0.0283,
"mean_token_accuracy": 0.9914026968181133,
"step": 157
},
{
"epoch": 4.6604127579737336,
"grad_norm": 0.09988980276705559,
"learning_rate": 2.3576648891056875e-07,
"loss": 0.027,
"mean_token_accuracy": 0.9920587744563818,
"step": 158
},
{
"epoch": 4.690431519699812,
"grad_norm": 0.09117032881835743,
"learning_rate": 1.732885761268427e-07,
"loss": 0.0262,
"mean_token_accuracy": 0.991992175579071,
"step": 159
},
{
"epoch": 4.720450281425891,
"grad_norm": 0.09146419854434591,
"learning_rate": 1.2038183319507955e-07,
"loss": 0.0264,
"mean_token_accuracy": 0.9921840745955706,
"step": 160
},
{
"epoch": 4.75046904315197,
"grad_norm": 0.0941692484126693,
"learning_rate": 7.706665667180091e-08,
"loss": 0.0262,
"mean_token_accuracy": 0.992155384272337,
"step": 161
},
{
"epoch": 4.780487804878049,
"grad_norm": 0.09868857104545,
"learning_rate": 4.335974538210441e-08,
"loss": 0.0286,
"mean_token_accuracy": 0.9914395287632942,
"step": 162
},
{
"epoch": 4.810506566604127,
"grad_norm": 0.098281670570088,
"learning_rate": 1.9274093981927478e-08,
"loss": 0.0269,
"mean_token_accuracy": 0.9919413533061743,
"step": 163
},
{
"epoch": 4.840525328330206,
"grad_norm": 0.08992856114449468,
"learning_rate": 4.818987948379539e-09,
"loss": 0.0266,
"mean_token_accuracy": 0.9920829199254513,
"step": 164
},
{
"epoch": 4.870544090056285,
"grad_norm": 0.09146022503539022,
"learning_rate": 0.0,
"loss": 0.0274,
"mean_token_accuracy": 0.9918729793280363,
"step": 165
},
{
"epoch": 4.870544090056285,
"step": 165,
"total_flos": 195199345459200.0,
"train_loss": 0.14440994993077985,
"train_runtime": 12814.9616,
"train_samples_per_second": 3.325,
"train_steps_per_second": 0.013
}
],
"logging_steps": 1,
"max_steps": 165,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 195199345459200.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}