Agentic-R_e5 / trainer_state.json
liuwenhan's picture
Upload 6 files
56e5310 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 2060,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009708737864077669,
"grad_norm": 8.540091514587402,
"learning_rate": 3.883495145631068e-07,
"loss": 2.734,
"step": 10
},
{
"epoch": 0.019417475728155338,
"grad_norm": 8.14619255065918,
"learning_rate": 1.359223300970874e-06,
"loss": 2.6711,
"step": 20
},
{
"epoch": 0.02912621359223301,
"grad_norm": 6.858204364776611,
"learning_rate": 2.330097087378641e-06,
"loss": 2.6055,
"step": 30
},
{
"epoch": 0.038834951456310676,
"grad_norm": 6.228450298309326,
"learning_rate": 3.300970873786408e-06,
"loss": 2.5322,
"step": 40
},
{
"epoch": 0.04854368932038835,
"grad_norm": 5.512765407562256,
"learning_rate": 4.271844660194175e-06,
"loss": 2.466,
"step": 50
},
{
"epoch": 0.05825242718446602,
"grad_norm": 5.114351272583008,
"learning_rate": 5.242718446601942e-06,
"loss": 2.4828,
"step": 60
},
{
"epoch": 0.06796116504854369,
"grad_norm": 4.749820232391357,
"learning_rate": 6.213592233009709e-06,
"loss": 2.4529,
"step": 70
},
{
"epoch": 0.07766990291262135,
"grad_norm": 4.962618827819824,
"learning_rate": 7.184466019417476e-06,
"loss": 2.3377,
"step": 80
},
{
"epoch": 0.08737864077669903,
"grad_norm": 4.963841915130615,
"learning_rate": 8.155339805825243e-06,
"loss": 2.3914,
"step": 90
},
{
"epoch": 0.0970873786407767,
"grad_norm": 5.020650386810303,
"learning_rate": 9.12621359223301e-06,
"loss": 2.2748,
"step": 100
},
{
"epoch": 0.10679611650485436,
"grad_norm": 4.788837909698486,
"learning_rate": 1.0097087378640778e-05,
"loss": 2.2975,
"step": 110
},
{
"epoch": 0.11650485436893204,
"grad_norm": 5.005253791809082,
"learning_rate": 1.1067961165048544e-05,
"loss": 2.3025,
"step": 120
},
{
"epoch": 0.1262135922330097,
"grad_norm": 5.113918304443359,
"learning_rate": 1.2038834951456311e-05,
"loss": 2.2121,
"step": 130
},
{
"epoch": 0.13592233009708737,
"grad_norm": 5.089141368865967,
"learning_rate": 1.300970873786408e-05,
"loss": 2.233,
"step": 140
},
{
"epoch": 0.14563106796116504,
"grad_norm": 5.0798749923706055,
"learning_rate": 1.3980582524271846e-05,
"loss": 2.1688,
"step": 150
},
{
"epoch": 0.1553398058252427,
"grad_norm": 5.230806827545166,
"learning_rate": 1.4951456310679614e-05,
"loss": 2.2232,
"step": 160
},
{
"epoch": 0.1650485436893204,
"grad_norm": 5.30760383605957,
"learning_rate": 1.592233009708738e-05,
"loss": 2.1506,
"step": 170
},
{
"epoch": 0.17475728155339806,
"grad_norm": 5.008656978607178,
"learning_rate": 1.6893203883495145e-05,
"loss": 2.1866,
"step": 180
},
{
"epoch": 0.18446601941747573,
"grad_norm": 4.95796537399292,
"learning_rate": 1.7864077669902916e-05,
"loss": 2.2156,
"step": 190
},
{
"epoch": 0.1941747572815534,
"grad_norm": 4.717769145965576,
"learning_rate": 1.883495145631068e-05,
"loss": 2.164,
"step": 200
},
{
"epoch": 0.20388349514563106,
"grad_norm": 4.858338832855225,
"learning_rate": 1.9805825242718447e-05,
"loss": 2.1291,
"step": 210
},
{
"epoch": 0.21359223300970873,
"grad_norm": 5.218167781829834,
"learning_rate": 1.9913700107874866e-05,
"loss": 2.1617,
"step": 220
},
{
"epoch": 0.22330097087378642,
"grad_norm": 5.097916126251221,
"learning_rate": 1.9805825242718447e-05,
"loss": 2.1721,
"step": 230
},
{
"epoch": 0.23300970873786409,
"grad_norm": 5.860560417175293,
"learning_rate": 1.969795037756203e-05,
"loss": 2.1412,
"step": 240
},
{
"epoch": 0.24271844660194175,
"grad_norm": 5.395883560180664,
"learning_rate": 1.959007551240561e-05,
"loss": 2.1397,
"step": 250
},
{
"epoch": 0.2524271844660194,
"grad_norm": 5.043527126312256,
"learning_rate": 1.9482200647249193e-05,
"loss": 2.1314,
"step": 260
},
{
"epoch": 0.2621359223300971,
"grad_norm": 4.853712558746338,
"learning_rate": 1.9374325782092775e-05,
"loss": 2.118,
"step": 270
},
{
"epoch": 0.27184466019417475,
"grad_norm": 5.681634902954102,
"learning_rate": 1.9266450916936353e-05,
"loss": 2.1189,
"step": 280
},
{
"epoch": 0.2815533980582524,
"grad_norm": 5.401227951049805,
"learning_rate": 1.9158576051779935e-05,
"loss": 2.1148,
"step": 290
},
{
"epoch": 0.2912621359223301,
"grad_norm": 5.208418369293213,
"learning_rate": 1.905070118662352e-05,
"loss": 2.128,
"step": 300
},
{
"epoch": 0.30097087378640774,
"grad_norm": 5.307718276977539,
"learning_rate": 1.89428263214671e-05,
"loss": 2.0858,
"step": 310
},
{
"epoch": 0.3106796116504854,
"grad_norm": 5.279410362243652,
"learning_rate": 1.883495145631068e-05,
"loss": 2.1371,
"step": 320
},
{
"epoch": 0.32038834951456313,
"grad_norm": 5.151274681091309,
"learning_rate": 1.8727076591154262e-05,
"loss": 2.0932,
"step": 330
},
{
"epoch": 0.3300970873786408,
"grad_norm": 5.083354473114014,
"learning_rate": 1.8619201725997844e-05,
"loss": 2.0623,
"step": 340
},
{
"epoch": 0.33980582524271846,
"grad_norm": 5.1322550773620605,
"learning_rate": 1.8511326860841425e-05,
"loss": 2.05,
"step": 350
},
{
"epoch": 0.34951456310679613,
"grad_norm": 4.970919609069824,
"learning_rate": 1.8403451995685007e-05,
"loss": 2.0901,
"step": 360
},
{
"epoch": 0.3592233009708738,
"grad_norm": 5.15512752532959,
"learning_rate": 1.829557713052859e-05,
"loss": 2.0883,
"step": 370
},
{
"epoch": 0.36893203883495146,
"grad_norm": 5.088575839996338,
"learning_rate": 1.818770226537217e-05,
"loss": 2.1119,
"step": 380
},
{
"epoch": 0.3786407766990291,
"grad_norm": 6.092918872833252,
"learning_rate": 1.807982740021575e-05,
"loss": 2.0979,
"step": 390
},
{
"epoch": 0.3883495145631068,
"grad_norm": 4.909801483154297,
"learning_rate": 1.797195253505933e-05,
"loss": 2.0283,
"step": 400
},
{
"epoch": 0.39805825242718446,
"grad_norm": 5.128530025482178,
"learning_rate": 1.7864077669902916e-05,
"loss": 2.023,
"step": 410
},
{
"epoch": 0.4077669902912621,
"grad_norm": 4.998912811279297,
"learning_rate": 1.7756202804746498e-05,
"loss": 2.0207,
"step": 420
},
{
"epoch": 0.4174757281553398,
"grad_norm": 5.182358264923096,
"learning_rate": 1.7648327939590076e-05,
"loss": 1.9837,
"step": 430
},
{
"epoch": 0.42718446601941745,
"grad_norm": 5.3191022872924805,
"learning_rate": 1.7540453074433658e-05,
"loss": 2.058,
"step": 440
},
{
"epoch": 0.4368932038834951,
"grad_norm": 5.306585788726807,
"learning_rate": 1.743257820927724e-05,
"loss": 2.0832,
"step": 450
},
{
"epoch": 0.44660194174757284,
"grad_norm": 5.278446197509766,
"learning_rate": 1.732470334412082e-05,
"loss": 2.0594,
"step": 460
},
{
"epoch": 0.4563106796116505,
"grad_norm": 5.484086990356445,
"learning_rate": 1.7216828478964403e-05,
"loss": 2.0763,
"step": 470
},
{
"epoch": 0.46601941747572817,
"grad_norm": 5.767387866973877,
"learning_rate": 1.7108953613807985e-05,
"loss": 2.0634,
"step": 480
},
{
"epoch": 0.47572815533980584,
"grad_norm": 4.96846342086792,
"learning_rate": 1.7001078748651563e-05,
"loss": 2.0769,
"step": 490
},
{
"epoch": 0.4854368932038835,
"grad_norm": 5.264239311218262,
"learning_rate": 1.6893203883495145e-05,
"loss": 2.0604,
"step": 500
},
{
"epoch": 0.49514563106796117,
"grad_norm": 5.036663055419922,
"learning_rate": 1.6785329018338727e-05,
"loss": 2.1031,
"step": 510
},
{
"epoch": 0.5048543689320388,
"grad_norm": 4.875285625457764,
"learning_rate": 1.6677454153182312e-05,
"loss": 2.0457,
"step": 520
},
{
"epoch": 0.5145631067961165,
"grad_norm": 4.933873653411865,
"learning_rate": 1.6569579288025894e-05,
"loss": 2.0312,
"step": 530
},
{
"epoch": 0.5242718446601942,
"grad_norm": 5.284345626831055,
"learning_rate": 1.6461704422869472e-05,
"loss": 2.0656,
"step": 540
},
{
"epoch": 0.5339805825242718,
"grad_norm": 5.3404998779296875,
"learning_rate": 1.6353829557713054e-05,
"loss": 2.1049,
"step": 550
},
{
"epoch": 0.5436893203883495,
"grad_norm": 5.243639945983887,
"learning_rate": 1.6245954692556636e-05,
"loss": 2.0382,
"step": 560
},
{
"epoch": 0.5533980582524272,
"grad_norm": 5.110634803771973,
"learning_rate": 1.6138079827400217e-05,
"loss": 2.004,
"step": 570
},
{
"epoch": 0.5631067961165048,
"grad_norm": 5.063004493713379,
"learning_rate": 1.60302049622438e-05,
"loss": 2.0207,
"step": 580
},
{
"epoch": 0.5728155339805825,
"grad_norm": 4.7647271156311035,
"learning_rate": 1.592233009708738e-05,
"loss": 2.0534,
"step": 590
},
{
"epoch": 0.5825242718446602,
"grad_norm": 5.176267147064209,
"learning_rate": 1.581445523193096e-05,
"loss": 2.0627,
"step": 600
},
{
"epoch": 0.5922330097087378,
"grad_norm": 5.2062225341796875,
"learning_rate": 1.570658036677454e-05,
"loss": 2.0063,
"step": 610
},
{
"epoch": 0.6019417475728155,
"grad_norm": 5.044838905334473,
"learning_rate": 1.5598705501618123e-05,
"loss": 1.9475,
"step": 620
},
{
"epoch": 0.6116504854368932,
"grad_norm": 5.029117584228516,
"learning_rate": 1.5490830636461708e-05,
"loss": 2.0673,
"step": 630
},
{
"epoch": 0.6213592233009708,
"grad_norm": 5.190179347991943,
"learning_rate": 1.5382955771305286e-05,
"loss": 2.0176,
"step": 640
},
{
"epoch": 0.6310679611650486,
"grad_norm": 4.956365585327148,
"learning_rate": 1.5275080906148868e-05,
"loss": 1.9984,
"step": 650
},
{
"epoch": 0.6407766990291263,
"grad_norm": 4.972413539886475,
"learning_rate": 1.516720604099245e-05,
"loss": 1.9919,
"step": 660
},
{
"epoch": 0.6504854368932039,
"grad_norm": 5.319215297698975,
"learning_rate": 1.5059331175836032e-05,
"loss": 1.984,
"step": 670
},
{
"epoch": 0.6601941747572816,
"grad_norm": 5.120510578155518,
"learning_rate": 1.4951456310679614e-05,
"loss": 1.9838,
"step": 680
},
{
"epoch": 0.6699029126213593,
"grad_norm": 4.868938446044922,
"learning_rate": 1.4843581445523194e-05,
"loss": 2.006,
"step": 690
},
{
"epoch": 0.6796116504854369,
"grad_norm": 5.22821044921875,
"learning_rate": 1.4735706580366775e-05,
"loss": 2.0099,
"step": 700
},
{
"epoch": 0.6893203883495146,
"grad_norm": 4.970730781555176,
"learning_rate": 1.4627831715210357e-05,
"loss": 2.0078,
"step": 710
},
{
"epoch": 0.6990291262135923,
"grad_norm": 4.913213729858398,
"learning_rate": 1.4519956850053937e-05,
"loss": 2.0198,
"step": 720
},
{
"epoch": 0.7087378640776699,
"grad_norm": 5.104898452758789,
"learning_rate": 1.4412081984897519e-05,
"loss": 2.0224,
"step": 730
},
{
"epoch": 0.7184466019417476,
"grad_norm": 4.992263317108154,
"learning_rate": 1.4304207119741102e-05,
"loss": 2.0013,
"step": 740
},
{
"epoch": 0.7281553398058253,
"grad_norm": 5.0994038581848145,
"learning_rate": 1.4196332254584684e-05,
"loss": 1.9542,
"step": 750
},
{
"epoch": 0.7378640776699029,
"grad_norm": 5.849913120269775,
"learning_rate": 1.4088457389428264e-05,
"loss": 1.9941,
"step": 760
},
{
"epoch": 0.7475728155339806,
"grad_norm": 5.09085750579834,
"learning_rate": 1.3980582524271846e-05,
"loss": 2.0384,
"step": 770
},
{
"epoch": 0.7572815533980582,
"grad_norm": 5.28529167175293,
"learning_rate": 1.3872707659115428e-05,
"loss": 2.0251,
"step": 780
},
{
"epoch": 0.7669902912621359,
"grad_norm": 5.162165641784668,
"learning_rate": 1.3764832793959008e-05,
"loss": 1.9806,
"step": 790
},
{
"epoch": 0.7766990291262136,
"grad_norm": 5.865965843200684,
"learning_rate": 1.365695792880259e-05,
"loss": 2.0359,
"step": 800
},
{
"epoch": 0.7864077669902912,
"grad_norm": 4.936879634857178,
"learning_rate": 1.3549083063646171e-05,
"loss": 1.993,
"step": 810
},
{
"epoch": 0.7961165048543689,
"grad_norm": 5.331514358520508,
"learning_rate": 1.3441208198489753e-05,
"loss": 1.9685,
"step": 820
},
{
"epoch": 0.8058252427184466,
"grad_norm": 5.171398639678955,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.9885,
"step": 830
},
{
"epoch": 0.8155339805825242,
"grad_norm": 4.853579998016357,
"learning_rate": 1.3225458468176915e-05,
"loss": 1.9818,
"step": 840
},
{
"epoch": 0.8252427184466019,
"grad_norm": 5.196751117706299,
"learning_rate": 1.3117583603020499e-05,
"loss": 1.946,
"step": 850
},
{
"epoch": 0.8349514563106796,
"grad_norm": 4.931100845336914,
"learning_rate": 1.300970873786408e-05,
"loss": 1.9898,
"step": 860
},
{
"epoch": 0.8446601941747572,
"grad_norm": 5.232204437255859,
"learning_rate": 1.290183387270766e-05,
"loss": 2.0314,
"step": 870
},
{
"epoch": 0.8543689320388349,
"grad_norm": 5.175143718719482,
"learning_rate": 1.2793959007551242e-05,
"loss": 1.9958,
"step": 880
},
{
"epoch": 0.8640776699029126,
"grad_norm": 5.501524925231934,
"learning_rate": 1.2686084142394824e-05,
"loss": 1.969,
"step": 890
},
{
"epoch": 0.8737864077669902,
"grad_norm": 5.200106620788574,
"learning_rate": 1.2578209277238404e-05,
"loss": 1.9624,
"step": 900
},
{
"epoch": 0.883495145631068,
"grad_norm": 5.435555934906006,
"learning_rate": 1.2470334412081986e-05,
"loss": 2.0057,
"step": 910
},
{
"epoch": 0.8932038834951457,
"grad_norm": 5.041926860809326,
"learning_rate": 1.2362459546925568e-05,
"loss": 1.9604,
"step": 920
},
{
"epoch": 0.9029126213592233,
"grad_norm": 5.86530065536499,
"learning_rate": 1.2254584681769148e-05,
"loss": 1.9904,
"step": 930
},
{
"epoch": 0.912621359223301,
"grad_norm": 5.039781093597412,
"learning_rate": 1.214670981661273e-05,
"loss": 1.9681,
"step": 940
},
{
"epoch": 0.9223300970873787,
"grad_norm": 5.195461273193359,
"learning_rate": 1.2038834951456311e-05,
"loss": 2.0042,
"step": 950
},
{
"epoch": 0.9320388349514563,
"grad_norm": 5.229151725769043,
"learning_rate": 1.1930960086299891e-05,
"loss": 2.0039,
"step": 960
},
{
"epoch": 0.941747572815534,
"grad_norm": 4.973319053649902,
"learning_rate": 1.1823085221143475e-05,
"loss": 2.0065,
"step": 970
},
{
"epoch": 0.9514563106796117,
"grad_norm": 5.273900032043457,
"learning_rate": 1.1715210355987056e-05,
"loss": 1.9896,
"step": 980
},
{
"epoch": 0.9611650485436893,
"grad_norm": 5.269063472747803,
"learning_rate": 1.1607335490830638e-05,
"loss": 1.9592,
"step": 990
},
{
"epoch": 0.970873786407767,
"grad_norm": 5.100589752197266,
"learning_rate": 1.149946062567422e-05,
"loss": 1.9354,
"step": 1000
},
{
"epoch": 0.9805825242718447,
"grad_norm": 5.232608318328857,
"learning_rate": 1.1413160733549084e-05,
"loss": 2.0395,
"step": 1010
},
{
"epoch": 0.9902912621359223,
"grad_norm": 5.08132791519165,
"learning_rate": 1.1305285868392666e-05,
"loss": 1.9437,
"step": 1020
},
{
"epoch": 1.0,
"grad_norm": 5.696261405944824,
"learning_rate": 1.1197411003236248e-05,
"loss": 1.9665,
"step": 1030
},
{
"epoch": 1.0097087378640777,
"grad_norm": 5.359734058380127,
"learning_rate": 1.1089536138079828e-05,
"loss": 1.841,
"step": 1040
},
{
"epoch": 1.0194174757281553,
"grad_norm": 5.353382587432861,
"learning_rate": 1.098166127292341e-05,
"loss": 1.8354,
"step": 1050
},
{
"epoch": 1.029126213592233,
"grad_norm": 5.595281600952148,
"learning_rate": 1.0873786407766991e-05,
"loss": 1.8011,
"step": 1060
},
{
"epoch": 1.0388349514563107,
"grad_norm": 5.603349685668945,
"learning_rate": 1.0765911542610571e-05,
"loss": 1.8443,
"step": 1070
},
{
"epoch": 1.0485436893203883,
"grad_norm": 5.549112319946289,
"learning_rate": 1.0658036677454153e-05,
"loss": 1.8022,
"step": 1080
},
{
"epoch": 1.058252427184466,
"grad_norm": 5.524362087249756,
"learning_rate": 1.0550161812297735e-05,
"loss": 1.8361,
"step": 1090
},
{
"epoch": 1.0679611650485437,
"grad_norm": 5.287837028503418,
"learning_rate": 1.0442286947141318e-05,
"loss": 1.7831,
"step": 1100
},
{
"epoch": 1.0776699029126213,
"grad_norm": 5.503448486328125,
"learning_rate": 1.03344120819849e-05,
"loss": 1.816,
"step": 1110
},
{
"epoch": 1.087378640776699,
"grad_norm": 5.447495460510254,
"learning_rate": 1.022653721682848e-05,
"loss": 1.7967,
"step": 1120
},
{
"epoch": 1.0970873786407767,
"grad_norm": 5.651370525360107,
"learning_rate": 1.0118662351672062e-05,
"loss": 1.767,
"step": 1130
},
{
"epoch": 1.1067961165048543,
"grad_norm": 5.455685138702393,
"learning_rate": 1.0010787486515644e-05,
"loss": 1.7986,
"step": 1140
},
{
"epoch": 1.116504854368932,
"grad_norm": 5.773884296417236,
"learning_rate": 9.902912621359224e-06,
"loss": 1.8061,
"step": 1150
},
{
"epoch": 1.1262135922330097,
"grad_norm": 5.654228687286377,
"learning_rate": 9.795037756202806e-06,
"loss": 1.8051,
"step": 1160
},
{
"epoch": 1.1359223300970873,
"grad_norm": 5.353296756744385,
"learning_rate": 9.687162891046387e-06,
"loss": 1.8157,
"step": 1170
},
{
"epoch": 1.145631067961165,
"grad_norm": 5.371259689331055,
"learning_rate": 9.579288025889967e-06,
"loss": 1.7957,
"step": 1180
},
{
"epoch": 1.1553398058252426,
"grad_norm": 5.551179885864258,
"learning_rate": 9.47141316073355e-06,
"loss": 1.8099,
"step": 1190
},
{
"epoch": 1.1650485436893203,
"grad_norm": 5.763803958892822,
"learning_rate": 9.363538295577131e-06,
"loss": 1.8243,
"step": 1200
},
{
"epoch": 1.174757281553398,
"grad_norm": 5.4469313621521,
"learning_rate": 9.255663430420713e-06,
"loss": 1.7813,
"step": 1210
},
{
"epoch": 1.1844660194174756,
"grad_norm": 5.914862155914307,
"learning_rate": 9.147788565264294e-06,
"loss": 1.8308,
"step": 1220
},
{
"epoch": 1.1941747572815533,
"grad_norm": 5.619472980499268,
"learning_rate": 9.039913700107874e-06,
"loss": 1.8566,
"step": 1230
},
{
"epoch": 1.203883495145631,
"grad_norm": 5.747879981994629,
"learning_rate": 8.932038834951458e-06,
"loss": 1.831,
"step": 1240
},
{
"epoch": 1.2135922330097086,
"grad_norm": 5.515039443969727,
"learning_rate": 8.824163969795038e-06,
"loss": 1.8279,
"step": 1250
},
{
"epoch": 1.2233009708737863,
"grad_norm": 5.6780171394348145,
"learning_rate": 8.71628910463862e-06,
"loss": 1.7496,
"step": 1260
},
{
"epoch": 1.233009708737864,
"grad_norm": 5.678586006164551,
"learning_rate": 8.608414239482202e-06,
"loss": 1.7862,
"step": 1270
},
{
"epoch": 1.2427184466019416,
"grad_norm": 5.727756977081299,
"learning_rate": 8.500539374325782e-06,
"loss": 1.8364,
"step": 1280
},
{
"epoch": 1.2524271844660193,
"grad_norm": 5.593883037567139,
"learning_rate": 8.392664509169363e-06,
"loss": 1.7775,
"step": 1290
},
{
"epoch": 1.262135922330097,
"grad_norm": 5.856795310974121,
"learning_rate": 8.284789644012947e-06,
"loss": 1.8652,
"step": 1300
},
{
"epoch": 1.2718446601941746,
"grad_norm": 5.66147518157959,
"learning_rate": 8.176914778856527e-06,
"loss": 1.7747,
"step": 1310
},
{
"epoch": 1.2815533980582523,
"grad_norm": 5.7044291496276855,
"learning_rate": 8.069039913700109e-06,
"loss": 1.7781,
"step": 1320
},
{
"epoch": 1.29126213592233,
"grad_norm": 5.8010149002075195,
"learning_rate": 7.96116504854369e-06,
"loss": 1.812,
"step": 1330
},
{
"epoch": 1.3009708737864076,
"grad_norm": 5.601301670074463,
"learning_rate": 7.85329018338727e-06,
"loss": 1.8109,
"step": 1340
},
{
"epoch": 1.3106796116504853,
"grad_norm": 5.59577751159668,
"learning_rate": 7.745415318230854e-06,
"loss": 1.794,
"step": 1350
},
{
"epoch": 1.3203883495145632,
"grad_norm": 6.064187526702881,
"learning_rate": 7.637540453074434e-06,
"loss": 1.7978,
"step": 1360
},
{
"epoch": 1.3300970873786409,
"grad_norm": 5.477755069732666,
"learning_rate": 7.529665587918016e-06,
"loss": 1.7732,
"step": 1370
},
{
"epoch": 1.3398058252427185,
"grad_norm": 5.672438144683838,
"learning_rate": 7.421790722761597e-06,
"loss": 1.8356,
"step": 1380
},
{
"epoch": 1.3495145631067962,
"grad_norm": 5.968810558319092,
"learning_rate": 7.3139158576051786e-06,
"loss": 1.8513,
"step": 1390
},
{
"epoch": 1.3592233009708738,
"grad_norm": 5.984207630157471,
"learning_rate": 7.2060409924487595e-06,
"loss": 1.7821,
"step": 1400
},
{
"epoch": 1.3689320388349515,
"grad_norm": 6.006514072418213,
"learning_rate": 7.098166127292342e-06,
"loss": 1.756,
"step": 1410
},
{
"epoch": 1.3786407766990292,
"grad_norm": 5.548986911773682,
"learning_rate": 6.990291262135923e-06,
"loss": 1.8,
"step": 1420
},
{
"epoch": 1.3883495145631068,
"grad_norm": 5.688983917236328,
"learning_rate": 6.882416396979504e-06,
"loss": 1.7601,
"step": 1430
},
{
"epoch": 1.3980582524271845,
"grad_norm": 5.5633225440979,
"learning_rate": 6.774541531823086e-06,
"loss": 1.7876,
"step": 1440
},
{
"epoch": 1.4077669902912622,
"grad_norm": 5.3327226638793945,
"learning_rate": 6.666666666666667e-06,
"loss": 1.7388,
"step": 1450
},
{
"epoch": 1.4174757281553398,
"grad_norm": 5.563536643981934,
"learning_rate": 6.558791801510249e-06,
"loss": 1.7894,
"step": 1460
},
{
"epoch": 1.4271844660194175,
"grad_norm": 5.413880825042725,
"learning_rate": 6.45091693635383e-06,
"loss": 1.8045,
"step": 1470
},
{
"epoch": 1.4368932038834952,
"grad_norm": 5.559710502624512,
"learning_rate": 6.343042071197412e-06,
"loss": 1.7388,
"step": 1480
},
{
"epoch": 1.4466019417475728,
"grad_norm": 5.674643039703369,
"learning_rate": 6.235167206040993e-06,
"loss": 1.7521,
"step": 1490
},
{
"epoch": 1.4563106796116505,
"grad_norm": 6.015341758728027,
"learning_rate": 6.127292340884574e-06,
"loss": 1.7567,
"step": 1500
},
{
"epoch": 1.4660194174757282,
"grad_norm": 5.763010025024414,
"learning_rate": 6.0194174757281556e-06,
"loss": 1.7494,
"step": 1510
},
{
"epoch": 1.4757281553398058,
"grad_norm": 5.727349758148193,
"learning_rate": 5.911542610571737e-06,
"loss": 1.8048,
"step": 1520
},
{
"epoch": 1.4854368932038835,
"grad_norm": 5.473784923553467,
"learning_rate": 5.803667745415319e-06,
"loss": 1.7469,
"step": 1530
},
{
"epoch": 1.4951456310679612,
"grad_norm": 5.847958087921143,
"learning_rate": 5.6957928802589e-06,
"loss": 1.7803,
"step": 1540
},
{
"epoch": 1.5048543689320388,
"grad_norm": 6.08969259262085,
"learning_rate": 5.587918015102482e-06,
"loss": 1.796,
"step": 1550
},
{
"epoch": 1.5145631067961165,
"grad_norm": 5.455092430114746,
"learning_rate": 5.480043149946063e-06,
"loss": 1.7495,
"step": 1560
},
{
"epoch": 1.5242718446601942,
"grad_norm": 5.9275031089782715,
"learning_rate": 5.372168284789644e-06,
"loss": 1.814,
"step": 1570
},
{
"epoch": 1.5339805825242718,
"grad_norm": 5.613204002380371,
"learning_rate": 5.264293419633226e-06,
"loss": 1.8095,
"step": 1580
},
{
"epoch": 1.5436893203883495,
"grad_norm": 5.575292110443115,
"learning_rate": 5.156418554476807e-06,
"loss": 1.794,
"step": 1590
},
{
"epoch": 1.5533980582524272,
"grad_norm": 6.1768107414245605,
"learning_rate": 5.048543689320389e-06,
"loss": 1.7858,
"step": 1600
},
{
"epoch": 1.5631067961165048,
"grad_norm": 5.83579158782959,
"learning_rate": 4.94066882416397e-06,
"loss": 1.7229,
"step": 1610
},
{
"epoch": 1.5728155339805825,
"grad_norm": 5.574371814727783,
"learning_rate": 4.832793959007552e-06,
"loss": 1.784,
"step": 1620
},
{
"epoch": 1.5825242718446602,
"grad_norm": 5.452093124389648,
"learning_rate": 4.724919093851133e-06,
"loss": 1.7639,
"step": 1630
},
{
"epoch": 1.5922330097087378,
"grad_norm": 5.442083358764648,
"learning_rate": 4.617044228694714e-06,
"loss": 1.8157,
"step": 1640
},
{
"epoch": 1.6019417475728155,
"grad_norm": 5.7211079597473145,
"learning_rate": 4.509169363538296e-06,
"loss": 1.7338,
"step": 1650
},
{
"epoch": 1.6116504854368932,
"grad_norm": 6.113297462463379,
"learning_rate": 4.401294498381877e-06,
"loss": 1.7044,
"step": 1660
},
{
"epoch": 1.6213592233009708,
"grad_norm": 5.693146705627441,
"learning_rate": 4.293419633225459e-06,
"loss": 1.7722,
"step": 1670
},
{
"epoch": 1.6310679611650487,
"grad_norm": 5.956842422485352,
"learning_rate": 4.1855447680690406e-06,
"loss": 1.7618,
"step": 1680
},
{
"epoch": 1.6407766990291264,
"grad_norm": 5.646125793457031,
"learning_rate": 4.0776699029126215e-06,
"loss": 1.7872,
"step": 1690
},
{
"epoch": 1.650485436893204,
"grad_norm": 5.913788795471191,
"learning_rate": 3.969795037756203e-06,
"loss": 1.7913,
"step": 1700
},
{
"epoch": 1.6601941747572817,
"grad_norm": 6.05329704284668,
"learning_rate": 3.861920172599784e-06,
"loss": 1.7771,
"step": 1710
},
{
"epoch": 1.6699029126213594,
"grad_norm": 6.139546871185303,
"learning_rate": 3.754045307443366e-06,
"loss": 1.7966,
"step": 1720
},
{
"epoch": 1.679611650485437,
"grad_norm": 6.158768653869629,
"learning_rate": 3.6461704422869477e-06,
"loss": 1.8097,
"step": 1730
},
{
"epoch": 1.6893203883495147,
"grad_norm": 5.726659774780273,
"learning_rate": 3.5382955771305286e-06,
"loss": 1.7568,
"step": 1740
},
{
"epoch": 1.6990291262135924,
"grad_norm": 5.508258819580078,
"learning_rate": 3.43042071197411e-06,
"loss": 1.8406,
"step": 1750
},
{
"epoch": 1.70873786407767,
"grad_norm": 6.076147556304932,
"learning_rate": 3.3225458468176918e-06,
"loss": 1.7246,
"step": 1760
},
{
"epoch": 1.7184466019417477,
"grad_norm": 5.596787452697754,
"learning_rate": 3.214670981661273e-06,
"loss": 1.8189,
"step": 1770
},
{
"epoch": 1.7281553398058254,
"grad_norm": 6.001366138458252,
"learning_rate": 3.1067961165048544e-06,
"loss": 1.7681,
"step": 1780
},
{
"epoch": 1.737864077669903,
"grad_norm": 5.889746189117432,
"learning_rate": 2.9989212513484362e-06,
"loss": 1.7634,
"step": 1790
},
{
"epoch": 1.7475728155339807,
"grad_norm": 5.484528541564941,
"learning_rate": 2.8910463861920176e-06,
"loss": 1.7928,
"step": 1800
},
{
"epoch": 1.7572815533980584,
"grad_norm": 5.366055011749268,
"learning_rate": 2.7831715210355993e-06,
"loss": 1.7284,
"step": 1810
},
{
"epoch": 1.766990291262136,
"grad_norm": 6.179636478424072,
"learning_rate": 2.6752966558791803e-06,
"loss": 1.7973,
"step": 1820
},
{
"epoch": 1.7766990291262137,
"grad_norm": 5.786418437957764,
"learning_rate": 2.5674217907227616e-06,
"loss": 1.7271,
"step": 1830
},
{
"epoch": 1.7864077669902914,
"grad_norm": 5.728253364562988,
"learning_rate": 2.4595469255663434e-06,
"loss": 1.7947,
"step": 1840
},
{
"epoch": 1.796116504854369,
"grad_norm": 5.840207576751709,
"learning_rate": 2.3516720604099247e-06,
"loss": 1.8411,
"step": 1850
},
{
"epoch": 1.8058252427184467,
"grad_norm": 6.026117324829102,
"learning_rate": 2.243797195253506e-06,
"loss": 1.7801,
"step": 1860
},
{
"epoch": 1.8155339805825244,
"grad_norm": 5.574731826782227,
"learning_rate": 2.1359223300970874e-06,
"loss": 1.8296,
"step": 1870
},
{
"epoch": 1.825242718446602,
"grad_norm": 5.741345405578613,
"learning_rate": 2.0280474649406688e-06,
"loss": 1.8048,
"step": 1880
},
{
"epoch": 1.8349514563106797,
"grad_norm": 5.989925384521484,
"learning_rate": 1.9201725997842505e-06,
"loss": 1.7559,
"step": 1890
},
{
"epoch": 1.8446601941747574,
"grad_norm": 5.998227119445801,
"learning_rate": 1.812297734627832e-06,
"loss": 1.7678,
"step": 1900
},
{
"epoch": 1.854368932038835,
"grad_norm": 5.9274420738220215,
"learning_rate": 1.7044228694714132e-06,
"loss": 1.7886,
"step": 1910
},
{
"epoch": 1.8640776699029127,
"grad_norm": 5.719155788421631,
"learning_rate": 1.5965480043149948e-06,
"loss": 1.7921,
"step": 1920
},
{
"epoch": 1.8737864077669903,
"grad_norm": 5.4220147132873535,
"learning_rate": 1.4886731391585763e-06,
"loss": 1.7813,
"step": 1930
},
{
"epoch": 1.883495145631068,
"grad_norm": 5.772354602813721,
"learning_rate": 1.3807982740021575e-06,
"loss": 1.7606,
"step": 1940
},
{
"epoch": 1.8932038834951457,
"grad_norm": 5.864536762237549,
"learning_rate": 1.272923408845739e-06,
"loss": 1.7354,
"step": 1950
},
{
"epoch": 1.9029126213592233,
"grad_norm": 5.453779697418213,
"learning_rate": 1.1650485436893206e-06,
"loss": 1.7403,
"step": 1960
},
{
"epoch": 1.912621359223301,
"grad_norm": 5.6637492179870605,
"learning_rate": 1.057173678532902e-06,
"loss": 1.7649,
"step": 1970
},
{
"epoch": 1.9223300970873787,
"grad_norm": 5.886834621429443,
"learning_rate": 9.492988133764834e-07,
"loss": 1.8095,
"step": 1980
},
{
"epoch": 1.9320388349514563,
"grad_norm": 6.255226135253906,
"learning_rate": 8.414239482200648e-07,
"loss": 1.7987,
"step": 1990
},
{
"epoch": 1.941747572815534,
"grad_norm": 5.694814205169678,
"learning_rate": 7.335490830636462e-07,
"loss": 1.7715,
"step": 2000
},
{
"epoch": 1.9514563106796117,
"grad_norm": 5.4670257568359375,
"learning_rate": 6.256742179072277e-07,
"loss": 1.737,
"step": 2010
},
{
"epoch": 1.9611650485436893,
"grad_norm": 5.776082992553711,
"learning_rate": 5.393743257820928e-07,
"loss": 1.7518,
"step": 2020
},
{
"epoch": 1.970873786407767,
"grad_norm": 5.826039791107178,
"learning_rate": 4.314994606256743e-07,
"loss": 1.8036,
"step": 2030
},
{
"epoch": 1.9805825242718447,
"grad_norm": 5.991348743438721,
"learning_rate": 3.2362459546925565e-07,
"loss": 1.756,
"step": 2040
},
{
"epoch": 1.9902912621359223,
"grad_norm": 5.450629234313965,
"learning_rate": 2.1574973031283715e-07,
"loss": 1.7953,
"step": 2050
},
{
"epoch": 2.0,
"grad_norm": 5.462096214294434,
"learning_rate": 1.0787486515641857e-07,
"loss": 1.7721,
"step": 2060
}
],
"logging_steps": 10,
"max_steps": 2060,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}