{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 2000.0, "global_step": 412, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009708737864077669, "grad_norm": 5.575473016079204, "kl": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 0.4845, "step": 1, "step_loss": 0.490234375 }, { "epoch": 0.019417475728155338, "grad_norm": 9.7143780359321, "kl": 0.0020599365234375, "learning_rate": 2.438044511330269e-06, "loss": 0.4845, "step": 2, "step_loss": 0.48046875 }, { "epoch": 0.02912621359223301, "grad_norm": 7.3775865577265884, "kl": 0.002471923828125, "learning_rate": 3.5717278751869343e-06, "loss": 0.4945, "step": 3, "step_loss": 0.546875 }, { "epoch": 0.038834951456310676, "grad_norm": 15.596262146563706, "kl": 0.002349853515625, "learning_rate": 4.376089022660538e-06, "loss": 0.4586, "step": 4, "step_loss": 0.32421875 }, { "epoch": 0.04854368932038835, "grad_norm": 6.642139360876633, "kl": 0.021240234375, "learning_rate": 5e-06, "loss": 0.4483, "step": 5, "step_loss": 0.71875 }, { "epoch": 0.05825242718446602, "grad_norm": 10.4028109346367, "kl": 0.037353515625, "learning_rate": 4.999957311534636e-06, "loss": 0.4224, "step": 6, "step_loss": 0.306640625 }, { "epoch": 0.06796116504854369, "grad_norm": 8.965364568204704, "kl": 0.043212890625, "learning_rate": 4.99982924775837e-06, "loss": 0.3771, "step": 7, "step_loss": 0.361328125 }, { "epoch": 0.07766990291262135, "grad_norm": 5.86329979229663, "kl": 0.06396484375, "learning_rate": 4.999615813530619e-06, "loss": 0.372, "step": 8, "step_loss": 0.345703125 }, { "epoch": 0.08737864077669903, "grad_norm": 7.680252752913332, "kl": 0.07763671875, "learning_rate": 4.999317016950212e-06, "loss": 0.4717, "step": 9, "step_loss": 0.482421875 }, { "epoch": 0.0970873786407767, "grad_norm": 7.28382768968269, "kl": 0.12060546875, "learning_rate": 4.998932869355074e-06, "loss": 0.3988, "step": 10, "step_loss": 0.259765625 }, { "epoch": 0.10679611650485436, "grad_norm": 3.742924436507147, "kl": 0.07958984375, "learning_rate": 4.998463385321802e-06, "loss": 0.3597, "step": 11, "step_loss": 0.42578125 }, { "epoch": 0.11650485436893204, "grad_norm": 2.077338507259391, "kl": 0.0986328125, "learning_rate": 4.997908582665111e-06, "loss": 0.3828, "step": 12, "step_loss": 0.2197265625 }, { "epoch": 0.1262135922330097, "grad_norm": 2.0936762099287187, "kl": 0.2578125, "learning_rate": 4.997268482437153e-06, "loss": 0.3686, "step": 13, "step_loss": 0.40234375 }, { "epoch": 0.13592233009708737, "grad_norm": 1.5539690974487896, "kl": 0.11572265625, "learning_rate": 4.9965431089267265e-06, "loss": 0.3481, "step": 14, "step_loss": 0.318359375 }, { "epoch": 0.14563106796116504, "grad_norm": 11.763990149324268, "kl": 0.19921875, "learning_rate": 4.9957324896583495e-06, "loss": 0.3518, "step": 15, "step_loss": 0.419921875 }, { "epoch": 0.1553398058252427, "grad_norm": 1.6021280918226926, "kl": 0.2080078125, "learning_rate": 4.9948366553912146e-06, "loss": 0.3468, "step": 16, "step_loss": 0.498046875 }, { "epoch": 0.1650485436893204, "grad_norm": 2.243950658979944, "kl": 0.1279296875, "learning_rate": 4.993855640118024e-06, "loss": 0.3336, "step": 17, "step_loss": 0.267578125 }, { "epoch": 0.17475728155339806, "grad_norm": 1.5344029876580454, "kl": 0.2216796875, "learning_rate": 4.992789481063699e-06, "loss": 0.3456, "step": 18, "step_loss": 0.3671875 }, { "epoch": 0.18446601941747573, "grad_norm": 1.7390327115176747, "kl": 0.1259765625, "learning_rate": 4.9916382186839665e-06, "loss": 0.3427, "step": 19, "step_loss": 0.298828125 }, { "epoch": 0.1941747572815534, "grad_norm": 5.2081774675570855, "kl": 0.1591796875, "learning_rate": 4.990401896663829e-06, "loss": 0.2956, "step": 20, "step_loss": 0.2353515625 }, { "epoch": 0.20388349514563106, "grad_norm": 2.087334097477202, "kl": 0.169921875, "learning_rate": 4.989080561915895e-06, "loss": 0.3336, "step": 21, "step_loss": 0.2109375 }, { "epoch": 0.21359223300970873, "grad_norm": 1.8894907173358744, "kl": 0.1845703125, "learning_rate": 4.987674264578615e-06, "loss": 0.2974, "step": 22, "step_loss": 0.2314453125 }, { "epoch": 0.22330097087378642, "grad_norm": 3.672752113284032, "kl": 0.1748046875, "learning_rate": 4.9861830580143665e-06, "loss": 0.3722, "step": 23, "step_loss": 0.255859375 }, { "epoch": 0.23300970873786409, "grad_norm": 1.0485056233907877, "kl": 0.1494140625, "learning_rate": 4.984606998807432e-06, "loss": 0.2997, "step": 24, "step_loss": 0.396484375 }, { "epoch": 0.24271844660194175, "grad_norm": 1.3694786710161069, "kl": 0.11474609375, "learning_rate": 4.982946146761856e-06, "loss": 0.2984, "step": 25, "step_loss": 0.1396484375 }, { "epoch": 0.2524271844660194, "grad_norm": 1.0149380047661418, "kl": 0.2314453125, "learning_rate": 4.981200564899172e-06, "loss": 0.2926, "step": 26, "step_loss": 0.34375 }, { "epoch": 0.2621359223300971, "grad_norm": 3.3346882609517627, "kl": 0.1630859375, "learning_rate": 4.979370319456011e-06, "loss": 0.325, "step": 27, "step_loss": 0.2197265625 }, { "epoch": 0.27184466019417475, "grad_norm": 1.1026809101068904, "kl": 0.1171875, "learning_rate": 4.977455479881591e-06, "loss": 0.3124, "step": 28, "step_loss": 0.42578125 }, { "epoch": 0.2815533980582524, "grad_norm": 1.4388631908391791, "kl": 0.2470703125, "learning_rate": 4.975456118835079e-06, "loss": 0.2766, "step": 29, "step_loss": 0.2275390625 }, { "epoch": 0.2912621359223301, "grad_norm": 1.654229929536635, "kl": 0.23828125, "learning_rate": 4.973372312182835e-06, "loss": 0.3192, "step": 30, "step_loss": 0.5546875 }, { "epoch": 0.30097087378640774, "grad_norm": 1.0784118934051907, "kl": 0.138671875, "learning_rate": 4.971204138995531e-06, "loss": 0.2877, "step": 31, "step_loss": 0.26171875 }, { "epoch": 0.3106796116504854, "grad_norm": 1.789773991131577, "kl": 0.2138671875, "learning_rate": 4.968951681545156e-06, "loss": 0.3597, "step": 32, "step_loss": 0.23828125 }, { "epoch": 0.32038834951456313, "grad_norm": 1.3349882623963718, "kl": 0.1240234375, "learning_rate": 4.96661502530189e-06, "loss": 0.2909, "step": 33, "step_loss": 0.2431640625 }, { "epoch": 0.3300970873786408, "grad_norm": 0.8694048190676366, "kl": 0.12109375, "learning_rate": 4.96419425893086e-06, "loss": 0.2964, "step": 34, "step_loss": 0.25 }, { "epoch": 0.33980582524271846, "grad_norm": 1.247182149510005, "kl": 0.1650390625, "learning_rate": 4.96168947428878e-06, "loss": 0.3136, "step": 35, "step_loss": 0.25 }, { "epoch": 0.34951456310679613, "grad_norm": 0.9312530287110918, "kl": 0.1630859375, "learning_rate": 4.959100766420458e-06, "loss": 0.3164, "step": 36, "step_loss": 0.306640625 }, { "epoch": 0.3592233009708738, "grad_norm": 0.9175318225986004, "kl": 0.22265625, "learning_rate": 4.9564282335552e-06, "loss": 0.2954, "step": 37, "step_loss": 0.259765625 }, { "epoch": 0.36893203883495146, "grad_norm": 0.993550543291835, "kl": 0.193359375, "learning_rate": 4.953671977103074e-06, "loss": 0.2802, "step": 38, "step_loss": 0.28125 }, { "epoch": 0.3786407766990291, "grad_norm": 1.0512485528634326, "kl": 0.7578125, "learning_rate": 4.950832101651063e-06, "loss": 0.2843, "step": 39, "step_loss": 0.58203125 }, { "epoch": 0.3883495145631068, "grad_norm": 0.6686286343760834, "kl": 0.11376953125, "learning_rate": 4.947908714959102e-06, "loss": 0.2512, "step": 40, "step_loss": 0.142578125 }, { "epoch": 0.39805825242718446, "grad_norm": 0.8469829082363365, "kl": 0.1650390625, "learning_rate": 4.944901927955983e-06, "loss": 0.3058, "step": 41, "step_loss": 0.5546875 }, { "epoch": 0.4077669902912621, "grad_norm": 0.7813325456880021, "kl": 0.2421875, "learning_rate": 4.941811854735148e-06, "loss": 0.2823, "step": 42, "step_loss": 0.193359375 }, { "epoch": 0.4174757281553398, "grad_norm": 0.9078321786346385, "kl": 0.11865234375, "learning_rate": 4.938638612550361e-06, "loss": 0.2902, "step": 43, "step_loss": 0.22265625 }, { "epoch": 0.42718446601941745, "grad_norm": 0.8942167570500859, "kl": 0.138671875, "learning_rate": 4.935382321811256e-06, "loss": 0.3012, "step": 44, "step_loss": 0.3828125 }, { "epoch": 0.4368932038834951, "grad_norm": 0.8140639878037927, "kl": 0.166015625, "learning_rate": 4.932043106078772e-06, "loss": 0.2861, "step": 45, "step_loss": 0.197265625 }, { "epoch": 0.44660194174757284, "grad_norm": 1.4604431784813239, "kl": 0.302734375, "learning_rate": 4.928621092060457e-06, "loss": 0.3029, "step": 46, "step_loss": 0.49609375 }, { "epoch": 0.4563106796116505, "grad_norm": 0.7529571567774104, "kl": 0.1318359375, "learning_rate": 4.925116409605672e-06, "loss": 0.2834, "step": 47, "step_loss": 0.275390625 }, { "epoch": 0.46601941747572817, "grad_norm": 0.7826578425297724, "kl": 0.267578125, "learning_rate": 4.92152919170065e-06, "loss": 0.3034, "step": 48, "step_loss": 0.5234375 }, { "epoch": 0.47572815533980584, "grad_norm": 1.066438164425578, "kl": 0.2109375, "learning_rate": 4.917859574463462e-06, "loss": 0.3071, "step": 49, "step_loss": 0.2236328125 }, { "epoch": 0.4854368932038835, "grad_norm": 1.7914744281397004, "kl": 0.1728515625, "learning_rate": 4.9141076971388435e-06, "loss": 0.289, "step": 50, "step_loss": 0.3046875 }, { "epoch": 0.49514563106796117, "grad_norm": 0.7820108904555855, "kl": 0.126953125, "learning_rate": 4.9102737020929135e-06, "loss": 0.2661, "step": 51, "step_loss": 0.21875 }, { "epoch": 0.5048543689320388, "grad_norm": 0.8477559536427011, "kl": 0.1875, "learning_rate": 4.906357734807776e-06, "loss": 0.3048, "step": 52, "step_loss": 0.2578125 }, { "epoch": 0.5145631067961165, "grad_norm": 0.781267699994807, "kl": 0.150390625, "learning_rate": 4.902359943875992e-06, "loss": 0.2837, "step": 53, "step_loss": 0.2734375 }, { "epoch": 0.5242718446601942, "grad_norm": 0.7373706510227717, "kl": 0.146484375, "learning_rate": 4.89828048099495e-06, "loss": 0.2632, "step": 54, "step_loss": 0.16015625 }, { "epoch": 0.5339805825242718, "grad_norm": 0.7396378743294025, "kl": 0.2421875, "learning_rate": 4.894119500961103e-06, "loss": 0.2848, "step": 55, "step_loss": 0.458984375 }, { "epoch": 0.5436893203883495, "grad_norm": 0.8384171545494072, "kl": 0.1728515625, "learning_rate": 4.889877161664096e-06, "loss": 0.311, "step": 56, "step_loss": 0.4609375 }, { "epoch": 0.5533980582524272, "grad_norm": 0.7847312422227414, "kl": 0.205078125, "learning_rate": 4.885553624080778e-06, "loss": 0.2971, "step": 57, "step_loss": 0.203125 }, { "epoch": 0.5631067961165048, "grad_norm": 0.791715637164775, "kl": 0.1494140625, "learning_rate": 4.881149052269091e-06, "loss": 0.2983, "step": 58, "step_loss": 0.197265625 }, { "epoch": 0.5728155339805825, "grad_norm": 0.6776733370951615, "kl": 0.158203125, "learning_rate": 4.876663613361844e-06, "loss": 0.2871, "step": 59, "step_loss": 0.2490234375 }, { "epoch": 0.5825242718446602, "grad_norm": 0.6974646233573695, "kl": 0.20703125, "learning_rate": 4.8720974775603745e-06, "loss": 0.2983, "step": 60, "step_loss": 0.203125 }, { "epoch": 0.5922330097087378, "grad_norm": 1.1133484937202898, "kl": 1.1171875, "learning_rate": 4.867450818128086e-06, "loss": 0.2969, "step": 61, "step_loss": 0.89453125 }, { "epoch": 0.6019417475728155, "grad_norm": 0.7147826288493739, "kl": 0.1279296875, "learning_rate": 4.862723811383878e-06, "loss": 0.2819, "step": 62, "step_loss": 0.341796875 }, { "epoch": 0.6116504854368932, "grad_norm": 0.762521447990066, "kl": 0.1591796875, "learning_rate": 4.857916636695449e-06, "loss": 0.2812, "step": 63, "step_loss": 0.1533203125 }, { "epoch": 0.6213592233009708, "grad_norm": 0.6945241251902946, "kl": 0.2294921875, "learning_rate": 4.853029476472499e-06, "loss": 0.2986, "step": 64, "step_loss": 0.275390625 }, { "epoch": 0.6310679611650486, "grad_norm": 0.7038147827740143, "kl": 0.1708984375, "learning_rate": 4.848062516159801e-06, "loss": 0.3015, "step": 65, "step_loss": 0.400390625 }, { "epoch": 0.6407766990291263, "grad_norm": 0.8016050985516795, "kl": 0.19140625, "learning_rate": 4.843015944230166e-06, "loss": 0.2873, "step": 66, "step_loss": 0.279296875 }, { "epoch": 0.6504854368932039, "grad_norm": 0.7769984336135177, "kl": 0.2451171875, "learning_rate": 4.837889952177294e-06, "loss": 0.2899, "step": 67, "step_loss": 0.56640625 }, { "epoch": 0.6601941747572816, "grad_norm": 0.6998891333931025, "kl": 0.138671875, "learning_rate": 4.832684734508502e-06, "loss": 0.2643, "step": 68, "step_loss": 0.330078125 }, { "epoch": 0.6699029126213593, "grad_norm": 1.0392663511926639, "kl": 0.1640625, "learning_rate": 4.827400488737351e-06, "loss": 0.2863, "step": 69, "step_loss": 0.287109375 }, { "epoch": 0.6796116504854369, "grad_norm": 0.8630658049176575, "kl": 0.40234375, "learning_rate": 4.822037415376147e-06, "loss": 0.2894, "step": 70, "step_loss": 0.435546875 }, { "epoch": 0.6893203883495146, "grad_norm": 0.7810137963853107, "kl": 0.271484375, "learning_rate": 4.816595717928327e-06, "loss": 0.2842, "step": 71, "step_loss": 0.2451171875 }, { "epoch": 0.6990291262135923, "grad_norm": 0.7611975818745967, "kl": 0.181640625, "learning_rate": 4.8110756028807506e-06, "loss": 0.2618, "step": 72, "step_loss": 0.3359375 }, { "epoch": 0.7087378640776699, "grad_norm": 0.9039894718510777, "kl": 0.17578125, "learning_rate": 4.805477279695852e-06, "loss": 0.304, "step": 73, "step_loss": 0.369140625 }, { "epoch": 0.7184466019417476, "grad_norm": 0.6848452637621099, "kl": 0.154296875, "learning_rate": 4.799800960803699e-06, "loss": 0.2583, "step": 74, "step_loss": 0.265625 }, { "epoch": 0.7281553398058253, "grad_norm": 0.7706941057886834, "kl": 0.11962890625, "learning_rate": 4.794046861593929e-06, "loss": 0.2913, "step": 75, "step_loss": 0.19921875 }, { "epoch": 0.7378640776699029, "grad_norm": 0.7340458477237756, "kl": 0.1953125, "learning_rate": 4.788215200407576e-06, "loss": 0.2725, "step": 76, "step_loss": 0.357421875 }, { "epoch": 0.7475728155339806, "grad_norm": 1.0806357960026565, "kl": 0.23046875, "learning_rate": 4.7823061985287906e-06, "loss": 0.2781, "step": 77, "step_loss": 0.193359375 }, { "epoch": 0.7572815533980582, "grad_norm": 0.7704916059097427, "kl": 0.34375, "learning_rate": 4.776320080176434e-06, "loss": 0.2742, "step": 78, "step_loss": 0.3203125 }, { "epoch": 0.7669902912621359, "grad_norm": 0.7492040543213823, "kl": 0.18359375, "learning_rate": 4.770257072495581e-06, "loss": 0.2776, "step": 79, "step_loss": 0.2109375 }, { "epoch": 0.7766990291262136, "grad_norm": 0.7467369782034328, "kl": 0.51171875, "learning_rate": 4.764117405548891e-06, "loss": 0.2377, "step": 80, "step_loss": 0.365234375 }, { "epoch": 0.7864077669902912, "grad_norm": 0.9406968288941178, "kl": 0.1513671875, "learning_rate": 4.757901312307882e-06, "loss": 0.2708, "step": 81, "step_loss": 0.162109375 }, { "epoch": 0.7961165048543689, "grad_norm": 0.7660157121486119, "kl": 0.2333984375, "learning_rate": 4.751609028644097e-06, "loss": 0.2877, "step": 82, "step_loss": 0.30078125 }, { "epoch": 0.8058252427184466, "grad_norm": 0.6957183682004972, "kl": 0.1806640625, "learning_rate": 4.7452407933201395e-06, "loss": 0.2403, "step": 83, "step_loss": 0.265625 }, { "epoch": 0.8155339805825242, "grad_norm": 0.7043740093288546, "kl": 0.1767578125, "learning_rate": 4.738796847980627e-06, "loss": 0.2735, "step": 84, "step_loss": 0.3359375 }, { "epoch": 0.8252427184466019, "grad_norm": 0.6782132306690141, "kl": 0.1572265625, "learning_rate": 4.732277437143015e-06, "loss": 0.2517, "step": 85, "step_loss": 0.2119140625 }, { "epoch": 0.8349514563106796, "grad_norm": 0.7479330827900228, "kl": 0.21484375, "learning_rate": 4.725682808188321e-06, "loss": 0.2641, "step": 86, "step_loss": 0.232421875 }, { "epoch": 0.8446601941747572, "grad_norm": 0.7620154952078262, "kl": 0.10791015625, "learning_rate": 4.719013211351734e-06, "loss": 0.2572, "step": 87, "step_loss": 0.146484375 }, { "epoch": 0.8543689320388349, "grad_norm": 0.6678822109983321, "kl": 0.2060546875, "learning_rate": 4.712268899713125e-06, "loss": 0.2878, "step": 88, "step_loss": 0.275390625 }, { "epoch": 0.8640776699029126, "grad_norm": 0.7456708178708048, "kl": 0.1298828125, "learning_rate": 4.705450129187439e-06, "loss": 0.2746, "step": 89, "step_loss": 0.2275390625 }, { "epoch": 0.8737864077669902, "grad_norm": 0.6177231600838415, "kl": 0.2236328125, "learning_rate": 4.698557158514988e-06, "loss": 0.2491, "step": 90, "step_loss": 0.412109375 }, { "epoch": 0.883495145631068, "grad_norm": 0.8797248927244058, "kl": 0.1513671875, "learning_rate": 4.691590249251627e-06, "loss": 0.2745, "step": 91, "step_loss": 0.12353515625 }, { "epoch": 0.8932038834951457, "grad_norm": 0.7015383302055267, "kl": 0.1162109375, "learning_rate": 4.684549665758839e-06, "loss": 0.2778, "step": 92, "step_loss": 0.189453125 }, { "epoch": 0.9029126213592233, "grad_norm": 0.7073373711435095, "kl": 0.2490234375, "learning_rate": 4.677435675193692e-06, "loss": 0.2862, "step": 93, "step_loss": 0.26171875 }, { "epoch": 0.912621359223301, "grad_norm": 0.6637944746473083, "kl": 0.14453125, "learning_rate": 4.670248547498712e-06, "loss": 0.2734, "step": 94, "step_loss": 0.1376953125 }, { "epoch": 0.9223300970873787, "grad_norm": 0.720119043217982, "kl": 0.1865234375, "learning_rate": 4.662988555391632e-06, "loss": 0.2899, "step": 95, "step_loss": 0.423828125 }, { "epoch": 0.9320388349514563, "grad_norm": 0.6938324080314517, "kl": 0.16796875, "learning_rate": 4.655655974355051e-06, "loss": 0.2714, "step": 96, "step_loss": 0.255859375 }, { "epoch": 0.941747572815534, "grad_norm": 0.6610518480589428, "kl": 0.1630859375, "learning_rate": 4.648251082625975e-06, "loss": 0.2837, "step": 97, "step_loss": 0.29296875 }, { "epoch": 0.9514563106796117, "grad_norm": 0.6877843962165877, "kl": 0.1748046875, "learning_rate": 4.640774161185259e-06, "loss": 0.3036, "step": 98, "step_loss": 0.515625 }, { "epoch": 0.9611650485436893, "grad_norm": 0.6332816751225963, "kl": 0.1533203125, "learning_rate": 4.633225493746951e-06, "loss": 0.2489, "step": 99, "step_loss": 0.1826171875 }, { "epoch": 0.970873786407767, "grad_norm": 0.7362793924005384, "kl": 0.123046875, "learning_rate": 4.625605366747519e-06, "loss": 0.2622, "step": 100, "step_loss": 0.12890625 }, { "epoch": 0.9805825242718447, "grad_norm": 0.6742913243561754, "kl": 0.2119140625, "learning_rate": 4.617914069334989e-06, "loss": 0.2544, "step": 101, "step_loss": 0.2578125 }, { "epoch": 0.9902912621359223, "grad_norm": 0.6047332480224725, "kl": 0.259765625, "learning_rate": 4.610151893357968e-06, "loss": 0.2546, "step": 102, "step_loss": 0.37890625 }, { "epoch": 1.0, "grad_norm": 0.6546589091756503, "kl": 0.2138671875, "learning_rate": 4.602319133354571e-06, "loss": 0.2566, "step": 103, "step_loss": 0.2177734375 }, { "epoch": 1.0, "eval_test_transformed.json_loss": NaN, "eval_test_transformed.json_runtime": 8.816, "eval_test_transformed.json_samples_per_second": 56.715, "eval_test_transformed.json_steps_per_second": 1.815, "step": 103 }, { "epoch": 1.0097087378640777, "grad_norm": 0.639387377398209, "kl": NaN, "learning_rate": 4.594416086541248e-06, "loss": 0.2493, "step": 104, "step_loss": NaN }, { "epoch": 1.0194174757281553, "grad_norm": 0.7392491046589192, "kl": 0.1640625, "learning_rate": 4.5864430528014996e-06, "loss": 0.2413, "step": 105, "step_loss": 0.1396484375 }, { "epoch": 1.029126213592233, "grad_norm": 0.5649339416591039, "kl": 0.2001953125, "learning_rate": 4.578400334674503e-06, "loss": 0.2223, "step": 106, "step_loss": 0.318359375 }, { "epoch": 1.0388349514563107, "grad_norm": 0.6022576613202164, "kl": 0.1708984375, "learning_rate": 4.570288237343632e-06, "loss": 0.2368, "step": 107, "step_loss": 0.24609375 }, { "epoch": 1.0485436893203883, "grad_norm": 0.6973100963269709, "kl": 0.474609375, "learning_rate": 4.562107068624874e-06, "loss": 0.2363, "step": 108, "step_loss": 0.255859375 }, { "epoch": 1.058252427184466, "grad_norm": 0.6281000265214681, "kl": 0.16796875, "learning_rate": 4.55385713895515e-06, "loss": 0.2271, "step": 109, "step_loss": 0.2578125 }, { "epoch": 1.0679611650485437, "grad_norm": 0.6251998581820843, "kl": 0.19921875, "learning_rate": 4.545538761380539e-06, "loss": 0.2444, "step": 110, "step_loss": 0.1533203125 }, { "epoch": 1.0776699029126213, "grad_norm": 0.6499920789529428, "kl": 0.140625, "learning_rate": 4.537152251544394e-06, "loss": 0.2575, "step": 111, "step_loss": 0.21484375 }, { "epoch": 1.087378640776699, "grad_norm": 0.6196063483230071, "kl": 0.34765625, "learning_rate": 4.5286979276753675e-06, "loss": 0.2216, "step": 112, "step_loss": 0.24609375 }, { "epoch": 1.0970873786407767, "grad_norm": 0.6519876067572186, "kl": 0.2265625, "learning_rate": 4.520176110575338e-06, "loss": 0.2209, "step": 113, "step_loss": 0.1748046875 }, { "epoch": 1.1067961165048543, "grad_norm": 0.6464673181225589, "kl": 0.2041015625, "learning_rate": 4.511587123607232e-06, "loss": 0.2242, "step": 114, "step_loss": 0.271484375 }, { "epoch": 1.116504854368932, "grad_norm": 0.5620248695246799, "kl": 0.23046875, "learning_rate": 4.502931292682759e-06, "loss": 0.225, "step": 115, "step_loss": 0.255859375 }, { "epoch": 1.1262135922330097, "grad_norm": 0.6212910752562149, "kl": 0.3203125, "learning_rate": 4.494208946250042e-06, "loss": 0.2322, "step": 116, "step_loss": 0.166015625 }, { "epoch": 1.1359223300970873, "grad_norm": 0.6102956517360447, "kl": 0.134765625, "learning_rate": 4.485420415281157e-06, "loss": 0.246, "step": 117, "step_loss": 0.2099609375 }, { "epoch": 1.145631067961165, "grad_norm": 0.8959975572620882, "kl": 0.341796875, "learning_rate": 4.4765660332595686e-06, "loss": 0.2519, "step": 118, "step_loss": 0.154296875 }, { "epoch": 1.1553398058252426, "grad_norm": 0.5883698395469736, "kl": 0.30078125, "learning_rate": 4.467646136167482e-06, "loss": 0.2548, "step": 119, "step_loss": 0.22265625 }, { "epoch": 1.1650485436893203, "grad_norm": 0.5899290313710992, "kl": 0.1669921875, "learning_rate": 4.458661062473091e-06, "loss": 0.2327, "step": 120, "step_loss": 0.1533203125 }, { "epoch": 1.174757281553398, "grad_norm": 0.7434899396388887, "kl": 0.294921875, "learning_rate": 4.449611153117736e-06, "loss": 0.2415, "step": 121, "step_loss": 0.16796875 }, { "epoch": 1.1844660194174756, "grad_norm": 0.5645672375917218, "kl": 0.21875, "learning_rate": 4.4404967515029655e-06, "loss": 0.198, "step": 122, "step_loss": 0.27734375 }, { "epoch": 1.1941747572815533, "grad_norm": 0.61620433580154, "kl": 0.28125, "learning_rate": 4.431318203477505e-06, "loss": 0.214, "step": 123, "step_loss": 0.18359375 }, { "epoch": 1.203883495145631, "grad_norm": 0.5927124243417978, "kl": 0.29296875, "learning_rate": 4.422075857324138e-06, "loss": 0.2182, "step": 124, "step_loss": 0.25 }, { "epoch": 1.2135922330097086, "grad_norm": 0.6201091855952103, "kl": 0.18359375, "learning_rate": 4.412770063746483e-06, "loss": 0.2207, "step": 125, "step_loss": 0.1865234375 }, { "epoch": 1.2233009708737863, "grad_norm": 0.911604159360823, "kl": 0.162109375, "learning_rate": 4.403401175855695e-06, "loss": 0.2471, "step": 126, "step_loss": 0.1474609375 }, { "epoch": 1.233009708737864, "grad_norm": 0.6792470228209272, "kl": 0.0927734375, "learning_rate": 4.3939695491570596e-06, "loss": 0.2351, "step": 127, "step_loss": 0.146484375 }, { "epoch": 1.2427184466019416, "grad_norm": 0.6012469615978469, "kl": 0.212890625, "learning_rate": 4.384475541536505e-06, "loss": 0.2434, "step": 128, "step_loss": 0.2041015625 }, { "epoch": 1.2524271844660193, "grad_norm": 0.589209563012478, "kl": 0.2041015625, "learning_rate": 4.374919513247021e-06, "loss": 0.2129, "step": 129, "step_loss": 0.109375 }, { "epoch": 1.262135922330097, "grad_norm": 0.5987454630415056, "kl": 0.22265625, "learning_rate": 4.3653018268949945e-06, "loss": 0.2179, "step": 130, "step_loss": 0.166015625 }, { "epoch": 1.2718446601941746, "grad_norm": 0.5813161566217064, "kl": 0.2041015625, "learning_rate": 4.355622847426444e-06, "loss": 0.2183, "step": 131, "step_loss": 0.431640625 }, { "epoch": 1.2815533980582523, "grad_norm": 0.644992270460315, "kl": 0.25, "learning_rate": 4.345882942113171e-06, "loss": 0.2412, "step": 132, "step_loss": 0.1474609375 }, { "epoch": 1.29126213592233, "grad_norm": 0.5838255650379847, "kl": 0.4140625, "learning_rate": 4.336082480538832e-06, "loss": 0.2223, "step": 133, "step_loss": 0.171875 }, { "epoch": 1.3009708737864076, "grad_norm": 0.7975539886352869, "kl": 0.3125, "learning_rate": 4.326221834584905e-06, "loss": 0.2218, "step": 134, "step_loss": 0.47265625 }, { "epoch": 1.3106796116504853, "grad_norm": 0.5800964403953691, "kl": 0.181640625, "learning_rate": 4.316301378416585e-06, "loss": 0.239, "step": 135, "step_loss": 0.1552734375 }, { "epoch": 1.3203883495145632, "grad_norm": 0.5247261922814125, "kl": 0.1923828125, "learning_rate": 4.306321488468583e-06, "loss": 0.2337, "step": 136, "step_loss": 0.119140625 }, { "epoch": 1.3300970873786409, "grad_norm": 0.6297520228191021, "kl": 0.1435546875, "learning_rate": 4.296282543430841e-06, "loss": 0.24, "step": 137, "step_loss": 0.29296875 }, { "epoch": 1.3398058252427185, "grad_norm": 0.5734205082343037, "kl": 0.216796875, "learning_rate": 4.286184924234168e-06, "loss": 0.2218, "step": 138, "step_loss": 0.15234375 }, { "epoch": 1.3495145631067962, "grad_norm": 0.5442629148889834, "kl": 0.2255859375, "learning_rate": 4.276029014035777e-06, "loss": 0.2189, "step": 139, "step_loss": 0.2265625 }, { "epoch": 1.3592233009708738, "grad_norm": 0.5288732036595232, "kl": 0.12060546875, "learning_rate": 4.265815198204754e-06, "loss": 0.2428, "step": 140, "step_loss": 0.169921875 }, { "epoch": 1.3689320388349515, "grad_norm": 0.6546290678837918, "kl": 0.2451171875, "learning_rate": 4.2555438643074315e-06, "loss": 0.2422, "step": 141, "step_loss": 0.21875 }, { "epoch": 1.3786407766990292, "grad_norm": 0.585078334332048, "kl": 0.2041015625, "learning_rate": 4.245215402092681e-06, "loss": 0.2455, "step": 142, "step_loss": 0.205078125 }, { "epoch": 1.3883495145631068, "grad_norm": 0.556943790797957, "kl": 0.1748046875, "learning_rate": 4.234830203477126e-06, "loss": 0.2064, "step": 143, "step_loss": 0.201171875 }, { "epoch": 1.3980582524271845, "grad_norm": 0.5946911580488923, "kl": 0.14453125, "learning_rate": 4.224388662530271e-06, "loss": 0.2218, "step": 144, "step_loss": 0.2431640625 }, { "epoch": 1.4077669902912622, "grad_norm": 0.5810717668347283, "kl": 0.326171875, "learning_rate": 4.213891175459545e-06, "loss": 0.2442, "step": 145, "step_loss": 0.1796875 }, { "epoch": 1.4174757281553398, "grad_norm": 0.6115382263091327, "kl": 0.1865234375, "learning_rate": 4.203338140595272e-06, "loss": 0.2202, "step": 146, "step_loss": 0.26171875 }, { "epoch": 1.4271844660194175, "grad_norm": 0.5465729015270102, "kl": 0.248046875, "learning_rate": 4.192729958375552e-06, "loss": 0.2129, "step": 147, "step_loss": 0.28125 }, { "epoch": 1.4368932038834952, "grad_norm": 0.5834391929303593, "kl": 0.162109375, "learning_rate": 4.1820670313310684e-06, "loss": 0.2328, "step": 148, "step_loss": 0.1103515625 }, { "epoch": 1.4466019417475728, "grad_norm": 0.6239303256729286, "kl": 0.19921875, "learning_rate": 4.171349764069815e-06, "loss": 0.2468, "step": 149, "step_loss": 0.2275390625 }, { "epoch": 1.4563106796116505, "grad_norm": 0.5464914339167269, "kl": 0.154296875, "learning_rate": 4.16057856326174e-06, "loss": 0.2145, "step": 150, "step_loss": 0.21484375 }, { "epoch": 1.4660194174757282, "grad_norm": 0.5714192820756144, "kl": 0.34765625, "learning_rate": 4.149753837623317e-06, "loss": 0.2293, "step": 151, "step_loss": 0.1142578125 }, { "epoch": 1.4757281553398058, "grad_norm": 0.6222270965621777, "kl": 0.20703125, "learning_rate": 4.1388759979020346e-06, "loss": 0.2378, "step": 152, "step_loss": 0.2158203125 }, { "epoch": 1.4854368932038835, "grad_norm": 0.6459648372732245, "kl": 0.1728515625, "learning_rate": 4.127945456860813e-06, "loss": 0.2301, "step": 153, "step_loss": 0.154296875 }, { "epoch": 1.4951456310679612, "grad_norm": 0.613782544176392, "kl": 0.21875, "learning_rate": 4.116962629262339e-06, "loss": 0.2155, "step": 154, "step_loss": 0.2099609375 }, { "epoch": 1.5048543689320388, "grad_norm": 0.569655314094469, "kl": 0.220703125, "learning_rate": 4.105927931853327e-06, "loss": 0.231, "step": 155, "step_loss": 0.26953125 }, { "epoch": 1.5145631067961165, "grad_norm": 0.6309412616695066, "kl": 0.369140625, "learning_rate": 4.094841783348711e-06, "loss": 0.2435, "step": 156, "step_loss": 0.1689453125 }, { "epoch": 1.5242718446601942, "grad_norm": 0.6105234133068124, "kl": 0.2138671875, "learning_rate": 4.083704604415749e-06, "loss": 0.2052, "step": 157, "step_loss": 0.2412109375 }, { "epoch": 1.5339805825242718, "grad_norm": 0.5804176749635781, "kl": 0.2197265625, "learning_rate": 4.072516817658065e-06, "loss": 0.2508, "step": 158, "step_loss": 0.171875 }, { "epoch": 1.5436893203883495, "grad_norm": 0.55765374326049, "kl": 0.306640625, "learning_rate": 4.0612788475996125e-06, "loss": 0.2164, "step": 159, "step_loss": 0.2001953125 }, { "epoch": 1.5533980582524272, "grad_norm": 0.6074565129354751, "kl": 0.7265625, "learning_rate": 4.049991120668566e-06, "loss": 0.229, "step": 160, "step_loss": 0.40625 }, { "epoch": 1.5631067961165048, "grad_norm": 0.7748587294998153, "kl": 1.328125, "learning_rate": 4.038654065181137e-06, "loss": 0.281, "step": 161, "step_loss": 0.8984375 }, { "epoch": 1.5728155339805825, "grad_norm": 0.5644659021112801, "kl": 0.205078125, "learning_rate": 4.027268111325328e-06, "loss": 0.2397, "step": 162, "step_loss": 0.26953125 }, { "epoch": 1.5825242718446602, "grad_norm": 0.5648454404940179, "kl": 0.1533203125, "learning_rate": 4.015833691144603e-06, "loss": 0.2157, "step": 163, "step_loss": 0.1416015625 }, { "epoch": 1.5922330097087378, "grad_norm": 0.5903127460469126, "kl": 0.224609375, "learning_rate": 4.0043512385214924e-06, "loss": 0.2308, "step": 164, "step_loss": 0.1669921875 }, { "epoch": 1.6019417475728155, "grad_norm": 0.5951640957860757, "kl": 0.26953125, "learning_rate": 3.9928211891611385e-06, "loss": 0.2136, "step": 165, "step_loss": 0.1875 }, { "epoch": 1.6116504854368932, "grad_norm": 0.5261435237006341, "kl": 0.17578125, "learning_rate": 3.981243980574751e-06, "loss": 0.2382, "step": 166, "step_loss": 0.234375 }, { "epoch": 1.6213592233009708, "grad_norm": 0.5887020505829587, "kl": 0.2255859375, "learning_rate": 3.969620052063012e-06, "loss": 0.2352, "step": 167, "step_loss": 0.2236328125 }, { "epoch": 1.6310679611650487, "grad_norm": 0.6254868280303059, "kl": 0.2001953125, "learning_rate": 3.957949844699405e-06, "loss": 0.2235, "step": 168, "step_loss": 0.162109375 }, { "epoch": 1.6407766990291264, "grad_norm": 0.5478297482531744, "kl": 0.23046875, "learning_rate": 3.946233801313482e-06, "loss": 0.2201, "step": 169, "step_loss": 0.30859375 }, { "epoch": 1.650485436893204, "grad_norm": 0.5691412826879887, "kl": 0.2177734375, "learning_rate": 3.934472366474051e-06, "loss": 0.2014, "step": 170, "step_loss": 0.2001953125 }, { "epoch": 1.6601941747572817, "grad_norm": 0.5604014457233358, "kl": 0.16796875, "learning_rate": 3.922665986472316e-06, "loss": 0.2348, "step": 171, "step_loss": 0.27734375 }, { "epoch": 1.6699029126213594, "grad_norm": 0.5166321657054415, "kl": 0.1708984375, "learning_rate": 3.91081510930494e-06, "loss": 0.2125, "step": 172, "step_loss": 0.263671875 }, { "epoch": 1.679611650485437, "grad_norm": 0.567633227241871, "kl": 0.2197265625, "learning_rate": 3.8989201846570405e-06, "loss": 0.2257, "step": 173, "step_loss": 0.16796875 }, { "epoch": 1.6893203883495147, "grad_norm": 0.6483568118346154, "kl": 0.185546875, "learning_rate": 3.886981663885134e-06, "loss": 0.2426, "step": 174, "step_loss": 0.25390625 }, { "epoch": 1.6990291262135924, "grad_norm": 0.6784974565198278, "kl": 0.28125, "learning_rate": 3.875e-06, "loss": 0.2508, "step": 175, "step_loss": 0.453125 }, { "epoch": 1.70873786407767, "grad_norm": 0.5532390595417498, "kl": 0.20703125, "learning_rate": 3.862975647649503e-06, "loss": 0.2183, "step": 176, "step_loss": 0.12890625 }, { "epoch": 1.7184466019417477, "grad_norm": 0.5636800996705107, "kl": 0.228515625, "learning_rate": 3.850909063101328e-06, "loss": 0.2107, "step": 177, "step_loss": 0.1845703125 }, { "epoch": 1.7281553398058254, "grad_norm": 0.5703475366568808, "kl": 0.1337890625, "learning_rate": 3.838800704225679e-06, "loss": 0.2135, "step": 178, "step_loss": 0.1708984375 }, { "epoch": 1.737864077669903, "grad_norm": 0.6101234925448211, "kl": 0.1904296875, "learning_rate": 3.826651030477896e-06, "loss": 0.2385, "step": 179, "step_loss": 0.177734375 }, { "epoch": 1.7475728155339807, "grad_norm": 0.5393184109703284, "kl": 0.1708984375, "learning_rate": 3.8144605028810233e-06, "loss": 0.2215, "step": 180, "step_loss": 0.18359375 }, { "epoch": 1.7572815533980584, "grad_norm": 0.560735690651583, "kl": 0.322265625, "learning_rate": 3.802229584008321e-06, "loss": 0.2235, "step": 181, "step_loss": 0.2353515625 }, { "epoch": 1.766990291262136, "grad_norm": 0.5687197299908796, "kl": 0.318359375, "learning_rate": 3.789958737965705e-06, "loss": 0.2377, "step": 182, "step_loss": 0.42578125 }, { "epoch": 1.7766990291262137, "grad_norm": 0.517105808317797, "kl": 0.2265625, "learning_rate": 3.777648430374142e-06, "loss": 0.2247, "step": 183, "step_loss": 0.423828125 }, { "epoch": 1.7864077669902914, "grad_norm": 0.5846543567424347, "kl": 0.150390625, "learning_rate": 3.765299128351977e-06, "loss": 0.2233, "step": 184, "step_loss": 0.14453125 }, { "epoch": 1.796116504854369, "grad_norm": 0.550349343500017, "kl": 0.5078125, "learning_rate": 3.7529113004972117e-06, "loss": 0.2296, "step": 185, "step_loss": 0.32421875 }, { "epoch": 1.8058252427184467, "grad_norm": 0.6054819413255546, "kl": 0.2890625, "learning_rate": 3.740485416869722e-06, "loss": 0.2266, "step": 186, "step_loss": 0.2373046875 }, { "epoch": 1.8155339805825244, "grad_norm": 0.5882865611694021, "kl": 0.2001953125, "learning_rate": 3.7280219489734214e-06, "loss": 0.2265, "step": 187, "step_loss": 0.1259765625 }, { "epoch": 1.825242718446602, "grad_norm": 0.6392508865666573, "kl": 0.328125, "learning_rate": 3.7155213697383702e-06, "loss": 0.2463, "step": 188, "step_loss": 0.173828125 }, { "epoch": 1.8349514563106797, "grad_norm": 0.5196239704198046, "kl": 0.1923828125, "learning_rate": 3.70298415350283e-06, "loss": 0.2359, "step": 189, "step_loss": 0.123046875 }, { "epoch": 1.8446601941747574, "grad_norm": 0.6494083949250458, "kl": 1.0859375, "learning_rate": 3.690410775995263e-06, "loss": 0.2307, "step": 190, "step_loss": 0.44921875 }, { "epoch": 1.854368932038835, "grad_norm": 0.5435542784787667, "kl": 0.3359375, "learning_rate": 3.677801714316283e-06, "loss": 0.2252, "step": 191, "step_loss": 0.2265625 }, { "epoch": 1.8640776699029127, "grad_norm": 0.6511666190719619, "kl": 0.248046875, "learning_rate": 3.665157446920551e-06, "loss": 0.2525, "step": 192, "step_loss": 0.189453125 }, { "epoch": 1.8737864077669903, "grad_norm": 0.6128430106715034, "kl": 0.306640625, "learning_rate": 3.6524784535986175e-06, "loss": 0.2723, "step": 193, "step_loss": 0.298828125 }, { "epoch": 1.883495145631068, "grad_norm": 0.5849812397434826, "kl": 0.1875, "learning_rate": 3.639765215458721e-06, "loss": 0.2487, "step": 194, "step_loss": 0.33984375 }, { "epoch": 1.8932038834951457, "grad_norm": 0.5998143147199343, "kl": 0.2060546875, "learning_rate": 3.6270182149085288e-06, "loss": 0.2265, "step": 195, "step_loss": 0.1162109375 }, { "epoch": 1.9029126213592233, "grad_norm": 0.5559548956050891, "kl": 0.2041015625, "learning_rate": 3.6142379356368334e-06, "loss": 0.2266, "step": 196, "step_loss": 0.12109375 }, { "epoch": 1.912621359223301, "grad_norm": 0.6722163508708774, "kl": 0.2265625, "learning_rate": 3.6014248625951987e-06, "loss": 0.2349, "step": 197, "step_loss": 0.296875 }, { "epoch": 1.9223300970873787, "grad_norm": 0.5488054807070938, "kl": 0.0947265625, "learning_rate": 3.5885794819795566e-06, "loss": 0.216, "step": 198, "step_loss": 0.09326171875 }, { "epoch": 1.9320388349514563, "grad_norm": 0.5550608922872593, "kl": 0.423828125, "learning_rate": 3.5757022812117625e-06, "loss": 0.2304, "step": 199, "step_loss": 0.49609375 }, { "epoch": 1.941747572815534, "grad_norm": 0.5964742703384457, "kl": 0.341796875, "learning_rate": 3.562793748921095e-06, "loss": 0.2283, "step": 200, "step_loss": 0.32421875 }, { "epoch": 1.9514563106796117, "grad_norm": 0.5752160371372506, "kl": 0.263671875, "learning_rate": 3.5498543749257164e-06, "loss": 0.2166, "step": 201, "step_loss": 0.2041015625 }, { "epoch": 1.9611650485436893, "grad_norm": 0.547718813991668, "kl": 0.1904296875, "learning_rate": 3.536884650214088e-06, "loss": 0.2374, "step": 202, "step_loss": 0.193359375 }, { "epoch": 1.970873786407767, "grad_norm": 0.5834419512668307, "kl": 0.20703125, "learning_rate": 3.5238850669263386e-06, "loss": 0.2419, "step": 203, "step_loss": 0.404296875 }, { "epoch": 1.9805825242718447, "grad_norm": 0.5492481511414377, "kl": 0.171875, "learning_rate": 3.510856118335589e-06, "loss": 0.2304, "step": 204, "step_loss": 0.26953125 }, { "epoch": 1.9902912621359223, "grad_norm": 0.6611396642693328, "kl": 0.34765625, "learning_rate": 3.497798298829234e-06, "loss": 0.2235, "step": 205, "step_loss": 0.158203125 }, { "epoch": 2.0, "grad_norm": 0.6014585529041143, "kl": 0.19140625, "learning_rate": 3.484712103890188e-06, "loss": 0.2067, "step": 206, "step_loss": 0.23828125 }, { "epoch": 2.0, "eval_test_transformed.json_loss": NaN, "eval_test_transformed.json_runtime": 8.5733, "eval_test_transformed.json_samples_per_second": 58.321, "eval_test_transformed.json_steps_per_second": 1.866, "step": 206 }, { "epoch": 2.0097087378640777, "grad_norm": 0.5791202947407729, "kl": NaN, "learning_rate": 3.471598030078074e-06, "loss": 0.1895, "step": 207, "step_loss": NaN }, { "epoch": 2.0194174757281553, "grad_norm": 0.5095592680348658, "kl": 0.2373046875, "learning_rate": 3.4584565750103932e-06, "loss": 0.1777, "step": 208, "step_loss": 0.25390625 }, { "epoch": 2.029126213592233, "grad_norm": 0.52090270216247, "kl": 0.2041015625, "learning_rate": 3.445288237343632e-06, "loss": 0.1885, "step": 209, "step_loss": 0.240234375 }, { "epoch": 2.0388349514563107, "grad_norm": 0.4976045327134996, "kl": 0.2431640625, "learning_rate": 3.432093516754348e-06, "loss": 0.1863, "step": 210, "step_loss": 0.1064453125 }, { "epoch": 2.0485436893203883, "grad_norm": 0.47846076806594573, "kl": 0.25390625, "learning_rate": 3.4188729139202063e-06, "loss": 0.1806, "step": 211, "step_loss": 0.138671875 }, { "epoch": 2.058252427184466, "grad_norm": 0.4911716210346588, "kl": 0.1787109375, "learning_rate": 3.4056269305009807e-06, "loss": 0.1963, "step": 212, "step_loss": 0.1650390625 }, { "epoch": 2.0679611650485437, "grad_norm": 0.5097950309369077, "kl": 0.1708984375, "learning_rate": 3.3923560691195194e-06, "loss": 0.1745, "step": 213, "step_loss": 0.232421875 }, { "epoch": 2.0776699029126213, "grad_norm": 0.49796056560568963, "kl": 0.390625, "learning_rate": 3.379060833342673e-06, "loss": 0.1956, "step": 214, "step_loss": 0.134765625 }, { "epoch": 2.087378640776699, "grad_norm": 0.5533404313296716, "kl": 0.1796875, "learning_rate": 3.3657417276621867e-06, "loss": 0.1977, "step": 215, "step_loss": 0.2890625 }, { "epoch": 2.0970873786407767, "grad_norm": 0.5296091938932913, "kl": 0.251953125, "learning_rate": 3.352399257475553e-06, "loss": 0.1799, "step": 216, "step_loss": 0.296875 }, { "epoch": 2.1067961165048543, "grad_norm": 0.6482221636505494, "kl": 0.435546875, "learning_rate": 3.339033929066841e-06, "loss": 0.1938, "step": 217, "step_loss": 0.134765625 }, { "epoch": 2.116504854368932, "grad_norm": 0.5671573525815887, "kl": 0.28515625, "learning_rate": 3.3256462495874804e-06, "loss": 0.2086, "step": 218, "step_loss": 0.259765625 }, { "epoch": 2.1262135922330097, "grad_norm": 0.5195068124172081, "kl": 0.1962890625, "learning_rate": 3.3122367270370193e-06, "loss": 0.1836, "step": 219, "step_loss": 0.33984375 }, { "epoch": 2.1359223300970873, "grad_norm": 0.5586205055924973, "kl": 0.2373046875, "learning_rate": 3.2988058702438493e-06, "loss": 0.2151, "step": 220, "step_loss": 0.1455078125 }, { "epoch": 2.145631067961165, "grad_norm": 0.4963165143500007, "kl": 0.3046875, "learning_rate": 3.285354188845892e-06, "loss": 0.1908, "step": 221, "step_loss": 0.140625 }, { "epoch": 2.1553398058252426, "grad_norm": 0.5444372065813423, "kl": 0.296875, "learning_rate": 3.271882193271271e-06, "loss": 0.2084, "step": 222, "step_loss": 0.18359375 }, { "epoch": 2.1650485436893203, "grad_norm": 0.5399029855207185, "kl": 1.0546875, "learning_rate": 3.258390394718933e-06, "loss": 0.2045, "step": 223, "step_loss": 0.3125 }, { "epoch": 2.174757281553398, "grad_norm": 0.5319895925471867, "kl": 0.345703125, "learning_rate": 3.2448793051392546e-06, "loss": 0.1971, "step": 224, "step_loss": 0.109375 }, { "epoch": 2.1844660194174756, "grad_norm": 0.6543149803275666, "kl": 0.162109375, "learning_rate": 3.231349437214619e-06, "loss": 0.1733, "step": 225, "step_loss": 0.267578125 }, { "epoch": 2.1941747572815533, "grad_norm": 0.5510963728874484, "kl": 0.2021484375, "learning_rate": 3.2178013043399588e-06, "loss": 0.172, "step": 226, "step_loss": 0.2109375 }, { "epoch": 2.203883495145631, "grad_norm": 0.5155354624371081, "kl": 0.435546875, "learning_rate": 3.2042354206032733e-06, "loss": 0.1785, "step": 227, "step_loss": 0.1484375 }, { "epoch": 2.2135922330097086, "grad_norm": 0.5053157167916185, "kl": 0.341796875, "learning_rate": 3.190652300766126e-06, "loss": 0.1685, "step": 228, "step_loss": 0.0595703125 }, { "epoch": 2.2233009708737863, "grad_norm": 0.5412285451182728, "kl": 0.294921875, "learning_rate": 3.1770524602441085e-06, "loss": 0.1945, "step": 229, "step_loss": 0.171875 }, { "epoch": 2.233009708737864, "grad_norm": 0.7645124019667211, "kl": 0.337890625, "learning_rate": 3.163436415087283e-06, "loss": 0.1786, "step": 230, "step_loss": 0.0947265625 }, { "epoch": 2.2427184466019416, "grad_norm": 0.5902838675490253, "kl": 0.177734375, "learning_rate": 3.1498046819606046e-06, "loss": 0.1995, "step": 231, "step_loss": 0.1884765625 }, { "epoch": 2.2524271844660193, "grad_norm": 0.6016854494954839, "kl": 0.416015625, "learning_rate": 3.13615777812431e-06, "loss": 0.2059, "step": 232, "step_loss": 0.189453125 }, { "epoch": 2.262135922330097, "grad_norm": 0.49283185288305054, "kl": 0.169921875, "learning_rate": 3.122496221414293e-06, "loss": 0.1833, "step": 233, "step_loss": 0.1328125 }, { "epoch": 2.2718446601941746, "grad_norm": 0.5176674531829142, "kl": 0.1953125, "learning_rate": 3.108820530222458e-06, "loss": 0.2065, "step": 234, "step_loss": 0.2734375 }, { "epoch": 2.2815533980582523, "grad_norm": 0.571466923601719, "kl": 0.234375, "learning_rate": 3.0951312234770427e-06, "loss": 0.1995, "step": 235, "step_loss": 0.1318359375 }, { "epoch": 2.29126213592233, "grad_norm": 0.46009947680771285, "kl": 0.283203125, "learning_rate": 3.081428820622935e-06, "loss": 0.1821, "step": 236, "step_loss": 0.1689453125 }, { "epoch": 2.3009708737864076, "grad_norm": 0.4656876349084823, "kl": 0.2158203125, "learning_rate": 3.067713841601956e-06, "loss": 0.1744, "step": 237, "step_loss": 0.1328125 }, { "epoch": 2.3106796116504853, "grad_norm": 0.5238872133477445, "kl": 0.1767578125, "learning_rate": 3.0539868068331345e-06, "loss": 0.21, "step": 238, "step_loss": 0.353515625 }, { "epoch": 2.320388349514563, "grad_norm": 0.5445354007250789, "kl": 0.240234375, "learning_rate": 3.040248237192958e-06, "loss": 0.1729, "step": 239, "step_loss": 0.12109375 }, { "epoch": 2.3300970873786406, "grad_norm": 0.5420652323121445, "kl": 0.193359375, "learning_rate": 3.026498653995607e-06, "loss": 0.2176, "step": 240, "step_loss": 0.330078125 }, { "epoch": 2.3398058252427183, "grad_norm": 0.5088097390569825, "kl": 0.361328125, "learning_rate": 3.0127385789731773e-06, "loss": 0.199, "step": 241, "step_loss": 0.107421875 }, { "epoch": 2.349514563106796, "grad_norm": 0.5272400930748196, "kl": 0.1708984375, "learning_rate": 2.9989685342558776e-06, "loss": 0.1906, "step": 242, "step_loss": 0.21484375 }, { "epoch": 2.3592233009708736, "grad_norm": 0.5369149150820804, "kl": 0.16015625, "learning_rate": 2.9851890423522214e-06, "loss": 0.1698, "step": 243, "step_loss": 0.2080078125 }, { "epoch": 2.3689320388349513, "grad_norm": 0.48739367878418577, "kl": 0.18359375, "learning_rate": 2.9714006261291967e-06, "loss": 0.1903, "step": 244, "step_loss": 0.123046875 }, { "epoch": 2.378640776699029, "grad_norm": 0.5086942793116336, "kl": 0.28515625, "learning_rate": 2.9576038087924304e-06, "loss": 0.1919, "step": 245, "step_loss": 0.1181640625 }, { "epoch": 2.3883495145631066, "grad_norm": 0.511491215329325, "kl": 0.287109375, "learning_rate": 2.943799113866329e-06, "loss": 0.17, "step": 246, "step_loss": 0.1376953125 }, { "epoch": 2.3980582524271843, "grad_norm": 0.4862420492331248, "kl": 0.2109375, "learning_rate": 2.929987065174219e-06, "loss": 0.1858, "step": 247, "step_loss": 0.0888671875 }, { "epoch": 2.407766990291262, "grad_norm": 0.5342355311910822, "kl": 0.408203125, "learning_rate": 2.9161681868184673e-06, "loss": 0.1747, "step": 248, "step_loss": 0.103515625 }, { "epoch": 2.4174757281553396, "grad_norm": 0.48746202027489255, "kl": 0.359375, "learning_rate": 2.9023430031605928e-06, "loss": 0.1628, "step": 249, "step_loss": 0.12890625 }, { "epoch": 2.4271844660194173, "grad_norm": 0.5273774877988189, "kl": 0.30078125, "learning_rate": 2.888512038801372e-06, "loss": 0.1826, "step": 250, "step_loss": 0.1435546875 }, { "epoch": 2.436893203883495, "grad_norm": 0.5621617942900121, "kl": 0.2373046875, "learning_rate": 2.874675818560933e-06, "loss": 0.2022, "step": 251, "step_loss": 0.1572265625 }, { "epoch": 2.4466019417475726, "grad_norm": 0.5789885643882243, "kl": 0.208984375, "learning_rate": 2.8608348674588383e-06, "loss": 0.1643, "step": 252, "step_loss": 0.1279296875 }, { "epoch": 2.4563106796116507, "grad_norm": 0.5470908147857678, "kl": 0.12060546875, "learning_rate": 2.8469897106941657e-06, "loss": 0.1879, "step": 253, "step_loss": 0.1455078125 }, { "epoch": 2.466019417475728, "grad_norm": 0.5350773885496009, "kl": 0.24609375, "learning_rate": 2.8331408736255766e-06, "loss": 0.1936, "step": 254, "step_loss": 0.2197265625 }, { "epoch": 2.475728155339806, "grad_norm": 0.5529985369495677, "kl": 0.251953125, "learning_rate": 2.8192888817513844e-06, "loss": 0.1665, "step": 255, "step_loss": 0.1728515625 }, { "epoch": 2.4854368932038833, "grad_norm": 0.5432455331659981, "kl": 0.10888671875, "learning_rate": 2.8054342606896102e-06, "loss": 0.1795, "step": 256, "step_loss": 0.09912109375 }, { "epoch": 2.4951456310679614, "grad_norm": 0.6472916647922633, "kl": 0.306640625, "learning_rate": 2.7915775361580427e-06, "loss": 0.178, "step": 257, "step_loss": 0.1982421875 }, { "epoch": 2.5048543689320386, "grad_norm": 0.5736017610369268, "kl": 0.349609375, "learning_rate": 2.7777192339542867e-06, "loss": 0.22, "step": 258, "step_loss": 0.248046875 }, { "epoch": 2.5145631067961167, "grad_norm": 0.5113226835633047, "kl": 0.19140625, "learning_rate": 2.7638598799358123e-06, "loss": 0.1816, "step": 259, "step_loss": 0.12109375 }, { "epoch": 2.524271844660194, "grad_norm": 0.4926253445141819, "kl": 0.333984375, "learning_rate": 2.7500000000000004e-06, "loss": 0.187, "step": 260, "step_loss": 0.1181640625 }, { "epoch": 2.533980582524272, "grad_norm": 0.5196079398321654, "kl": 0.15625, "learning_rate": 2.7361401200641884e-06, "loss": 0.2047, "step": 261, "step_loss": 0.16796875 }, { "epoch": 2.5436893203883493, "grad_norm": 0.5367574475148829, "kl": 0.134765625, "learning_rate": 2.722280766045714e-06, "loss": 0.1701, "step": 262, "step_loss": 0.2275390625 }, { "epoch": 2.5533980582524274, "grad_norm": 0.6249660005858519, "kl": 0.298828125, "learning_rate": 2.708422463841958e-06, "loss": 0.2044, "step": 263, "step_loss": 0.3203125 }, { "epoch": 2.5631067961165046, "grad_norm": 0.5255108947368616, "kl": 0.142578125, "learning_rate": 2.6945657393103913e-06, "loss": 0.2174, "step": 264, "step_loss": 0.267578125 }, { "epoch": 2.5728155339805827, "grad_norm": 0.5000574555764202, "kl": 0.1171875, "learning_rate": 2.680711118248617e-06, "loss": 0.1692, "step": 265, "step_loss": 0.10302734375 }, { "epoch": 2.58252427184466, "grad_norm": 0.5928812196080804, "kl": 0.1923828125, "learning_rate": 2.666859126374425e-06, "loss": 0.2014, "step": 266, "step_loss": 0.3203125 }, { "epoch": 2.592233009708738, "grad_norm": 0.520932366174803, "kl": 0.2490234375, "learning_rate": 2.653010289305835e-06, "loss": 0.1857, "step": 267, "step_loss": 0.134765625 }, { "epoch": 2.6019417475728153, "grad_norm": 0.5199064176512976, "kl": 0.255859375, "learning_rate": 2.639165132541162e-06, "loss": 0.1805, "step": 268, "step_loss": 0.234375 }, { "epoch": 2.6116504854368934, "grad_norm": 0.5189304175947423, "kl": 0.259765625, "learning_rate": 2.625324181439068e-06, "loss": 0.1857, "step": 269, "step_loss": 0.333984375 }, { "epoch": 2.6213592233009706, "grad_norm": 0.5477820911661071, "kl": 0.1962890625, "learning_rate": 2.611487961198629e-06, "loss": 0.1705, "step": 270, "step_loss": 0.267578125 }, { "epoch": 2.6310679611650487, "grad_norm": 0.5144960586834338, "kl": 0.3515625, "learning_rate": 2.5976569968394084e-06, "loss": 0.1824, "step": 271, "step_loss": 0.12890625 }, { "epoch": 2.6407766990291264, "grad_norm": 0.5129225163884132, "kl": 0.1611328125, "learning_rate": 2.583831813181534e-06, "loss": 0.1936, "step": 272, "step_loss": 0.240234375 }, { "epoch": 2.650485436893204, "grad_norm": 0.5747902549826789, "kl": 0.376953125, "learning_rate": 2.570012934825782e-06, "loss": 0.1803, "step": 273, "step_loss": 0.171875 }, { "epoch": 2.6601941747572817, "grad_norm": 0.5303667804765286, "kl": 0.2353515625, "learning_rate": 2.556200886133672e-06, "loss": 0.1638, "step": 274, "step_loss": 0.353515625 }, { "epoch": 2.6699029126213594, "grad_norm": 0.552737610069486, "kl": 0.166015625, "learning_rate": 2.542396191207571e-06, "loss": 0.1896, "step": 275, "step_loss": 0.212890625 }, { "epoch": 2.679611650485437, "grad_norm": 0.5138863525528505, "kl": 0.267578125, "learning_rate": 2.528599373870804e-06, "loss": 0.1647, "step": 276, "step_loss": 0.1650390625 }, { "epoch": 2.6893203883495147, "grad_norm": 0.5816837784109217, "kl": 0.3203125, "learning_rate": 2.5148109576477798e-06, "loss": 0.2022, "step": 277, "step_loss": 0.2734375 }, { "epoch": 2.6990291262135924, "grad_norm": 0.5946206686019718, "kl": 0.296875, "learning_rate": 2.501031465744123e-06, "loss": 0.1793, "step": 278, "step_loss": 0.1650390625 }, { "epoch": 2.70873786407767, "grad_norm": 0.5505786008910999, "kl": 0.26171875, "learning_rate": 2.487261421026823e-06, "loss": 0.1869, "step": 279, "step_loss": 0.189453125 }, { "epoch": 2.7184466019417477, "grad_norm": 0.5607618391578882, "kl": 0.396484375, "learning_rate": 2.4735013460043933e-06, "loss": 0.1782, "step": 280, "step_loss": 0.140625 }, { "epoch": 2.7281553398058254, "grad_norm": 0.532008359083315, "kl": 0.443359375, "learning_rate": 2.4597517628070427e-06, "loss": 0.2004, "step": 281, "step_loss": 0.1953125 }, { "epoch": 2.737864077669903, "grad_norm": 0.5409775868618988, "kl": 0.412109375, "learning_rate": 2.446013193166866e-06, "loss": 0.1968, "step": 282, "step_loss": 0.10302734375 }, { "epoch": 2.7475728155339807, "grad_norm": 0.5494599421845879, "kl": 0.291015625, "learning_rate": 2.4322861583980445e-06, "loss": 0.1882, "step": 283, "step_loss": 0.138671875 }, { "epoch": 2.7572815533980584, "grad_norm": 0.5270774451468452, "kl": 0.275390625, "learning_rate": 2.4185711793770662e-06, "loss": 0.1773, "step": 284, "step_loss": 0.1728515625 }, { "epoch": 2.766990291262136, "grad_norm": 0.556579747133581, "kl": 0.177734375, "learning_rate": 2.4048687765229584e-06, "loss": 0.1875, "step": 285, "step_loss": 0.12109375 }, { "epoch": 2.7766990291262137, "grad_norm": 0.503176078809862, "kl": 0.189453125, "learning_rate": 2.3911794697775437e-06, "loss": 0.1847, "step": 286, "step_loss": 0.328125 }, { "epoch": 2.7864077669902914, "grad_norm": 0.5914318608290587, "kl": 0.251953125, "learning_rate": 2.377503778585708e-06, "loss": 0.2007, "step": 287, "step_loss": 0.197265625 }, { "epoch": 2.796116504854369, "grad_norm": 0.47771283753849275, "kl": 0.365234375, "learning_rate": 2.3638422218756905e-06, "loss": 0.1968, "step": 288, "step_loss": 0.13671875 }, { "epoch": 2.8058252427184467, "grad_norm": 0.544279916739223, "kl": 0.1630859375, "learning_rate": 2.350195318039396e-06, "loss": 0.1844, "step": 289, "step_loss": 0.1279296875 }, { "epoch": 2.8155339805825244, "grad_norm": 0.7140740591904084, "kl": 0.2080078125, "learning_rate": 2.336563584912717e-06, "loss": 0.2153, "step": 290, "step_loss": 0.294921875 }, { "epoch": 2.825242718446602, "grad_norm": 0.5978474529315786, "kl": 0.1943359375, "learning_rate": 2.322947539755892e-06, "loss": 0.2018, "step": 291, "step_loss": 0.25390625 }, { "epoch": 2.8349514563106797, "grad_norm": 0.5338842771148811, "kl": 0.4296875, "learning_rate": 2.3093476992338747e-06, "loss": 0.2051, "step": 292, "step_loss": 0.142578125 }, { "epoch": 2.8446601941747574, "grad_norm": 0.5434580731788923, "kl": 0.2021484375, "learning_rate": 2.295764579396727e-06, "loss": 0.1833, "step": 293, "step_loss": 0.1259765625 }, { "epoch": 2.854368932038835, "grad_norm": 0.48934430178338256, "kl": 0.1484375, "learning_rate": 2.2821986956600415e-06, "loss": 0.1602, "step": 294, "step_loss": 0.10595703125 }, { "epoch": 2.8640776699029127, "grad_norm": 0.5408973977811805, "kl": 0.169921875, "learning_rate": 2.2686505627853812e-06, "loss": 0.1594, "step": 295, "step_loss": 0.1103515625 }, { "epoch": 2.8737864077669903, "grad_norm": 0.5283594028868295, "kl": 0.36328125, "learning_rate": 2.255120694860746e-06, "loss": 0.1916, "step": 296, "step_loss": 0.126953125 }, { "epoch": 2.883495145631068, "grad_norm": 0.5035651183385199, "kl": 0.2177734375, "learning_rate": 2.2416096052810683e-06, "loss": 0.1824, "step": 297, "step_loss": 0.220703125 }, { "epoch": 2.8932038834951457, "grad_norm": 0.5459862981254268, "kl": 0.2470703125, "learning_rate": 2.2281178067287294e-06, "loss": 0.2043, "step": 298, "step_loss": 0.205078125 }, { "epoch": 2.9029126213592233, "grad_norm": 0.5238184894583012, "kl": 0.30078125, "learning_rate": 2.214645811154108e-06, "loss": 0.1867, "step": 299, "step_loss": 0.2119140625 }, { "epoch": 2.912621359223301, "grad_norm": 0.5030938038128431, "kl": 0.251953125, "learning_rate": 2.201194129756152e-06, "loss": 0.1743, "step": 300, "step_loss": 0.1611328125 }, { "epoch": 2.9223300970873787, "grad_norm": 0.6344887467056015, "kl": 0.1796875, "learning_rate": 2.187763272962981e-06, "loss": 0.1986, "step": 301, "step_loss": 0.1328125 }, { "epoch": 2.9320388349514563, "grad_norm": 0.5310139131483351, "kl": 0.189453125, "learning_rate": 2.1743537504125208e-06, "loss": 0.1998, "step": 302, "step_loss": 0.287109375 }, { "epoch": 2.941747572815534, "grad_norm": 0.5172075780561182, "kl": 0.4375, "learning_rate": 2.1609660709331598e-06, "loss": 0.2207, "step": 303, "step_loss": 0.31640625 }, { "epoch": 2.9514563106796117, "grad_norm": 0.5624076449552908, "kl": 0.1748046875, "learning_rate": 2.1476007425244476e-06, "loss": 0.1955, "step": 304, "step_loss": 0.1015625 }, { "epoch": 2.9611650485436893, "grad_norm": 0.5232258269061456, "kl": 0.306640625, "learning_rate": 2.134258272337814e-06, "loss": 0.1884, "step": 305, "step_loss": 0.21484375 }, { "epoch": 2.970873786407767, "grad_norm": 0.5856686801966252, "kl": 0.291015625, "learning_rate": 2.120939166657327e-06, "loss": 0.202, "step": 306, "step_loss": 0.2021484375 }, { "epoch": 2.9805825242718447, "grad_norm": 0.5360709399599322, "kl": 0.29296875, "learning_rate": 2.1076439308804813e-06, "loss": 0.1946, "step": 307, "step_loss": 0.150390625 }, { "epoch": 2.9902912621359223, "grad_norm": 0.5186377564757945, "kl": 0.1923828125, "learning_rate": 2.0943730694990204e-06, "loss": 0.1776, "step": 308, "step_loss": 0.1015625 }, { "epoch": 3.0, "grad_norm": 0.5769108575358282, "kl": 0.48828125, "learning_rate": 2.081127086079795e-06, "loss": 0.1656, "step": 309, "step_loss": 0.240234375 }, { "epoch": 3.0, "eval_test_transformed.json_loss": NaN, "eval_test_transformed.json_runtime": 8.3956, "eval_test_transformed.json_samples_per_second": 59.555, "eval_test_transformed.json_steps_per_second": 1.906, "step": 309 }, { "epoch": 3.0097087378640777, "grad_norm": 0.5711934327513365, "kl": NaN, "learning_rate": 2.0679064832456523e-06, "loss": 0.1695, "step": 310, "step_loss": NaN }, { "epoch": 3.0194174757281553, "grad_norm": 0.5080776402736003, "kl": 0.2255859375, "learning_rate": 2.054711762656369e-06, "loss": 0.1599, "step": 311, "step_loss": 0.2373046875 }, { "epoch": 3.029126213592233, "grad_norm": 0.460829560524234, "kl": 0.154296875, "learning_rate": 2.0415434249896075e-06, "loss": 0.1554, "step": 312, "step_loss": 0.08984375 }, { "epoch": 3.0388349514563107, "grad_norm": 0.532111983258558, "kl": 0.259765625, "learning_rate": 2.0284019699219265e-06, "loss": 0.1561, "step": 313, "step_loss": 0.1689453125 }, { "epoch": 3.0485436893203883, "grad_norm": 0.5807336256828239, "kl": 0.369140625, "learning_rate": 2.0152878961098133e-06, "loss": 0.1513, "step": 314, "step_loss": 0.06201171875 }, { "epoch": 3.058252427184466, "grad_norm": 0.5177785233543187, "kl": 0.1982421875, "learning_rate": 2.0022017011707663e-06, "loss": 0.1816, "step": 315, "step_loss": 0.2236328125 }, { "epoch": 3.0679611650485437, "grad_norm": 0.5969479421413822, "kl": 0.333984375, "learning_rate": 1.989143881664412e-06, "loss": 0.1782, "step": 316, "step_loss": 0.240234375 }, { "epoch": 3.0776699029126213, "grad_norm": 0.489029791228728, "kl": 0.59765625, "learning_rate": 1.9761149330736625e-06, "loss": 0.1308, "step": 317, "step_loss": 0.064453125 }, { "epoch": 3.087378640776699, "grad_norm": 0.7131615528898937, "kl": 0.177734375, "learning_rate": 1.9631153497859127e-06, "loss": 0.179, "step": 318, "step_loss": 0.07861328125 }, { "epoch": 3.0970873786407767, "grad_norm": 0.4640669864219324, "kl": 0.390625, "learning_rate": 1.950145625074285e-06, "loss": 0.1331, "step": 319, "step_loss": 0.06494140625 }, { "epoch": 3.1067961165048543, "grad_norm": 0.4889269717181464, "kl": 0.2138671875, "learning_rate": 1.9372062510789064e-06, "loss": 0.1696, "step": 320, "step_loss": 0.07861328125 }, { "epoch": 3.116504854368932, "grad_norm": 0.4894140759210115, "kl": 0.294921875, "learning_rate": 1.924297718788238e-06, "loss": 0.1532, "step": 321, "step_loss": 0.1728515625 }, { "epoch": 3.1262135922330097, "grad_norm": 0.4851533726900198, "kl": 0.271484375, "learning_rate": 1.9114205180204437e-06, "loss": 0.1563, "step": 322, "step_loss": 0.08984375 }, { "epoch": 3.1359223300970873, "grad_norm": 0.4812749750914589, "kl": 0.3359375, "learning_rate": 1.8985751374048022e-06, "loss": 0.1597, "step": 323, "step_loss": 0.083984375 }, { "epoch": 3.145631067961165, "grad_norm": 0.4735366094358067, "kl": 0.57421875, "learning_rate": 1.8857620643631675e-06, "loss": 0.164, "step": 324, "step_loss": 0.19140625 }, { "epoch": 3.1553398058252426, "grad_norm": 0.4616311559837366, "kl": 0.3203125, "learning_rate": 1.8729817850914717e-06, "loss": 0.169, "step": 325, "step_loss": 0.2294921875 }, { "epoch": 3.1650485436893203, "grad_norm": 0.5563217588279268, "kl": 0.224609375, "learning_rate": 1.8602347845412799e-06, "loss": 0.1502, "step": 326, "step_loss": 0.28515625 }, { "epoch": 3.174757281553398, "grad_norm": 0.45499622442963633, "kl": 0.330078125, "learning_rate": 1.847521546401383e-06, "loss": 0.1547, "step": 327, "step_loss": 0.062255859375 }, { "epoch": 3.1844660194174756, "grad_norm": 0.5222912664403042, "kl": 0.33984375, "learning_rate": 1.83484255307945e-06, "loss": 0.1694, "step": 328, "step_loss": 0.1083984375 }, { "epoch": 3.1941747572815533, "grad_norm": 0.4698877989732488, "kl": 0.1611328125, "learning_rate": 1.8221982856837177e-06, "loss": 0.1616, "step": 329, "step_loss": 0.203125 }, { "epoch": 3.203883495145631, "grad_norm": 0.5161358585670528, "kl": 0.2578125, "learning_rate": 1.8095892240047375e-06, "loss": 0.1483, "step": 330, "step_loss": 0.193359375 }, { "epoch": 3.2135922330097086, "grad_norm": 0.5707034508549506, "kl": 0.384765625, "learning_rate": 1.7970158464971704e-06, "loss": 0.1764, "step": 331, "step_loss": 0.1435546875 }, { "epoch": 3.2233009708737863, "grad_norm": 0.6845693547436684, "kl": 0.30078125, "learning_rate": 1.78447863026163e-06, "loss": 0.1686, "step": 332, "step_loss": 0.146484375 }, { "epoch": 3.233009708737864, "grad_norm": 0.539242866306224, "kl": 0.279296875, "learning_rate": 1.77197805102658e-06, "loss": 0.1602, "step": 333, "step_loss": 0.30078125 }, { "epoch": 3.2427184466019416, "grad_norm": 0.5376628174473539, "kl": 0.318359375, "learning_rate": 1.759514583130279e-06, "loss": 0.1596, "step": 334, "step_loss": 0.0859375 }, { "epoch": 3.2524271844660193, "grad_norm": 0.488395082556356, "kl": 0.421875, "learning_rate": 1.7470886995027902e-06, "loss": 0.1437, "step": 335, "step_loss": 0.07080078125 }, { "epoch": 3.262135922330097, "grad_norm": 0.5055787180594609, "kl": 0.2099609375, "learning_rate": 1.734700871648024e-06, "loss": 0.1565, "step": 336, "step_loss": 0.1953125 }, { "epoch": 3.2718446601941746, "grad_norm": 0.4884350496305089, "kl": 0.2431640625, "learning_rate": 1.722351569625859e-06, "loss": 0.1445, "step": 337, "step_loss": 0.07666015625 }, { "epoch": 3.2815533980582523, "grad_norm": 0.5055878842267785, "kl": 0.1669921875, "learning_rate": 1.710041262034296e-06, "loss": 0.1662, "step": 338, "step_loss": 0.1298828125 }, { "epoch": 3.29126213592233, "grad_norm": 0.6568055753017068, "kl": 0.27734375, "learning_rate": 1.6977704159916801e-06, "loss": 0.1607, "step": 339, "step_loss": 0.13671875 }, { "epoch": 3.3009708737864076, "grad_norm": 0.46693217339820037, "kl": 0.326171875, "learning_rate": 1.6855394971189779e-06, "loss": 0.1519, "step": 340, "step_loss": 0.0751953125 }, { "epoch": 3.3106796116504853, "grad_norm": 0.46750716114679314, "kl": 0.224609375, "learning_rate": 1.6733489695221056e-06, "loss": 0.165, "step": 341, "step_loss": 0.25 }, { "epoch": 3.320388349514563, "grad_norm": 0.46663445875307946, "kl": 0.359375, "learning_rate": 1.6611992957743217e-06, "loss": 0.151, "step": 342, "step_loss": 0.1318359375 }, { "epoch": 3.3300970873786406, "grad_norm": 0.48545644382954584, "kl": 0.1982421875, "learning_rate": 1.6490909368986725e-06, "loss": 0.1624, "step": 343, "step_loss": 0.1748046875 }, { "epoch": 3.3398058252427183, "grad_norm": 0.4692569851993047, "kl": 0.232421875, "learning_rate": 1.637024352350498e-06, "loss": 0.1414, "step": 344, "step_loss": 0.271484375 }, { "epoch": 3.349514563106796, "grad_norm": 0.4758629432786183, "kl": 0.2490234375, "learning_rate": 1.6250000000000007e-06, "loss": 0.1427, "step": 345, "step_loss": 0.150390625 }, { "epoch": 3.3592233009708736, "grad_norm": 0.4871466615503954, "kl": 0.29296875, "learning_rate": 1.6130183361148675e-06, "loss": 0.1593, "step": 346, "step_loss": 0.1748046875 }, { "epoch": 3.3689320388349513, "grad_norm": 0.5474079532412777, "kl": 0.32421875, "learning_rate": 1.60107981534296e-06, "loss": 0.178, "step": 347, "step_loss": 0.267578125 }, { "epoch": 3.378640776699029, "grad_norm": 2.4760425876288865, "kl": 0.25390625, "learning_rate": 1.5891848906950602e-06, "loss": 0.1768, "step": 348, "step_loss": 0.3515625 }, { "epoch": 3.3883495145631066, "grad_norm": 0.499334607836154, "kl": 0.3359375, "learning_rate": 1.5773340135276843e-06, "loss": 0.1309, "step": 349, "step_loss": 0.07470703125 }, { "epoch": 3.3980582524271843, "grad_norm": 0.5121888774785807, "kl": 0.3046875, "learning_rate": 1.5655276335259495e-06, "loss": 0.146, "step": 350, "step_loss": 0.126953125 }, { "epoch": 3.407766990291262, "grad_norm": 0.5311085568794023, "kl": 0.427734375, "learning_rate": 1.5537661986865196e-06, "loss": 0.1675, "step": 351, "step_loss": 0.1103515625 }, { "epoch": 3.4174757281553396, "grad_norm": 0.5128192279567273, "kl": 0.271484375, "learning_rate": 1.542050155300595e-06, "loss": 0.1672, "step": 352, "step_loss": 0.2197265625 }, { "epoch": 3.4271844660194173, "grad_norm": 0.49745836081284944, "kl": 0.26171875, "learning_rate": 1.53037994793699e-06, "loss": 0.1568, "step": 353, "step_loss": 0.18359375 }, { "epoch": 3.436893203883495, "grad_norm": 0.48051427609991737, "kl": 0.263671875, "learning_rate": 1.51875601942525e-06, "loss": 0.1625, "step": 354, "step_loss": 0.2265625 }, { "epoch": 3.4466019417475726, "grad_norm": 0.48409810919704643, "kl": 0.29296875, "learning_rate": 1.507178810838863e-06, "loss": 0.1706, "step": 355, "step_loss": 0.1845703125 }, { "epoch": 3.4563106796116507, "grad_norm": 0.537826558649176, "kl": 0.349609375, "learning_rate": 1.4956487614785076e-06, "loss": 0.16, "step": 356, "step_loss": 0.15625 }, { "epoch": 3.466019417475728, "grad_norm": 0.507752429637045, "kl": 0.345703125, "learning_rate": 1.4841663088553992e-06, "loss": 0.1625, "step": 357, "step_loss": 0.12890625 }, { "epoch": 3.475728155339806, "grad_norm": 0.5130052193009929, "kl": 0.341796875, "learning_rate": 1.4727318886746725e-06, "loss": 0.1741, "step": 358, "step_loss": 0.11181640625 }, { "epoch": 3.4854368932038833, "grad_norm": 0.5556488303190134, "kl": 0.46484375, "learning_rate": 1.4613459348188635e-06, "loss": 0.1242, "step": 359, "step_loss": 0.0869140625 }, { "epoch": 3.4951456310679614, "grad_norm": 0.477620232983351, "kl": 0.39453125, "learning_rate": 1.4500088793314351e-06, "loss": 0.1439, "step": 360, "step_loss": 0.0703125 }, { "epoch": 3.5048543689320386, "grad_norm": 0.5136494794829761, "kl": 0.2734375, "learning_rate": 1.438721152400388e-06, "loss": 0.1511, "step": 361, "step_loss": 0.326171875 }, { "epoch": 3.5145631067961167, "grad_norm": 0.4992556694121463, "kl": 0.26171875, "learning_rate": 1.427483182341936e-06, "loss": 0.1641, "step": 362, "step_loss": 0.0810546875 }, { "epoch": 3.524271844660194, "grad_norm": 0.4965898781365568, "kl": 0.291015625, "learning_rate": 1.4162953955842518e-06, "loss": 0.1717, "step": 363, "step_loss": 0.06640625 }, { "epoch": 3.533980582524272, "grad_norm": 0.4885949309810252, "kl": 0.2490234375, "learning_rate": 1.4051582166512895e-06, "loss": 0.1377, "step": 364, "step_loss": 0.0830078125 }, { "epoch": 3.5436893203883493, "grad_norm": 0.7644545502958816, "kl": 0.3203125, "learning_rate": 1.3940720681466735e-06, "loss": 0.1581, "step": 365, "step_loss": 0.17578125 }, { "epoch": 3.5533980582524274, "grad_norm": 0.549140542595152, "kl": 0.26953125, "learning_rate": 1.3830373707376623e-06, "loss": 0.1455, "step": 366, "step_loss": 0.1533203125 }, { "epoch": 3.5631067961165046, "grad_norm": 0.7512378808438823, "kl": 0.333984375, "learning_rate": 1.372054543139188e-06, "loss": 0.15, "step": 367, "step_loss": 0.1103515625 }, { "epoch": 3.5728155339805827, "grad_norm": 0.5020029362406481, "kl": 0.19921875, "learning_rate": 1.3611240020979655e-06, "loss": 0.1427, "step": 368, "step_loss": 0.06494140625 }, { "epoch": 3.58252427184466, "grad_norm": 0.46369749478886507, "kl": 0.1650390625, "learning_rate": 1.3502461623766842e-06, "loss": 0.1668, "step": 369, "step_loss": 0.1728515625 }, { "epoch": 3.592233009708738, "grad_norm": 0.527596307252248, "kl": 0.404296875, "learning_rate": 1.3394214367382602e-06, "loss": 0.1543, "step": 370, "step_loss": 0.19921875 }, { "epoch": 3.6019417475728153, "grad_norm": 0.5072354485956617, "kl": 0.2578125, "learning_rate": 1.3286502359301863e-06, "loss": 0.1599, "step": 371, "step_loss": 0.1904296875 }, { "epoch": 3.6116504854368934, "grad_norm": 0.5160691631303242, "kl": 0.439453125, "learning_rate": 1.3179329686689318e-06, "loss": 0.1498, "step": 372, "step_loss": 0.169921875 }, { "epoch": 3.6213592233009706, "grad_norm": 0.6554485149692931, "kl": 0.2412109375, "learning_rate": 1.3072700416244494e-06, "loss": 0.1542, "step": 373, "step_loss": 0.228515625 }, { "epoch": 3.6310679611650487, "grad_norm": 0.4910381460677276, "kl": 0.240234375, "learning_rate": 1.2966618594047285e-06, "loss": 0.1407, "step": 374, "step_loss": 0.322265625 }, { "epoch": 3.6407766990291264, "grad_norm": 0.5460180925997682, "kl": 0.25, "learning_rate": 1.286108824540456e-06, "loss": 0.1558, "step": 375, "step_loss": 0.2236328125 }, { "epoch": 3.650485436893204, "grad_norm": 0.584914369592297, "kl": 0.53125, "learning_rate": 1.2756113374697294e-06, "loss": 0.1594, "step": 376, "step_loss": 0.1318359375 }, { "epoch": 3.6601941747572817, "grad_norm": 0.5256624617590849, "kl": 0.189453125, "learning_rate": 1.2651697965228748e-06, "loss": 0.1696, "step": 377, "step_loss": 0.1123046875 }, { "epoch": 3.6699029126213594, "grad_norm": 0.5033922457112555, "kl": 0.216796875, "learning_rate": 1.2547845979073194e-06, "loss": 0.1567, "step": 378, "step_loss": 0.2412109375 }, { "epoch": 3.679611650485437, "grad_norm": 0.5191896792873844, "kl": 0.1494140625, "learning_rate": 1.2444561356925692e-06, "loss": 0.1413, "step": 379, "step_loss": 0.10205078125 }, { "epoch": 3.6893203883495147, "grad_norm": 0.5166370978149198, "kl": 0.46484375, "learning_rate": 1.2341848017952464e-06, "loss": 0.1658, "step": 380, "step_loss": 0.1005859375 }, { "epoch": 3.6990291262135924, "grad_norm": 0.5574979370660633, "kl": 0.251953125, "learning_rate": 1.2239709859642237e-06, "loss": 0.1484, "step": 381, "step_loss": 0.2119140625 }, { "epoch": 3.70873786407767, "grad_norm": 0.5625584889838852, "kl": 0.5390625, "learning_rate": 1.2138150757658328e-06, "loss": 0.1802, "step": 382, "step_loss": 0.17578125 }, { "epoch": 3.7184466019417477, "grad_norm": 0.47927966720420184, "kl": 0.283203125, "learning_rate": 1.2037174565691591e-06, "loss": 0.1506, "step": 383, "step_loss": 0.146484375 }, { "epoch": 3.7281553398058254, "grad_norm": 0.5194167964748884, "kl": 0.4453125, "learning_rate": 1.1936785115314176e-06, "loss": 0.1395, "step": 384, "step_loss": 0.10498046875 }, { "epoch": 3.737864077669903, "grad_norm": 0.5468254611874791, "kl": 0.306640625, "learning_rate": 1.1836986215834153e-06, "loss": 0.1423, "step": 385, "step_loss": 0.12158203125 }, { "epoch": 3.7475728155339807, "grad_norm": 0.5071633518038373, "kl": 0.11865234375, "learning_rate": 1.1737781654150955e-06, "loss": 0.162, "step": 386, "step_loss": 0.12451171875 }, { "epoch": 3.7572815533980584, "grad_norm": 0.48157726901389747, "kl": 0.2353515625, "learning_rate": 1.1639175194611693e-06, "loss": 0.1547, "step": 387, "step_loss": 0.1328125 }, { "epoch": 3.766990291262136, "grad_norm": 0.541213622511581, "kl": 0.251953125, "learning_rate": 1.15411705788683e-06, "loss": 0.1655, "step": 388, "step_loss": 0.107421875 }, { "epoch": 3.7766990291262137, "grad_norm": 0.5615405005474776, "kl": 0.212890625, "learning_rate": 1.1443771525735577e-06, "loss": 0.1576, "step": 389, "step_loss": 0.1767578125 }, { "epoch": 3.7864077669902914, "grad_norm": 0.4950987080693749, "kl": 0.169921875, "learning_rate": 1.1346981731050051e-06, "loss": 0.1565, "step": 390, "step_loss": 0.2578125 }, { "epoch": 3.796116504854369, "grad_norm": 0.49370094007294774, "kl": 0.375, "learning_rate": 1.1250804867529794e-06, "loss": 0.17, "step": 391, "step_loss": 0.0888671875 }, { "epoch": 3.8058252427184467, "grad_norm": 0.5639925863204843, "kl": 0.251953125, "learning_rate": 1.1155244584634953e-06, "loss": 0.1395, "step": 392, "step_loss": 0.08984375 }, { "epoch": 3.8155339805825244, "grad_norm": 0.5187019670608374, "kl": 0.173828125, "learning_rate": 1.1060304508429407e-06, "loss": 0.1556, "step": 393, "step_loss": 0.154296875 }, { "epoch": 3.825242718446602, "grad_norm": 0.4683311180263721, "kl": 0.23046875, "learning_rate": 1.0965988241443043e-06, "loss": 0.1584, "step": 394, "step_loss": 0.1357421875 }, { "epoch": 3.8349514563106797, "grad_norm": 0.46054186247992845, "kl": 0.2412109375, "learning_rate": 1.0872299362535175e-06, "loss": 0.1232, "step": 395, "step_loss": 0.0966796875 }, { "epoch": 3.8446601941747574, "grad_norm": 0.514740300530401, "kl": 0.404296875, "learning_rate": 1.0779241426758628e-06, "loss": 0.1634, "step": 396, "step_loss": 0.09912109375 }, { "epoch": 3.854368932038835, "grad_norm": 0.5249887226800867, "kl": 0.2421875, "learning_rate": 1.068681796522496e-06, "loss": 0.1515, "step": 397, "step_loss": 0.078125 }, { "epoch": 3.8640776699029127, "grad_norm": 0.4882053926671313, "kl": 0.2734375, "learning_rate": 1.0595032484970354e-06, "loss": 0.1623, "step": 398, "step_loss": 0.06787109375 }, { "epoch": 3.8737864077669903, "grad_norm": 0.5046889857220114, "kl": 0.1982421875, "learning_rate": 1.0503888468822648e-06, "loss": 0.1485, "step": 399, "step_loss": 0.2275390625 }, { "epoch": 3.883495145631068, "grad_norm": 0.5742122623829672, "kl": 0.291015625, "learning_rate": 1.0413389375269098e-06, "loss": 0.1645, "step": 400, "step_loss": 0.109375 }, { "epoch": 3.8932038834951457, "grad_norm": 0.4869305965525205, "kl": 0.373046875, "learning_rate": 1.0323538638325185e-06, "loss": 0.1506, "step": 401, "step_loss": 0.06298828125 }, { "epoch": 3.9029126213592233, "grad_norm": 0.4895770160456859, "kl": 0.1748046875, "learning_rate": 1.0234339667404326e-06, "loss": 0.1535, "step": 402, "step_loss": 0.12255859375 }, { "epoch": 3.912621359223301, "grad_norm": 0.4718475674872005, "kl": 0.2138671875, "learning_rate": 1.0145795847188435e-06, "loss": 0.1563, "step": 403, "step_loss": 0.244140625 }, { "epoch": 3.9223300970873787, "grad_norm": 0.46310640287447863, "kl": 0.28125, "learning_rate": 1.0057910537499585e-06, "loss": 0.1502, "step": 404, "step_loss": 0.08642578125 }, { "epoch": 3.9320388349514563, "grad_norm": 0.5464878892001633, "kl": 0.2275390625, "learning_rate": 9.970687073172416e-07, "loss": 0.1648, "step": 405, "step_loss": 0.1259765625 }, { "epoch": 3.941747572815534, "grad_norm": 0.5101772415025155, "kl": 0.1318359375, "learning_rate": 9.884128763927692e-07, "loss": 0.1584, "step": 406, "step_loss": 0.0927734375 }, { "epoch": 3.9514563106796117, "grad_norm": 0.5262085741097766, "kl": 0.3359375, "learning_rate": 9.798238894246628e-07, "loss": 0.1498, "step": 407, "step_loss": 0.134765625 }, { "epoch": 3.9611650485436893, "grad_norm": 0.5133055914879928, "kl": 0.16015625, "learning_rate": 9.713020723246332e-07, "loss": 0.1707, "step": 408, "step_loss": 0.1513671875 }, { "epoch": 3.970873786407767, "grad_norm": 0.5499931811863684, "kl": 0.306640625, "learning_rate": 9.628477484556066e-07, "loss": 0.1547, "step": 409, "step_loss": 0.11474609375 }, { "epoch": 3.9805825242718447, "grad_norm": 0.5051010516067944, "kl": 0.162109375, "learning_rate": 9.54461238619462e-07, "loss": 0.1586, "step": 410, "step_loss": 0.109375 }, { "epoch": 3.9902912621359223, "grad_norm": 0.49187747334266807, "kl": 0.2333984375, "learning_rate": 9.461428610448503e-07, "loss": 0.1461, "step": 411, "step_loss": 0.1416015625 }, { "epoch": 4.0, "grad_norm": 0.44028016997121006, "kl": 0.2734375, "learning_rate": 9.378929313751267e-07, "loss": 0.1499, "step": 412, "step_loss": 0.08056640625 }, { "epoch": 4.0, "eval_test_transformed.json_loss": NaN, "eval_test_transformed.json_runtime": 8.4799, "eval_test_transformed.json_samples_per_second": 58.963, "eval_test_transformed.json_steps_per_second": 1.887, "step": 412 } ], "logging_steps": 1.0, "max_steps": 515, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 43413797257216.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }