| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 2060, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009708737864077669, | |
| "grad_norm": 8.540091514587402, | |
| "learning_rate": 3.883495145631068e-07, | |
| "loss": 2.734, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.019417475728155338, | |
| "grad_norm": 8.14619255065918, | |
| "learning_rate": 1.359223300970874e-06, | |
| "loss": 2.6711, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02912621359223301, | |
| "grad_norm": 6.858204364776611, | |
| "learning_rate": 2.330097087378641e-06, | |
| "loss": 2.6055, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.038834951456310676, | |
| "grad_norm": 6.228450298309326, | |
| "learning_rate": 3.300970873786408e-06, | |
| "loss": 2.5322, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04854368932038835, | |
| "grad_norm": 5.512765407562256, | |
| "learning_rate": 4.271844660194175e-06, | |
| "loss": 2.466, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05825242718446602, | |
| "grad_norm": 5.114351272583008, | |
| "learning_rate": 5.242718446601942e-06, | |
| "loss": 2.4828, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06796116504854369, | |
| "grad_norm": 4.749820232391357, | |
| "learning_rate": 6.213592233009709e-06, | |
| "loss": 2.4529, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07766990291262135, | |
| "grad_norm": 4.962618827819824, | |
| "learning_rate": 7.184466019417476e-06, | |
| "loss": 2.3377, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08737864077669903, | |
| "grad_norm": 4.963841915130615, | |
| "learning_rate": 8.155339805825243e-06, | |
| "loss": 2.3914, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0970873786407767, | |
| "grad_norm": 5.020650386810303, | |
| "learning_rate": 9.12621359223301e-06, | |
| "loss": 2.2748, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10679611650485436, | |
| "grad_norm": 4.788837909698486, | |
| "learning_rate": 1.0097087378640778e-05, | |
| "loss": 2.2975, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11650485436893204, | |
| "grad_norm": 5.005253791809082, | |
| "learning_rate": 1.1067961165048544e-05, | |
| "loss": 2.3025, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1262135922330097, | |
| "grad_norm": 5.113918304443359, | |
| "learning_rate": 1.2038834951456311e-05, | |
| "loss": 2.2121, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13592233009708737, | |
| "grad_norm": 5.089141368865967, | |
| "learning_rate": 1.300970873786408e-05, | |
| "loss": 2.233, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14563106796116504, | |
| "grad_norm": 5.0798749923706055, | |
| "learning_rate": 1.3980582524271846e-05, | |
| "loss": 2.1688, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1553398058252427, | |
| "grad_norm": 5.230806827545166, | |
| "learning_rate": 1.4951456310679614e-05, | |
| "loss": 2.2232, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1650485436893204, | |
| "grad_norm": 5.30760383605957, | |
| "learning_rate": 1.592233009708738e-05, | |
| "loss": 2.1506, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.17475728155339806, | |
| "grad_norm": 5.008656978607178, | |
| "learning_rate": 1.6893203883495145e-05, | |
| "loss": 2.1866, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.18446601941747573, | |
| "grad_norm": 4.95796537399292, | |
| "learning_rate": 1.7864077669902916e-05, | |
| "loss": 2.2156, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1941747572815534, | |
| "grad_norm": 4.717769145965576, | |
| "learning_rate": 1.883495145631068e-05, | |
| "loss": 2.164, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20388349514563106, | |
| "grad_norm": 4.858338832855225, | |
| "learning_rate": 1.9805825242718447e-05, | |
| "loss": 2.1291, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.21359223300970873, | |
| "grad_norm": 5.218167781829834, | |
| "learning_rate": 1.9913700107874866e-05, | |
| "loss": 2.1617, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.22330097087378642, | |
| "grad_norm": 5.097916126251221, | |
| "learning_rate": 1.9805825242718447e-05, | |
| "loss": 2.1721, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.23300970873786409, | |
| "grad_norm": 5.860560417175293, | |
| "learning_rate": 1.969795037756203e-05, | |
| "loss": 2.1412, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.24271844660194175, | |
| "grad_norm": 5.395883560180664, | |
| "learning_rate": 1.959007551240561e-05, | |
| "loss": 2.1397, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2524271844660194, | |
| "grad_norm": 5.043527126312256, | |
| "learning_rate": 1.9482200647249193e-05, | |
| "loss": 2.1314, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2621359223300971, | |
| "grad_norm": 4.853712558746338, | |
| "learning_rate": 1.9374325782092775e-05, | |
| "loss": 2.118, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.27184466019417475, | |
| "grad_norm": 5.681634902954102, | |
| "learning_rate": 1.9266450916936353e-05, | |
| "loss": 2.1189, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2815533980582524, | |
| "grad_norm": 5.401227951049805, | |
| "learning_rate": 1.9158576051779935e-05, | |
| "loss": 2.1148, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2912621359223301, | |
| "grad_norm": 5.208418369293213, | |
| "learning_rate": 1.905070118662352e-05, | |
| "loss": 2.128, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.30097087378640774, | |
| "grad_norm": 5.307718276977539, | |
| "learning_rate": 1.89428263214671e-05, | |
| "loss": 2.0858, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3106796116504854, | |
| "grad_norm": 5.279410362243652, | |
| "learning_rate": 1.883495145631068e-05, | |
| "loss": 2.1371, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.32038834951456313, | |
| "grad_norm": 5.151274681091309, | |
| "learning_rate": 1.8727076591154262e-05, | |
| "loss": 2.0932, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3300970873786408, | |
| "grad_norm": 5.083354473114014, | |
| "learning_rate": 1.8619201725997844e-05, | |
| "loss": 2.0623, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.33980582524271846, | |
| "grad_norm": 5.1322550773620605, | |
| "learning_rate": 1.8511326860841425e-05, | |
| "loss": 2.05, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.34951456310679613, | |
| "grad_norm": 4.970919609069824, | |
| "learning_rate": 1.8403451995685007e-05, | |
| "loss": 2.0901, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3592233009708738, | |
| "grad_norm": 5.15512752532959, | |
| "learning_rate": 1.829557713052859e-05, | |
| "loss": 2.0883, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.36893203883495146, | |
| "grad_norm": 5.088575839996338, | |
| "learning_rate": 1.818770226537217e-05, | |
| "loss": 2.1119, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3786407766990291, | |
| "grad_norm": 6.092918872833252, | |
| "learning_rate": 1.807982740021575e-05, | |
| "loss": 2.0979, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3883495145631068, | |
| "grad_norm": 4.909801483154297, | |
| "learning_rate": 1.797195253505933e-05, | |
| "loss": 2.0283, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.39805825242718446, | |
| "grad_norm": 5.128530025482178, | |
| "learning_rate": 1.7864077669902916e-05, | |
| "loss": 2.023, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4077669902912621, | |
| "grad_norm": 4.998912811279297, | |
| "learning_rate": 1.7756202804746498e-05, | |
| "loss": 2.0207, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4174757281553398, | |
| "grad_norm": 5.182358264923096, | |
| "learning_rate": 1.7648327939590076e-05, | |
| "loss": 1.9837, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.42718446601941745, | |
| "grad_norm": 5.3191022872924805, | |
| "learning_rate": 1.7540453074433658e-05, | |
| "loss": 2.058, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4368932038834951, | |
| "grad_norm": 5.306585788726807, | |
| "learning_rate": 1.743257820927724e-05, | |
| "loss": 2.0832, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.44660194174757284, | |
| "grad_norm": 5.278446197509766, | |
| "learning_rate": 1.732470334412082e-05, | |
| "loss": 2.0594, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4563106796116505, | |
| "grad_norm": 5.484086990356445, | |
| "learning_rate": 1.7216828478964403e-05, | |
| "loss": 2.0763, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.46601941747572817, | |
| "grad_norm": 5.767387866973877, | |
| "learning_rate": 1.7108953613807985e-05, | |
| "loss": 2.0634, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.47572815533980584, | |
| "grad_norm": 4.96846342086792, | |
| "learning_rate": 1.7001078748651563e-05, | |
| "loss": 2.0769, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4854368932038835, | |
| "grad_norm": 5.264239311218262, | |
| "learning_rate": 1.6893203883495145e-05, | |
| "loss": 2.0604, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.49514563106796117, | |
| "grad_norm": 5.036663055419922, | |
| "learning_rate": 1.6785329018338727e-05, | |
| "loss": 2.1031, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5048543689320388, | |
| "grad_norm": 4.875285625457764, | |
| "learning_rate": 1.6677454153182312e-05, | |
| "loss": 2.0457, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5145631067961165, | |
| "grad_norm": 4.933873653411865, | |
| "learning_rate": 1.6569579288025894e-05, | |
| "loss": 2.0312, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5242718446601942, | |
| "grad_norm": 5.284345626831055, | |
| "learning_rate": 1.6461704422869472e-05, | |
| "loss": 2.0656, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5339805825242718, | |
| "grad_norm": 5.3404998779296875, | |
| "learning_rate": 1.6353829557713054e-05, | |
| "loss": 2.1049, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5436893203883495, | |
| "grad_norm": 5.243639945983887, | |
| "learning_rate": 1.6245954692556636e-05, | |
| "loss": 2.0382, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5533980582524272, | |
| "grad_norm": 5.110634803771973, | |
| "learning_rate": 1.6138079827400217e-05, | |
| "loss": 2.004, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5631067961165048, | |
| "grad_norm": 5.063004493713379, | |
| "learning_rate": 1.60302049622438e-05, | |
| "loss": 2.0207, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5728155339805825, | |
| "grad_norm": 4.7647271156311035, | |
| "learning_rate": 1.592233009708738e-05, | |
| "loss": 2.0534, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5825242718446602, | |
| "grad_norm": 5.176267147064209, | |
| "learning_rate": 1.581445523193096e-05, | |
| "loss": 2.0627, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5922330097087378, | |
| "grad_norm": 5.2062225341796875, | |
| "learning_rate": 1.570658036677454e-05, | |
| "loss": 2.0063, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6019417475728155, | |
| "grad_norm": 5.044838905334473, | |
| "learning_rate": 1.5598705501618123e-05, | |
| "loss": 1.9475, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6116504854368932, | |
| "grad_norm": 5.029117584228516, | |
| "learning_rate": 1.5490830636461708e-05, | |
| "loss": 2.0673, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6213592233009708, | |
| "grad_norm": 5.190179347991943, | |
| "learning_rate": 1.5382955771305286e-05, | |
| "loss": 2.0176, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6310679611650486, | |
| "grad_norm": 4.956365585327148, | |
| "learning_rate": 1.5275080906148868e-05, | |
| "loss": 1.9984, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6407766990291263, | |
| "grad_norm": 4.972413539886475, | |
| "learning_rate": 1.516720604099245e-05, | |
| "loss": 1.9919, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6504854368932039, | |
| "grad_norm": 5.319215297698975, | |
| "learning_rate": 1.5059331175836032e-05, | |
| "loss": 1.984, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6601941747572816, | |
| "grad_norm": 5.120510578155518, | |
| "learning_rate": 1.4951456310679614e-05, | |
| "loss": 1.9838, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6699029126213593, | |
| "grad_norm": 4.868938446044922, | |
| "learning_rate": 1.4843581445523194e-05, | |
| "loss": 2.006, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6796116504854369, | |
| "grad_norm": 5.22821044921875, | |
| "learning_rate": 1.4735706580366775e-05, | |
| "loss": 2.0099, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6893203883495146, | |
| "grad_norm": 4.970730781555176, | |
| "learning_rate": 1.4627831715210357e-05, | |
| "loss": 2.0078, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6990291262135923, | |
| "grad_norm": 4.913213729858398, | |
| "learning_rate": 1.4519956850053937e-05, | |
| "loss": 2.0198, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7087378640776699, | |
| "grad_norm": 5.104898452758789, | |
| "learning_rate": 1.4412081984897519e-05, | |
| "loss": 2.0224, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7184466019417476, | |
| "grad_norm": 4.992263317108154, | |
| "learning_rate": 1.4304207119741102e-05, | |
| "loss": 2.0013, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7281553398058253, | |
| "grad_norm": 5.0994038581848145, | |
| "learning_rate": 1.4196332254584684e-05, | |
| "loss": 1.9542, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7378640776699029, | |
| "grad_norm": 5.849913120269775, | |
| "learning_rate": 1.4088457389428264e-05, | |
| "loss": 1.9941, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7475728155339806, | |
| "grad_norm": 5.09085750579834, | |
| "learning_rate": 1.3980582524271846e-05, | |
| "loss": 2.0384, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7572815533980582, | |
| "grad_norm": 5.28529167175293, | |
| "learning_rate": 1.3872707659115428e-05, | |
| "loss": 2.0251, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7669902912621359, | |
| "grad_norm": 5.162165641784668, | |
| "learning_rate": 1.3764832793959008e-05, | |
| "loss": 1.9806, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7766990291262136, | |
| "grad_norm": 5.865965843200684, | |
| "learning_rate": 1.365695792880259e-05, | |
| "loss": 2.0359, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7864077669902912, | |
| "grad_norm": 4.936879634857178, | |
| "learning_rate": 1.3549083063646171e-05, | |
| "loss": 1.993, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7961165048543689, | |
| "grad_norm": 5.331514358520508, | |
| "learning_rate": 1.3441208198489753e-05, | |
| "loss": 1.9685, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8058252427184466, | |
| "grad_norm": 5.171398639678955, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.9885, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8155339805825242, | |
| "grad_norm": 4.853579998016357, | |
| "learning_rate": 1.3225458468176915e-05, | |
| "loss": 1.9818, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8252427184466019, | |
| "grad_norm": 5.196751117706299, | |
| "learning_rate": 1.3117583603020499e-05, | |
| "loss": 1.946, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8349514563106796, | |
| "grad_norm": 4.931100845336914, | |
| "learning_rate": 1.300970873786408e-05, | |
| "loss": 1.9898, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8446601941747572, | |
| "grad_norm": 5.232204437255859, | |
| "learning_rate": 1.290183387270766e-05, | |
| "loss": 2.0314, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8543689320388349, | |
| "grad_norm": 5.175143718719482, | |
| "learning_rate": 1.2793959007551242e-05, | |
| "loss": 1.9958, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8640776699029126, | |
| "grad_norm": 5.501524925231934, | |
| "learning_rate": 1.2686084142394824e-05, | |
| "loss": 1.969, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8737864077669902, | |
| "grad_norm": 5.200106620788574, | |
| "learning_rate": 1.2578209277238404e-05, | |
| "loss": 1.9624, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.883495145631068, | |
| "grad_norm": 5.435555934906006, | |
| "learning_rate": 1.2470334412081986e-05, | |
| "loss": 2.0057, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8932038834951457, | |
| "grad_norm": 5.041926860809326, | |
| "learning_rate": 1.2362459546925568e-05, | |
| "loss": 1.9604, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9029126213592233, | |
| "grad_norm": 5.86530065536499, | |
| "learning_rate": 1.2254584681769148e-05, | |
| "loss": 1.9904, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.912621359223301, | |
| "grad_norm": 5.039781093597412, | |
| "learning_rate": 1.214670981661273e-05, | |
| "loss": 1.9681, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9223300970873787, | |
| "grad_norm": 5.195461273193359, | |
| "learning_rate": 1.2038834951456311e-05, | |
| "loss": 2.0042, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9320388349514563, | |
| "grad_norm": 5.229151725769043, | |
| "learning_rate": 1.1930960086299891e-05, | |
| "loss": 2.0039, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.941747572815534, | |
| "grad_norm": 4.973319053649902, | |
| "learning_rate": 1.1823085221143475e-05, | |
| "loss": 2.0065, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9514563106796117, | |
| "grad_norm": 5.273900032043457, | |
| "learning_rate": 1.1715210355987056e-05, | |
| "loss": 1.9896, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9611650485436893, | |
| "grad_norm": 5.269063472747803, | |
| "learning_rate": 1.1607335490830638e-05, | |
| "loss": 1.9592, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.970873786407767, | |
| "grad_norm": 5.100589752197266, | |
| "learning_rate": 1.149946062567422e-05, | |
| "loss": 1.9354, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9805825242718447, | |
| "grad_norm": 5.232608318328857, | |
| "learning_rate": 1.1413160733549084e-05, | |
| "loss": 2.0395, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9902912621359223, | |
| "grad_norm": 5.08132791519165, | |
| "learning_rate": 1.1305285868392666e-05, | |
| "loss": 1.9437, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.696261405944824, | |
| "learning_rate": 1.1197411003236248e-05, | |
| "loss": 1.9665, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.0097087378640777, | |
| "grad_norm": 5.359734058380127, | |
| "learning_rate": 1.1089536138079828e-05, | |
| "loss": 1.841, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.0194174757281553, | |
| "grad_norm": 5.353382587432861, | |
| "learning_rate": 1.098166127292341e-05, | |
| "loss": 1.8354, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.029126213592233, | |
| "grad_norm": 5.595281600952148, | |
| "learning_rate": 1.0873786407766991e-05, | |
| "loss": 1.8011, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.0388349514563107, | |
| "grad_norm": 5.603349685668945, | |
| "learning_rate": 1.0765911542610571e-05, | |
| "loss": 1.8443, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.0485436893203883, | |
| "grad_norm": 5.549112319946289, | |
| "learning_rate": 1.0658036677454153e-05, | |
| "loss": 1.8022, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.058252427184466, | |
| "grad_norm": 5.524362087249756, | |
| "learning_rate": 1.0550161812297735e-05, | |
| "loss": 1.8361, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.0679611650485437, | |
| "grad_norm": 5.287837028503418, | |
| "learning_rate": 1.0442286947141318e-05, | |
| "loss": 1.7831, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0776699029126213, | |
| "grad_norm": 5.503448486328125, | |
| "learning_rate": 1.03344120819849e-05, | |
| "loss": 1.816, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.087378640776699, | |
| "grad_norm": 5.447495460510254, | |
| "learning_rate": 1.022653721682848e-05, | |
| "loss": 1.7967, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.0970873786407767, | |
| "grad_norm": 5.651370525360107, | |
| "learning_rate": 1.0118662351672062e-05, | |
| "loss": 1.767, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.1067961165048543, | |
| "grad_norm": 5.455685138702393, | |
| "learning_rate": 1.0010787486515644e-05, | |
| "loss": 1.7986, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.116504854368932, | |
| "grad_norm": 5.773884296417236, | |
| "learning_rate": 9.902912621359224e-06, | |
| "loss": 1.8061, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.1262135922330097, | |
| "grad_norm": 5.654228687286377, | |
| "learning_rate": 9.795037756202806e-06, | |
| "loss": 1.8051, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.1359223300970873, | |
| "grad_norm": 5.353296756744385, | |
| "learning_rate": 9.687162891046387e-06, | |
| "loss": 1.8157, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.145631067961165, | |
| "grad_norm": 5.371259689331055, | |
| "learning_rate": 9.579288025889967e-06, | |
| "loss": 1.7957, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.1553398058252426, | |
| "grad_norm": 5.551179885864258, | |
| "learning_rate": 9.47141316073355e-06, | |
| "loss": 1.8099, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.1650485436893203, | |
| "grad_norm": 5.763803958892822, | |
| "learning_rate": 9.363538295577131e-06, | |
| "loss": 1.8243, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.174757281553398, | |
| "grad_norm": 5.4469313621521, | |
| "learning_rate": 9.255663430420713e-06, | |
| "loss": 1.7813, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.1844660194174756, | |
| "grad_norm": 5.914862155914307, | |
| "learning_rate": 9.147788565264294e-06, | |
| "loss": 1.8308, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.1941747572815533, | |
| "grad_norm": 5.619472980499268, | |
| "learning_rate": 9.039913700107874e-06, | |
| "loss": 1.8566, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.203883495145631, | |
| "grad_norm": 5.747879981994629, | |
| "learning_rate": 8.932038834951458e-06, | |
| "loss": 1.831, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.2135922330097086, | |
| "grad_norm": 5.515039443969727, | |
| "learning_rate": 8.824163969795038e-06, | |
| "loss": 1.8279, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.2233009708737863, | |
| "grad_norm": 5.6780171394348145, | |
| "learning_rate": 8.71628910463862e-06, | |
| "loss": 1.7496, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.233009708737864, | |
| "grad_norm": 5.678586006164551, | |
| "learning_rate": 8.608414239482202e-06, | |
| "loss": 1.7862, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.2427184466019416, | |
| "grad_norm": 5.727756977081299, | |
| "learning_rate": 8.500539374325782e-06, | |
| "loss": 1.8364, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.2524271844660193, | |
| "grad_norm": 5.593883037567139, | |
| "learning_rate": 8.392664509169363e-06, | |
| "loss": 1.7775, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.262135922330097, | |
| "grad_norm": 5.856795310974121, | |
| "learning_rate": 8.284789644012947e-06, | |
| "loss": 1.8652, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2718446601941746, | |
| "grad_norm": 5.66147518157959, | |
| "learning_rate": 8.176914778856527e-06, | |
| "loss": 1.7747, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.2815533980582523, | |
| "grad_norm": 5.7044291496276855, | |
| "learning_rate": 8.069039913700109e-06, | |
| "loss": 1.7781, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.29126213592233, | |
| "grad_norm": 5.8010149002075195, | |
| "learning_rate": 7.96116504854369e-06, | |
| "loss": 1.812, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.3009708737864076, | |
| "grad_norm": 5.601301670074463, | |
| "learning_rate": 7.85329018338727e-06, | |
| "loss": 1.8109, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.3106796116504853, | |
| "grad_norm": 5.59577751159668, | |
| "learning_rate": 7.745415318230854e-06, | |
| "loss": 1.794, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.3203883495145632, | |
| "grad_norm": 6.064187526702881, | |
| "learning_rate": 7.637540453074434e-06, | |
| "loss": 1.7978, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.3300970873786409, | |
| "grad_norm": 5.477755069732666, | |
| "learning_rate": 7.529665587918016e-06, | |
| "loss": 1.7732, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.3398058252427185, | |
| "grad_norm": 5.672438144683838, | |
| "learning_rate": 7.421790722761597e-06, | |
| "loss": 1.8356, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.3495145631067962, | |
| "grad_norm": 5.968810558319092, | |
| "learning_rate": 7.3139158576051786e-06, | |
| "loss": 1.8513, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.3592233009708738, | |
| "grad_norm": 5.984207630157471, | |
| "learning_rate": 7.2060409924487595e-06, | |
| "loss": 1.7821, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.3689320388349515, | |
| "grad_norm": 6.006514072418213, | |
| "learning_rate": 7.098166127292342e-06, | |
| "loss": 1.756, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.3786407766990292, | |
| "grad_norm": 5.548986911773682, | |
| "learning_rate": 6.990291262135923e-06, | |
| "loss": 1.8, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.3883495145631068, | |
| "grad_norm": 5.688983917236328, | |
| "learning_rate": 6.882416396979504e-06, | |
| "loss": 1.7601, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.3980582524271845, | |
| "grad_norm": 5.5633225440979, | |
| "learning_rate": 6.774541531823086e-06, | |
| "loss": 1.7876, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.4077669902912622, | |
| "grad_norm": 5.3327226638793945, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.7388, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.4174757281553398, | |
| "grad_norm": 5.563536643981934, | |
| "learning_rate": 6.558791801510249e-06, | |
| "loss": 1.7894, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.4271844660194175, | |
| "grad_norm": 5.413880825042725, | |
| "learning_rate": 6.45091693635383e-06, | |
| "loss": 1.8045, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.4368932038834952, | |
| "grad_norm": 5.559710502624512, | |
| "learning_rate": 6.343042071197412e-06, | |
| "loss": 1.7388, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.4466019417475728, | |
| "grad_norm": 5.674643039703369, | |
| "learning_rate": 6.235167206040993e-06, | |
| "loss": 1.7521, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.4563106796116505, | |
| "grad_norm": 6.015341758728027, | |
| "learning_rate": 6.127292340884574e-06, | |
| "loss": 1.7567, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4660194174757282, | |
| "grad_norm": 5.763010025024414, | |
| "learning_rate": 6.0194174757281556e-06, | |
| "loss": 1.7494, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.4757281553398058, | |
| "grad_norm": 5.727349758148193, | |
| "learning_rate": 5.911542610571737e-06, | |
| "loss": 1.8048, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.4854368932038835, | |
| "grad_norm": 5.473784923553467, | |
| "learning_rate": 5.803667745415319e-06, | |
| "loss": 1.7469, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.4951456310679612, | |
| "grad_norm": 5.847958087921143, | |
| "learning_rate": 5.6957928802589e-06, | |
| "loss": 1.7803, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.5048543689320388, | |
| "grad_norm": 6.08969259262085, | |
| "learning_rate": 5.587918015102482e-06, | |
| "loss": 1.796, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.5145631067961165, | |
| "grad_norm": 5.455092430114746, | |
| "learning_rate": 5.480043149946063e-06, | |
| "loss": 1.7495, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.5242718446601942, | |
| "grad_norm": 5.9275031089782715, | |
| "learning_rate": 5.372168284789644e-06, | |
| "loss": 1.814, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.5339805825242718, | |
| "grad_norm": 5.613204002380371, | |
| "learning_rate": 5.264293419633226e-06, | |
| "loss": 1.8095, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.5436893203883495, | |
| "grad_norm": 5.575292110443115, | |
| "learning_rate": 5.156418554476807e-06, | |
| "loss": 1.794, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.5533980582524272, | |
| "grad_norm": 6.1768107414245605, | |
| "learning_rate": 5.048543689320389e-06, | |
| "loss": 1.7858, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5631067961165048, | |
| "grad_norm": 5.83579158782959, | |
| "learning_rate": 4.94066882416397e-06, | |
| "loss": 1.7229, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.5728155339805825, | |
| "grad_norm": 5.574371814727783, | |
| "learning_rate": 4.832793959007552e-06, | |
| "loss": 1.784, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.5825242718446602, | |
| "grad_norm": 5.452093124389648, | |
| "learning_rate": 4.724919093851133e-06, | |
| "loss": 1.7639, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.5922330097087378, | |
| "grad_norm": 5.442083358764648, | |
| "learning_rate": 4.617044228694714e-06, | |
| "loss": 1.8157, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.6019417475728155, | |
| "grad_norm": 5.7211079597473145, | |
| "learning_rate": 4.509169363538296e-06, | |
| "loss": 1.7338, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.6116504854368932, | |
| "grad_norm": 6.113297462463379, | |
| "learning_rate": 4.401294498381877e-06, | |
| "loss": 1.7044, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.6213592233009708, | |
| "grad_norm": 5.693146705627441, | |
| "learning_rate": 4.293419633225459e-06, | |
| "loss": 1.7722, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.6310679611650487, | |
| "grad_norm": 5.956842422485352, | |
| "learning_rate": 4.1855447680690406e-06, | |
| "loss": 1.7618, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.6407766990291264, | |
| "grad_norm": 5.646125793457031, | |
| "learning_rate": 4.0776699029126215e-06, | |
| "loss": 1.7872, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.650485436893204, | |
| "grad_norm": 5.913788795471191, | |
| "learning_rate": 3.969795037756203e-06, | |
| "loss": 1.7913, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.6601941747572817, | |
| "grad_norm": 6.05329704284668, | |
| "learning_rate": 3.861920172599784e-06, | |
| "loss": 1.7771, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.6699029126213594, | |
| "grad_norm": 6.139546871185303, | |
| "learning_rate": 3.754045307443366e-06, | |
| "loss": 1.7966, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.679611650485437, | |
| "grad_norm": 6.158768653869629, | |
| "learning_rate": 3.6461704422869477e-06, | |
| "loss": 1.8097, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.6893203883495147, | |
| "grad_norm": 5.726659774780273, | |
| "learning_rate": 3.5382955771305286e-06, | |
| "loss": 1.7568, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.6990291262135924, | |
| "grad_norm": 5.508258819580078, | |
| "learning_rate": 3.43042071197411e-06, | |
| "loss": 1.8406, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.70873786407767, | |
| "grad_norm": 6.076147556304932, | |
| "learning_rate": 3.3225458468176918e-06, | |
| "loss": 1.7246, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.7184466019417477, | |
| "grad_norm": 5.596787452697754, | |
| "learning_rate": 3.214670981661273e-06, | |
| "loss": 1.8189, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.7281553398058254, | |
| "grad_norm": 6.001366138458252, | |
| "learning_rate": 3.1067961165048544e-06, | |
| "loss": 1.7681, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.737864077669903, | |
| "grad_norm": 5.889746189117432, | |
| "learning_rate": 2.9989212513484362e-06, | |
| "loss": 1.7634, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.7475728155339807, | |
| "grad_norm": 5.484528541564941, | |
| "learning_rate": 2.8910463861920176e-06, | |
| "loss": 1.7928, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.7572815533980584, | |
| "grad_norm": 5.366055011749268, | |
| "learning_rate": 2.7831715210355993e-06, | |
| "loss": 1.7284, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.766990291262136, | |
| "grad_norm": 6.179636478424072, | |
| "learning_rate": 2.6752966558791803e-06, | |
| "loss": 1.7973, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.7766990291262137, | |
| "grad_norm": 5.786418437957764, | |
| "learning_rate": 2.5674217907227616e-06, | |
| "loss": 1.7271, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.7864077669902914, | |
| "grad_norm": 5.728253364562988, | |
| "learning_rate": 2.4595469255663434e-06, | |
| "loss": 1.7947, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.796116504854369, | |
| "grad_norm": 5.840207576751709, | |
| "learning_rate": 2.3516720604099247e-06, | |
| "loss": 1.8411, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.8058252427184467, | |
| "grad_norm": 6.026117324829102, | |
| "learning_rate": 2.243797195253506e-06, | |
| "loss": 1.7801, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.8155339805825244, | |
| "grad_norm": 5.574731826782227, | |
| "learning_rate": 2.1359223300970874e-06, | |
| "loss": 1.8296, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.825242718446602, | |
| "grad_norm": 5.741345405578613, | |
| "learning_rate": 2.0280474649406688e-06, | |
| "loss": 1.8048, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.8349514563106797, | |
| "grad_norm": 5.989925384521484, | |
| "learning_rate": 1.9201725997842505e-06, | |
| "loss": 1.7559, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.8446601941747574, | |
| "grad_norm": 5.998227119445801, | |
| "learning_rate": 1.812297734627832e-06, | |
| "loss": 1.7678, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.854368932038835, | |
| "grad_norm": 5.9274420738220215, | |
| "learning_rate": 1.7044228694714132e-06, | |
| "loss": 1.7886, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.8640776699029127, | |
| "grad_norm": 5.719155788421631, | |
| "learning_rate": 1.5965480043149948e-06, | |
| "loss": 1.7921, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.8737864077669903, | |
| "grad_norm": 5.4220147132873535, | |
| "learning_rate": 1.4886731391585763e-06, | |
| "loss": 1.7813, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.883495145631068, | |
| "grad_norm": 5.772354602813721, | |
| "learning_rate": 1.3807982740021575e-06, | |
| "loss": 1.7606, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.8932038834951457, | |
| "grad_norm": 5.864536762237549, | |
| "learning_rate": 1.272923408845739e-06, | |
| "loss": 1.7354, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.9029126213592233, | |
| "grad_norm": 5.453779697418213, | |
| "learning_rate": 1.1650485436893206e-06, | |
| "loss": 1.7403, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.912621359223301, | |
| "grad_norm": 5.6637492179870605, | |
| "learning_rate": 1.057173678532902e-06, | |
| "loss": 1.7649, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.9223300970873787, | |
| "grad_norm": 5.886834621429443, | |
| "learning_rate": 9.492988133764834e-07, | |
| "loss": 1.8095, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.9320388349514563, | |
| "grad_norm": 6.255226135253906, | |
| "learning_rate": 8.414239482200648e-07, | |
| "loss": 1.7987, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.941747572815534, | |
| "grad_norm": 5.694814205169678, | |
| "learning_rate": 7.335490830636462e-07, | |
| "loss": 1.7715, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.9514563106796117, | |
| "grad_norm": 5.4670257568359375, | |
| "learning_rate": 6.256742179072277e-07, | |
| "loss": 1.737, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.9611650485436893, | |
| "grad_norm": 5.776082992553711, | |
| "learning_rate": 5.393743257820928e-07, | |
| "loss": 1.7518, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.970873786407767, | |
| "grad_norm": 5.826039791107178, | |
| "learning_rate": 4.314994606256743e-07, | |
| "loss": 1.8036, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.9805825242718447, | |
| "grad_norm": 5.991348743438721, | |
| "learning_rate": 3.2362459546925565e-07, | |
| "loss": 1.756, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.9902912621359223, | |
| "grad_norm": 5.450629234313965, | |
| "learning_rate": 2.1574973031283715e-07, | |
| "loss": 1.7953, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 5.462096214294434, | |
| "learning_rate": 1.0787486515641857e-07, | |
| "loss": 1.7721, | |
| "step": 2060 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2060, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |