| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1131, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.026525198938992044, |
| "grad_norm": 2.780399544451733, |
| "learning_rate": 5e-06, |
| "loss": 0.8608, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05305039787798409, |
| "grad_norm": 1.606321904164578, |
| "learning_rate": 5e-06, |
| "loss": 0.7691, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07957559681697612, |
| "grad_norm": 1.4733236101897926, |
| "learning_rate": 5e-06, |
| "loss": 0.731, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10610079575596817, |
| "grad_norm": 1.0466130239794242, |
| "learning_rate": 5e-06, |
| "loss": 0.7107, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13262599469496023, |
| "grad_norm": 0.9068095404759675, |
| "learning_rate": 5e-06, |
| "loss": 0.6977, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15915119363395225, |
| "grad_norm": 0.9537695534962456, |
| "learning_rate": 5e-06, |
| "loss": 0.6882, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1856763925729443, |
| "grad_norm": 0.6703381757321946, |
| "learning_rate": 5e-06, |
| "loss": 0.6835, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.21220159151193635, |
| "grad_norm": 1.00882019650449, |
| "learning_rate": 5e-06, |
| "loss": 0.6724, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23872679045092837, |
| "grad_norm": 1.0611084342807717, |
| "learning_rate": 5e-06, |
| "loss": 0.6764, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.26525198938992045, |
| "grad_norm": 0.8654567865290824, |
| "learning_rate": 5e-06, |
| "loss": 0.663, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2917771883289125, |
| "grad_norm": 0.8547675860872707, |
| "learning_rate": 5e-06, |
| "loss": 0.6602, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3183023872679045, |
| "grad_norm": 0.6764470147171867, |
| "learning_rate": 5e-06, |
| "loss": 0.6511, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3448275862068966, |
| "grad_norm": 0.4996824578122701, |
| "learning_rate": 5e-06, |
| "loss": 0.649, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3713527851458886, |
| "grad_norm": 0.5555661729991889, |
| "learning_rate": 5e-06, |
| "loss": 0.6499, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3978779840848806, |
| "grad_norm": 0.662354493585409, |
| "learning_rate": 5e-06, |
| "loss": 0.6428, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4244031830238727, |
| "grad_norm": 0.47582415706089126, |
| "learning_rate": 5e-06, |
| "loss": 0.6527, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4509283819628647, |
| "grad_norm": 1.3542813303379895, |
| "learning_rate": 5e-06, |
| "loss": 0.6423, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.47745358090185674, |
| "grad_norm": 0.5202115494812097, |
| "learning_rate": 5e-06, |
| "loss": 0.6379, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5039787798408488, |
| "grad_norm": 0.5270125702790065, |
| "learning_rate": 5e-06, |
| "loss": 0.6523, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5305039787798409, |
| "grad_norm": 0.4787233056340413, |
| "learning_rate": 5e-06, |
| "loss": 0.6375, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5570291777188329, |
| "grad_norm": 0.5279042698103554, |
| "learning_rate": 5e-06, |
| "loss": 0.643, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.583554376657825, |
| "grad_norm": 0.7066879723483964, |
| "learning_rate": 5e-06, |
| "loss": 0.6424, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.610079575596817, |
| "grad_norm": 0.7948472968894127, |
| "learning_rate": 5e-06, |
| "loss": 0.6341, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.636604774535809, |
| "grad_norm": 0.5390839840399401, |
| "learning_rate": 5e-06, |
| "loss": 0.64, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6631299734748011, |
| "grad_norm": 0.6650275203652714, |
| "learning_rate": 5e-06, |
| "loss": 0.6325, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 0.5941647978617659, |
| "learning_rate": 5e-06, |
| "loss": 0.6373, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7161803713527851, |
| "grad_norm": 0.5582608177644663, |
| "learning_rate": 5e-06, |
| "loss": 0.6385, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7427055702917772, |
| "grad_norm": 0.5805453901667452, |
| "learning_rate": 5e-06, |
| "loss": 0.638, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.4514952002612208, |
| "learning_rate": 5e-06, |
| "loss": 0.6321, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7957559681697612, |
| "grad_norm": 0.5044213856224046, |
| "learning_rate": 5e-06, |
| "loss": 0.6355, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8222811671087533, |
| "grad_norm": 0.5268464944312369, |
| "learning_rate": 5e-06, |
| "loss": 0.6374, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8488063660477454, |
| "grad_norm": 0.47822319615489534, |
| "learning_rate": 5e-06, |
| "loss": 0.6409, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8753315649867374, |
| "grad_norm": 0.6797410044487332, |
| "learning_rate": 5e-06, |
| "loss": 0.6278, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9018567639257294, |
| "grad_norm": 0.5170899802801728, |
| "learning_rate": 5e-06, |
| "loss": 0.6318, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9283819628647215, |
| "grad_norm": 0.6765922367059989, |
| "learning_rate": 5e-06, |
| "loss": 0.6314, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9549071618037135, |
| "grad_norm": 0.6459795123201603, |
| "learning_rate": 5e-06, |
| "loss": 0.6274, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9814323607427056, |
| "grad_norm": 0.44017798959016546, |
| "learning_rate": 5e-06, |
| "loss": 0.6282, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.622368574142456, |
| "eval_runtime": 203.4748, |
| "eval_samples_per_second": 49.928, |
| "eval_steps_per_second": 0.393, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.0079575596816976, |
| "grad_norm": 0.8095066775828493, |
| "learning_rate": 5e-06, |
| "loss": 0.6139, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0344827586206897, |
| "grad_norm": 0.5482836222129767, |
| "learning_rate": 5e-06, |
| "loss": 0.5899, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0610079575596818, |
| "grad_norm": 0.5390710199271481, |
| "learning_rate": 5e-06, |
| "loss": 0.5918, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0875331564986737, |
| "grad_norm": 0.5745480856709776, |
| "learning_rate": 5e-06, |
| "loss": 0.5836, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.1140583554376657, |
| "grad_norm": 0.5786473175674526, |
| "learning_rate": 5e-06, |
| "loss": 0.5942, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1405835543766578, |
| "grad_norm": 0.4754533900180701, |
| "learning_rate": 5e-06, |
| "loss": 0.5894, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.16710875331565, |
| "grad_norm": 0.626662902645802, |
| "learning_rate": 5e-06, |
| "loss": 0.5792, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.193633952254642, |
| "grad_norm": 0.4520996771126915, |
| "learning_rate": 5e-06, |
| "loss": 0.5877, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.2201591511936338, |
| "grad_norm": 0.48945343637681654, |
| "learning_rate": 5e-06, |
| "loss": 0.5859, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.246684350132626, |
| "grad_norm": 0.464325446341802, |
| "learning_rate": 5e-06, |
| "loss": 0.5915, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.273209549071618, |
| "grad_norm": 0.46045963914114574, |
| "learning_rate": 5e-06, |
| "loss": 0.5807, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.29973474801061, |
| "grad_norm": 0.49722516653390847, |
| "learning_rate": 5e-06, |
| "loss": 0.5816, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.3262599469496021, |
| "grad_norm": 0.5455802289844164, |
| "learning_rate": 5e-06, |
| "loss": 0.5887, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3527851458885942, |
| "grad_norm": 0.4427674181447264, |
| "learning_rate": 5e-06, |
| "loss": 0.5875, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.3793103448275863, |
| "grad_norm": 0.6045736193729062, |
| "learning_rate": 5e-06, |
| "loss": 0.5891, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4058355437665782, |
| "grad_norm": 0.45208550166771494, |
| "learning_rate": 5e-06, |
| "loss": 0.5876, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.4323607427055702, |
| "grad_norm": 0.5217117204135557, |
| "learning_rate": 5e-06, |
| "loss": 0.5831, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4588859416445623, |
| "grad_norm": 0.5393363462032307, |
| "learning_rate": 5e-06, |
| "loss": 0.579, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.4854111405835544, |
| "grad_norm": 0.6515785188868304, |
| "learning_rate": 5e-06, |
| "loss": 0.5867, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.5119363395225465, |
| "grad_norm": 0.6394000137643647, |
| "learning_rate": 5e-06, |
| "loss": 0.5939, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.5074512553300523, |
| "learning_rate": 5e-06, |
| "loss": 0.5839, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5649867374005306, |
| "grad_norm": 0.5172554765605915, |
| "learning_rate": 5e-06, |
| "loss": 0.5886, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5915119363395225, |
| "grad_norm": 0.4946796812261883, |
| "learning_rate": 5e-06, |
| "loss": 0.5837, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.6180371352785146, |
| "grad_norm": 0.45275057698879645, |
| "learning_rate": 5e-06, |
| "loss": 0.5875, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.6445623342175066, |
| "grad_norm": 0.6141728231437198, |
| "learning_rate": 5e-06, |
| "loss": 0.5863, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.6710875331564987, |
| "grad_norm": 0.5764229361365383, |
| "learning_rate": 5e-06, |
| "loss": 0.5818, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6976127320954908, |
| "grad_norm": 0.43948836202023195, |
| "learning_rate": 5e-06, |
| "loss": 0.5854, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.7241379310344827, |
| "grad_norm": 0.6212110766567985, |
| "learning_rate": 5e-06, |
| "loss": 0.5899, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.750663129973475, |
| "grad_norm": 0.5230421236325133, |
| "learning_rate": 5e-06, |
| "loss": 0.5809, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.7771883289124668, |
| "grad_norm": 0.42596513265710756, |
| "learning_rate": 5e-06, |
| "loss": 0.581, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.8037135278514589, |
| "grad_norm": 0.5019437503087736, |
| "learning_rate": 5e-06, |
| "loss": 0.5811, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.830238726790451, |
| "grad_norm": 0.5695097062913548, |
| "learning_rate": 5e-06, |
| "loss": 0.5858, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.8567639257294428, |
| "grad_norm": 0.48071601140752834, |
| "learning_rate": 5e-06, |
| "loss": 0.584, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.8832891246684351, |
| "grad_norm": 0.6322423860046313, |
| "learning_rate": 5e-06, |
| "loss": 0.5901, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.909814323607427, |
| "grad_norm": 0.504386977138495, |
| "learning_rate": 5e-06, |
| "loss": 0.5878, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.936339522546419, |
| "grad_norm": 0.5132407169571725, |
| "learning_rate": 5e-06, |
| "loss": 0.5939, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.9628647214854111, |
| "grad_norm": 0.49747549542012004, |
| "learning_rate": 5e-06, |
| "loss": 0.5824, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.9893899204244032, |
| "grad_norm": 0.6053057729936807, |
| "learning_rate": 5e-06, |
| "loss": 0.5895, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.6123443841934204, |
| "eval_runtime": 203.0015, |
| "eval_samples_per_second": 50.044, |
| "eval_steps_per_second": 0.394, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.0159151193633953, |
| "grad_norm": 0.6355273791432589, |
| "learning_rate": 5e-06, |
| "loss": 0.5558, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.042440318302387, |
| "grad_norm": 0.5643951304254625, |
| "learning_rate": 5e-06, |
| "loss": 0.5439, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.0689655172413794, |
| "grad_norm": 0.4960175423111283, |
| "learning_rate": 5e-06, |
| "loss": 0.5434, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.0954907161803713, |
| "grad_norm": 0.5284981337718996, |
| "learning_rate": 5e-06, |
| "loss": 0.5441, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.1220159151193636, |
| "grad_norm": 0.5187766913446101, |
| "learning_rate": 5e-06, |
| "loss": 0.5457, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.1485411140583555, |
| "grad_norm": 0.4676393193911655, |
| "learning_rate": 5e-06, |
| "loss": 0.5453, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.1750663129973473, |
| "grad_norm": 0.447245777123748, |
| "learning_rate": 5e-06, |
| "loss": 0.5454, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.2015915119363396, |
| "grad_norm": 0.4803013114072548, |
| "learning_rate": 5e-06, |
| "loss": 0.5427, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.2281167108753315, |
| "grad_norm": 0.6305967900882212, |
| "learning_rate": 5e-06, |
| "loss": 0.5512, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.2546419098143238, |
| "grad_norm": 0.4660714087302693, |
| "learning_rate": 5e-06, |
| "loss": 0.5337, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.2811671087533156, |
| "grad_norm": 0.5499183474925715, |
| "learning_rate": 5e-06, |
| "loss": 0.539, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 0.6029739750306148, |
| "learning_rate": 5e-06, |
| "loss": 0.5423, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.3342175066313, |
| "grad_norm": 0.5459822127048733, |
| "learning_rate": 5e-06, |
| "loss": 0.5462, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.3607427055702916, |
| "grad_norm": 0.4843657467722299, |
| "learning_rate": 5e-06, |
| "loss": 0.5504, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.387267904509284, |
| "grad_norm": 0.6497699445469315, |
| "learning_rate": 5e-06, |
| "loss": 0.5436, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.413793103448276, |
| "grad_norm": 0.4716521124138746, |
| "learning_rate": 5e-06, |
| "loss": 0.537, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.4403183023872677, |
| "grad_norm": 0.5180881653993036, |
| "learning_rate": 5e-06, |
| "loss": 0.5412, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.46684350132626, |
| "grad_norm": 0.6139310358311121, |
| "learning_rate": 5e-06, |
| "loss": 0.5563, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.493368700265252, |
| "grad_norm": 0.5199070076942287, |
| "learning_rate": 5e-06, |
| "loss": 0.547, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.519893899204244, |
| "grad_norm": 0.47708958463613943, |
| "learning_rate": 5e-06, |
| "loss": 0.5419, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.546419098143236, |
| "grad_norm": 0.6389465583810601, |
| "learning_rate": 5e-06, |
| "loss": 0.5433, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.5729442970822283, |
| "grad_norm": 0.46642283536695006, |
| "learning_rate": 5e-06, |
| "loss": 0.5476, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.59946949602122, |
| "grad_norm": 0.5177895417014805, |
| "learning_rate": 5e-06, |
| "loss": 0.549, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.6259946949602124, |
| "grad_norm": 0.5889398648766363, |
| "learning_rate": 5e-06, |
| "loss": 0.5497, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.6525198938992043, |
| "grad_norm": 0.49922378313664745, |
| "learning_rate": 5e-06, |
| "loss": 0.5451, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.679045092838196, |
| "grad_norm": 0.5386720121866165, |
| "learning_rate": 5e-06, |
| "loss": 0.5415, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.7055702917771884, |
| "grad_norm": 0.500783203019607, |
| "learning_rate": 5e-06, |
| "loss": 0.5428, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.7320954907161803, |
| "grad_norm": 0.4931032038157372, |
| "learning_rate": 5e-06, |
| "loss": 0.5416, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.7586206896551726, |
| "grad_norm": 0.5391229756876829, |
| "learning_rate": 5e-06, |
| "loss": 0.5487, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.7851458885941645, |
| "grad_norm": 0.5489614966946841, |
| "learning_rate": 5e-06, |
| "loss": 0.5499, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.8116710875331563, |
| "grad_norm": 0.4779229991518947, |
| "learning_rate": 5e-06, |
| "loss": 0.5457, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.8381962864721486, |
| "grad_norm": 0.52170064853256, |
| "learning_rate": 5e-06, |
| "loss": 0.549, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.8647214854111405, |
| "grad_norm": 0.5091309568444623, |
| "learning_rate": 5e-06, |
| "loss": 0.552, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.8912466843501328, |
| "grad_norm": 0.45335922455307415, |
| "learning_rate": 5e-06, |
| "loss": 0.5453, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.9177718832891246, |
| "grad_norm": 0.565478737240957, |
| "learning_rate": 5e-06, |
| "loss": 0.546, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.9442970822281165, |
| "grad_norm": 0.5673285594805738, |
| "learning_rate": 5e-06, |
| "loss": 0.5517, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.970822281167109, |
| "grad_norm": 0.503611299440423, |
| "learning_rate": 5e-06, |
| "loss": 0.5444, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.9973474801061006, |
| "grad_norm": 0.5499097253524401, |
| "learning_rate": 5e-06, |
| "loss": 0.5447, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.6133805513381958, |
| "eval_runtime": 203.8433, |
| "eval_samples_per_second": 49.837, |
| "eval_steps_per_second": 0.392, |
| "step": 1131 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1131, |
| "total_flos": 1894048365281280.0, |
| "train_loss": 0.5970700682000076, |
| "train_runtime": 33949.4437, |
| "train_samples_per_second": 17.055, |
| "train_steps_per_second": 0.033 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1131, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1894048365281280.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|