| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9980830670926517, |
| "eval_steps": 500, |
| "global_step": 1173, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.025559105431309903, |
| "grad_norm": 2.488937021121503, |
| "learning_rate": 5e-06, |
| "loss": 0.874, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.051118210862619806, |
| "grad_norm": 1.0134122408123283, |
| "learning_rate": 5e-06, |
| "loss": 0.7768, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07667731629392971, |
| "grad_norm": 0.7252743172722608, |
| "learning_rate": 5e-06, |
| "loss": 0.739, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10223642172523961, |
| "grad_norm": 0.9369862288990177, |
| "learning_rate": 5e-06, |
| "loss": 0.719, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12779552715654952, |
| "grad_norm": 1.067983364712023, |
| "learning_rate": 5e-06, |
| "loss": 0.7105, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15335463258785942, |
| "grad_norm": 0.757709510092403, |
| "learning_rate": 5e-06, |
| "loss": 0.7034, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17891373801916932, |
| "grad_norm": 0.6165712807429302, |
| "learning_rate": 5e-06, |
| "loss": 0.6975, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20447284345047922, |
| "grad_norm": 0.6390453689605463, |
| "learning_rate": 5e-06, |
| "loss": 0.6871, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23003194888178913, |
| "grad_norm": 0.6401806321502969, |
| "learning_rate": 5e-06, |
| "loss": 0.6847, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.25559105431309903, |
| "grad_norm": 0.5772096727061063, |
| "learning_rate": 5e-06, |
| "loss": 0.6815, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.28115015974440893, |
| "grad_norm": 0.5767370315949822, |
| "learning_rate": 5e-06, |
| "loss": 0.6751, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.30670926517571884, |
| "grad_norm": 0.6484027615221676, |
| "learning_rate": 5e-06, |
| "loss": 0.6744, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.33226837060702874, |
| "grad_norm": 0.45793202087348756, |
| "learning_rate": 5e-06, |
| "loss": 0.6677, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.35782747603833864, |
| "grad_norm": 0.6809828594933767, |
| "learning_rate": 5e-06, |
| "loss": 0.6611, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.38338658146964855, |
| "grad_norm": 0.50255443636339, |
| "learning_rate": 5e-06, |
| "loss": 0.67, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.40894568690095845, |
| "grad_norm": 0.5051546709708246, |
| "learning_rate": 5e-06, |
| "loss": 0.6688, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.43450479233226835, |
| "grad_norm": 0.5158935047871976, |
| "learning_rate": 5e-06, |
| "loss": 0.666, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.46006389776357826, |
| "grad_norm": 0.5115877840507708, |
| "learning_rate": 5e-06, |
| "loss": 0.6658, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.48562300319488816, |
| "grad_norm": 0.6309545106607486, |
| "learning_rate": 5e-06, |
| "loss": 0.6548, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5111821086261981, |
| "grad_norm": 0.6018638447566027, |
| "learning_rate": 5e-06, |
| "loss": 0.6605, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.536741214057508, |
| "grad_norm": 0.5200148356112437, |
| "learning_rate": 5e-06, |
| "loss": 0.655, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5623003194888179, |
| "grad_norm": 0.7314253896655762, |
| "learning_rate": 5e-06, |
| "loss": 0.655, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5878594249201278, |
| "grad_norm": 0.7982233157649096, |
| "learning_rate": 5e-06, |
| "loss": 0.6583, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6134185303514377, |
| "grad_norm": 0.6943352778908869, |
| "learning_rate": 5e-06, |
| "loss": 0.6554, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6389776357827476, |
| "grad_norm": 0.554018633223486, |
| "learning_rate": 5e-06, |
| "loss": 0.654, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6645367412140575, |
| "grad_norm": 0.43212557887545755, |
| "learning_rate": 5e-06, |
| "loss": 0.6568, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6900958466453674, |
| "grad_norm": 0.4865016028697348, |
| "learning_rate": 5e-06, |
| "loss": 0.6506, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7156549520766773, |
| "grad_norm": 0.5429929192321034, |
| "learning_rate": 5e-06, |
| "loss": 0.6502, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7412140575079872, |
| "grad_norm": 0.47771937619296945, |
| "learning_rate": 5e-06, |
| "loss": 0.6494, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7667731629392971, |
| "grad_norm": 0.4577925482968175, |
| "learning_rate": 5e-06, |
| "loss": 0.6506, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.792332268370607, |
| "grad_norm": 0.46309534099607036, |
| "learning_rate": 5e-06, |
| "loss": 0.6462, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8178913738019169, |
| "grad_norm": 0.4567909579930465, |
| "learning_rate": 5e-06, |
| "loss": 0.6422, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8434504792332268, |
| "grad_norm": 0.5205456435682532, |
| "learning_rate": 5e-06, |
| "loss": 0.6418, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8690095846645367, |
| "grad_norm": 0.6410066183707821, |
| "learning_rate": 5e-06, |
| "loss": 0.6443, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8945686900958466, |
| "grad_norm": 0.45817253558977566, |
| "learning_rate": 5e-06, |
| "loss": 0.6401, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9201277955271565, |
| "grad_norm": 0.4863261215394659, |
| "learning_rate": 5e-06, |
| "loss": 0.6465, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9456869009584664, |
| "grad_norm": 0.4920600022672814, |
| "learning_rate": 5e-06, |
| "loss": 0.6463, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9712460063897763, |
| "grad_norm": 0.43198500585334393, |
| "learning_rate": 5e-06, |
| "loss": 0.6526, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9968051118210862, |
| "grad_norm": 0.5601512259650758, |
| "learning_rate": 5e-06, |
| "loss": 0.6447, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.9993610223642172, |
| "eval_loss": 0.6407531499862671, |
| "eval_runtime": 211.2232, |
| "eval_samples_per_second": 49.89, |
| "eval_steps_per_second": 0.393, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.0223642172523961, |
| "grad_norm": 0.5234653038754594, |
| "learning_rate": 5e-06, |
| "loss": 0.6093, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0479233226837061, |
| "grad_norm": 0.45536515216509915, |
| "learning_rate": 5e-06, |
| "loss": 0.6035, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.073482428115016, |
| "grad_norm": 0.5158433981271974, |
| "learning_rate": 5e-06, |
| "loss": 0.6011, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.099041533546326, |
| "grad_norm": 0.5570560091916129, |
| "learning_rate": 5e-06, |
| "loss": 0.6033, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.1246006389776357, |
| "grad_norm": 0.6165930669771302, |
| "learning_rate": 5e-06, |
| "loss": 0.6066, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.1501597444089458, |
| "grad_norm": 0.4737189308117872, |
| "learning_rate": 5e-06, |
| "loss": 0.6062, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1757188498402555, |
| "grad_norm": 0.4707731622353862, |
| "learning_rate": 5e-06, |
| "loss": 0.5996, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.2012779552715656, |
| "grad_norm": 0.6291217339170796, |
| "learning_rate": 5e-06, |
| "loss": 0.599, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.2268370607028753, |
| "grad_norm": 0.4368414320470577, |
| "learning_rate": 5e-06, |
| "loss": 0.601, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2523961661341854, |
| "grad_norm": 0.5763882217045183, |
| "learning_rate": 5e-06, |
| "loss": 0.6033, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2779552715654952, |
| "grad_norm": 0.5673744099871547, |
| "learning_rate": 5e-06, |
| "loss": 0.6006, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3035143769968052, |
| "grad_norm": 0.5044673911911107, |
| "learning_rate": 5e-06, |
| "loss": 0.6034, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.329073482428115, |
| "grad_norm": 0.48842610397801645, |
| "learning_rate": 5e-06, |
| "loss": 0.5979, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.354632587859425, |
| "grad_norm": 0.45337151740937665, |
| "learning_rate": 5e-06, |
| "loss": 0.6072, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3801916932907348, |
| "grad_norm": 0.5155646630680047, |
| "learning_rate": 5e-06, |
| "loss": 0.5995, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4057507987220448, |
| "grad_norm": 0.5588384029093506, |
| "learning_rate": 5e-06, |
| "loss": 0.597, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.4313099041533546, |
| "grad_norm": 0.5368327302641925, |
| "learning_rate": 5e-06, |
| "loss": 0.6028, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.4568690095846646, |
| "grad_norm": 0.45657989741381694, |
| "learning_rate": 5e-06, |
| "loss": 0.5994, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4824281150159744, |
| "grad_norm": 0.6868054044486551, |
| "learning_rate": 5e-06, |
| "loss": 0.6037, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5079872204472844, |
| "grad_norm": 0.5471957185900231, |
| "learning_rate": 5e-06, |
| "loss": 0.6013, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5335463258785942, |
| "grad_norm": 0.49701536962833187, |
| "learning_rate": 5e-06, |
| "loss": 0.5979, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5591054313099042, |
| "grad_norm": 0.5437091666091665, |
| "learning_rate": 5e-06, |
| "loss": 0.603, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.5846645367412142, |
| "grad_norm": 0.46174917062296694, |
| "learning_rate": 5e-06, |
| "loss": 0.603, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.610223642172524, |
| "grad_norm": 0.42809667027774384, |
| "learning_rate": 5e-06, |
| "loss": 0.6063, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6357827476038338, |
| "grad_norm": 0.6924625585548914, |
| "learning_rate": 5e-06, |
| "loss": 0.6079, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.6613418530351438, |
| "grad_norm": 0.5010760817874803, |
| "learning_rate": 5e-06, |
| "loss": 0.6045, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.6869009584664538, |
| "grad_norm": 0.481770021248738, |
| "learning_rate": 5e-06, |
| "loss": 0.602, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.7124600638977636, |
| "grad_norm": 0.5077296945019126, |
| "learning_rate": 5e-06, |
| "loss": 0.6013, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.7380191693290734, |
| "grad_norm": 0.5159453593847776, |
| "learning_rate": 5e-06, |
| "loss": 0.5952, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.7635782747603834, |
| "grad_norm": 0.5752680538208508, |
| "learning_rate": 5e-06, |
| "loss": 0.6034, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.7891373801916934, |
| "grad_norm": 0.4356053466798036, |
| "learning_rate": 5e-06, |
| "loss": 0.6046, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.8146964856230032, |
| "grad_norm": 0.5263361152578077, |
| "learning_rate": 5e-06, |
| "loss": 0.6029, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.840255591054313, |
| "grad_norm": 0.47376505871455504, |
| "learning_rate": 5e-06, |
| "loss": 0.6052, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.865814696485623, |
| "grad_norm": 0.48964997558770895, |
| "learning_rate": 5e-06, |
| "loss": 0.59, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.891373801916933, |
| "grad_norm": 0.5040275348886807, |
| "learning_rate": 5e-06, |
| "loss": 0.6019, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.9169329073482428, |
| "grad_norm": 0.6636559820039077, |
| "learning_rate": 5e-06, |
| "loss": 0.5996, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.9424920127795526, |
| "grad_norm": 0.6339897456829262, |
| "learning_rate": 5e-06, |
| "loss": 0.6021, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.9680511182108626, |
| "grad_norm": 0.4545322012239174, |
| "learning_rate": 5e-06, |
| "loss": 0.6015, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.9936102236421727, |
| "grad_norm": 0.6398078551059936, |
| "learning_rate": 5e-06, |
| "loss": 0.5993, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.9987220447284346, |
| "eval_loss": 0.6304081082344055, |
| "eval_runtime": 210.5877, |
| "eval_samples_per_second": 50.041, |
| "eval_steps_per_second": 0.394, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.0191693290734825, |
| "grad_norm": 0.6722191426438802, |
| "learning_rate": 5e-06, |
| "loss": 0.5698, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.0447284345047922, |
| "grad_norm": 0.5523925945985377, |
| "learning_rate": 5e-06, |
| "loss": 0.5532, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.070287539936102, |
| "grad_norm": 0.6989635266051605, |
| "learning_rate": 5e-06, |
| "loss": 0.5505, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.0958466453674123, |
| "grad_norm": 0.6949623311244769, |
| "learning_rate": 5e-06, |
| "loss": 0.5548, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.121405750798722, |
| "grad_norm": 0.4721879733757925, |
| "learning_rate": 5e-06, |
| "loss": 0.5577, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.146964856230032, |
| "grad_norm": 0.6036119451801016, |
| "learning_rate": 5e-06, |
| "loss": 0.5544, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.1725239616613417, |
| "grad_norm": 0.5244338637812003, |
| "learning_rate": 5e-06, |
| "loss": 0.5548, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.198083067092652, |
| "grad_norm": 0.510738895984618, |
| "learning_rate": 5e-06, |
| "loss": 0.5618, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.2236421725239617, |
| "grad_norm": 0.6565162833337977, |
| "learning_rate": 5e-06, |
| "loss": 0.5611, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.2492012779552715, |
| "grad_norm": 0.5490046418008897, |
| "learning_rate": 5e-06, |
| "loss": 0.5542, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.2747603833865817, |
| "grad_norm": 0.5598655787056185, |
| "learning_rate": 5e-06, |
| "loss": 0.554, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.3003194888178915, |
| "grad_norm": 0.5724536974958164, |
| "learning_rate": 5e-06, |
| "loss": 0.5586, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.3258785942492013, |
| "grad_norm": 0.7038695500738883, |
| "learning_rate": 5e-06, |
| "loss": 0.5626, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.351437699680511, |
| "grad_norm": 0.4943055706887088, |
| "learning_rate": 5e-06, |
| "loss": 0.5613, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.376996805111821, |
| "grad_norm": 0.5473864748939261, |
| "learning_rate": 5e-06, |
| "loss": 0.5592, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.402555910543131, |
| "grad_norm": 0.5463031663747152, |
| "learning_rate": 5e-06, |
| "loss": 0.5605, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.428115015974441, |
| "grad_norm": 0.5764574450021644, |
| "learning_rate": 5e-06, |
| "loss": 0.5638, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.4536741214057507, |
| "grad_norm": 0.5966409155859831, |
| "learning_rate": 5e-06, |
| "loss": 0.561, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.479233226837061, |
| "grad_norm": 0.5006171543606123, |
| "learning_rate": 5e-06, |
| "loss": 0.5647, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.5047923322683707, |
| "grad_norm": 0.4996230204708687, |
| "learning_rate": 5e-06, |
| "loss": 0.5653, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.5303514376996805, |
| "grad_norm": 0.46846989809783296, |
| "learning_rate": 5e-06, |
| "loss": 0.5598, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.5559105431309903, |
| "grad_norm": 0.509368116376272, |
| "learning_rate": 5e-06, |
| "loss": 0.5637, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.5814696485623, |
| "grad_norm": 0.49787659029146086, |
| "learning_rate": 5e-06, |
| "loss": 0.5644, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.6070287539936103, |
| "grad_norm": 0.556553122489043, |
| "learning_rate": 5e-06, |
| "loss": 0.562, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.63258785942492, |
| "grad_norm": 0.5184228442491099, |
| "learning_rate": 5e-06, |
| "loss": 0.5629, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.65814696485623, |
| "grad_norm": 0.4561739299086491, |
| "learning_rate": 5e-06, |
| "loss": 0.5648, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.68370607028754, |
| "grad_norm": 0.5769831449344406, |
| "learning_rate": 5e-06, |
| "loss": 0.5636, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.70926517571885, |
| "grad_norm": 0.4428248860326066, |
| "learning_rate": 5e-06, |
| "loss": 0.5629, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.7348242811501597, |
| "grad_norm": 0.5156130562903302, |
| "learning_rate": 5e-06, |
| "loss": 0.5686, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.7603833865814695, |
| "grad_norm": 0.5540658232297233, |
| "learning_rate": 5e-06, |
| "loss": 0.5627, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.7859424920127793, |
| "grad_norm": 0.5069347052283188, |
| "learning_rate": 5e-06, |
| "loss": 0.5612, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.8115015974440896, |
| "grad_norm": 0.6261687359343319, |
| "learning_rate": 5e-06, |
| "loss": 0.556, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.8370607028753994, |
| "grad_norm": 0.47449299632556347, |
| "learning_rate": 5e-06, |
| "loss": 0.5729, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.862619808306709, |
| "grad_norm": 0.4540284354232125, |
| "learning_rate": 5e-06, |
| "loss": 0.5633, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.8881789137380194, |
| "grad_norm": 0.5570870387952495, |
| "learning_rate": 5e-06, |
| "loss": 0.5628, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.913738019169329, |
| "grad_norm": 0.4545794478265982, |
| "learning_rate": 5e-06, |
| "loss": 0.5602, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.939297124600639, |
| "grad_norm": 0.4528099150088049, |
| "learning_rate": 5e-06, |
| "loss": 0.5641, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.9648562300319488, |
| "grad_norm": 0.5465749868700104, |
| "learning_rate": 5e-06, |
| "loss": 0.5613, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.9904153354632586, |
| "grad_norm": 0.4856753038168024, |
| "learning_rate": 5e-06, |
| "loss": 0.5618, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.9980830670926517, |
| "eval_loss": 0.6310843229293823, |
| "eval_runtime": 212.4336, |
| "eval_samples_per_second": 49.606, |
| "eval_steps_per_second": 0.391, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.9980830670926517, |
| "step": 1173, |
| "total_flos": 1964399929589760.0, |
| "train_loss": 0.612151123272082, |
| "train_runtime": 35048.9763, |
| "train_samples_per_second": 17.137, |
| "train_steps_per_second": 0.033 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1173, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1964399929589760.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|