| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.43873012004143563, |
| "eval_steps": 500, |
| "global_step": 4500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004874779111571507, |
| "grad_norm": 581.0810546875, |
| "learning_rate": 1.6233766233766232e-07, |
| "loss": 42.9948, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.009749558223143015, |
| "grad_norm": 331.4380798339844, |
| "learning_rate": 3.2467532467532465e-07, |
| "loss": 36.4433, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.014624337334714521, |
| "grad_norm": 399.9220886230469, |
| "learning_rate": 4.87012987012987e-07, |
| "loss": 33.9757, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01949911644628603, |
| "grad_norm": 227.4508514404297, |
| "learning_rate": 6.493506493506493e-07, |
| "loss": 24.5654, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.024373895557857534, |
| "grad_norm": 169.74786376953125, |
| "learning_rate": 8.116883116883116e-07, |
| "loss": 18.221, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.029248674669429042, |
| "grad_norm": 151.637451171875, |
| "learning_rate": 9.74025974025974e-07, |
| "loss": 15.1792, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03412345378100055, |
| "grad_norm": 140.70602416992188, |
| "learning_rate": 9.99956019474448e-07, |
| "loss": 13.3062, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.03899823289257206, |
| "grad_norm": 163.72286987304688, |
| "learning_rate": 9.997889850109673e-07, |
| "loss": 12.1289, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04387301200414356, |
| "grad_norm": 205.9079132080078, |
| "learning_rate": 9.994973425669175e-07, |
| "loss": 11.2206, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.04874779111571507, |
| "grad_norm": 190.47525024414062, |
| "learning_rate": 9.990811648549374e-07, |
| "loss": 10.3846, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05362257022728657, |
| "grad_norm": 63.44021224975586, |
| "learning_rate": 9.98540555636946e-07, |
| "loss": 10.0807, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.058497349338858085, |
| "grad_norm": 134.91310119628906, |
| "learning_rate": 9.978756496982724e-07, |
| "loss": 9.2068, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0633721284504296, |
| "grad_norm": 125.61437225341797, |
| "learning_rate": 9.97086612814052e-07, |
| "loss": 8.8829, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0682469075620011, |
| "grad_norm": 148.3848876953125, |
| "learning_rate": 9.961736417078928e-07, |
| "loss": 8.8043, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0731216866735726, |
| "grad_norm": 360.50970458984375, |
| "learning_rate": 9.951369640028304e-07, |
| "loss": 9.8165, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.07799646578514412, |
| "grad_norm": 89.09446716308594, |
| "learning_rate": 9.939768381645761e-07, |
| "loss": 8.9056, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.08287124489671562, |
| "grad_norm": 163.72415161132812, |
| "learning_rate": 9.92693553437075e-07, |
| "loss": 9.5952, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.08774602400828713, |
| "grad_norm": 147.0449981689453, |
| "learning_rate": 9.912874297703925e-07, |
| "loss": 9.0044, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.09262080311985862, |
| "grad_norm": 171.43394470214844, |
| "learning_rate": 9.897588177409434e-07, |
| "loss": 9.1141, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.09749558223143014, |
| "grad_norm": 206.17628479003906, |
| "learning_rate": 9.88108098464086e-07, |
| "loss": 8.1566, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.10237036134300165, |
| "grad_norm": 131.0735321044922, |
| "learning_rate": 9.863356834991016e-07, |
| "loss": 8.4912, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.10724514045457315, |
| "grad_norm": 140.00730895996094, |
| "learning_rate": 9.844420147465848e-07, |
| "loss": 8.1491, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.11211991956614466, |
| "grad_norm": 163.5810089111328, |
| "learning_rate": 9.824275643382676e-07, |
| "loss": 8.6904, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.11699469867771617, |
| "grad_norm": 129.7275848388672, |
| "learning_rate": 9.802928345193068e-07, |
| "loss": 8.1686, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.12186947778928767, |
| "grad_norm": 115.78919219970703, |
| "learning_rate": 9.780383575230648e-07, |
| "loss": 7.6378, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.1267442569008592, |
| "grad_norm": 74.11811828613281, |
| "learning_rate": 9.756646954384115e-07, |
| "loss": 7.8103, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1316190360124307, |
| "grad_norm": 93.70452880859375, |
| "learning_rate": 9.731724400695836e-07, |
| "loss": 8.122, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.1364938151240022, |
| "grad_norm": 110.20355987548828, |
| "learning_rate": 9.70562212788636e-07, |
| "loss": 7.8767, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.1413685942355737, |
| "grad_norm": 96.42841339111328, |
| "learning_rate": 9.6783466438052e-07, |
| "loss": 8.0516, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.1462433733471452, |
| "grad_norm": 98.32221221923828, |
| "learning_rate": 9.649904748808292e-07, |
| "loss": 7.6941, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1511181524587167, |
| "grad_norm": 107.42027282714844, |
| "learning_rate": 9.620303534062518e-07, |
| "loss": 8.0057, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.15599293157028823, |
| "grad_norm": 55.05694580078125, |
| "learning_rate": 9.589550379777732e-07, |
| "loss": 7.4756, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.16086771068185973, |
| "grad_norm": 117.27649688720703, |
| "learning_rate": 9.557652953366717e-07, |
| "loss": 6.8833, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.16574248979343123, |
| "grad_norm": 174.34263610839844, |
| "learning_rate": 9.52461920753353e-07, |
| "loss": 7.4795, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.17061726890500276, |
| "grad_norm": 119.58318328857422, |
| "learning_rate": 9.490457378290737e-07, |
| "loss": 7.7871, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.17549204801657425, |
| "grad_norm": 142.45582580566406, |
| "learning_rate": 9.455175982905988e-07, |
| "loss": 8.1505, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.18036682712814575, |
| "grad_norm": 122.0265884399414, |
| "learning_rate": 9.418783817778484e-07, |
| "loss": 7.6914, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.18524160623971725, |
| "grad_norm": 96.58927917480469, |
| "learning_rate": 9.381289956245861e-07, |
| "loss": 7.5846, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.19011638535128877, |
| "grad_norm": 238.81964111328125, |
| "learning_rate": 9.342703746321997e-07, |
| "loss": 7.7886, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.19499116446286027, |
| "grad_norm": 61.6027946472168, |
| "learning_rate": 9.303034808366366e-07, |
| "loss": 7.2491, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.19986594357443177, |
| "grad_norm": 96.19196319580078, |
| "learning_rate": 9.262293032685475e-07, |
| "loss": 6.8776, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.2047407226860033, |
| "grad_norm": 72.44068908691406, |
| "learning_rate": 9.220488577066996e-07, |
| "loss": 7.2714, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.2096155017975748, |
| "grad_norm": 160.9955291748047, |
| "learning_rate": 9.177631864247226e-07, |
| "loss": 7.4344, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.2144902809091463, |
| "grad_norm": 78.55003356933594, |
| "learning_rate": 9.133733579312468e-07, |
| "loss": 7.2211, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.21936506002071782, |
| "grad_norm": 141.0493621826172, |
| "learning_rate": 9.088804667035016e-07, |
| "loss": 7.3533, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.22423983913228931, |
| "grad_norm": 106.55406951904297, |
| "learning_rate": 9.042856329144392e-07, |
| "loss": 7.526, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.2291146182438608, |
| "grad_norm": 116.47844696044922, |
| "learning_rate": 8.995900021534517e-07, |
| "loss": 6.5839, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.23398939735543234, |
| "grad_norm": 87.43718719482422, |
| "learning_rate": 8.947947451407512e-07, |
| "loss": 7.2284, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.23886417646700384, |
| "grad_norm": 135.8431854248047, |
| "learning_rate": 8.89901057435485e-07, |
| "loss": 7.6484, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.24373895557857533, |
| "grad_norm": 87.54926300048828, |
| "learning_rate": 8.849101591376568e-07, |
| "loss": 7.2991, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.24861373469014686, |
| "grad_norm": 109.77268981933594, |
| "learning_rate": 8.798232945839304e-07, |
| "loss": 6.9895, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.2534885138017184, |
| "grad_norm": 118.82627868652344, |
| "learning_rate": 8.746417320373896e-07, |
| "loss": 7.4786, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.25836329291328985, |
| "grad_norm": 175.50918579101562, |
| "learning_rate": 8.693667633713338e-07, |
| "loss": 6.6877, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.2632380720248614, |
| "grad_norm": 190.9921875, |
| "learning_rate": 8.639997037471867e-07, |
| "loss": 6.8118, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.2681128511364329, |
| "grad_norm": 163.4025115966797, |
| "learning_rate": 8.585418912865986e-07, |
| "loss": 6.9759, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.2729876302480044, |
| "grad_norm": 149.16815185546875, |
| "learning_rate": 8.529946867378241e-07, |
| "loss": 7.2147, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.2778624093595759, |
| "grad_norm": 97.56202697753906, |
| "learning_rate": 8.473594731364587e-07, |
| "loss": 7.1163, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.2827371884711474, |
| "grad_norm": 104.49602508544922, |
| "learning_rate": 8.416376554606195e-07, |
| "loss": 7.5656, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.2876119675827189, |
| "grad_norm": 130.78414916992188, |
| "learning_rate": 8.358306602806534e-07, |
| "loss": 6.901, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.2924867466942904, |
| "grad_norm": 152.71543884277344, |
| "learning_rate": 8.299399354034633e-07, |
| "loss": 7.0534, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.29736152580586195, |
| "grad_norm": 128.23233032226562, |
| "learning_rate": 8.239669495115393e-07, |
| "loss": 7.2949, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.3022363049174334, |
| "grad_norm": 212.286865234375, |
| "learning_rate": 8.179131917967852e-07, |
| "loss": 7.1819, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.30711108402900494, |
| "grad_norm": 146.38832092285156, |
| "learning_rate": 8.117801715892306e-07, |
| "loss": 7.3945, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.31198586314057647, |
| "grad_norm": 98.95829010009766, |
| "learning_rate": 8.05569417980724e-07, |
| "loss": 7.0111, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.31686064225214794, |
| "grad_norm": 147.78128051757812, |
| "learning_rate": 7.992824794436971e-07, |
| "loss": 7.1754, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.32173542136371946, |
| "grad_norm": 137.8080596923828, |
| "learning_rate": 7.92920923445098e-07, |
| "loss": 7.2801, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.326610200475291, |
| "grad_norm": 156.37132263183594, |
| "learning_rate": 7.864863360555886e-07, |
| "loss": 7.1625, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.33148497958686246, |
| "grad_norm": 176.6288299560547, |
| "learning_rate": 7.799803215541036e-07, |
| "loss": 7.5386, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.336359758698434, |
| "grad_norm": 150.56468200683594, |
| "learning_rate": 7.734045020278694e-07, |
| "loss": 6.9751, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.3412345378100055, |
| "grad_norm": 138.31723022460938, |
| "learning_rate": 7.667605169679842e-07, |
| "loss": 6.8245, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.346109316921577, |
| "grad_norm": 100.9895248413086, |
| "learning_rate": 7.600500228606573e-07, |
| "loss": 6.947, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.3509840960331485, |
| "grad_norm": 142.1031036376953, |
| "learning_rate": 7.532746927742119e-07, |
| "loss": 6.9751, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.35585887514472, |
| "grad_norm": 182.1207275390625, |
| "learning_rate": 7.464362159419551e-07, |
| "loss": 7.1473, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.3607336542562915, |
| "grad_norm": 162.8542938232422, |
| "learning_rate": 7.395362973410145e-07, |
| "loss": 7.7815, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.36560843336786303, |
| "grad_norm": 168.93276977539062, |
| "learning_rate": 7.325766572672528e-07, |
| "loss": 7.7646, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.3704832124794345, |
| "grad_norm": 159.7053985595703, |
| "learning_rate": 7.255590309063604e-07, |
| "loss": 6.4885, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.375357991591006, |
| "grad_norm": 96.07608795166016, |
| "learning_rate": 7.184851679012374e-07, |
| "loss": 6.9556, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.38023277070257755, |
| "grad_norm": 158.5734100341797, |
| "learning_rate": 7.113568319157707e-07, |
| "loss": 6.9754, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.385107549814149, |
| "grad_norm": 151.5749969482422, |
| "learning_rate": 7.041758001951149e-07, |
| "loss": 6.6478, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.38998232892572054, |
| "grad_norm": 118.84528350830078, |
| "learning_rate": 6.969438631225877e-07, |
| "loss": 6.3464, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.39485710803729207, |
| "grad_norm": 130.48410034179688, |
| "learning_rate": 6.896628237732894e-07, |
| "loss": 7.0122, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.39973188714886354, |
| "grad_norm": 92.12020874023438, |
| "learning_rate": 6.823344974645576e-07, |
| "loss": 7.2089, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.40460666626043507, |
| "grad_norm": 113.18313598632812, |
| "learning_rate": 6.749607113033709e-07, |
| "loss": 7.2546, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.4094814453720066, |
| "grad_norm": 106.96129608154297, |
| "learning_rate": 6.675433037308119e-07, |
| "loss": 7.3078, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.41435622448357806, |
| "grad_norm": 150.419677734375, |
| "learning_rate": 6.600841240637052e-07, |
| "loss": 7.1537, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.4192310035951496, |
| "grad_norm": 88.62400817871094, |
| "learning_rate": 6.525850320335433e-07, |
| "loss": 7.0714, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.4241057827067211, |
| "grad_norm": 83.91474151611328, |
| "learning_rate": 6.450478973228162e-07, |
| "loss": 6.9181, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.4289805618182926, |
| "grad_norm": 135.38629150390625, |
| "learning_rate": 6.374745990988598e-07, |
| "loss": 7.1421, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.4338553409298641, |
| "grad_norm": 101.2317123413086, |
| "learning_rate": 6.298670255453404e-07, |
| "loss": 6.6926, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.43873012004143563, |
| "grad_norm": 112.95464324951172, |
| "learning_rate": 6.222270733914895e-07, |
| "loss": 6.7252, |
| "step": 4500 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 10256, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8045681436358345e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|