| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.7346053772766696, | |
| "eval_steps": 200, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03469210754553339, | |
| "eval_loss": 1.8023786544799805, | |
| "eval_runtime": 572.3832, | |
| "eval_samples_per_second": 10.074, | |
| "eval_steps_per_second": 1.26, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06938421509106678, | |
| "eval_loss": 1.7305923700332642, | |
| "eval_runtime": 572.203, | |
| "eval_samples_per_second": 10.077, | |
| "eval_steps_per_second": 1.26, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08673026886383348, | |
| "grad_norm": 6.57196569442749, | |
| "learning_rate": 2.4566348655680836e-05, | |
| "loss": 1.8068, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10407632263660017, | |
| "eval_loss": 1.7001726627349854, | |
| "eval_runtime": 572.0907, | |
| "eval_samples_per_second": 10.079, | |
| "eval_steps_per_second": 1.26, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.13876843018213356, | |
| "eval_loss": 1.6665369272232056, | |
| "eval_runtime": 572.5138, | |
| "eval_samples_per_second": 10.071, | |
| "eval_steps_per_second": 1.259, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.17346053772766695, | |
| "grad_norm": 6.407934665679932, | |
| "learning_rate": 2.4132697311361666e-05, | |
| "loss": 1.6773, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.17346053772766695, | |
| "eval_loss": 1.645666480064392, | |
| "eval_runtime": 572.4669, | |
| "eval_samples_per_second": 10.072, | |
| "eval_steps_per_second": 1.259, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.20815264527320035, | |
| "eval_loss": 1.6295970678329468, | |
| "eval_runtime": 572.6705, | |
| "eval_samples_per_second": 10.069, | |
| "eval_steps_per_second": 1.259, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.24284475281873374, | |
| "eval_loss": 1.6119849681854248, | |
| "eval_runtime": 572.6602, | |
| "eval_samples_per_second": 10.069, | |
| "eval_steps_per_second": 1.259, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.26019080659150046, | |
| "grad_norm": 6.23416805267334, | |
| "learning_rate": 2.36990459670425e-05, | |
| "loss": 1.6291, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2775368603642671, | |
| "eval_loss": 1.5977734327316284, | |
| "eval_runtime": 572.802, | |
| "eval_samples_per_second": 10.066, | |
| "eval_steps_per_second": 1.259, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.31222896790980054, | |
| "eval_loss": 1.5906885862350464, | |
| "eval_runtime": 572.7238, | |
| "eval_samples_per_second": 10.068, | |
| "eval_steps_per_second": 1.259, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3469210754553339, | |
| "grad_norm": 5.846036434173584, | |
| "learning_rate": 2.326539462272333e-05, | |
| "loss": 1.6032, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3469210754553339, | |
| "eval_loss": 1.5792902708053589, | |
| "eval_runtime": 572.6421, | |
| "eval_samples_per_second": 10.069, | |
| "eval_steps_per_second": 1.259, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.38161318300086733, | |
| "eval_loss": 1.5674443244934082, | |
| "eval_runtime": 572.94, | |
| "eval_samples_per_second": 10.064, | |
| "eval_steps_per_second": 1.258, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4163052905464007, | |
| "eval_loss": 1.5650794506072998, | |
| "eval_runtime": 573.1561, | |
| "eval_samples_per_second": 10.06, | |
| "eval_steps_per_second": 1.258, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4336513443191674, | |
| "grad_norm": 6.9578962326049805, | |
| "learning_rate": 2.2831743278404163e-05, | |
| "loss": 1.5699, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.45099739809193407, | |
| "eval_loss": 1.5550028085708618, | |
| "eval_runtime": 572.973, | |
| "eval_samples_per_second": 10.063, | |
| "eval_steps_per_second": 1.258, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.4856895056374675, | |
| "eval_loss": 1.539338231086731, | |
| "eval_runtime": 573.1731, | |
| "eval_samples_per_second": 10.06, | |
| "eval_steps_per_second": 1.258, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5203816131830009, | |
| "grad_norm": 6.062795639038086, | |
| "learning_rate": 2.2398091934084997e-05, | |
| "loss": 1.5555, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5203816131830009, | |
| "eval_loss": 1.533992886543274, | |
| "eval_runtime": 573.3108, | |
| "eval_samples_per_second": 10.057, | |
| "eval_steps_per_second": 1.258, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5550737207285342, | |
| "eval_loss": 1.5279603004455566, | |
| "eval_runtime": 573.3234, | |
| "eval_samples_per_second": 10.057, | |
| "eval_steps_per_second": 1.258, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.5897658282740676, | |
| "eval_loss": 1.5221937894821167, | |
| "eval_runtime": 573.2462, | |
| "eval_samples_per_second": 10.059, | |
| "eval_steps_per_second": 1.258, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6071118820468343, | |
| "grad_norm": 5.474059581756592, | |
| "learning_rate": 2.196444058976583e-05, | |
| "loss": 1.5258, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6244579358196011, | |
| "eval_loss": 1.5145606994628906, | |
| "eval_runtime": 573.1527, | |
| "eval_samples_per_second": 10.06, | |
| "eval_steps_per_second": 1.258, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6591500433651344, | |
| "eval_loss": 1.5087436437606812, | |
| "eval_runtime": 573.3236, | |
| "eval_samples_per_second": 10.057, | |
| "eval_steps_per_second": 1.258, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.6938421509106678, | |
| "grad_norm": 4.400829315185547, | |
| "learning_rate": 2.1530789245446662e-05, | |
| "loss": 1.5145, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6938421509106678, | |
| "eval_loss": 1.501986026763916, | |
| "eval_runtime": 572.9788, | |
| "eval_samples_per_second": 10.063, | |
| "eval_steps_per_second": 1.258, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7285342584562012, | |
| "eval_loss": 1.4961259365081787, | |
| "eval_runtime": 572.9318, | |
| "eval_samples_per_second": 10.064, | |
| "eval_steps_per_second": 1.258, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.7632263660017347, | |
| "eval_loss": 1.4921443462371826, | |
| "eval_runtime": 573.2197, | |
| "eval_samples_per_second": 10.059, | |
| "eval_steps_per_second": 1.258, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.7805724197745013, | |
| "grad_norm": 5.124959945678711, | |
| "learning_rate": 2.1097137901127496e-05, | |
| "loss": 1.4981, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.797918473547268, | |
| "eval_loss": 1.48764967918396, | |
| "eval_runtime": 573.3463, | |
| "eval_samples_per_second": 10.057, | |
| "eval_steps_per_second": 1.258, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.8326105810928014, | |
| "eval_loss": 1.4827669858932495, | |
| "eval_runtime": 573.3276, | |
| "eval_samples_per_second": 10.057, | |
| "eval_steps_per_second": 1.258, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.8673026886383348, | |
| "grad_norm": 5.631836414337158, | |
| "learning_rate": 2.0663486556808327e-05, | |
| "loss": 1.4758, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.8673026886383348, | |
| "eval_loss": 1.4766356945037842, | |
| "eval_runtime": 573.3049, | |
| "eval_samples_per_second": 10.057, | |
| "eval_steps_per_second": 1.258, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9019947961838681, | |
| "eval_loss": 1.4708250761032104, | |
| "eval_runtime": 573.3902, | |
| "eval_samples_per_second": 10.056, | |
| "eval_steps_per_second": 1.257, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.9366869037294016, | |
| "eval_loss": 1.4667783975601196, | |
| "eval_runtime": 573.338, | |
| "eval_samples_per_second": 10.057, | |
| "eval_steps_per_second": 1.258, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.9540329575021682, | |
| "grad_norm": 4.832674980163574, | |
| "learning_rate": 2.0229835212489158e-05, | |
| "loss": 1.4818, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.971379011274935, | |
| "eval_loss": 1.4649358987808228, | |
| "eval_runtime": 573.5907, | |
| "eval_samples_per_second": 10.052, | |
| "eval_steps_per_second": 1.257, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.0060711188204683, | |
| "eval_loss": 1.4911904335021973, | |
| "eval_runtime": 573.8034, | |
| "eval_samples_per_second": 10.049, | |
| "eval_steps_per_second": 1.257, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.0407632263660018, | |
| "grad_norm": 6.181447982788086, | |
| "learning_rate": 1.9796183868169993e-05, | |
| "loss": 1.3108, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0407632263660018, | |
| "eval_loss": 1.5114498138427734, | |
| "eval_runtime": 573.9439, | |
| "eval_samples_per_second": 10.046, | |
| "eval_steps_per_second": 1.256, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0754553339115351, | |
| "eval_loss": 1.5078836679458618, | |
| "eval_runtime": 573.7341, | |
| "eval_samples_per_second": 10.05, | |
| "eval_steps_per_second": 1.257, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.1101474414570685, | |
| "eval_loss": 1.512686848640442, | |
| "eval_runtime": 573.5532, | |
| "eval_samples_per_second": 10.053, | |
| "eval_steps_per_second": 1.257, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.1274934952298352, | |
| "grad_norm": 6.276436805725098, | |
| "learning_rate": 1.9362532523850823e-05, | |
| "loss": 1.1338, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.144839549002602, | |
| "eval_loss": 1.5086950063705444, | |
| "eval_runtime": 573.502, | |
| "eval_samples_per_second": 10.054, | |
| "eval_steps_per_second": 1.257, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.1795316565481353, | |
| "eval_loss": 1.5138036012649536, | |
| "eval_runtime": 573.4778, | |
| "eval_samples_per_second": 10.054, | |
| "eval_steps_per_second": 1.257, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.2142237640936686, | |
| "grad_norm": 5.294378280639648, | |
| "learning_rate": 1.8928881179531658e-05, | |
| "loss": 1.1411, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.2142237640936686, | |
| "eval_loss": 1.5119119882583618, | |
| "eval_runtime": 573.2773, | |
| "eval_samples_per_second": 10.058, | |
| "eval_steps_per_second": 1.258, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.2489158716392021, | |
| "eval_loss": 1.5059071779251099, | |
| "eval_runtime": 573.2436, | |
| "eval_samples_per_second": 10.059, | |
| "eval_steps_per_second": 1.258, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.2836079791847355, | |
| "eval_loss": 1.4931423664093018, | |
| "eval_runtime": 573.2431, | |
| "eval_samples_per_second": 10.059, | |
| "eval_steps_per_second": 1.258, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.3009540329575022, | |
| "grad_norm": 5.875624179840088, | |
| "learning_rate": 1.8495229835212492e-05, | |
| "loss": 1.1482, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.318300086730269, | |
| "eval_loss": 1.4929821491241455, | |
| "eval_runtime": 572.8059, | |
| "eval_samples_per_second": 10.066, | |
| "eval_steps_per_second": 1.259, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.3529921942758023, | |
| "eval_loss": 1.490503191947937, | |
| "eval_runtime": 572.7436, | |
| "eval_samples_per_second": 10.067, | |
| "eval_steps_per_second": 1.259, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.3876843018213356, | |
| "grad_norm": 5.962628364562988, | |
| "learning_rate": 1.8061578490893323e-05, | |
| "loss": 1.1534, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.3876843018213356, | |
| "eval_loss": 1.4796279668807983, | |
| "eval_runtime": 572.5741, | |
| "eval_samples_per_second": 10.07, | |
| "eval_steps_per_second": 1.259, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.4223764093668692, | |
| "eval_loss": 1.4942739009857178, | |
| "eval_runtime": 572.7895, | |
| "eval_samples_per_second": 10.067, | |
| "eval_steps_per_second": 1.259, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.4570685169124025, | |
| "eval_loss": 1.478100299835205, | |
| "eval_runtime": 574.02, | |
| "eval_samples_per_second": 10.045, | |
| "eval_steps_per_second": 1.256, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.4744145706851692, | |
| "grad_norm": 5.818081855773926, | |
| "learning_rate": 1.7627927146574154e-05, | |
| "loss": 1.1493, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.4917606244579358, | |
| "eval_loss": 1.4706262350082397, | |
| "eval_runtime": 573.645, | |
| "eval_samples_per_second": 10.052, | |
| "eval_steps_per_second": 1.257, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.5264527320034693, | |
| "eval_loss": 1.4702831506729126, | |
| "eval_runtime": 573.6402, | |
| "eval_samples_per_second": 10.052, | |
| "eval_steps_per_second": 1.257, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.5611448395490026, | |
| "grad_norm": 6.020638465881348, | |
| "learning_rate": 1.7194275802254988e-05, | |
| "loss": 1.1517, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.5611448395490026, | |
| "eval_loss": 1.4639151096343994, | |
| "eval_runtime": 573.5071, | |
| "eval_samples_per_second": 10.054, | |
| "eval_steps_per_second": 1.257, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.595836947094536, | |
| "eval_loss": 1.4722236394882202, | |
| "eval_runtime": 573.4545, | |
| "eval_samples_per_second": 10.055, | |
| "eval_steps_per_second": 1.257, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.6305290546400695, | |
| "eval_loss": 1.4613826274871826, | |
| "eval_runtime": 573.2765, | |
| "eval_samples_per_second": 10.058, | |
| "eval_steps_per_second": 1.258, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.647875108412836, | |
| "grad_norm": 5.535754680633545, | |
| "learning_rate": 1.676062445793582e-05, | |
| "loss": 1.1428, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.6652211621856028, | |
| "eval_loss": 1.4539824724197388, | |
| "eval_runtime": 573.1598, | |
| "eval_samples_per_second": 10.06, | |
| "eval_steps_per_second": 1.258, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.699913269731136, | |
| "eval_loss": 1.457112431526184, | |
| "eval_runtime": 573.1778, | |
| "eval_samples_per_second": 10.06, | |
| "eval_steps_per_second": 1.258, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.7346053772766696, | |
| "grad_norm": 6.019700527191162, | |
| "learning_rate": 1.6326973113616653e-05, | |
| "loss": 1.1466, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.7346053772766696, | |
| "eval_loss": 1.4443352222442627, | |
| "eval_runtime": 573.1133, | |
| "eval_samples_per_second": 10.061, | |
| "eval_steps_per_second": 1.258, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 28825, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.74751582519296e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |