{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8298755186721992, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02074688796680498, "grad_norm": 0.4129277765750885, "learning_rate": 8.264462809917356e-05, "loss": 1.7066, "step": 50 }, { "epoch": 0.04149377593360996, "grad_norm": 0.2623195946216583, "learning_rate": 0.00016528925619834712, "loss": 0.7931, "step": 100 }, { "epoch": 0.06224066390041494, "grad_norm": 0.29180774092674255, "learning_rate": 0.00019746614242027087, "loss": 0.6638, "step": 150 }, { "epoch": 0.08298755186721991, "grad_norm": 0.2945658564567566, "learning_rate": 0.0001930974224552206, "loss": 0.6448, "step": 200 }, { "epoch": 0.1037344398340249, "grad_norm": 0.28438836336135864, "learning_rate": 0.00018872870249017038, "loss": 0.6229, "step": 250 }, { "epoch": 0.12448132780082988, "grad_norm": 0.27639704942703247, "learning_rate": 0.00018435998252512014, "loss": 0.6064, "step": 300 }, { "epoch": 0.14522821576763487, "grad_norm": 0.2640627920627594, "learning_rate": 0.0001799912625600699, "loss": 0.591, "step": 350 }, { "epoch": 0.16597510373443983, "grad_norm": 0.258998304605484, "learning_rate": 0.00017562254259501968, "loss": 0.5753, "step": 400 }, { "epoch": 0.18672199170124482, "grad_norm": 0.24890367686748505, "learning_rate": 0.00017125382262996944, "loss": 0.5734, "step": 450 }, { "epoch": 0.2074688796680498, "grad_norm": 0.2385823130607605, "learning_rate": 0.00016688510266491918, "loss": 0.5721, "step": 500 }, { "epoch": 0.2074688796680498, "eval_loss": 0.573611319065094, "eval_runtime": 141.3666, "eval_samples_per_second": 7.166, "eval_steps_per_second": 0.898, "step": 500 }, { "epoch": 0.22821576763485477, "grad_norm": 0.21996833384037018, "learning_rate": 0.00016251638269986895, "loss": 0.5688, "step": 550 }, { "epoch": 0.24896265560165975, "grad_norm": 0.23547005653381348, "learning_rate": 0.0001581476627348187, "loss": 0.554, "step": 600 }, { "epoch": 0.2697095435684647, "grad_norm": 0.23831422626972198, "learning_rate": 0.00015377894276976848, "loss": 0.5488, "step": 650 }, { "epoch": 0.29045643153526973, "grad_norm": 0.24010932445526123, "learning_rate": 0.00014941022280471822, "loss": 0.5445, "step": 700 }, { "epoch": 0.3112033195020747, "grad_norm": 0.22254310548305511, "learning_rate": 0.00014504150283966798, "loss": 0.5491, "step": 750 }, { "epoch": 0.33195020746887965, "grad_norm": 0.23563440144062042, "learning_rate": 0.00014067278287461772, "loss": 0.5428, "step": 800 }, { "epoch": 0.35269709543568467, "grad_norm": 0.23261123895645142, "learning_rate": 0.0001363040629095675, "loss": 0.5444, "step": 850 }, { "epoch": 0.37344398340248963, "grad_norm": 0.2282126247882843, "learning_rate": 0.00013193534294451726, "loss": 0.5298, "step": 900 }, { "epoch": 0.3941908713692946, "grad_norm": 0.28895512223243713, "learning_rate": 0.00012756662297946702, "loss": 0.5368, "step": 950 }, { "epoch": 0.4149377593360996, "grad_norm": 0.2174956202507019, "learning_rate": 0.0001231979030144168, "loss": 0.5333, "step": 1000 }, { "epoch": 0.4149377593360996, "eval_loss": 0.5404650568962097, "eval_runtime": 140.5485, "eval_samples_per_second": 7.207, "eval_steps_per_second": 0.904, "step": 1000 }, { "epoch": 0.43568464730290457, "grad_norm": 0.22959038615226746, "learning_rate": 0.00011882918304936654, "loss": 0.5254, "step": 1050 }, { "epoch": 0.45643153526970953, "grad_norm": 0.20930084586143494, "learning_rate": 0.00011446046308431631, "loss": 0.53, "step": 1100 }, { "epoch": 0.47717842323651455, "grad_norm": 0.2505868077278137, "learning_rate": 0.00011009174311926606, "loss": 0.5344, "step": 1150 }, { "epoch": 0.4979253112033195, "grad_norm": 0.24351277947425842, "learning_rate": 0.00010572302315421583, "loss": 0.5317, "step": 1200 }, { "epoch": 0.5186721991701245, "grad_norm": 0.24290521442890167, "learning_rate": 0.00010135430318916559, "loss": 0.5265, "step": 1250 }, { "epoch": 0.5394190871369294, "grad_norm": 0.2370923012495041, "learning_rate": 9.698558322411535e-05, "loss": 0.5255, "step": 1300 }, { "epoch": 0.5601659751037344, "grad_norm": 0.2208719551563263, "learning_rate": 9.26168632590651e-05, "loss": 0.5182, "step": 1350 }, { "epoch": 0.5809128630705395, "grad_norm": 0.24646012485027313, "learning_rate": 8.824814329401486e-05, "loss": 0.5117, "step": 1400 }, { "epoch": 0.6016597510373444, "grad_norm": 0.21846014261245728, "learning_rate": 8.387942332896462e-05, "loss": 0.5275, "step": 1450 }, { "epoch": 0.6224066390041494, "grad_norm": 0.23680146038532257, "learning_rate": 7.951070336391437e-05, "loss": 0.5252, "step": 1500 }, { "epoch": 0.6224066390041494, "eval_loss": 0.5271425247192383, "eval_runtime": 232.8953, "eval_samples_per_second": 4.35, "eval_steps_per_second": 0.545, "step": 1500 }, { "epoch": 0.6431535269709544, "grad_norm": 0.24285145103931427, "learning_rate": 7.514198339886414e-05, "loss": 0.5186, "step": 1550 }, { "epoch": 0.6639004149377593, "grad_norm": 0.25104212760925293, "learning_rate": 7.07732634338139e-05, "loss": 0.5154, "step": 1600 }, { "epoch": 0.6846473029045643, "grad_norm": 0.23628434538841248, "learning_rate": 6.640454346876365e-05, "loss": 0.5127, "step": 1650 }, { "epoch": 0.7053941908713693, "grad_norm": 0.25255104899406433, "learning_rate": 6.203582350371342e-05, "loss": 0.5174, "step": 1700 }, { "epoch": 0.7261410788381742, "grad_norm": 0.23285475373268127, "learning_rate": 5.766710353866317e-05, "loss": 0.5059, "step": 1750 }, { "epoch": 0.7468879668049793, "grad_norm": 0.24090896546840668, "learning_rate": 5.329838357361293e-05, "loss": 0.5179, "step": 1800 }, { "epoch": 0.7676348547717843, "grad_norm": 0.23537760972976685, "learning_rate": 4.892966360856269e-05, "loss": 0.5164, "step": 1850 }, { "epoch": 0.7883817427385892, "grad_norm": 0.23769009113311768, "learning_rate": 4.456094364351245e-05, "loss": 0.5103, "step": 1900 }, { "epoch": 0.8091286307053942, "grad_norm": 0.2349609136581421, "learning_rate": 4.019222367846222e-05, "loss": 0.5108, "step": 1950 }, { "epoch": 0.8298755186721992, "grad_norm": 0.232425257563591, "learning_rate": 3.582350371341197e-05, "loss": 0.5074, "step": 2000 }, { "epoch": 0.8298755186721992, "eval_loss": 0.5192467570304871, "eval_runtime": 206.8865, "eval_samples_per_second": 4.896, "eval_steps_per_second": 0.614, "step": 2000 } ], "logging_steps": 50, "max_steps": 2410, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8341028691968e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }