| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 3751, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.026659557451346308, | |
| "grad_norm": 7.0857120383857755, | |
| "learning_rate": 2.6329787234042554e-06, | |
| "loss": 1.0624, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.053319114902692616, | |
| "grad_norm": 6.581000668568491, | |
| "learning_rate": 5.292553191489362e-06, | |
| "loss": 0.7422, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07997867235403892, | |
| "grad_norm": 7.4656274686679085, | |
| "learning_rate": 7.95212765957447e-06, | |
| "loss": 0.7649, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10663822980538523, | |
| "grad_norm": 4.554989486482939, | |
| "learning_rate": 9.998854140728647e-06, | |
| "loss": 0.7662, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.13329778725673153, | |
| "grad_norm": 4.023821679311659, | |
| "learning_rate": 9.967263823916638e-06, | |
| "loss": 0.784, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15995734470807785, | |
| "grad_norm": 4.221747608519819, | |
| "learning_rate": 9.892664857121854e-06, | |
| "loss": 0.79, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.18661690215942414, | |
| "grad_norm": 3.838998761862227, | |
| "learning_rate": 9.775703149433419e-06, | |
| "loss": 0.7811, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.21327645961077046, | |
| "grad_norm": 4.346355062560185, | |
| "learning_rate": 9.617391404288412e-06, | |
| "loss": 0.8124, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.23993601706211676, | |
| "grad_norm": 4.985043330591158, | |
| "learning_rate": 9.41910035106149e-06, | |
| "loss": 0.7959, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.26659557451346305, | |
| "grad_norm": 4.43851554256146, | |
| "learning_rate": 9.18254687671603e-06, | |
| "loss": 0.8065, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2932551319648094, | |
| "grad_norm": 4.6639168705369425, | |
| "learning_rate": 8.909779160277951e-06, | |
| "loss": 0.7854, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3199146894161557, | |
| "grad_norm": 3.7379632914857375, | |
| "learning_rate": 8.603158938844122e-06, | |
| "loss": 0.7729, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.346574246867502, | |
| "grad_norm": 4.820935299394203, | |
| "learning_rate": 8.265341058673722e-06, | |
| "loss": 0.7831, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3732338043188483, | |
| "grad_norm": 3.6917931938034148, | |
| "learning_rate": 7.899250488417746e-06, | |
| "loss": 0.7967, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.39989336177019463, | |
| "grad_norm": 4.773594475659506, | |
| "learning_rate": 7.5080569935157375e-06, | |
| "loss": 0.7979, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4265529192215409, | |
| "grad_norm": 4.447857956208416, | |
| "learning_rate": 7.095147691039425e-06, | |
| "loss": 0.7843, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4532124766728872, | |
| "grad_norm": 4.389149312164256, | |
| "learning_rate": 6.664097722614934e-06, | |
| "loss": 0.7721, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4798720341242335, | |
| "grad_norm": 3.915206329218288, | |
| "learning_rate": 6.218639299349676e-06, | |
| "loss": 0.7526, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5065315915755798, | |
| "grad_norm": 5.180289969908035, | |
| "learning_rate": 5.7626293867858985e-06, | |
| "loss": 0.7854, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5331911490269261, | |
| "grad_norm": 4.139436495632767, | |
| "learning_rate": 5.300016309678104e-06, | |
| "loss": 0.7381, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5598507064782725, | |
| "grad_norm": 4.936713870803515, | |
| "learning_rate": 4.834805565744173e-06, | |
| "loss": 0.7471, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5865102639296188, | |
| "grad_norm": 4.5976706617804, | |
| "learning_rate": 4.371025144389e-06, | |
| "loss": 0.7611, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6131698213809651, | |
| "grad_norm": 5.820707950034038, | |
| "learning_rate": 3.912690650685726e-06, | |
| "loss": 0.7374, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6398293788323114, | |
| "grad_norm": 3.951105419131897, | |
| "learning_rate": 3.4637705365856666e-06, | |
| "loss": 0.7444, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6664889362836577, | |
| "grad_norm": 4.529901890166711, | |
| "learning_rate": 3.0281517403997245e-06, | |
| "loss": 0.7452, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.693148493735004, | |
| "grad_norm": 2.851635488548725, | |
| "learning_rate": 2.6096060320590393e-06, | |
| "loss": 0.7345, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7198080511863503, | |
| "grad_norm": 3.6802521221701947, | |
| "learning_rate": 2.2117573555516774e-06, | |
| "loss": 0.7348, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7464676086376966, | |
| "grad_norm": 3.6788331738320075, | |
| "learning_rate": 1.8380504512982329e-06, | |
| "loss": 0.7352, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.773127166089043, | |
| "grad_norm": 3.664797807064228, | |
| "learning_rate": 1.491721030146963e-06, | |
| "loss": 0.7299, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7997867235403893, | |
| "grad_norm": 3.9751097792558783, | |
| "learning_rate": 1.1757677572344577e-06, | |
| "loss": 0.7353, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8264462809917356, | |
| "grad_norm": 3.7953764140303985, | |
| "learning_rate": 8.929262882873524e-07, | |
| "loss": 0.7297, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8531058384430819, | |
| "grad_norm": 3.4294084377480814, | |
| "learning_rate": 6.456455831696234e-07, | |
| "loss": 0.7151, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8797653958944281, | |
| "grad_norm": 4.751586217200114, | |
| "learning_rate": 4.3606670176271014e-07, | |
| "loss": 0.695, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9064249533457744, | |
| "grad_norm": 4.643644454272238, | |
| "learning_rate": 2.660042657725931e-07, | |
| "loss": 0.7423, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9330845107971207, | |
| "grad_norm": 4.371360024084083, | |
| "learning_rate": 1.3693074697528231e-07, | |
| "loss": 0.7068, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.959744068248467, | |
| "grad_norm": 5.224648142017166, | |
| "learning_rate": 4.996371793965837e-08, | |
| "loss": 0.7121, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9864036256998134, | |
| "grad_norm": 4.815750775150759, | |
| "learning_rate": 5.8561756162400785e-09, | |
| "loss": 0.7437, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3751, | |
| "total_flos": 84620534874112.0, | |
| "train_loss": 0.7639887226768826, | |
| "train_runtime": 3882.0564, | |
| "train_samples_per_second": 7.728, | |
| "train_steps_per_second": 0.966 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3751, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 84620534874112.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |