{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.03203640500568828, "eval_steps": 500, "global_step": 44, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0029124004550625713, "importance_ratio": 1.0, "kl_div_avg": 6.202674558153376e-05, "learning_rate": 0.0, "loss_func": "stage2", "step": 1, "total_loss": -0.30124378204345703 }, { "epoch": 0.0029124004550625713, "importance_ratio": 0.9999282956123352, "kl_div_avg": 0.0013916800962761045, "learning_rate": 2.153382790366965e-07, "loss_func": "stage2", "step": 2, "total_loss": -0.36082643270492554 }, { "epoch": 0.0029124004550625713, "importance_ratio": 0.9998849630355835, "kl_div_avg": 2.4411560843873303e-06, "learning_rate": 3.4130309724299266e-07, "loss_func": "stage2", "step": 3, "total_loss": -1.0000114440917969 }, { "epoch": 0.0029124004550625713, "importance_ratio": 1.0000627040863037, "kl_div_avg": 5.5745240388205275e-05, "learning_rate": 4.30676558073393e-07, "loss_func": "stage2", "step": 4, "total_loss": 0.44314149022102356 }, { "epoch": 0.0058248009101251426, "importance_ratio": 1.0, "kl_div_avg": 0.0006268564611673355, "learning_rate": 5e-07, "loss_func": "stage2", "step": 5, "total_loss": 0.060124993324279785 }, { "epoch": 0.0058248009101251426, "importance_ratio": 1.0000600814819336, "kl_div_avg": 0.0005689397221431136, "learning_rate": 5e-07, "loss_func": "stage2", "step": 6, "total_loss": 0.058914512395858765 }, { "epoch": 0.0058248009101251426, "importance_ratio": 0.9999627470970154, "kl_div_avg": 0.0004537358181551099, "learning_rate": 4.999271455631648e-07, "loss_func": "stage2", "step": 7, "total_loss": 0.19876566529273987 }, { "epoch": 0.0058248009101251426, "importance_ratio": 1.0000057220458984, "kl_div_avg": 0.000579233281314373, "learning_rate": 4.998542911263296e-07, "loss_func": "stage2", "step": 8, "total_loss": 0.05980253219604492 }, { "epoch": 0.008737201365187713, "importance_ratio": 1.0, "kl_div_avg": 0.0005276197334751487, "learning_rate": 4.997814366894943e-07, "loss_func": "stage2", "step": 9, "total_loss": 0.3613969683647156 }, { "epoch": 0.008737201365187713, "importance_ratio": 1.000020146369934, "kl_div_avg": 0.000614392978604883, "learning_rate": 4.997085822526592e-07, "loss_func": "stage2", "step": 10, "total_loss": -0.7252892255783081 }, { "epoch": 0.008737201365187713, "importance_ratio": 0.9998898506164551, "kl_div_avg": 0.0005164016038179398, "learning_rate": 4.99635727815824e-07, "loss_func": "stage2", "step": 11, "total_loss": 0.32872042059898376 }, { "epoch": 0.008737201365187713, "importance_ratio": 1.0001676082611084, "kl_div_avg": 0.000510960235260427, "learning_rate": 4.995628733789887e-07, "loss_func": "stage2", "step": 12, "total_loss": -0.8001887798309326 }, { "epoch": 0.011649601820250285, "importance_ratio": 0.999980092048645, "kl_div_avg": 0.0013118372298777103, "learning_rate": 4.994900189421535e-07, "loss_func": "stage2", "step": 13, "total_loss": 0.4664098024368286 }, { "epoch": 0.011649601820250285, "importance_ratio": 1.0000075101852417, "kl_div_avg": 0.0005553055088967085, "learning_rate": 4.994171645053183e-07, "loss_func": "stage2", "step": 14, "total_loss": -0.3387294113636017 }, { "epoch": 0.011649601820250285, "importance_ratio": 0.9999791979789734, "kl_div_avg": 0.0006344152498058975, "learning_rate": 4.993443100684832e-07, "loss_func": "stage2", "step": 15, "total_loss": -0.7034344673156738 }, { "epoch": 0.011649601820250285, "importance_ratio": 0.9996992349624634, "kl_div_avg": 0.0013188815210014582, "learning_rate": 4.99271455631648e-07, "loss_func": "stage2", "step": 16, "total_loss": 0.6023236513137817 }, { "epoch": 0.014562002275312855, "importance_ratio": 1.0, "kl_div_avg": 0.0015903799794614315, "learning_rate": 4.991986011948127e-07, "loss_func": "stage2", "step": 17, "total_loss": 0.9130043983459473 }, { "epoch": 0.014562002275312855, "importance_ratio": 1.0000156164169312, "kl_div_avg": 0.0008485906291753054, "learning_rate": 4.991257467579775e-07, "loss_func": "stage2", "step": 18, "total_loss": 0.05914050340652466 }, { "epoch": 0.014562002275312855, "importance_ratio": 1.0000320672988892, "kl_div_avg": 0.0009148549288511276, "learning_rate": 4.990528923211423e-07, "loss_func": "stage2", "step": 19, "total_loss": -0.6230961084365845 }, { "epoch": 0.014562002275312855, "importance_ratio": 0.9999111890792847, "kl_div_avg": 0.0016965724062174559, "learning_rate": 4.989800378843072e-07, "loss_func": "stage2", "step": 20, "total_loss": 0.5814720988273621 }, { "epoch": 0.017474402730375427, "importance_ratio": 1.0, "kl_div_avg": 0.0012479191645979881, "learning_rate": 4.98907183447472e-07, "loss_func": "stage2", "step": 21, "total_loss": 0.291412353515625 }, { "epoch": 0.017474402730375427, "importance_ratio": 0.9998515248298645, "kl_div_avg": 0.0008866681600920856, "learning_rate": 4.988343290106367e-07, "loss_func": "stage2", "step": 22, "total_loss": 0.069973886013031 }, { "epoch": 0.017474402730375427, "importance_ratio": 0.9997187852859497, "kl_div_avg": 0.0029276064597070217, "learning_rate": 4.987614745738015e-07, "loss_func": "stage2", "step": 23, "total_loss": 0.14661093056201935 }, { "epoch": 0.017474402730375427, "importance_ratio": 0.9996069669723511, "kl_div_avg": 0.0028409322258085012, "learning_rate": 4.986886201369663e-07, "loss_func": "stage2", "step": 24, "total_loss": 0.23758070170879364 }, { "epoch": 0.020386803185437997, "importance_ratio": 1.0000464916229248, "kl_div_avg": 0.003729865886271, "learning_rate": 4.986157657001312e-07, "loss_func": "stage2", "step": 25, "total_loss": 0.3280088007450104 }, { "epoch": 0.020386803185437997, "importance_ratio": 0.9997869729995728, "kl_div_avg": 0.0012685225810855627, "learning_rate": 4.985429112632959e-07, "loss_func": "stage2", "step": 26, "total_loss": 0.5303494930267334 }, { "epoch": 0.020386803185437997, "importance_ratio": 0.9990458488464355, "kl_div_avg": 0.0042372471652925014, "learning_rate": 4.984700568264607e-07, "loss_func": "stage2", "step": 27, "total_loss": 0.6485586166381836 }, { "epoch": 0.020386803185437997, "importance_ratio": 0.9997897148132324, "kl_div_avg": 0.000950633198954165, "learning_rate": 4.983972023896255e-07, "loss_func": "stage2", "step": 28, "total_loss": 0.9602090120315552 }, { "epoch": 0.02329920364050057, "importance_ratio": 1.0, "kl_div_avg": 0.00110217509791255, "learning_rate": 4.983243479527903e-07, "loss_func": "stage2", "step": 29, "total_loss": 0.056760966777801514 }, { "epoch": 0.02329920364050057, "importance_ratio": 0.9995359778404236, "kl_div_avg": 0.002028511371463537, "learning_rate": 4.98251493515955e-07, "loss_func": "stage2", "step": 30, "total_loss": -0.34580060839653015 }, { "epoch": 0.02329920364050057, "importance_ratio": 0.999755859375, "kl_div_avg": 0.0011557539692148566, "learning_rate": 4.981786390791199e-07, "loss_func": "stage2", "step": 31, "total_loss": 0.05999104678630829 }, { "epoch": 0.02329920364050057, "importance_ratio": 0.999870240688324, "kl_div_avg": 0.0008296141168102622, "learning_rate": 4.981057846422847e-07, "loss_func": "stage2", "step": 32, "total_loss": -0.2040196657180786 }, { "epoch": 0.02621160409556314, "importance_ratio": 1.0, "kl_div_avg": 0.0052886055782437325, "learning_rate": 4.980329302054495e-07, "loss_func": "stage2", "step": 33, "total_loss": 0.3385615050792694 }, { "epoch": 0.02621160409556314, "importance_ratio": 0.9999160766601562, "kl_div_avg": 0.00105857546441257, "learning_rate": 4.979600757686143e-07, "loss_func": "stage2", "step": 34, "total_loss": 0.8041456937789917 }, { "epoch": 0.02621160409556314, "importance_ratio": 1.0000337362289429, "kl_div_avg": 0.001299469848163426, "learning_rate": 4.97887221331779e-07, "loss_func": "stage2", "step": 35, "total_loss": 0.059677302837371826 }, { "epoch": 0.02621160409556314, "importance_ratio": 1.0000333786010742, "kl_div_avg": 0.0009728098521009088, "learning_rate": 4.978143668949439e-07, "loss_func": "stage2", "step": 36, "total_loss": 0.9722646474838257 }, { "epoch": 0.02912400455062571, "importance_ratio": 0.9999969005584717, "kl_div_avg": 0.004802822135388851, "learning_rate": 4.977415124581087e-07, "loss_func": "stage2", "step": 37, "total_loss": -0.5127509236335754 }, { "epoch": 0.02912400455062571, "importance_ratio": 0.9998223781585693, "kl_div_avg": 0.001794470939785242, "learning_rate": 4.976686580212735e-07, "loss_func": "stage2", "step": 38, "total_loss": 0.29614874720573425 }, { "epoch": 0.02912400455062571, "importance_ratio": 0.9999631643295288, "kl_div_avg": 0.0012322800466790795, "learning_rate": 4.975958035844383e-07, "loss_func": "stage2", "step": 39, "total_loss": -0.6838780045509338 }, { "epoch": 0.02912400455062571, "importance_ratio": 1.0000174045562744, "kl_div_avg": 0.0013172460021451116, "learning_rate": 4.97522949147603e-07, "loss_func": "stage2", "step": 40, "total_loss": 0.31415513157844543 }, { "epoch": 0.03203640500568828, "importance_ratio": 1.0, "kl_div_avg": 0.0018465688917785883, "learning_rate": 4.974500947107679e-07, "loss_func": "stage2", "step": 41, "total_loss": -0.6754190921783447 }, { "epoch": 0.03203640500568828, "importance_ratio": 0.9999680519104004, "kl_div_avg": 0.001666294177994132, "learning_rate": 4.973772402739327e-07, "loss_func": "stage2", "step": 42, "total_loss": -0.6812421083450317 }, { "epoch": 0.03203640500568828, "importance_ratio": 1.000089168548584, "kl_div_avg": 0.0016971167642623186, "learning_rate": 4.973043858370975e-07, "loss_func": "stage2", "step": 43, "total_loss": -0.764133095741272 }, { "epoch": 0.03203640500568828, "importance_ratio": 0.9997596740722656, "kl_div_avg": 0.001568423816934228, "learning_rate": 4.972315314002623e-07, "loss_func": "stage2", "step": 44, "total_loss": -0.6741525530815125 } ], "logging_steps": 1.0, "max_steps": 6868, "num_input_tokens_seen": 0, "num_train_epochs": 20.0, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0, "train_batch_size": 1, "trial_name": null, "trial_params": null }