Invalid JSON: Unexpected token 'N', ..."tio_var": NaN
},"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.0012675024260788625, | |
| "eval_steps": 500, | |
| "global_step": 8, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0001584378032598578, | |
| "grad_norm": 6.941703031770885e-05, | |
| "learning_rate": 2.9998415465061005e-05, | |
| "loss": 0.0, | |
| "loss/policy_avg": 9.534414857625961e-08, | |
| "objective/entropy": 66.7407455444336, | |
| "objective/kl": 0.0, | |
| "objective/rlhf_reward": 1.9394512176513672, | |
| "objective/scores": 1.9393310546875, | |
| "policy/approxkl_avg": 0.0, | |
| "policy/clipfrac_avg": 0.0, | |
| "policy/entropy_avg": 0.501089334487915, | |
| "step": 1, | |
| "timer/calc_advantages": 1.838552713394165, | |
| "timer/calc_loss": 0.718826174736023, | |
| "timer/get_reward": 0.5057681202888489, | |
| "timer/training_step": 5.023805141448975, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0003168756065197156, | |
| "grad_norm": 8.962108612060547, | |
| "learning_rate": 2.999683093012201e-05, | |
| "loss": 0.0079, | |
| "loss/policy_avg": 0.007912077941000462, | |
| "objective/entropy": 58.23992919921875, | |
| "objective/kl": 0.006753697991371155, | |
| "objective/rlhf_reward": 2.58967924118042, | |
| "objective/scores": 2.59033203125, | |
| "policy/approxkl_avg": 0.143972247838974, | |
| "policy/clipfrac_avg": 0.376953125, | |
| "policy/entropy_avg": 0.437211811542511, | |
| "step": 2, | |
| "timer/calc_advantages": 1.693800687789917, | |
| "timer/calc_loss": 0.6173452734947205, | |
| "timer/get_reward": 0.431838721036911, | |
| "timer/training_step": 4.488475322723389, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0006933212280273, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0004753134097795734, | |
| "grad_norm": 8.80933666229248, | |
| "learning_rate": 2.9995246395183014e-05, | |
| "loss": 0.0087, | |
| "loss/policy_avg": 0.008671796880662441, | |
| "objective/entropy": 60.747886657714844, | |
| "objective/kl": 0.19268979132175446, | |
| "objective/rlhf_reward": 2.797236442565918, | |
| "objective/scores": 2.8162841796875, | |
| "policy/approxkl_avg": 0.22207684814929962, | |
| "policy/clipfrac_avg": 0.369140625, | |
| "policy/entropy_avg": 0.47127196192741394, | |
| "step": 3, | |
| "timer/calc_advantages": 2.117703437805176, | |
| "timer/calc_loss": 0.8592336773872375, | |
| "timer/get_reward": 0.5989187955856323, | |
| "timer/training_step": 5.804616928100586, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 0.9997344613075256, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0006337512130394313, | |
| "grad_norm": 15.397004127502441, | |
| "learning_rate": 2.999366186024402e-05, | |
| "loss": 0.0181, | |
| "loss/policy_avg": 0.018090050667524338, | |
| "objective/entropy": 59.366294860839844, | |
| "objective/kl": 0.16402865946292877, | |
| "objective/rlhf_reward": 2.7970707416534424, | |
| "objective/scores": 2.8134765625, | |
| "policy/approxkl_avg": 0.30915290117263794, | |
| "policy/clipfrac_avg": 0.41796875, | |
| "policy/entropy_avg": 0.4661800265312195, | |
| "step": 4, | |
| "timer/calc_advantages": 2.018901824951172, | |
| "timer/calc_loss": 0.8049512505531311, | |
| "timer/get_reward": 0.5547392964363098, | |
| "timer/training_step": 5.506021022796631, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0003275871276855, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.000792189016299289, | |
| "grad_norm": 6.137847231002524e-05, | |
| "learning_rate": 2.9992077325305024e-05, | |
| "loss": 0.0, | |
| "loss/policy_avg": 1.0040821507573128e-07, | |
| "objective/entropy": 61.850738525390625, | |
| "objective/kl": 0.0, | |
| "objective/rlhf_reward": 2.3658785820007324, | |
| "objective/scores": 2.36578369140625, | |
| "policy/approxkl_avg": 0.0, | |
| "policy/clipfrac_avg": 0.0, | |
| "policy/entropy_avg": 0.48403242230415344, | |
| "step": 5, | |
| "timer/calc_advantages": 1.889434576034546, | |
| "timer/calc_loss": 0.7489800453186035, | |
| "timer/get_reward": 0.5218558311462402, | |
| "timer/training_step": 5.19569206237793, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0009506268195591468, | |
| "grad_norm": 3.129560947418213, | |
| "learning_rate": 2.9990492790366028e-05, | |
| "loss": 0.002, | |
| "loss/policy_avg": 0.002019597217440605, | |
| "objective/entropy": 64.70206451416016, | |
| "objective/kl": -0.011846143752336502, | |
| "objective/rlhf_reward": 2.4429714679718018, | |
| "objective/scores": 2.44146728515625, | |
| "policy/approxkl_avg": 0.05796036496758461, | |
| "policy/clipfrac_avg": 0.224609375, | |
| "policy/entropy_avg": 0.4975966215133667, | |
| "step": 6, | |
| "timer/calc_advantages": 1.939255714416504, | |
| "timer/calc_loss": 0.7629643082618713, | |
| "timer/get_reward": 0.5286913514137268, | |
| "timer/training_step": 5.261943340301514, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0005271434783936, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0011090646228190046, | |
| "grad_norm": 4.28013277053833, | |
| "learning_rate": 2.9988908255427033e-05, | |
| "loss": 0.0037, | |
| "loss/policy_avg": 0.0036908092442899942, | |
| "objective/entropy": 68.02294158935547, | |
| "objective/kl": 0.15080219507217407, | |
| "objective/rlhf_reward": 3.149240016937256, | |
| "objective/scores": 3.1646728515625, | |
| "policy/approxkl_avg": 0.11497651040554047, | |
| "policy/clipfrac_avg": 0.25, | |
| "policy/entropy_avg": 0.5161693692207336, | |
| "step": 7, | |
| "timer/calc_advantages": 2.2946014404296875, | |
| "timer/calc_loss": 0.96665358543396, | |
| "timer/get_reward": 0.6638086438179016, | |
| "timer/training_step": 6.369439125061035, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 0.99953293800354, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0012675024260788625, | |
| "grad_norm": 8.370040893554688, | |
| "learning_rate": 2.9987323720488037e-05, | |
| "loss": 0.0078, | |
| "loss/policy_avg": 0.00783943198621273, | |
| "objective/entropy": 66.24089813232422, | |
| "objective/kl": 0.0030039921402931213, | |
| "objective/rlhf_reward": 3.1140072345733643, | |
| "objective/scores": 3.1142501831054688, | |
| "policy/approxkl_avg": 0.13499276340007782, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.4966353476047516, | |
| "step": 8, | |
| "timer/calc_advantages": 2.4299309253692627, | |
| "timer/calc_loss": 1.046256422996521, | |
| "timer/get_reward": 0.7162653207778931, | |
| "timer/training_step": 6.773098945617676, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0011930465698242, | |
| "val/ratio_var": NaN | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 18933, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 8, | |
| "total_flos": 4025485044940800.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |