Invalid JSON: Unexpected token 'N', ..."tio_var": NaN
},"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.0012675024260788625, | |
| "eval_steps": 500, | |
| "global_step": 8, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0001584378032598578, | |
| "grad_norm": 7.086519326549023e-05, | |
| "learning_rate": 2.9998415465061005e-05, | |
| "loss": 0.0, | |
| "loss/policy_avg": 9.534414857625961e-08, | |
| "objective/entropy": 66.7407455444336, | |
| "objective/kl": 0.0, | |
| "objective/rlhf_reward": 1.9394512176513672, | |
| "objective/scores": 1.9393310546875, | |
| "policy/approxkl_avg": 0.0, | |
| "policy/clipfrac_avg": 0.0, | |
| "policy/entropy_avg": 0.501089334487915, | |
| "step": 1, | |
| "timer/calc_advantages": 1.9189459085464478, | |
| "timer/calc_loss": 0.7234929800033569, | |
| "timer/get_reward": 0.5044295787811279, | |
| "timer/training_step": 5.130897045135498, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0003168756065197156, | |
| "grad_norm": 9.20624828338623, | |
| "learning_rate": 2.999683093012201e-05, | |
| "loss": 0.0082, | |
| "loss/policy_avg": 0.008180337026715279, | |
| "objective/entropy": 58.23992919921875, | |
| "objective/kl": 0.04112038388848305, | |
| "objective/rlhf_reward": 2.586242914199829, | |
| "objective/scores": 2.59033203125, | |
| "policy/approxkl_avg": 0.13751985132694244, | |
| "policy/clipfrac_avg": 0.353515625, | |
| "policy/entropy_avg": 0.4371030628681183, | |
| "step": 2, | |
| "timer/calc_advantages": 1.77765953540802, | |
| "timer/calc_loss": 0.620478630065918, | |
| "timer/get_reward": 0.43380415439605713, | |
| "timer/training_step": 4.58284854888916, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0005027055740356, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0004753134097795734, | |
| "grad_norm": 11.192896842956543, | |
| "learning_rate": 2.9995246395183014e-05, | |
| "loss": 0.0083, | |
| "loss/policy_avg": 0.008318130858242512, | |
| "objective/entropy": 60.747886657714844, | |
| "objective/kl": 0.1742033064365387, | |
| "objective/rlhf_reward": 2.7990851402282715, | |
| "objective/scores": 2.8162841796875, | |
| "policy/approxkl_avg": 0.1877279430627823, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.47007349133491516, | |
| "step": 3, | |
| "timer/calc_advantages": 2.159975528717041, | |
| "timer/calc_loss": 0.8658402562141418, | |
| "timer/get_reward": 0.5989301204681396, | |
| "timer/training_step": 5.865015029907227, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 0.9998154640197754, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0006337512130394313, | |
| "grad_norm": 10.267037391662598, | |
| "learning_rate": 2.999366186024402e-05, | |
| "loss": 0.0119, | |
| "loss/policy_avg": 0.011887951754033566, | |
| "objective/entropy": 59.366294860839844, | |
| "objective/kl": 0.12045621126890182, | |
| "objective/rlhf_reward": 2.8014278411865234, | |
| "objective/scores": 2.8134765625, | |
| "policy/approxkl_avg": 0.2340196967124939, | |
| "policy/clipfrac_avg": 0.369140625, | |
| "policy/entropy_avg": 0.4660375118255615, | |
| "step": 4, | |
| "timer/calc_advantages": 1.9160572290420532, | |
| "timer/calc_loss": 0.8047055602073669, | |
| "timer/get_reward": 0.5554770231246948, | |
| "timer/training_step": 5.4731550216674805, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.000678539276123, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.000792189016299289, | |
| "grad_norm": 5.658022200805135e-05, | |
| "learning_rate": 2.9992077325305024e-05, | |
| "loss": 0.0, | |
| "loss/policy_avg": 9.505311027169228e-08, | |
| "objective/entropy": 63.056705474853516, | |
| "objective/kl": 0.0, | |
| "objective/rlhf_reward": 1.961860179901123, | |
| "objective/scores": 1.961883544921875, | |
| "policy/approxkl_avg": 0.0, | |
| "policy/clipfrac_avg": 0.0, | |
| "policy/entropy_avg": 0.49685803055763245, | |
| "step": 5, | |
| "timer/calc_advantages": 1.9551451206207275, | |
| "timer/calc_loss": 0.7570784091949463, | |
| "timer/get_reward": 0.5253068208694458, | |
| "timer/training_step": 5.274328231811523, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0009506268195591468, | |
| "grad_norm": 5.630626201629639, | |
| "learning_rate": 2.9990492790366028e-05, | |
| "loss": 0.0036, | |
| "loss/policy_avg": 0.0035882075317204, | |
| "objective/entropy": 64.0039291381836, | |
| "objective/kl": -0.06272067129611969, | |
| "objective/rlhf_reward": 2.5381531715393066, | |
| "objective/scores": 2.53204345703125, | |
| "policy/approxkl_avg": 0.07097644358873367, | |
| "policy/clipfrac_avg": 0.244140625, | |
| "policy/entropy_avg": 0.49359244108200073, | |
| "step": 6, | |
| "timer/calc_advantages": 1.9771534204483032, | |
| "timer/calc_loss": 0.7805941700935364, | |
| "timer/get_reward": 0.5281075835227966, | |
| "timer/training_step": 5.320864677429199, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.000971794128418, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0011090646228190046, | |
| "grad_norm": 7.572779178619385, | |
| "learning_rate": 2.9988908255427033e-05, | |
| "loss": 0.0058, | |
| "loss/policy_avg": 0.005815813317894936, | |
| "objective/entropy": 69.00190734863281, | |
| "objective/kl": 0.07886971533298492, | |
| "objective/rlhf_reward": 3.3839988708496094, | |
| "objective/scores": 3.39208984375, | |
| "policy/approxkl_avg": 0.1876513808965683, | |
| "policy/clipfrac_avg": 0.34765625, | |
| "policy/entropy_avg": 0.520235538482666, | |
| "step": 7, | |
| "timer/calc_advantages": 2.296631336212158, | |
| "timer/calc_loss": 0.9676017761230469, | |
| "timer/get_reward": 0.6649996042251587, | |
| "timer/training_step": 6.399840831756592, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.000232458114624, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0012675024260788625, | |
| "grad_norm": 11.984278678894043, | |
| "learning_rate": 2.9987323720488037e-05, | |
| "loss": 0.0093, | |
| "loss/policy_avg": 0.009322328492999077, | |
| "objective/entropy": 66.09063720703125, | |
| "objective/kl": 0.015054229646921158, | |
| "objective/rlhf_reward": 2.489471673965454, | |
| "objective/scores": 2.491485595703125, | |
| "policy/approxkl_avg": 0.18275578320026398, | |
| "policy/clipfrac_avg": 0.3046875, | |
| "policy/entropy_avg": 0.5070106387138367, | |
| "step": 8, | |
| "timer/calc_advantages": 2.2399752140045166, | |
| "timer/calc_loss": 1.0477569103240967, | |
| "timer/get_reward": 0.7243468761444092, | |
| "timer/training_step": 6.686029434204102, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0009795427322388, | |
| "val/ratio_var": NaN | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 18933, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 8, | |
| "total_flos": 4025485044940800.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |