Invalid JSON: Unexpected token 'N', ..."tio_var": NaN
},"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.002535004852157725, | |
| "eval_steps": 500, | |
| "global_step": 16, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0001584378032598578, | |
| "grad_norm": 7.086519326549023e-05, | |
| "learning_rate": 2.9998415465061005e-05, | |
| "loss": 0.0, | |
| "loss/policy_avg": 9.534414857625961e-08, | |
| "objective/entropy": 66.7407455444336, | |
| "objective/kl": 0.0, | |
| "objective/rlhf_reward": 1.9394512176513672, | |
| "objective/scores": 1.9393310546875, | |
| "policy/approxkl_avg": 0.0, | |
| "policy/clipfrac_avg": 0.0, | |
| "policy/entropy_avg": 0.501089334487915, | |
| "step": 1, | |
| "timer/calc_advantages": 1.9189459085464478, | |
| "timer/calc_loss": 0.7234929800033569, | |
| "timer/get_reward": 0.5044295787811279, | |
| "timer/training_step": 5.130897045135498, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0003168756065197156, | |
| "grad_norm": 9.20624828338623, | |
| "learning_rate": 2.999683093012201e-05, | |
| "loss": 0.0082, | |
| "loss/policy_avg": 0.008180337026715279, | |
| "objective/entropy": 58.23992919921875, | |
| "objective/kl": 0.04112038388848305, | |
| "objective/rlhf_reward": 2.586242914199829, | |
| "objective/scores": 2.59033203125, | |
| "policy/approxkl_avg": 0.13751985132694244, | |
| "policy/clipfrac_avg": 0.353515625, | |
| "policy/entropy_avg": 0.4371030628681183, | |
| "step": 2, | |
| "timer/calc_advantages": 1.77765953540802, | |
| "timer/calc_loss": 0.620478630065918, | |
| "timer/get_reward": 0.43380415439605713, | |
| "timer/training_step": 4.58284854888916, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0005027055740356, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0004753134097795734, | |
| "grad_norm": 11.192896842956543, | |
| "learning_rate": 2.9995246395183014e-05, | |
| "loss": 0.0083, | |
| "loss/policy_avg": 0.008318130858242512, | |
| "objective/entropy": 60.747886657714844, | |
| "objective/kl": 0.1742033064365387, | |
| "objective/rlhf_reward": 2.7990851402282715, | |
| "objective/scores": 2.8162841796875, | |
| "policy/approxkl_avg": 0.1877279430627823, | |
| "policy/clipfrac_avg": 0.345703125, | |
| "policy/entropy_avg": 0.47007349133491516, | |
| "step": 3, | |
| "timer/calc_advantages": 2.159975528717041, | |
| "timer/calc_loss": 0.8658402562141418, | |
| "timer/get_reward": 0.5989301204681396, | |
| "timer/training_step": 5.865015029907227, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 0.9998154640197754, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0006337512130394313, | |
| "grad_norm": 10.267037391662598, | |
| "learning_rate": 2.999366186024402e-05, | |
| "loss": 0.0119, | |
| "loss/policy_avg": 0.011887951754033566, | |
| "objective/entropy": 59.366294860839844, | |
| "objective/kl": 0.12045621126890182, | |
| "objective/rlhf_reward": 2.8014278411865234, | |
| "objective/scores": 2.8134765625, | |
| "policy/approxkl_avg": 0.2340196967124939, | |
| "policy/clipfrac_avg": 0.369140625, | |
| "policy/entropy_avg": 0.4660375118255615, | |
| "step": 4, | |
| "timer/calc_advantages": 1.9160572290420532, | |
| "timer/calc_loss": 0.8047055602073669, | |
| "timer/get_reward": 0.5554770231246948, | |
| "timer/training_step": 5.4731550216674805, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.000678539276123, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.000792189016299289, | |
| "grad_norm": 5.658022200805135e-05, | |
| "learning_rate": 2.9992077325305024e-05, | |
| "loss": 0.0, | |
| "loss/policy_avg": 9.505311027169228e-08, | |
| "objective/entropy": 63.056705474853516, | |
| "objective/kl": 0.0, | |
| "objective/rlhf_reward": 1.961860179901123, | |
| "objective/scores": 1.961883544921875, | |
| "policy/approxkl_avg": 0.0, | |
| "policy/clipfrac_avg": 0.0, | |
| "policy/entropy_avg": 0.49685803055763245, | |
| "step": 5, | |
| "timer/calc_advantages": 1.9551451206207275, | |
| "timer/calc_loss": 0.7570784091949463, | |
| "timer/get_reward": 0.5253068208694458, | |
| "timer/training_step": 5.274328231811523, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0009506268195591468, | |
| "grad_norm": 5.630626201629639, | |
| "learning_rate": 2.9990492790366028e-05, | |
| "loss": 0.0036, | |
| "loss/policy_avg": 0.0035882075317204, | |
| "objective/entropy": 64.0039291381836, | |
| "objective/kl": -0.06272067129611969, | |
| "objective/rlhf_reward": 2.5381531715393066, | |
| "objective/scores": 2.53204345703125, | |
| "policy/approxkl_avg": 0.07097644358873367, | |
| "policy/clipfrac_avg": 0.244140625, | |
| "policy/entropy_avg": 0.49359244108200073, | |
| "step": 6, | |
| "timer/calc_advantages": 1.9771534204483032, | |
| "timer/calc_loss": 0.7805941700935364, | |
| "timer/get_reward": 0.5281075835227966, | |
| "timer/training_step": 5.320864677429199, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.000971794128418, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0011090646228190046, | |
| "grad_norm": 7.572779178619385, | |
| "learning_rate": 2.9988908255427033e-05, | |
| "loss": 0.0058, | |
| "loss/policy_avg": 0.005815813317894936, | |
| "objective/entropy": 69.00190734863281, | |
| "objective/kl": 0.07886971533298492, | |
| "objective/rlhf_reward": 3.3839988708496094, | |
| "objective/scores": 3.39208984375, | |
| "policy/approxkl_avg": 0.1876513808965683, | |
| "policy/clipfrac_avg": 0.34765625, | |
| "policy/entropy_avg": 0.520235538482666, | |
| "step": 7, | |
| "timer/calc_advantages": 2.296631336212158, | |
| "timer/calc_loss": 0.9676017761230469, | |
| "timer/get_reward": 0.6649996042251587, | |
| "timer/training_step": 6.399840831756592, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.000232458114624, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0012675024260788625, | |
| "grad_norm": 11.984278678894043, | |
| "learning_rate": 2.9987323720488037e-05, | |
| "loss": 0.0093, | |
| "loss/policy_avg": 0.009322328492999077, | |
| "objective/entropy": 66.09063720703125, | |
| "objective/kl": 0.015054229646921158, | |
| "objective/rlhf_reward": 2.489471673965454, | |
| "objective/scores": 2.491485595703125, | |
| "policy/approxkl_avg": 0.18275578320026398, | |
| "policy/clipfrac_avg": 0.3046875, | |
| "policy/entropy_avg": 0.5070106387138367, | |
| "step": 8, | |
| "timer/calc_advantages": 2.2399752140045166, | |
| "timer/calc_loss": 1.0477569103240967, | |
| "timer/get_reward": 0.7243468761444092, | |
| "timer/training_step": 6.686029434204102, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0009795427322388, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0014259402293387202, | |
| "grad_norm": 7.540817750850692e-05, | |
| "learning_rate": 2.9985739185549042e-05, | |
| "loss": 0.0, | |
| "loss/policy_avg": 1.367880031466484e-07, | |
| "objective/entropy": 58.587005615234375, | |
| "objective/kl": 0.0, | |
| "objective/rlhf_reward": 2.611638069152832, | |
| "objective/scores": 2.61175537109375, | |
| "policy/approxkl_avg": 0.0, | |
| "policy/clipfrac_avg": 0.0, | |
| "policy/entropy_avg": 0.42249488830566406, | |
| "step": 9, | |
| "timer/calc_advantages": 2.2680113315582275, | |
| "timer/calc_loss": 0.9395630359649658, | |
| "timer/get_reward": 0.6489749550819397, | |
| "timer/training_step": 6.220600128173828, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.001584378032598578, | |
| "grad_norm": 1.935127854347229, | |
| "learning_rate": 2.9984154650610046e-05, | |
| "loss": 0.0007, | |
| "loss/policy_avg": 0.0006855675601400435, | |
| "objective/entropy": 55.21089172363281, | |
| "objective/kl": 0.0106657724827528, | |
| "objective/rlhf_reward": 2.033531427383423, | |
| "objective/scores": 2.0347900390625, | |
| "policy/approxkl_avg": 0.02970672771334648, | |
| "policy/clipfrac_avg": 0.197265625, | |
| "policy/entropy_avg": 0.4399160146713257, | |
| "step": 10, | |
| "timer/calc_advantages": 1.9682772159576416, | |
| "timer/calc_loss": 0.7604256272315979, | |
| "timer/get_reward": 0.5244199633598328, | |
| "timer/training_step": 5.279101371765137, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0002206563949585, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0017428158358584358, | |
| "grad_norm": 2.560812473297119, | |
| "learning_rate": 2.998257011567105e-05, | |
| "loss": 0.0011, | |
| "loss/policy_avg": 0.0011356198228895664, | |
| "objective/entropy": 64.66464233398438, | |
| "objective/kl": 0.009169694036245346, | |
| "objective/rlhf_reward": 2.824911594390869, | |
| "objective/scores": 2.82574462890625, | |
| "policy/approxkl_avg": 0.03749970346689224, | |
| "policy/clipfrac_avg": 0.21484375, | |
| "policy/entropy_avg": 0.5036777257919312, | |
| "step": 11, | |
| "timer/calc_advantages": 2.0788302421569824, | |
| "timer/calc_loss": 0.8143972754478455, | |
| "timer/get_reward": 0.5581433176994324, | |
| "timer/training_step": 5.590597629547119, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.000451683998108, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0019012536391182935, | |
| "grad_norm": 4.297569751739502, | |
| "learning_rate": 2.9980985580732055e-05, | |
| "loss": 0.0034, | |
| "loss/policy_avg": 0.003396428655833006, | |
| "objective/entropy": 69.60541534423828, | |
| "objective/kl": 0.10166570544242859, | |
| "objective/rlhf_reward": 1.655887246131897, | |
| "objective/scores": 1.6659393310546875, | |
| "policy/approxkl_avg": 0.09505846351385117, | |
| "policy/clipfrac_avg": 0.318359375, | |
| "policy/entropy_avg": 0.534148633480072, | |
| "step": 12, | |
| "timer/calc_advantages": 1.8084050416946411, | |
| "timer/calc_loss": 0.6441062092781067, | |
| "timer/get_reward": 0.44651317596435547, | |
| "timer/training_step": 4.796171188354492, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 0.9999833106994629, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.0020596914423781513, | |
| "grad_norm": 7.657416426809505e-05, | |
| "learning_rate": 2.997940104579306e-05, | |
| "loss": 0.0, | |
| "loss/policy_avg": 1.4971010386943817e-07, | |
| "objective/entropy": 58.742462158203125, | |
| "objective/kl": 0.0, | |
| "objective/rlhf_reward": 3.092315673828125, | |
| "objective/scores": 3.0924072265625, | |
| "policy/approxkl_avg": 0.0, | |
| "policy/clipfrac_avg": 0.0, | |
| "policy/entropy_avg": 0.4520529806613922, | |
| "step": 13, | |
| "timer/calc_advantages": 2.120788335800171, | |
| "timer/calc_loss": 0.8547518849372864, | |
| "timer/get_reward": 0.5879230499267578, | |
| "timer/training_step": 5.762821197509766, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.002218129245638009, | |
| "grad_norm": 2.661646604537964, | |
| "learning_rate": 2.9977816510854064e-05, | |
| "loss": 0.001, | |
| "loss/policy_avg": 0.001029975712299347, | |
| "objective/entropy": 68.30726623535156, | |
| "objective/kl": 0.03951136767864227, | |
| "objective/rlhf_reward": 2.7953004837036133, | |
| "objective/scores": 2.7994384765625, | |
| "policy/approxkl_avg": 0.040699079632759094, | |
| "policy/clipfrac_avg": 0.2265625, | |
| "policy/entropy_avg": 0.5269634127616882, | |
| "step": 14, | |
| "timer/calc_advantages": 1.9552991390228271, | |
| "timer/calc_loss": 0.7335948944091797, | |
| "timer/get_reward": 0.5061210989952087, | |
| "timer/training_step": 5.143916130065918, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.000035047531128, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.002376567048897867, | |
| "grad_norm": 3.1387367248535156, | |
| "learning_rate": 2.997623197591507e-05, | |
| "loss": 0.0025, | |
| "loss/policy_avg": 0.00247659208253026, | |
| "objective/entropy": 56.77816390991211, | |
| "objective/kl": 0.006408168934285641, | |
| "objective/rlhf_reward": 2.673807144165039, | |
| "objective/scores": 2.6744384765625, | |
| "policy/approxkl_avg": 0.06710580736398697, | |
| "policy/clipfrac_avg": 0.283203125, | |
| "policy/entropy_avg": 0.44521695375442505, | |
| "step": 15, | |
| "timer/calc_advantages": 2.5635008811950684, | |
| "timer/calc_loss": 1.1095852851867676, | |
| "timer/get_reward": 0.7531031966209412, | |
| "timer/training_step": 7.173662185668945, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 1.0003986358642578, | |
| "val/ratio_var": NaN | |
| }, | |
| { | |
| "epoch": 0.002535004852157725, | |
| "grad_norm": 3.675922155380249, | |
| "learning_rate": 2.9974647440976074e-05, | |
| "loss": 0.0035, | |
| "loss/policy_avg": 0.0034786476753652096, | |
| "objective/entropy": 55.65203094482422, | |
| "objective/kl": 0.0999540314078331, | |
| "objective/rlhf_reward": 2.479569435119629, | |
| "objective/scores": 2.489501953125, | |
| "policy/approxkl_avg": 0.10638778656721115, | |
| "policy/clipfrac_avg": 0.359375, | |
| "policy/entropy_avg": 0.4279758334159851, | |
| "step": 16, | |
| "timer/calc_advantages": 2.1651806831359863, | |
| "timer/calc_loss": 0.8663696646690369, | |
| "timer/get_reward": 0.5983204245567322, | |
| "timer/training_step": 5.955389022827148, | |
| "val/num_eos_tokens": 0.0, | |
| "val/ratio": 0.9999206066131592, | |
| "val/ratio_var": NaN | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 18933, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 8, | |
| "total_flos": 8202719718014976.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |