| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9893390191897654, | |
| "eval_steps": 500, | |
| "global_step": 58, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.3348236083984, | |
| "epoch": 0.017057569296375266, | |
| "grad_norm": 295.6246337890625, | |
| "kl": 0.0, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0007, | |
| "reward": 1.3604911267757416, | |
| "reward_std": 0.43993261456489563, | |
| "rewards/accuracy_reward": 0.4196428768336773, | |
| "rewards/format_reward": 0.9408482611179352, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.4154644012451, | |
| "epoch": 0.08528784648187633, | |
| "grad_norm": 0.2173105925321579, | |
| "kl": 0.0015696585178375244, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0003, | |
| "reward": 1.36104916036129, | |
| "reward_std": 0.4599178032949567, | |
| "rewards/accuracy_reward": 0.4065290354192257, | |
| "rewards/format_reward": 0.9545201268047094, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.4203186035156, | |
| "epoch": 0.17057569296375266, | |
| "grad_norm": 0.3614032566547394, | |
| "kl": 80.7047048330307, | |
| "learning_rate": 2.956412726139078e-06, | |
| "loss": 3.2442, | |
| "reward": 1.4700893461704254, | |
| "reward_std": 0.42430560290813446, | |
| "rewards/accuracy_reward": 0.5145089522004127, | |
| "rewards/format_reward": 0.9555803969502449, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.5118347167969, | |
| "epoch": 0.255863539445629, | |
| "grad_norm": 5230.3447265625, | |
| "kl": 0.45456657409667967, | |
| "learning_rate": 2.7836719084521715e-06, | |
| "loss": 0.0182, | |
| "reward": 1.5975447058677674, | |
| "reward_std": 0.3535365674644709, | |
| "rewards/accuracy_reward": 0.6444196701049805, | |
| "rewards/format_reward": 0.9531250447034836, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.7593780517578, | |
| "epoch": 0.3411513859275053, | |
| "grad_norm": 0.2595553398132324, | |
| "kl": 3.4332809448242188, | |
| "learning_rate": 2.4946839873611927e-06, | |
| "loss": 0.1369, | |
| "reward": 1.663169714808464, | |
| "reward_std": 0.30231964513659476, | |
| "rewards/accuracy_reward": 0.7095982447266579, | |
| "rewards/format_reward": 0.9535714656114578, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.8439743041993, | |
| "epoch": 0.42643923240938164, | |
| "grad_norm": 0.1941014677286148, | |
| "kl": 0.010175323486328125, | |
| "learning_rate": 2.1156192081791355e-06, | |
| "loss": 0.0004, | |
| "reward": 1.632812574505806, | |
| "reward_std": 0.3012897618114948, | |
| "rewards/accuracy_reward": 0.6939732454717159, | |
| "rewards/format_reward": 0.9388393223285675, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.8616088867187, | |
| "epoch": 0.511727078891258, | |
| "grad_norm": 0.18191584944725037, | |
| "kl": 0.021715545654296876, | |
| "learning_rate": 1.6808050203829845e-06, | |
| "loss": 0.0008, | |
| "reward": 1.6042411476373672, | |
| "reward_std": 0.3133077774196863, | |
| "rewards/accuracy_reward": 0.6660714574158192, | |
| "rewards/format_reward": 0.9381696850061416, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.3654098510742, | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.32974478602409363, | |
| "kl": 0.02928009033203125, | |
| "learning_rate": 1.2296174432791415e-06, | |
| "loss": 0.001, | |
| "reward": 1.5727679282426834, | |
| "reward_std": 0.3634680099785328, | |
| "rewards/accuracy_reward": 0.6535714581608772, | |
| "rewards/format_reward": 0.9191964700818062, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1023.0618392944336, | |
| "epoch": 0.6823027718550106, | |
| "grad_norm": 0.38136547803878784, | |
| "kl": 0.0487884521484375, | |
| "learning_rate": 8.029152419343472e-07, | |
| "loss": 0.0015, | |
| "reward": 1.5879464954137803, | |
| "reward_std": 0.3696904189884663, | |
| "rewards/accuracy_reward": 0.6725446730852127, | |
| "rewards/format_reward": 0.9154018253087998, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1021.6725723266602, | |
| "epoch": 0.767590618336887, | |
| "grad_norm": 0.5596392750740051, | |
| "kl": 0.0637298583984375, | |
| "learning_rate": 4.3933982822017883e-07, | |
| "loss": 0.0009, | |
| "reward": 1.5805804252624511, | |
| "reward_std": 0.3978343676775694, | |
| "rewards/accuracy_reward": 0.6763393178582191, | |
| "rewards/format_reward": 0.9042411163449288, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1020.4614181518555, | |
| "epoch": 0.8528784648187633, | |
| "grad_norm": 0.46670985221862793, | |
| "kl": 0.07415771484375, | |
| "learning_rate": 1.718159615201853e-07, | |
| "loss": 0.0019, | |
| "reward": 1.5774554371833802, | |
| "reward_std": 0.40023380927741525, | |
| "rewards/accuracy_reward": 0.6667411029338837, | |
| "rewards/format_reward": 0.9107143297791481, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1020.0493682861328, | |
| "epoch": 0.9381663113006397, | |
| "grad_norm": 0.28767386078834534, | |
| "kl": 0.143035888671875, | |
| "learning_rate": 2.4570139579284723e-08, | |
| "loss": 0.0027, | |
| "reward": 1.6111607879400254, | |
| "reward_std": 0.3957536414265633, | |
| "rewards/accuracy_reward": 0.6975446730852127, | |
| "rewards/format_reward": 0.9136161133646965, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1021.3299789428711, | |
| "epoch": 0.9893390191897654, | |
| "kl": 0.07426961263020833, | |
| "reward": 1.6130953133106232, | |
| "reward_std": 0.3950556789835294, | |
| "rewards/accuracy_reward": 0.691592293481032, | |
| "rewards/format_reward": 0.9215030198295912, | |
| "step": 58, | |
| "total_flos": 0.0, | |
| "train_loss": 0.294493970670938, | |
| "train_runtime": 20554.7381, | |
| "train_samples_per_second": 0.365, | |
| "train_steps_per_second": 0.003 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 58, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |