| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.927536231884058, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 131.3125, | |
| "epoch": 0.0966183574879227, | |
| "grad_norm": 1.5448709726333618, | |
| "kl": 0.00043543790525291115, | |
| "learning_rate": 4e-07, | |
| "loss": 0.0, | |
| "reward": 0.6250000067055226, | |
| "reward_std": 0.29020712375640867, | |
| "rewards/cbt_technique_reward_func": 0.1375000011175871, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.10625000018626451, | |
| "rewards/question_asking_reward_func": 0.38124999925494196, | |
| "step": 5 | |
| }, | |
| { | |
| "completion_length": 116.3875, | |
| "epoch": 0.1932367149758454, | |
| "grad_norm": 1.3723770380020142, | |
| "kl": 0.0007617953233420849, | |
| "learning_rate": 9e-07, | |
| "loss": 0.0, | |
| "reward": 0.7187500111758709, | |
| "reward_std": 0.3299859084188938, | |
| "rewards/cbt_technique_reward_func": 0.1400000021792948, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.18000000081956385, | |
| "rewards/question_asking_reward_func": 0.39874999821186063, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 127.5, | |
| "epoch": 0.2898550724637681, | |
| "grad_norm": 1.2058902978897095, | |
| "kl": 0.0008091913536190986, | |
| "learning_rate": 9.555555555555556e-07, | |
| "loss": 0.0, | |
| "reward": 0.7375000104308128, | |
| "reward_std": 0.29069150872528554, | |
| "rewards/cbt_technique_reward_func": 0.14625000171363353, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.20250000078231095, | |
| "rewards/question_asking_reward_func": 0.3887499958276749, | |
| "step": 15 | |
| }, | |
| { | |
| "completion_length": 118.6875, | |
| "epoch": 0.3864734299516908, | |
| "grad_norm": 1.1809375286102295, | |
| "kl": 0.000778093043481931, | |
| "learning_rate": 9e-07, | |
| "loss": 0.0, | |
| "reward": 0.8087500005960464, | |
| "reward_std": 0.2881319634616375, | |
| "rewards/cbt_technique_reward_func": 0.15375000424683094, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.21750000212341547, | |
| "rewards/question_asking_reward_func": 0.4374999970197678, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 130.5375, | |
| "epoch": 0.4830917874396135, | |
| "grad_norm": 0.8987158536911011, | |
| "kl": 0.0007172481535235419, | |
| "learning_rate": 8.444444444444444e-07, | |
| "loss": 0.0, | |
| "reward": 0.690000006556511, | |
| "reward_std": 0.22531789541244507, | |
| "rewards/cbt_technique_reward_func": 0.13875000309199095, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.15124999973922967, | |
| "rewards/question_asking_reward_func": 0.39999999701976774, | |
| "step": 25 | |
| }, | |
| { | |
| "completion_length": 135.575, | |
| "epoch": 0.5797101449275363, | |
| "grad_norm": 1.636861801147461, | |
| "kl": 0.0007032989873550832, | |
| "learning_rate": 7.888888888888889e-07, | |
| "loss": 0.0, | |
| "reward": 0.657500009611249, | |
| "reward_std": 0.24160839468240738, | |
| "rewards/cbt_technique_reward_func": 0.1337500031106174, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.11499999947845936, | |
| "rewards/question_asking_reward_func": 0.40874999612569807, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 130.85, | |
| "epoch": 0.6763285024154589, | |
| "grad_norm": 1.004942536354065, | |
| "kl": 0.0006958791636861861, | |
| "learning_rate": 7.333333333333332e-07, | |
| "loss": 0.0, | |
| "reward": 0.6937500074505806, | |
| "reward_std": 0.25641300678253176, | |
| "rewards/cbt_technique_reward_func": 0.12000000299885868, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.17000000029802323, | |
| "rewards/question_asking_reward_func": 0.4037499949336052, | |
| "step": 35 | |
| }, | |
| { | |
| "completion_length": 109.175, | |
| "epoch": 0.7729468599033816, | |
| "grad_norm": 1.200303077697754, | |
| "kl": 0.0006988899374846369, | |
| "learning_rate": 6.777777777777778e-07, | |
| "loss": 0.0, | |
| "reward": 0.5937500014901161, | |
| "reward_std": 0.2189602989703417, | |
| "rewards/cbt_technique_reward_func": 0.11375000216066837, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.10499999951571226, | |
| "rewards/question_asking_reward_func": 0.37499999776482584, | |
| "step": 40 | |
| }, | |
| { | |
| "completion_length": 130.275, | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 1.5182042121887207, | |
| "kl": 0.000668867253989447, | |
| "learning_rate": 6.222222222222223e-07, | |
| "loss": 0.0, | |
| "reward": 0.6749999985098839, | |
| "reward_std": 0.256393301486969, | |
| "rewards/cbt_technique_reward_func": 0.14625000339001418, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.12875000070780515, | |
| "rewards/question_asking_reward_func": 0.3999999985098839, | |
| "step": 45 | |
| }, | |
| { | |
| "completion_length": 116.8, | |
| "epoch": 0.966183574879227, | |
| "grad_norm": 1.9656929969787598, | |
| "kl": 0.0007837369499611669, | |
| "learning_rate": 5.666666666666666e-07, | |
| "loss": 0.0, | |
| "reward": 0.687499999254942, | |
| "reward_std": 0.2334746764972806, | |
| "rewards/cbt_technique_reward_func": 0.11000000266358256, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.19375000055879354, | |
| "rewards/question_asking_reward_func": 0.38374999538064003, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 130.02631578947367, | |
| "epoch": 1.0579710144927537, | |
| "grad_norm": 1.276727557182312, | |
| "kl": 0.0007787851224604406, | |
| "learning_rate": 5.111111111111111e-07, | |
| "loss": 0.0, | |
| "reward": 0.7000000014116889, | |
| "reward_std": 0.27186120026989985, | |
| "rewards/cbt_technique_reward_func": 0.15789473929295414, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.1684210531805691, | |
| "rewards/question_asking_reward_func": 0.37368421099687876, | |
| "step": 55 | |
| }, | |
| { | |
| "completion_length": 124.0375, | |
| "epoch": 1.1545893719806763, | |
| "grad_norm": 1.4976823329925537, | |
| "kl": 0.0006855996092781424, | |
| "learning_rate": 4.555555555555555e-07, | |
| "loss": 0.0, | |
| "reward": 0.731249999627471, | |
| "reward_std": 0.2586277686059475, | |
| "rewards/cbt_technique_reward_func": 0.1425000037997961, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.19749999791383743, | |
| "rewards/question_asking_reward_func": 0.39124999567866325, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 136.325, | |
| "epoch": 1.251207729468599, | |
| "grad_norm": 2.217841863632202, | |
| "kl": 0.0007303371050511487, | |
| "learning_rate": 4e-07, | |
| "loss": 0.0, | |
| "reward": 0.7212500154972077, | |
| "reward_std": 0.23724802657961847, | |
| "rewards/cbt_technique_reward_func": 0.11375000402331352, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.17875000182539225, | |
| "rewards/question_asking_reward_func": 0.4287499986588955, | |
| "step": 65 | |
| }, | |
| { | |
| "completion_length": 127.1, | |
| "epoch": 1.3478260869565217, | |
| "grad_norm": 1.2720558643341064, | |
| "kl": 0.0008214380504796281, | |
| "learning_rate": 3.4444444444444444e-07, | |
| "loss": 0.0, | |
| "reward": 0.6512500032782554, | |
| "reward_std": 0.24096153806895018, | |
| "rewards/cbt_technique_reward_func": 0.12250000247731804, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.13250000029802322, | |
| "rewards/question_asking_reward_func": 0.39624999538064004, | |
| "step": 70 | |
| }, | |
| { | |
| "completion_length": 123.4, | |
| "epoch": 1.4444444444444444, | |
| "grad_norm": 1.5885432958602905, | |
| "kl": 0.0008428851724602282, | |
| "learning_rate": 2.8888888888888885e-07, | |
| "loss": 0.0, | |
| "reward": 0.6575000032782554, | |
| "reward_std": 0.3075646057724953, | |
| "rewards/cbt_technique_reward_func": 0.14500000271946192, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.14625000059604645, | |
| "rewards/question_asking_reward_func": 0.3662499986588955, | |
| "step": 75 | |
| }, | |
| { | |
| "completion_length": 122.525, | |
| "epoch": 1.541062801932367, | |
| "grad_norm": 1.6414885520935059, | |
| "kl": 0.0007202147302450612, | |
| "learning_rate": 2.3333333333333333e-07, | |
| "loss": 0.0, | |
| "reward": 0.6725000083446503, | |
| "reward_std": 0.2713221043348312, | |
| "rewards/cbt_technique_reward_func": 0.1287500030361116, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.13625000175088645, | |
| "rewards/question_asking_reward_func": 0.40749999806284903, | |
| "step": 80 | |
| }, | |
| { | |
| "completion_length": 113.65, | |
| "epoch": 1.6376811594202898, | |
| "grad_norm": 1.197077751159668, | |
| "kl": 0.0007829821581253782, | |
| "learning_rate": 1.7777777777777776e-07, | |
| "loss": 0.0, | |
| "reward": 0.6862500173039734, | |
| "reward_std": 0.227858448587358, | |
| "rewards/cbt_technique_reward_func": 0.12375000417232514, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.1512499988079071, | |
| "rewards/question_asking_reward_func": 0.4112499952316284, | |
| "step": 85 | |
| }, | |
| { | |
| "completion_length": 121.9, | |
| "epoch": 1.7342995169082127, | |
| "grad_norm": 0.9502215385437012, | |
| "kl": 0.000955963070737198, | |
| "learning_rate": 1.2222222222222222e-07, | |
| "loss": 0.0, | |
| "reward": 0.6025000005960465, | |
| "reward_std": 0.22266108132898807, | |
| "rewards/cbt_technique_reward_func": 0.11750000119209289, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.09875000026077033, | |
| "rewards/question_asking_reward_func": 0.3862499982118607, | |
| "step": 90 | |
| }, | |
| { | |
| "completion_length": 123.5125, | |
| "epoch": 1.8309178743961354, | |
| "grad_norm": 1.4943199157714844, | |
| "kl": 0.0007219786857604049, | |
| "learning_rate": 6.666666666666667e-08, | |
| "loss": 0.0, | |
| "reward": 0.6962499976158142, | |
| "reward_std": 0.23088937066495419, | |
| "rewards/cbt_technique_reward_func": 0.1312500026077032, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.14625000022351742, | |
| "rewards/question_asking_reward_func": 0.41874999478459357, | |
| "step": 95 | |
| }, | |
| { | |
| "completion_length": 125.6, | |
| "epoch": 1.927536231884058, | |
| "grad_norm": 1.0279828310012817, | |
| "kl": 0.000812371401116252, | |
| "learning_rate": 1.111111111111111e-08, | |
| "loss": 0.0, | |
| "reward": 0.7137500122189522, | |
| "reward_std": 0.27089230343699455, | |
| "rewards/cbt_technique_reward_func": 0.12625000337138773, | |
| "rewards/mmkay_speech_pattern_reward_func": 0.2049999987706542, | |
| "rewards/question_asking_reward_func": 0.38249999582767485, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |