| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 500, |
| "global_step": 156, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.064, |
| "grad_norm": 47.54409408569336, |
| "learning_rate": 6.249999999999999e-07, |
| "logits/chosen": -1.7506301403045654, |
| "logits/rejected": -0.9330015778541565, |
| "logps/chosen": -326.6182861328125, |
| "logps/rejected": -795.388916015625, |
| "loss": 0.7092, |
| "rewards/accuracies": 0.49687498807907104, |
| "rewards/chosen": 0.004627525806427002, |
| "rewards/margins": 0.044374678283929825, |
| "rewards/rejected": -0.039747148752212524, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 11.836691856384277, |
| "learning_rate": 9.979871469976195e-07, |
| "logits/chosen": -1.7661733627319336, |
| "logits/rejected": -0.9799120426177979, |
| "logps/chosen": -345.76220703125, |
| "logps/rejected": -836.4850463867188, |
| "loss": 0.3981, |
| "rewards/accuracies": 0.9468749761581421, |
| "rewards/chosen": 0.05132237449288368, |
| "rewards/margins": 1.2102007865905762, |
| "rewards/rejected": -1.1588784456253052, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.3670778274536133, |
| "learning_rate": 9.755282581475767e-07, |
| "logits/chosen": -1.934342861175537, |
| "logits/rejected": -1.3244361877441406, |
| "logps/chosen": -345.7969970703125, |
| "logps/rejected": -941.4720458984375, |
| "loss": 0.1481, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -3.1422524452209473, |
| "rewards/margins": 11.108638763427734, |
| "rewards/rejected": -14.250890731811523, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.37081119418144226, |
| "learning_rate": 9.29224396800933e-07, |
| "logits/chosen": -1.9896260499954224, |
| "logits/rejected": -1.5620503425598145, |
| "logps/chosen": -398.2611999511719, |
| "logps/rejected": -1067.803466796875, |
| "loss": 0.0628, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -8.123555183410645, |
| "rewards/margins": 22.457536697387695, |
| "rewards/rejected": -30.58109474182129, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.2008218616247177, |
| "learning_rate": 8.613974319136957e-07, |
| "logits/chosen": -1.8343955278396606, |
| "logits/rejected": -1.3990973234176636, |
| "logps/chosen": -393.0896301269531, |
| "logps/rejected": -1073.05419921875, |
| "loss": 0.0482, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -5.675015449523926, |
| "rewards/margins": 21.024158477783203, |
| "rewards/rejected": -26.699174880981445, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.2783186733722687, |
| "learning_rate": 7.754484907260512e-07, |
| "logits/chosen": -1.7005574703216553, |
| "logits/rejected": -1.2351994514465332, |
| "logps/chosen": -363.7461853027344, |
| "logps/rejected": -956.2418212890625, |
| "loss": 0.0509, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.303299903869629, |
| "rewards/margins": 15.75185775756836, |
| "rewards/rejected": -19.055158615112305, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.19658127427101135, |
| "learning_rate": 6.756874120406714e-07, |
| "logits/chosen": -1.6647199392318726, |
| "logits/rejected": -1.1417269706726074, |
| "logps/chosen": -306.51519775390625, |
| "logps/rejected": -924.2443237304688, |
| "loss": 0.0512, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -1.8024471998214722, |
| "rewards/margins": 14.316963195800781, |
| "rewards/rejected": -16.119409561157227, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.15499065816402435, |
| "learning_rate": 5.671166329088277e-07, |
| "logits/chosen": -1.6030125617980957, |
| "logits/rejected": -1.0880917310714722, |
| "logps/chosen": -331.5353088378906, |
| "logps/rejected": -947.468017578125, |
| "loss": 0.0486, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -1.8866089582443237, |
| "rewards/margins": 14.326324462890625, |
| "rewards/rejected": -16.212932586669922, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.15497474372386932, |
| "learning_rate": 4.5518034554828327e-07, |
| "logits/chosen": -1.6156357526779175, |
| "logits/rejected": -1.0863291025161743, |
| "logps/chosen": -340.2659912109375, |
| "logps/rejected": -988.78564453125, |
| "loss": 0.0554, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -1.919586420059204, |
| "rewards/margins": 14.997438430786133, |
| "rewards/rejected": -16.917024612426758, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.16324351727962494, |
| "learning_rate": 3.454915028125263e-07, |
| "logits/chosen": -1.586004376411438, |
| "logits/rejected": -1.047084093093872, |
| "logps/chosen": -323.78350830078125, |
| "logps/rejected": -967.1044311523438, |
| "loss": 0.0436, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -1.772637963294983, |
| "rewards/margins": 15.62191390991211, |
| "rewards/rejected": -17.39455223083496, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.31694385409355164, |
| "learning_rate": 2.4355036129704696e-07, |
| "logits/chosen": -1.5823489427566528, |
| "logits/rejected": -1.0539071559906006, |
| "logps/chosen": -335.7318420410156, |
| "logps/rejected": -982.3804321289062, |
| "loss": 0.0536, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -1.8680168390274048, |
| "rewards/margins": 15.72433853149414, |
| "rewards/rejected": -17.592355728149414, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.4332602918148041, |
| "learning_rate": 1.5446867550656767e-07, |
| "logits/chosen": -1.5331312417984009, |
| "logits/rejected": -1.0537359714508057, |
| "logps/chosen": -365.61004638671875, |
| "logps/rejected": -955.177978515625, |
| "loss": 0.0621, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -2.1784939765930176, |
| "rewards/margins": 14.849874496459961, |
| "rewards/rejected": -17.02836799621582, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.3648674786090851, |
| "learning_rate": 8.271337313934867e-08, |
| "logits/chosen": -1.572448968887329, |
| "logits/rejected": -1.061173677444458, |
| "logps/chosen": -352.5264892578125, |
| "logps/rejected": -974.0443115234375, |
| "loss": 0.0502, |
| "rewards/accuracies": 0.9906249642372131, |
| "rewards/chosen": -1.944822072982788, |
| "rewards/margins": 15.244119644165039, |
| "rewards/rejected": -17.188941955566406, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.1386958509683609, |
| "learning_rate": 3.188256468013139e-08, |
| "logits/chosen": -1.604827642440796, |
| "logits/rejected": -1.0495269298553467, |
| "logps/chosen": -310.2612609863281, |
| "logps/rejected": -985.6607055664062, |
| "loss": 0.0457, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -1.420556902885437, |
| "rewards/margins": 16.280969619750977, |
| "rewards/rejected": -17.701526641845703, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.13034382462501526, |
| "learning_rate": 4.5251191160326495e-09, |
| "logits/chosen": -1.5691853761672974, |
| "logits/rejected": -1.0549018383026123, |
| "logps/chosen": -345.1254577636719, |
| "logps/rejected": -989.0575561523438, |
| "loss": 0.0507, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -1.7863011360168457, |
| "rewards/margins": 15.717473983764648, |
| "rewards/rejected": -17.503774642944336, |
| "step": 150 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1530176818095063e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|