| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9958330532965927, | |
| "eval_steps": 100, | |
| "global_step": 464, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004301364339001277, | |
| "grad_norm": 73.13473510742188, | |
| "learning_rate": 1.0638297872340425e-08, | |
| "logits/chosen": -0.302734375, | |
| "logits/rejected": -0.3828125, | |
| "logps/chosen": -202.0, | |
| "logps/rejected": -172.0, | |
| "loss": 44.25, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.04301364339001277, | |
| "grad_norm": 71.8135986328125, | |
| "learning_rate": 1.0638297872340425e-07, | |
| "logits/chosen": -0.3203125, | |
| "logits/rejected": -0.337890625, | |
| "logps/chosen": -198.0, | |
| "logps/rejected": -165.0, | |
| "loss": 44.3581, | |
| "rewards/accuracies": 0.2395833283662796, | |
| "rewards/chosen": -0.0023651123046875, | |
| "rewards/margins": -0.00262451171875, | |
| "rewards/rejected": 0.0002613067626953125, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08602728678002554, | |
| "grad_norm": 65.34407806396484, | |
| "learning_rate": 2.127659574468085e-07, | |
| "logits/chosen": -0.296875, | |
| "logits/rejected": -0.3359375, | |
| "logps/chosen": -192.0, | |
| "logps/rejected": -167.0, | |
| "loss": 44.3793, | |
| "rewards/accuracies": 0.29218751192092896, | |
| "rewards/chosen": -0.00238037109375, | |
| "rewards/margins": -0.00157928466796875, | |
| "rewards/rejected": -0.000804901123046875, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12904093017003831, | |
| "grad_norm": 79.51785278320312, | |
| "learning_rate": 3.1914893617021275e-07, | |
| "logits/chosen": -0.3125, | |
| "logits/rejected": -0.337890625, | |
| "logps/chosen": -206.0, | |
| "logps/rejected": -159.0, | |
| "loss": 44.3758, | |
| "rewards/accuracies": 0.28593748807907104, | |
| "rewards/chosen": 0.00067138671875, | |
| "rewards/margins": -0.00144195556640625, | |
| "rewards/rejected": 0.002105712890625, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17205457356005108, | |
| "grad_norm": 73.9058837890625, | |
| "learning_rate": 4.25531914893617e-07, | |
| "logits/chosen": -0.33984375, | |
| "logits/rejected": -0.361328125, | |
| "logps/chosen": -197.0, | |
| "logps/rejected": -159.0, | |
| "loss": 44.3793, | |
| "rewards/accuracies": 0.2953124940395355, | |
| "rewards/chosen": -0.00061798095703125, | |
| "rewards/margins": -0.001556396484375, | |
| "rewards/rejected": 0.000934600830078125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.21506821695006384, | |
| "grad_norm": 90.93192291259766, | |
| "learning_rate": 4.999361498869529e-07, | |
| "logits/chosen": -0.3359375, | |
| "logits/rejected": -0.36328125, | |
| "logps/chosen": -196.0, | |
| "logps/rejected": -166.0, | |
| "loss": 44.3191, | |
| "rewards/accuracies": 0.30781251192092896, | |
| "rewards/chosen": 0.001434326171875, | |
| "rewards/margins": 0.00069427490234375, | |
| "rewards/rejected": 0.000743865966796875, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.25808186034007663, | |
| "grad_norm": 69.464599609375, | |
| "learning_rate": 4.988019438437758e-07, | |
| "logits/chosen": -0.3046875, | |
| "logits/rejected": -0.318359375, | |
| "logps/chosen": -189.0, | |
| "logps/rejected": -168.0, | |
| "loss": 44.3687, | |
| "rewards/accuracies": 0.29374998807907104, | |
| "rewards/chosen": 0.0023193359375, | |
| "rewards/margins": -0.0009918212890625, | |
| "rewards/rejected": 0.0033111572265625, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.30109550373008936, | |
| "grad_norm": 73.38098907470703, | |
| "learning_rate": 4.962562537324176e-07, | |
| "logits/chosen": -0.30859375, | |
| "logits/rejected": -0.333984375, | |
| "logps/chosen": -189.0, | |
| "logps/rejected": -167.0, | |
| "loss": 44.2961, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": 0.00762939453125, | |
| "rewards/margins": 0.00148773193359375, | |
| "rewards/rejected": 0.006134033203125, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.34410914712010215, | |
| "grad_norm": 78.60209655761719, | |
| "learning_rate": 4.923135215663896e-07, | |
| "logits/chosen": -0.302734375, | |
| "logits/rejected": -0.322265625, | |
| "logps/chosen": -190.0, | |
| "logps/rejected": -164.0, | |
| "loss": 44.2723, | |
| "rewards/accuracies": 0.3031249940395355, | |
| "rewards/chosen": 0.00958251953125, | |
| "rewards/margins": 0.00165557861328125, | |
| "rewards/rejected": 0.0079345703125, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.38712279051011494, | |
| "grad_norm": 72.0979232788086, | |
| "learning_rate": 4.8699611495083e-07, | |
| "logits/chosen": -0.298828125, | |
| "logits/rejected": -0.326171875, | |
| "logps/chosen": -192.0, | |
| "logps/rejected": -160.0, | |
| "loss": 44.318, | |
| "rewards/accuracies": 0.3296875059604645, | |
| "rewards/chosen": 0.01177978515625, | |
| "rewards/margins": 0.000476837158203125, | |
| "rewards/rejected": 0.01129150390625, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4301364339001277, | |
| "grad_norm": 80.73914337158203, | |
| "learning_rate": 4.803342001883246e-07, | |
| "logits/chosen": -0.3125, | |
| "logits/rejected": -0.33203125, | |
| "logps/chosen": -199.0, | |
| "logps/rejected": -173.0, | |
| "loss": 44.2637, | |
| "rewards/accuracies": 0.3109374940395355, | |
| "rewards/chosen": 0.0164794921875, | |
| "rewards/margins": 0.0026397705078125, | |
| "rewards/rejected": 0.01385498046875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4301364339001277, | |
| "eval_logits/chosen": -0.30078125, | |
| "eval_logits/rejected": -0.302734375, | |
| "eval_logps/chosen": -184.0, | |
| "eval_logps/rejected": -174.0, | |
| "eval_loss": 0.6918657422065735, | |
| "eval_rewards/accuracies": 0.3381776511669159, | |
| "eval_rewards/chosen": 0.01611328125, | |
| "eval_rewards/margins": 0.00162506103515625, | |
| "eval_rewards/rejected": 0.0145263671875, | |
| "eval_runtime": 2092.6368, | |
| "eval_samples_per_second": 2.925, | |
| "eval_steps_per_second": 0.732, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.47315007729014047, | |
| "grad_norm": 75.33040618896484, | |
| "learning_rate": 4.72365571141757e-07, | |
| "logits/chosen": -0.341796875, | |
| "logits/rejected": -0.359375, | |
| "logps/chosen": -193.0, | |
| "logps/rejected": -170.0, | |
| "loss": 44.3273, | |
| "rewards/accuracies": 0.34687501192092896, | |
| "rewards/chosen": 0.017333984375, | |
| "rewards/margins": 0.00176239013671875, | |
| "rewards/rejected": 0.0155029296875, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5161637206801533, | |
| "grad_norm": 87.9056625366211, | |
| "learning_rate": 4.6313543482507056e-07, | |
| "logits/chosen": -0.34375, | |
| "logits/rejected": -0.34375, | |
| "logps/chosen": -187.0, | |
| "logps/rejected": -163.0, | |
| "loss": 44.2031, | |
| "rewards/accuracies": 0.3531250059604645, | |
| "rewards/chosen": 0.01953125, | |
| "rewards/margins": 0.00360107421875, | |
| "rewards/rejected": 0.015869140625, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.559177364070166, | |
| "grad_norm": 80.84737396240234, | |
| "learning_rate": 4.526961549383108e-07, | |
| "logits/chosen": -0.314453125, | |
| "logits/rejected": -0.326171875, | |
| "logps/chosen": -203.0, | |
| "logps/rejected": -166.0, | |
| "loss": 44.1418, | |
| "rewards/accuracies": 0.3765625059604645, | |
| "rewards/chosen": 0.024658203125, | |
| "rewards/margins": 0.005950927734375, | |
| "rewards/rejected": 0.0186767578125, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6021910074601787, | |
| "grad_norm": 76.54241180419922, | |
| "learning_rate": 4.4110695480190597e-07, | |
| "logits/chosen": -0.3359375, | |
| "logits/rejected": -0.357421875, | |
| "logps/chosen": -189.0, | |
| "logps/rejected": -161.0, | |
| "loss": 44.0926, | |
| "rewards/accuracies": 0.40312498807907104, | |
| "rewards/chosen": 0.029296875, | |
| "rewards/margins": 0.00738525390625, | |
| "rewards/rejected": 0.02197265625, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6452046508501915, | |
| "grad_norm": 69.73096466064453, | |
| "learning_rate": 4.284335813754769e-07, | |
| "logits/chosen": -0.318359375, | |
| "logits/rejected": -0.341796875, | |
| "logps/chosen": -194.0, | |
| "logps/rejected": -168.0, | |
| "loss": 44.1789, | |
| "rewards/accuracies": 0.40312498807907104, | |
| "rewards/chosen": 0.031982421875, | |
| "rewards/margins": 0.00555419921875, | |
| "rewards/rejected": 0.0264892578125, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6882182942402043, | |
| "grad_norm": 79.0284423828125, | |
| "learning_rate": 4.1474793226723825e-07, | |
| "logits/chosen": -0.314453125, | |
| "logits/rejected": -0.333984375, | |
| "logps/chosen": -182.0, | |
| "logps/rejected": -164.0, | |
| "loss": 44.3586, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": 0.0306396484375, | |
| "rewards/margins": -0.00011539459228515625, | |
| "rewards/rejected": 0.03076171875, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7312319376302171, | |
| "grad_norm": 77.1414794921875, | |
| "learning_rate": 4.001276478500126e-07, | |
| "logits/chosen": -0.330078125, | |
| "logits/rejected": -0.345703125, | |
| "logps/chosen": -194.0, | |
| "logps/rejected": -165.0, | |
| "loss": 44.2418, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": 0.03662109375, | |
| "rewards/margins": 0.004119873046875, | |
| "rewards/rejected": 0.032470703125, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7742455810202299, | |
| "grad_norm": 98.8071517944336, | |
| "learning_rate": 3.846556707978337e-07, | |
| "logits/chosen": -0.3359375, | |
| "logits/rejected": -0.357421875, | |
| "logps/chosen": -187.0, | |
| "logps/rejected": -154.0, | |
| "loss": 44.0305, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": 0.044921875, | |
| "rewards/margins": 0.0101318359375, | |
| "rewards/rejected": 0.034912109375, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8172592244102426, | |
| "grad_norm": 72.15998840332031, | |
| "learning_rate": 3.684197755419419e-07, | |
| "logits/chosen": -0.3203125, | |
| "logits/rejected": -0.369140625, | |
| "logps/chosen": -193.0, | |
| "logps/rejected": -166.0, | |
| "loss": 44.027, | |
| "rewards/accuracies": 0.4468750059604645, | |
| "rewards/chosen": 0.047119140625, | |
| "rewards/margins": 0.01068115234375, | |
| "rewards/rejected": 0.036376953125, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8602728678002554, | |
| "grad_norm": 86.6330795288086, | |
| "learning_rate": 3.5151207031562633e-07, | |
| "logits/chosen": -0.32421875, | |
| "logits/rejected": -0.333984375, | |
| "logps/chosen": -197.0, | |
| "logps/rejected": -166.0, | |
| "loss": 44.0617, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": 0.0556640625, | |
| "rewards/margins": 0.00958251953125, | |
| "rewards/rejected": 0.046142578125, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8602728678002554, | |
| "eval_logits/chosen": -0.298828125, | |
| "eval_logits/rejected": -0.302734375, | |
| "eval_logps/chosen": -184.0, | |
| "eval_logps/rejected": -173.0, | |
| "eval_loss": 0.6901950240135193, | |
| "eval_rewards/accuracies": 0.42161986231803894, | |
| "eval_rewards/chosen": 0.055419921875, | |
| "eval_rewards/margins": 0.00567626953125, | |
| "eval_rewards/rejected": 0.049560546875, | |
| "eval_runtime": 2098.8198, | |
| "eval_samples_per_second": 2.916, | |
| "eval_steps_per_second": 0.729, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9032865111902681, | |
| "grad_norm": 77.22724151611328, | |
| "learning_rate": 3.34028474612874e-07, | |
| "logits/chosen": -0.302734375, | |
| "logits/rejected": -0.345703125, | |
| "logps/chosen": -197.0, | |
| "logps/rejected": -172.0, | |
| "loss": 44.2062, | |
| "rewards/accuracies": 0.3968749940395355, | |
| "rewards/chosen": 0.059814453125, | |
| "rewards/margins": 0.00506591796875, | |
| "rewards/rejected": 0.0546875, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9463001545802809, | |
| "grad_norm": 74.18938446044922, | |
| "learning_rate": 3.1606817502526736e-07, | |
| "logits/chosen": -0.302734375, | |
| "logits/rejected": -0.318359375, | |
| "logps/chosen": -193.0, | |
| "logps/rejected": -168.0, | |
| "loss": 44.0441, | |
| "rewards/accuracies": 0.43437498807907104, | |
| "rewards/chosen": 0.064453125, | |
| "rewards/margins": 0.0096435546875, | |
| "rewards/rejected": 0.0546875, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9893137979702937, | |
| "grad_norm": 111.19681549072266, | |
| "learning_rate": 2.9773306254423513e-07, | |
| "logits/chosen": -0.3203125, | |
| "logits/rejected": -0.33984375, | |
| "logps/chosen": -199.0, | |
| "logps/rejected": -168.0, | |
| "loss": 44.1539, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.06884765625, | |
| "rewards/margins": 0.0081787109375, | |
| "rewards/rejected": 0.060791015625, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0323274413603065, | |
| "grad_norm": 85.9101333618164, | |
| "learning_rate": 2.791271545209101e-07, | |
| "logits/chosen": -0.333984375, | |
| "logits/rejected": -0.3515625, | |
| "logps/chosen": -203.0, | |
| "logps/rejected": -173.0, | |
| "loss": 44.1109, | |
| "rewards/accuracies": 0.4390625059604645, | |
| "rewards/chosen": 0.0712890625, | |
| "rewards/margins": 0.00860595703125, | |
| "rewards/rejected": 0.06298828125, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0753410847503193, | |
| "grad_norm": 75.81817626953125, | |
| "learning_rate": 2.603560045628857e-07, | |
| "logits/chosen": -0.322265625, | |
| "logits/rejected": -0.337890625, | |
| "logps/chosen": -184.0, | |
| "logps/rejected": -160.0, | |
| "loss": 43.9852, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.07373046875, | |
| "rewards/margins": 0.0123291015625, | |
| "rewards/rejected": 0.061279296875, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.118354728140332, | |
| "grad_norm": 86.1444320678711, | |
| "learning_rate": 2.4152610371560093e-07, | |
| "logits/chosen": -0.33203125, | |
| "logits/rejected": -0.357421875, | |
| "logps/chosen": -183.0, | |
| "logps/rejected": -160.0, | |
| "loss": 43.9813, | |
| "rewards/accuracies": 0.45781248807907104, | |
| "rewards/chosen": 0.07666015625, | |
| "rewards/margins": 0.0120849609375, | |
| "rewards/rejected": 0.064453125, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1613683715303447, | |
| "grad_norm": 82.82176208496094, | |
| "learning_rate": 2.2274427632552503e-07, | |
| "logits/chosen": -0.330078125, | |
| "logits/rejected": -0.333984375, | |
| "logps/chosen": -199.0, | |
| "logps/rejected": -169.0, | |
| "loss": 43.8387, | |
| "rewards/accuracies": 0.5078125, | |
| "rewards/chosen": 0.083984375, | |
| "rewards/margins": 0.01611328125, | |
| "rewards/rejected": 0.06787109375, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.2043820149203575, | |
| "grad_norm": 78.16357421875, | |
| "learning_rate": 2.0411707401248403e-07, | |
| "logits/chosen": -0.318359375, | |
| "logits/rejected": -0.330078125, | |
| "logps/chosen": -181.0, | |
| "logps/rejected": -164.0, | |
| "loss": 44.0895, | |
| "rewards/accuracies": 0.4453125, | |
| "rewards/chosen": 0.08349609375, | |
| "rewards/margins": 0.0096435546875, | |
| "rewards/rejected": 0.07373046875, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2473956583103702, | |
| "grad_norm": 83.6078109741211, | |
| "learning_rate": 1.8575017118919928e-07, | |
| "logits/chosen": -0.310546875, | |
| "logits/rejected": -0.32421875, | |
| "logps/chosen": -202.0, | |
| "logps/rejected": -170.0, | |
| "loss": 44.0609, | |
| "rewards/accuracies": 0.4546875059604645, | |
| "rewards/chosen": 0.0869140625, | |
| "rewards/margins": 0.01104736328125, | |
| "rewards/rejected": 0.076171875, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.290409301700383, | |
| "grad_norm": 69.07476043701172, | |
| "learning_rate": 1.6774776555733028e-07, | |
| "logits/chosen": -0.3125, | |
| "logits/rejected": -0.33203125, | |
| "logps/chosen": -188.0, | |
| "logps/rejected": -167.0, | |
| "loss": 43.8176, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": 0.09228515625, | |
| "rewards/margins": 0.0174560546875, | |
| "rewards/rejected": 0.07470703125, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.290409301700383, | |
| "eval_logits/chosen": -0.298828125, | |
| "eval_logits/rejected": -0.30078125, | |
| "eval_logps/chosen": -183.0, | |
| "eval_logps/rejected": -173.0, | |
| "eval_loss": 0.6887055039405823, | |
| "eval_rewards/accuracies": 0.45101240277290344, | |
| "eval_rewards/chosen": 0.09033203125, | |
| "eval_rewards/margins": 0.009033203125, | |
| "eval_rewards/rejected": 0.0810546875, | |
| "eval_runtime": 2095.2015, | |
| "eval_samples_per_second": 2.921, | |
| "eval_steps_per_second": 0.731, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3334229450903958, | |
| "grad_norm": 82.81778717041016, | |
| "learning_rate": 1.5021198698108036e-07, | |
| "logits/chosen": -0.310546875, | |
| "logits/rejected": -0.3125, | |
| "logps/chosen": -204.0, | |
| "logps/rejected": -178.0, | |
| "loss": 43.9469, | |
| "rewards/accuracies": 0.43437498807907104, | |
| "rewards/chosen": 0.0966796875, | |
| "rewards/margins": 0.012939453125, | |
| "rewards/rejected": 0.083984375, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.3764365884804086, | |
| "grad_norm": 94.56529998779297, | |
| "learning_rate": 1.3324231809189983e-07, | |
| "logits/chosen": -0.31640625, | |
| "logits/rejected": -0.33984375, | |
| "logps/chosen": -204.0, | |
| "logps/rejected": -176.0, | |
| "loss": 43.9461, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.09765625, | |
| "rewards/margins": 0.0133056640625, | |
| "rewards/rejected": 0.083984375, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.4194502318704214, | |
| "grad_norm": 80.80192565917969, | |
| "learning_rate": 1.1693502991126608e-07, | |
| "logits/chosen": -0.30859375, | |
| "logits/rejected": -0.3515625, | |
| "logps/chosen": -193.0, | |
| "logps/rejected": -161.0, | |
| "loss": 43.9762, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": 0.09765625, | |
| "rewards/margins": 0.012939453125, | |
| "rewards/rejected": 0.08447265625, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4624638752604342, | |
| "grad_norm": 74.29733276367188, | |
| "learning_rate": 1.0138263569332267e-07, | |
| "logits/chosen": -0.302734375, | |
| "logits/rejected": -0.326171875, | |
| "logps/chosen": -200.0, | |
| "logps/rejected": -161.0, | |
| "loss": 43.7793, | |
| "rewards/accuracies": 0.5140625238418579, | |
| "rewards/chosen": 0.10595703125, | |
| "rewards/margins": 0.0184326171875, | |
| "rewards/rejected": 0.08740234375, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.505477518650447, | |
| "grad_norm": 77.55677795410156, | |
| "learning_rate": 8.667336608579487e-08, | |
| "logits/chosen": -0.333984375, | |
| "logits/rejected": -0.365234375, | |
| "logps/chosen": -187.0, | |
| "logps/rejected": -160.0, | |
| "loss": 43.8336, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.1015625, | |
| "rewards/margins": 0.01708984375, | |
| "rewards/rejected": 0.08447265625, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.5484911620404596, | |
| "grad_norm": 79.69268798828125, | |
| "learning_rate": 7.28906685866599e-08, | |
| "logits/chosen": -0.32421875, | |
| "logits/rejected": -0.345703125, | |
| "logps/chosen": -192.0, | |
| "logps/rejected": -169.0, | |
| "loss": 43.9348, | |
| "rewards/accuracies": 0.47968751192092896, | |
| "rewards/chosen": 0.1015625, | |
| "rewards/margins": 0.01409912109375, | |
| "rewards/rejected": 0.08740234375, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.5915048054304726, | |
| "grad_norm": 74.55278015136719, | |
| "learning_rate": 6.01127341362138e-08, | |
| "logits/chosen": -0.3046875, | |
| "logits/rejected": -0.33203125, | |
| "logps/chosen": -185.0, | |
| "logps/rejected": -160.0, | |
| "loss": 43.8746, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": 0.10205078125, | |
| "rewards/margins": 0.0162353515625, | |
| "rewards/rejected": 0.0859375, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.6345184488204851, | |
| "grad_norm": 73.96065521240234, | |
| "learning_rate": 4.841205353023714e-08, | |
| "logits/chosen": -0.318359375, | |
| "logits/rejected": -0.33984375, | |
| "logps/chosen": -185.0, | |
| "logps/rejected": -165.0, | |
| "loss": 43.8695, | |
| "rewards/accuracies": 0.504687488079071, | |
| "rewards/chosen": 0.10498046875, | |
| "rewards/margins": 0.0164794921875, | |
| "rewards/rejected": 0.0888671875, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.6775320922104981, | |
| "grad_norm": 72.94511413574219, | |
| "learning_rate": 3.785500617078424e-08, | |
| "logits/chosen": -0.298828125, | |
| "logits/rejected": -0.33203125, | |
| "logps/chosen": -191.0, | |
| "logps/rejected": -159.0, | |
| "loss": 43.8547, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 0.10498046875, | |
| "rewards/margins": 0.016845703125, | |
| "rewards/rejected": 0.087890625, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.7205457356005107, | |
| "grad_norm": 72.55449676513672, | |
| "learning_rate": 2.850148348765921e-08, | |
| "logits/chosen": -0.337890625, | |
| "logits/rejected": -0.361328125, | |
| "logps/chosen": -183.0, | |
| "logps/rejected": -159.0, | |
| "loss": 43.9145, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": 0.1064453125, | |
| "rewards/margins": 0.0150146484375, | |
| "rewards/rejected": 0.09130859375, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7205457356005107, | |
| "eval_logits/chosen": -0.296875, | |
| "eval_logits/rejected": -0.30078125, | |
| "eval_logps/chosen": -183.0, | |
| "eval_logps/rejected": -172.0, | |
| "eval_loss": 0.6879779696464539, | |
| "eval_rewards/accuracies": 0.4557478725910187, | |
| "eval_rewards/chosen": 0.10498046875, | |
| "eval_rewards/margins": 0.010986328125, | |
| "eval_rewards/rejected": 0.09423828125, | |
| "eval_runtime": 2088.408, | |
| "eval_samples_per_second": 2.931, | |
| "eval_steps_per_second": 0.733, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7635593789905235, | |
| "grad_norm": 80.52165985107422, | |
| "learning_rate": 2.0404549166959718e-08, | |
| "logits/chosen": -0.30078125, | |
| "logits/rejected": -0.330078125, | |
| "logps/chosen": -196.0, | |
| "logps/rejected": -158.0, | |
| "loss": 43.7961, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": 0.1083984375, | |
| "rewards/margins": 0.01806640625, | |
| "rewards/rejected": 0.09033203125, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.8065730223805363, | |
| "grad_norm": 75.35971069335938, | |
| "learning_rate": 1.3610138114250519e-08, | |
| "logits/chosen": -0.333984375, | |
| "logits/rejected": -0.373046875, | |
| "logps/chosen": -185.0, | |
| "logps/rejected": -165.0, | |
| "loss": 43.9035, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": 0.1064453125, | |
| "rewards/margins": 0.0159912109375, | |
| "rewards/rejected": 0.0908203125, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.849586665770549, | |
| "grad_norm": 78.83647155761719, | |
| "learning_rate": 8.156795860187027e-09, | |
| "logits/chosen": -0.302734375, | |
| "logits/rejected": -0.328125, | |
| "logps/chosen": -197.0, | |
| "logps/rejected": -168.0, | |
| "loss": 44.0375, | |
| "rewards/accuracies": 0.4609375, | |
| "rewards/chosen": 0.10595703125, | |
| "rewards/margins": 0.01165771484375, | |
| "rewards/rejected": 0.09423828125, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.8926003091605619, | |
| "grad_norm": 78.35073852539062, | |
| "learning_rate": 4.075459886973082e-09, | |
| "logits/chosen": -0.318359375, | |
| "logits/rejected": -0.3515625, | |
| "logps/chosen": -192.0, | |
| "logps/rejected": -154.0, | |
| "loss": 43.7273, | |
| "rewards/accuracies": 0.5140625238418579, | |
| "rewards/chosen": 0.109375, | |
| "rewards/margins": 0.0205078125, | |
| "rewards/rejected": 0.0888671875, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9356139525505747, | |
| "grad_norm": 71.64038848876953, | |
| "learning_rate": 1.3892841162143899e-09, | |
| "logits/chosen": -0.29296875, | |
| "logits/rejected": -0.326171875, | |
| "logps/chosen": -196.0, | |
| "logps/rejected": -157.0, | |
| "loss": 43.7586, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": 0.10888671875, | |
| "rewards/margins": 0.020263671875, | |
| "rewards/rejected": 0.08837890625, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.9786275959405875, | |
| "grad_norm": 77.12970733642578, | |
| "learning_rate": 1.1350755386951849e-10, | |
| "logits/chosen": -0.3203125, | |
| "logits/rejected": -0.34765625, | |
| "logps/chosen": -194.0, | |
| "logps/rejected": -165.0, | |
| "loss": 43.8836, | |
| "rewards/accuracies": 0.48906248807907104, | |
| "rewards/chosen": 0.107421875, | |
| "rewards/margins": 0.0159912109375, | |
| "rewards/rejected": 0.09130859375, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.9958330532965927, | |
| "step": 464, | |
| "total_flos": 0.0, | |
| "train_loss": 44.06969793911638, | |
| "train_runtime": 23064.0827, | |
| "train_samples_per_second": 1.29, | |
| "train_steps_per_second": 0.02 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 464, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |