| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9945, |
| "eval_steps": 500, |
| "global_step": 153, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 14.745374712777657, |
| "learning_rate": 3.125e-08, |
| "logits/chosen": -1.480354905128479, |
| "logits/rejected": -1.5607078075408936, |
| "logps/chosen": -113.47530364990234, |
| "logps/pi_response": -223.8134002685547, |
| "logps/ref_response": -223.8134002685547, |
| "logps/rejected": -112.02357482910156, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07, |
| "eta": 0.0009999999310821295, |
| "grad_norm": 13.943252568407653, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -1.861205816268921, |
| "logits/rejected": -1.8467286825180054, |
| "logps/chosen": -159.73291015625, |
| "logps/pi_response": -273.9164733886719, |
| "logps/ref_response": -273.0810852050781, |
| "logps/rejected": -158.10842895507812, |
| "loss": 0.693, |
| "rewards/accuracies": 0.46581196784973145, |
| "rewards/chosen": -0.0030446185264736414, |
| "rewards/margins": 0.0008683722116984427, |
| "rewards/rejected": -0.003912990912795067, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 14.636770074330482, |
| "learning_rate": 4.989490450759331e-07, |
| "logits/chosen": -1.6749669313430786, |
| "logits/rejected": -1.6249001026153564, |
| "logps/chosen": -181.44686889648438, |
| "logps/pi_response": -299.26806640625, |
| "logps/ref_response": -269.5531921386719, |
| "logps/rejected": -184.55027770996094, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.5461538434028625, |
| "rewards/chosen": -0.15622131526470184, |
| "rewards/margins": 0.010419250465929508, |
| "rewards/rejected": -0.16664059460163116, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 18.84883262757341, |
| "learning_rate": 4.872270441827174e-07, |
| "logits/chosen": -1.5554673671722412, |
| "logits/rejected": -1.4832121133804321, |
| "logps/chosen": -187.2523956298828, |
| "logps/pi_response": -331.72064208984375, |
| "logps/ref_response": -270.0771484375, |
| "logps/rejected": -184.25772094726562, |
| "loss": 0.694, |
| "rewards/accuracies": 0.4961538314819336, |
| "rewards/chosen": -0.3374003469944, |
| "rewards/margins": 0.01481544878333807, |
| "rewards/rejected": -0.35221579670906067, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 19.234325247102298, |
| "learning_rate": 4.6308512113530063e-07, |
| "logits/chosen": -1.6449016332626343, |
| "logits/rejected": -1.708786129951477, |
| "logps/chosen": -188.21363830566406, |
| "logps/pi_response": -333.604736328125, |
| "logps/ref_response": -275.1395263671875, |
| "logps/rejected": -197.63232421875, |
| "loss": 0.6861, |
| "rewards/accuracies": 0.5269230604171753, |
| "rewards/chosen": -0.23007477819919586, |
| "rewards/margins": 0.013713112100958824, |
| "rewards/rejected": -0.24378788471221924, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.33, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 17.138306112556428, |
| "learning_rate": 4.277872161641681e-07, |
| "logits/chosen": -1.726422667503357, |
| "logits/rejected": -1.659047245979309, |
| "logps/chosen": -173.61680603027344, |
| "logps/pi_response": -315.35235595703125, |
| "logps/ref_response": -265.1530456542969, |
| "logps/rejected": -175.0764617919922, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.5461538434028625, |
| "rewards/chosen": -0.1452452391386032, |
| "rewards/margins": 0.029706543311476707, |
| "rewards/rejected": -0.17495179176330566, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 15.457021507203054, |
| "learning_rate": 3.8318133624280046e-07, |
| "logits/chosen": -1.712626338005066, |
| "logits/rejected": -1.722040057182312, |
| "logps/chosen": -179.5359649658203, |
| "logps/pi_response": -311.54986572265625, |
| "logps/ref_response": -265.8667907714844, |
| "logps/rejected": -191.751708984375, |
| "loss": 0.6849, |
| "rewards/accuracies": 0.5653846263885498, |
| "rewards/chosen": -0.16358985006809235, |
| "rewards/margins": 0.03767317533493042, |
| "rewards/rejected": -0.20126302540302277, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 25.055768196503276, |
| "learning_rate": 3.316028034595861e-07, |
| "logits/chosen": -1.7020467519760132, |
| "logits/rejected": -1.7025904655456543, |
| "logps/chosen": -185.90699768066406, |
| "logps/pi_response": -315.3182067871094, |
| "logps/ref_response": -276.3799743652344, |
| "logps/rejected": -189.57199096679688, |
| "loss": 0.6889, |
| "rewards/accuracies": 0.5076923370361328, |
| "rewards/chosen": -0.18907414376735687, |
| "rewards/margins": -0.0005933608626946807, |
| "rewards/rejected": -0.18848079442977905, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.52, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 20.33794636752797, |
| "learning_rate": 2.7575199021178855e-07, |
| "logits/chosen": -1.3984944820404053, |
| "logits/rejected": -1.3162914514541626, |
| "logps/chosen": -193.8706817626953, |
| "logps/pi_response": -336.3244323730469, |
| "logps/ref_response": -272.45025634765625, |
| "logps/rejected": -203.4906005859375, |
| "loss": 0.6834, |
| "rewards/accuracies": 0.5692307949066162, |
| "rewards/chosen": -0.31467124819755554, |
| "rewards/margins": 0.06254380196332932, |
| "rewards/rejected": -0.37721511721611023, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.58, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 17.59187813851558, |
| "learning_rate": 2.1855294234408068e-07, |
| "logits/chosen": -1.2510021924972534, |
| "logits/rejected": -1.2377079725265503, |
| "logps/chosen": -209.71649169921875, |
| "logps/pi_response": -354.3184509277344, |
| "logps/ref_response": -265.3123474121094, |
| "logps/rejected": -218.32000732421875, |
| "loss": 0.686, |
| "rewards/accuracies": 0.5538461804389954, |
| "rewards/chosen": -0.5378236770629883, |
| "rewards/margins": 0.04319743812084198, |
| "rewards/rejected": -0.5810210704803467, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.65, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 15.830895593862458, |
| "learning_rate": 1.6300029195778453e-07, |
| "logits/chosen": -1.2602006196975708, |
| "logits/rejected": -1.3293911218643188, |
| "logps/chosen": -204.08714294433594, |
| "logps/pi_response": -333.2199401855469, |
| "logps/ref_response": -267.4874572753906, |
| "logps/rejected": -209.34811401367188, |
| "loss": 0.6896, |
| "rewards/accuracies": 0.5269230604171753, |
| "rewards/chosen": -0.438109815120697, |
| "rewards/margins": 0.006122402846813202, |
| "rewards/rejected": -0.44423219561576843, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.71, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 16.101030751126824, |
| "learning_rate": 1.1200247470632392e-07, |
| "logits/chosen": -1.4597405195236206, |
| "logits/rejected": -1.4979974031448364, |
| "logps/chosen": -195.9895782470703, |
| "logps/pi_response": -342.63812255859375, |
| "logps/ref_response": -289.2621765136719, |
| "logps/rejected": -196.60757446289062, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.5307692289352417, |
| "rewards/chosen": -0.37045371532440186, |
| "rewards/margins": 0.006542083341628313, |
| "rewards/rejected": -0.3769958019256592, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.78, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 14.757730514557847, |
| "learning_rate": 6.822945986946385e-08, |
| "logits/chosen": -1.2902206182479858, |
| "logits/rejected": -1.3999152183532715, |
| "logps/chosen": -195.00039672851562, |
| "logps/pi_response": -312.9175720214844, |
| "logps/ref_response": -265.36669921875, |
| "logps/rejected": -204.87124633789062, |
| "loss": 0.6806, |
| "rewards/accuracies": 0.6153846383094788, |
| "rewards/chosen": -0.32740989327430725, |
| "rewards/margins": 0.04114748165011406, |
| "rewards/rejected": -0.368557333946228, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.84, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 15.906731544868482, |
| "learning_rate": 3.397296523427806e-08, |
| "logits/chosen": -1.066477656364441, |
| "logits/rejected": -1.102446436882019, |
| "logps/chosen": -194.1769561767578, |
| "logps/pi_response": -325.8312683105469, |
| "logps/ref_response": -258.591552734375, |
| "logps/rejected": -202.23509216308594, |
| "loss": 0.682, |
| "rewards/accuracies": 0.5153846144676208, |
| "rewards/chosen": -0.433013916015625, |
| "rewards/margins": 0.026987465098500252, |
| "rewards/rejected": -0.4600013792514801, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.91, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 14.158010485809152, |
| "learning_rate": 1.1026475173977978e-08, |
| "logits/chosen": -1.430467963218689, |
| "logits/rejected": -1.3737958669662476, |
| "logps/chosen": -206.46617126464844, |
| "logps/pi_response": -334.0456848144531, |
| "logps/ref_response": -264.9248352050781, |
| "logps/rejected": -205.56634521484375, |
| "loss": 0.6856, |
| "rewards/accuracies": 0.5269230604171753, |
| "rewards/chosen": -0.42677730321884155, |
| "rewards/margins": 0.021916242316365242, |
| "rewards/rejected": -0.44869354367256165, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.97, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 15.33322379707595, |
| "learning_rate": 5.913435276374834e-10, |
| "logits/chosen": -1.2695854902267456, |
| "logits/rejected": -1.342061996459961, |
| "logps/chosen": -197.76773071289062, |
| "logps/pi_response": -341.5782165527344, |
| "logps/ref_response": -275.605224609375, |
| "logps/rejected": -205.83673095703125, |
| "loss": 0.681, |
| "rewards/accuracies": 0.5730769038200378, |
| "rewards/chosen": -0.42948493361473083, |
| "rewards/margins": 0.03518623486161232, |
| "rewards/rejected": -0.46467119455337524, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.99, |
| "step": 153, |
| "total_flos": 0.0, |
| "train_loss": 0.687004194540136, |
| "train_runtime": 23329.5349, |
| "train_samples_per_second": 0.857, |
| "train_steps_per_second": 0.007 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 153, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|